diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
deleted file mode 100644
index ab3496c..0000000
--- a/.github/workflows/docs.yml
+++ /dev/null
@@ -1,106 +0,0 @@
-name: Documentation
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - 'src/**'
-      - 'Cargo.toml'
-      - '.github/workflows/docs.yml'
-  workflow_dispatch:
-
-# Ensure only one deployment runs at a time
-concurrency:
-  group: pages
-  cancel-in-progress: false
-
-permissions:
-  contents: read
-  pages: write
-  id-token: write
-
-jobs:
-  build:
-    name: Build Documentation
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install stable Rust
-        uses: dtolnay/rust-toolchain@stable
-        with:
-          components: rust-docs
-
-      - name: Setup Rust Cache
-        uses: Swatinem/rust-cache@v2
-        with:
-          cache-all-crates: true
-
-      - name: Build documentation
-        env:
-          RUSTDOCFLAGS: >-
-            --cfg docsrs
-            -D warnings
-            --enable-index-page
-            -Z unstable-options
-            --extern-html-root-url serde=https://docs.rs/serde/latest/
-            --extern-html-root-url serde_json=https://docs.rs/serde_json/latest/
-        run: |
-          cargo +nightly doc \
-            --all-features \
-            --no-deps \
-            --document-private-items \
-            --lib
-
-      - name: Add redirect index.html
-        run: |
-          cat > target/doc/index.html <<EOF
-          <!DOCTYPE html>
-          <html>
-          <head>
-            <meta charset="utf-8">
-            <meta http-equiv="refresh" content="0; url=singularity_language_registry/index.html">
-            <title>Redirecting to singularity_language_registry documentation</title>
-          </head>
-          <body>
-            <p>Redirecting to <a href="singularity_language_registry/index.html">singularity_language_registry documentation</a>...</p>
-          </body>
-          </html>
-          EOF
-
-      - name: Add .nojekyll file
-        run: touch target/doc/.nojekyll
-
-      - name: Setup Pages
-        uses: actions/configure-pages@v5
-
-      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v3
-        with:
-          path: target/doc
-
-  deploy:
-    name: Deploy to GitHub Pages
-    needs: build
-    runs-on: ubuntu-latest
-    environment:
-      name: github-pages
-      url: ${{ steps.deployment.outputs.page_url }}
-    steps:
-      - name: Deploy to GitHub Pages
-        id: deployment
-        uses: actions/deploy-pages@v4
-
-      - name: Add deployment summary
-        run: |
-          echo "## Documentation Deployed" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "Documentation has been successfully deployed to GitHub Pages." >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "**URL**: ${{ steps.deployment.outputs.page_url }}" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "View the documentation: [singularity_language_registry docs](${{ steps.deployment.outputs.page_url }}singularity_language_registry/)" >> $GITHUB_STEP_SUMMARY
diff --git a/Cargo.lock b/Cargo.lock
index 5c1c98e..a698348 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -95,7 +95,7 @@ dependencies = [
 
 [[package]]
 name = "singularity-language-registry"
-version = "0.1.0"
+version = "0.2.0-beta.1"
 dependencies = [
  "anyhow",
  "lazy_static",
diff --git a/Cargo.toml b/Cargo.toml
index 3d4fa68..5eaa336 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "singularity-language-registry"
-version = "0.1.0"
+version = "0.2.0-beta.1"
 edition = "2021"
 license-file = "LICENSE"
 authors = ["Singularity Team"]
diff --git a/LINGUIST_INTEGRATION.md b/LINGUIST_INTEGRATION.md
new file mode 100644
index 0000000..f65f1c5
--- /dev/null
+++ b/LINGUIST_INTEGRATION.md
@@ -0,0 +1,261 @@
+# GitHub Linguist Integration
+
+## Overview
+
+Singularity's language registry is aligned with [GitHub Linguist](https://github.com/github-linguist/linguist) as the authoritative source for programming language definitions and file classification patterns.
+
+This ensures consistency across tools and prevents fragmentation of language definitions across the ecosystem.
+
+## Architecture
+
+```
+GitHub Linguist (Authoritative Source)
+    ↓
+Renovate (Weekly Updates)
+    ↓
+Singularity Language Registry
+    ├─ Language Definitions (Phase 1: DONE)
+    ├─ File Classification (Phase 2: READY)
+    └─ Detection Heuristics (Phase 3: PLANNED)
+    ↓
+All Singularity Engines
+```
+
+## Current State: Phase 1 - Language Definitions
+
+### What's Synced
+- **`languages.yml`**: Complete list of 500+ programming languages
+- **Metadata per language**: Extensions, aliases, MIME types, language type
+- **Linguist attributes**: Color codes, documentation references
+
+### How It Works
+```rust
+// All language definitions come from Linguist
+let registry = LanguageRegistry::new();
+
+// Only explicitly marked languages are supported
+if lang.supported_in_singularity {
+    // Analyze this language
+}
+```
+
+### Renovate Integration
+- **Schedule**: Weekly check for Linguist updates
+- **Label**: `linguist`, `language-registry`
+- **Action**: Manual review required before merge
+- **Update**: When Linguist releases a new version
+
+## Phase 2: File Classification (In Progress)
+
+### Status
+- ✅ **FileClassifier module**: Implemented with 5 tests
+- ✅ **Synchronization script**: Created (`scripts/sync_linguist_patterns.py`)
+- 🔧 **Integration in progress**: Add `sync-linguist` justfile command
+- 📋 **Next**: Add to CI workflow
+
+### What Will Be Added
+
+#### Vendored Code Detection
+Auto-skip third-party dependencies:
+```
+- node_modules/
+- vendor/
+- .yarn/
+- Pods/
+- third_party/
+- Carthage/
+```
+
+#### Generated File Detection
+Skip auto-generated code:
+```
+- *.pb.rs (Protobuf)
+- *.pb.go (Protobuf)
+- *.generated.ts (GraphQL)
+- *.designer.cs (Visual Studio)
+- *.meta (Unity3D)
+```
+
+#### Binary File Detection
+Skip non-text files:
+```
+- *.png, *.jpg, *.gif (Images)
+- *.zip, *.tar (Archives)
+- *.exe, *.dll (Binaries)
+- *.pdf, *.docx (Documents)
+```
+
+### How It Works
+
+#### Step 1: Manual Synchronization (Current)
+When Linguist updates (Renovate alert):
+```bash
+# Sync patterns from Linguist to Rust code
+python3 scripts/sync_linguist_patterns.py > src/file_classifier_generated.rs
+
+# Run tests to validate patterns
+cargo test
+
+# Commit the generated patterns
+git add src/file_classifier_generated.rs
+git commit -m "chore(linguist): sync file classification patterns"
+```
+
+#### Step 2: Automated Synchronization (Future)
+```bash
+# Automatic sync via justfile
+just sync-linguist
+
+# Or via cargo xtask
+cargo xtask sync-linguist
+```
+
+### Implementation Details
+
+#### Synchronization Script (`scripts/sync_linguist_patterns.py`)
+1. **Downloads from Linguist**:
+   - `vendor.yml`: Vendored code patterns (6.5KB)
+   - `generated.rb`: Generated file detection logic (29.8KB)
+   - `heuristics.yml`: Language detection rules (35KB, Phase 3)
+
+2. **Parses patterns**:
+   - YAML parsing for `vendor.yml`
+   - Ruby AST parsing for `generated.rb`
+   - Regex extraction and normalization
+
+3. **Generates Rust code**:
+   - Static arrays: `VENDORED_PATTERNS_FROM_LINGUIST`
+   - Static arrays: `GENERATED_PATTERNS_FROM_LINGUIST`
+   - Static arrays: `BINARY_PATTERNS_FROM_LINGUIST`
+
+4. **Output**: `src/file_classifier_generated.rs` (auto-generated)
+
+#### FileClassifier Usage
+```rust
+use singularity_language_registry::FileClassifier;
+
+let classifier = FileClassifier::new();
+
+if classifier.should_analyze(path) {
+    // Analyze source code
+} else {
+    match classifier.classify(path) {
+        FileClass::Vendored => skip("third-party"),
+        FileClass::Generated => skip("auto-generated"),
+        FileClass::Binary => skip("non-text"),
+        FileClass::Source => analyze(),
+    }
+}
+```
+
+### Source Data
+- **`vendor.yml`**: Vendored code patterns (6.5KB)
+  - Dependency manager directories
+  - IDE/editor artifacts
+  - Build output directories
+  - Framework-specific paths
+
+- **`generated.rb`**: Generated file detection (29.8KB)
+  - File path patterns
+  - Extension matching
+  - Content header signatures (Generated by, DO NOT EDIT)
+  - Minification detection
+  - Metadata inspection
+
+- **`heuristics.yml`**: Language detection rules (Phase 3)
+
+## Phase 3: Detection Heuristics (Planned)
+
+### What Will Be Added
+
+Fallback language detection for ambiguous file extensions:
+```
+.pl  → Perl or Prolog?  (check for 'use strict' vs 'use_module')
+.m   → Objective-C or Matlab?  (check for @interface vs function)
+.rs  → Rust or Reason?  (check for 'fn' vs 'let')
+```
+
+### Source Data
+- **`heuristics.yml`**: Detection rules (35KB)
+  - Pattern-based disambiguation
+  - Content signature matching
+  - Named pattern reuse
+
+## Governance Model
+
+### Who Decides What Becomes Supported?
+
+**Linguist** decides what languages exist:
+- Adding languages to Linguist → Auto-detected by Renovate
+- Removing languages from Linguist → Flagged in PR for review
+
+**Singularity** decides what to support:
+- Only languages with `supported_in_singularity: true` are analyzed
+- Requires explicit approval to add support
+
+```
+Global Decision (GitHub Linguist) → Local Decision (Singularity)
+     500+ languages                    24 languages (current)
+```
+
+## Maintenance
+
+### Updating When Renovate Creates a PR
+
+1. **Review the Linguist changes**
+   - New languages added?
+   - Existing languages modified?
+   - File classification patterns updated?
+
+2. **Update Singularity** (if needed)
+   - Add/remove language support
+   - Update file classification
+   - Update detection heuristics
+
+3. **Test**
+   ```bash
+   cargo test
+   cargo clippy -- -D warnings
+   just quality
+   ```
+
+4. **Merge and Release**
+   ```bash
+   cargo release
+   git push
+   ```
+
+## Benefits
+
+✅ **Single Source of Truth**: No duplicate language definitions
+✅ **Forward Compatible**: New languages auto-included (unsupported)
+✅ **Automatic Updates**: Weekly Renovate alerts
+✅ **Community Standard**: Uses GitHub's official definitions
+✅ **Reduced Friction**: Less code to maintain
+✅ **Better File Handling**: Skip vendored/generated automatically
+
+## Future Extensions
+
+### Additional Linguist Sources
+- **MIME Type Mappings**: From `languages.yml`
+- **File Extension Aliases**: Conflicting extensions (e.g., `.h` → C/C++/Objective-C)
+- **Shebang Patterns**: Detect from `#!` line (e.g., `#!/usr/bin/env python`)
+- **EditorConfig Integration**: From Linguist's `.editorconfig`
+
+### Integration Points
+- **singularity-parsing-engine**: Use `FileClassifier` to skip non-source files
+- **singularity-analysis-engine**: Use heuristics for ambiguous languages
+- **singularity-linting-engine**: Use file classification to focus on code
+- **IDE Extensions**: Use language registry for syntax highlighting
+
+## Resources
+
+- **GitHub Linguist**: <https://github.com/github-linguist/linguist>
+- **Linguist Languages**: <https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml>
+- **Linguist Vendor Patterns**: <https://github.com/github-linguist/linguist/blob/main/lib/linguist/vendor.yml>
+- **Linguist Generated Detection**: <https://github.com/github-linguist/linguist/blob/main/lib/linguist/generated.rb>
+- **Linguist Heuristics**: <https://github.com/github-linguist/linguist/blob/main/lib/linguist/heuristics.yml>
+
+## Questions?
+
+See [build.rs](build.rs) for the implementation roadmap and current progress.
diff --git a/build.rs b/build.rs
index 138729b..a82c16e 100644
--- a/build.rs
+++ b/build.rs
@@ -1,4 +1,62 @@
-//! Build script for validating language metadata
+//! Build script for validating language metadata and Linguist integration
+//!
+//! ## Language Registry Source
+//!
+//! The language registry is derived from GitHub Linguist's authoritative language list:
+//! <https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml>
+//!
+//! This ensures Singularity language definitions stay consistent with GitHub's standard.
+//! Renovate automatically alerts when Linguist updates (weekly schedule).
+//!
+//! ## Extended Linguist Integration (Option 2 - In Progress)
+//!
+//! ### Phase 1: Language Definitions (✅ DONE)
+//! - ✅ `languages.yml` synced to registry
+//! - ✅ `supported_in_singularity` flag for explicit support
+//! - ✅ Weekly Renovate alerts
+//!
+//! ### Phase 2: File Classification (🔧 IN PROGRESS)
+//!
+//! #### Implementation Step 1: Manual Synchronization (Current)
+//! Run the synchronization script when Linguist updates:
+//! ```bash
+//! python3 scripts/sync_linguist_patterns.py > src/file_classifier_generated.rs
+//! cargo test
+//! git add src/file_classifier_generated.rs
+//! git commit -m "chore(linguist): sync file classification patterns"
+//! ```
+//!
+//! #### Implementation Step 2: Automated Synchronization (Future)
+//! This build script can be extended to:
+//! ```bash
+//! cargo xtask sync-linguist
+//! ```
+//!
+//! Which will:
+//! 1. Download `vendor.yml` from Linguist
+//! 2. Download `generated.rb` from Linguist
+//! 3. Parse and extract patterns
+//! 4. Generate Rust code arrays
+//! 5. Update `src/file_classifier_generated.rs`
+//! 6. Run tests to validate
+//!
+//! #### Patterns Extracted
+//! - **Vendored**: `node_modules/`, `vendor/`, `.yarn/`, `Pods/`, `dist/`, `build/`
+//! - **Generated**: `.pb.rs`, `.pb.go`, `.generated.ts`, `.designer.cs`, `.meta`
+//! - **Binary**: `.png`, `.jpg`, `.zip`, `.exe`, `.dll`, `.pdf`
+//!
+//! ### Phase 3: Detection Heuristics (📋 PLANNED)
+//! - Extract `heuristics.yml` from Linguist (35KB)
+//! - Generate fallback language detection for ambiguous extensions
+//! - Support: `.pl` (Perl vs Prolog), `.m` (Objective-C vs Matlab), etc.
+//!
+//! ### Maintenance Workflow
+//! When Renovate creates a Linguist update PR:
+//! 1. Review language definition changes
+//! 2. Run: `python3 scripts/sync_linguist_patterns.py`
+//! 3. Run: `cargo test`
+//! 4. Commit changes: `git add . && git commit`
+//! 5. Merge and create release
 //!
 //! This can be used to ensure registry metadata matches actual library capabilities.
 //! Run with: cargo build --features validate-metadata
diff --git a/examples/usage.rs b/examples/usage.rs
index 3701f5b..598b389 100644
--- a/examples/usage.rs
+++ b/examples/usage.rs
@@ -9,6 +9,8 @@
     reason = "Examples are meant to demonstrate usage and print output to the user"
 )]
 
+use std::sync::atomic::Ordering;
+
 use singularity_language_registry::{
     ast_grep_supported_languages, detect_from_content, detect_language, get_language,
     get_language_by_alias, is_detectable, languages_by_families, rca_supported_languages,
@@ -41,7 +43,10 @@ fn main() {
     println!("\n2. Language Lookup:");
     if let Some(elixir) = get_language("elixir") {
         println!("  Elixir extensions: {:?}", elixir.extensions);
-        println!("  RCA supported: {}", elixir.rca_supported);
+        println!(
+            "  RCA supported: {}",
+            elixir.rca_supported.load(Ordering::Relaxed)
+        );
         println!("  AST-Grep supported: {}", elixir.ast_grep_supported);
     }
 
diff --git a/flake.lock b/flake.lock
index b518b16..80514f9 100644
--- a/flake.lock
+++ b/flake.lock
@@ -1,5 +1,21 @@
 {
   "nodes": {
+    "advisory-db": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1762774274,
+        "narHash": "sha256-tigj2sBL6S7zmjpt5JdXtvtGrClvja+/LAnmpU6+MV4=",
+        "owner": "rustsec",
+        "repo": "advisory-db",
+        "rev": "df17e8c0d170b71c0a4cca3f165c30030a526060",
+        "type": "github"
+      },
+      "original": {
+        "owner": "rustsec",
+        "repo": "advisory-db",
+        "type": "github"
+      }
+    },
     "crane": {
       "locked": {
         "lastModified": 1762538466,
@@ -67,6 +83,7 @@
     },
     "root": {
       "inputs": {
+        "advisory-db": "advisory-db",
         "crane": "crane",
         "flake-utils": "flake-utils",
         "nixpkgs": "nixpkgs",
diff --git a/justfile b/justfile
index 30eb46e..b39de57 100644
--- a/justfile
+++ b/justfile
@@ -118,6 +118,19 @@ ci-local:
 changelog:
     git log --pretty=format:"- %s (%h)" --reverse > CHANGELOG.md
 
+# Sync file classification patterns from GitHub Linguist (Phase 2)
+sync-linguist:
+    #!/usr/bin/env bash
+    set -e
+    echo "Synchronizing file classification patterns from GitHub Linguist..."
+    python3 scripts/sync_linguist_patterns.py > src/file_classifier_generated.rs
+    echo "✅ Patterns synced to src/file_classifier_generated.rs"
+    echo ""
+    echo "Next steps:"
+    echo "  1. cargo test"
+    echo "  2. git add src/file_classifier_generated.rs"
+    echo "  3. git commit -m 'chore(linguist): sync file classification patterns'"
+
 # Verify everything before PR
 verify: fmt clippy test audit renovate-validate doc
     @echo "✅ All checks passed!"
diff --git a/renovate.json5 b/renovate.json5
index d2964b2..53fa10b 100644
--- a/renovate.json5
+++ b/renovate.json5
@@ -42,6 +42,50 @@
 
   // Package Rules - ordered by priority
   "packageRules": [
+    // ===================
+    // GitHub Linguist (Language Registry Source)
+    // ===================
+    {
+      "description": "🔤 GitHub Linguist language list updates",
+      "matchDatasources": ["github-tags"],
+      "matchPackagePatterns": ["github-linguist/linguist"],
+      "schedule": ["weekly"],
+      "labels": ["linguist", "language-registry", "dependencies"],
+      "prPriority": 5,
+      "automerge": false,  // Manual review for language definition changes
+      "commitMessagePrefix": "chore(linguist):",
+      "prBodyNotes": [
+        "## ⚠️ Linguist Update Detected",
+        "",
+        "GitHub Linguist (the authoritative source for language definitions) has been updated.",
+        "",
+        "### What to Review",
+        "",
+        "1. **Language Definitions** (Phase 1 - Active):",
+        "   - New languages added to Linguist?",
+        "   - Existing language metadata changed?",
+        "   - Need to update `supported_in_singularity` flags?",
+        "",
+        "2. **File Classification** (Phase 2 - Ready):",
+        "   - Changes to vendor patterns (vendor.yml)?",
+        "   - Changes to generated file detection (generated.rb)?",
+        "   - Changes to binary file patterns?",
+        "",
+        "3. **Detection Heuristics** (Phase 3 - Planned):",
+        "   - Changes to language detection heuristics (heuristics.yml)?",
+        "",
+        "See [LINGUIST_INTEGRATION.md](LINGUIST_INTEGRATION.md) for details.",
+        "",
+        "### Action Items",
+        "",
+        "- [ ] Review language definition changes",
+        "- [ ] Update supported languages if needed",
+        "- [ ] Run `cargo test` to validate",
+        "- [ ] Update file classification patterns if needed (Phase 2)",
+        "- [ ] Merge and create a new release"
+      ]
+    },
+
     // ===================
     // Security Updates
     // ===================
diff --git a/scripts/sync_linguist_patterns.py b/scripts/sync_linguist_patterns.py
new file mode 100644
index 0000000..961129f
--- /dev/null
+++ b/scripts/sync_linguist_patterns.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python3
+"""
+Synchronize File Classification Patterns from GitHub Linguist
+
+This script downloads Linguist's vendor.yml and generated.rb files,
+extracts patterns, and generates Rust code for the FileClassifier module.
+
+Usage:
+    python3 scripts/sync_linguist_patterns.py > src/file_classifier_generated.rs
+
+Sources:
+    - vendor.yml: Vendored code path patterns
+    - generated.rb: Auto-generated file detection rules
+"""
+
+import re
+import sys
+import urllib.request
+from typing import List, Set
+import yaml
+
+# GitHub Linguist URLs
+VENDOR_YML_URL = "https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/vendor.yml"
+GENERATED_RB_URL = "https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/generated.rb"
+HEURISTICS_YML_URL = "https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/heuristics.yml"
+
+
+def fetch_url(url: str) -> str:
+    """Fetch content from URL"""
+    print(f"Fetching {url}...", file=sys.stderr)
+    try:
+        with urllib.request.urlopen(url, timeout=10) as response:
+            return response.read().decode("utf-8")
+    except Exception as e:
+        print(f"Error fetching {url}: {e}", file=sys.stderr)
+        raise
+
+
+def parse_vendor_yml(content: str) -> Set[str]:
+    """
+    Parse vendor.yml and extract vendored path patterns.
+
+    Format:
+    ```yaml
+    - /path/to/vendor/
+    - node_modules/
+    - "regex_pattern"
+    ```
+    """
+    patterns: Set[str] = set()
+
+    try:
+        data = yaml.safe_load(content)
+        if isinstance(data, list):
+            for item in data:
+                if isinstance(item, str):
+                    # Simple path patterns
+                    path = item.strip()
+                    if path and not path.startswith("#"):
+                        patterns.add(path)
+    except yaml.YAMLError as e:
+        print(f"Error parsing YAML: {e}", file=sys.stderr)
+        return patterns
+
+    return patterns
+
+
+def parse_generated_rb(content: str) -> Set[str]:
+    """
+    Parse generated.rb and extract generated file patterns.
+
+    Looks for:
+    - File extensions: ".pb.rs", ".generated.ts"
+    - Directory paths: "__generated__/", "dist/"
+    - Content markers for detection
+    """
+    patterns: Set[str] = set()
+
+    # Pattern to match quoted strings in Ruby
+    # Matches: ".pb.rs", '.generated.ts', "pattern"
+    string_pattern = re.compile(r'''['"](.*?)['"]''')
+
+    for line in content.split('\n'):
+        line = line.strip()
+
+        # Skip comments and empty lines
+        if not line or line.startswith('#'):
+            continue
+
+        # Extract quoted strings
+        matches = string_pattern.findall(line)
+        for match in matches:
+            if match and len(match) < 50:  # Reasonable pattern length
+                patterns.add(match)
+
+    return patterns
+
+
+def parse_heuristics_yml(content: str) -> dict:
+    """
+    Parse heuristics.yml for language detection rules.
+
+    This is for Phase 3 (future implementation).
+    """
+    try:
+        data = yaml.safe_load(content)
+        return data if data else {}
+    except yaml.YAMLError:
+        return {}
+
+
+def categorize_patterns(patterns: Set[str]) -> dict:
+    """
+    Categorize patterns into:
+    - Vendored: node_modules/, vendor/, .yarn/, etc.
+    - Generated: .pb.rs, .generated.ts, etc.
+    - Binary: .png, .jpg, .exe, etc.
+    """
+    categories = {
+        'vendored': set(),
+        'generated': set(),
+        'binary': set(),
+    }
+
+    binary_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.zip', '.tar', '.exe', '.dll', '.pdf'}
+
+    for pattern in patterns:
+        if any(pattern.startswith(v) for v in ['node_modules', 'vendor', '.yarn', '.idea', 'dist', 'build']):
+            categories['vendored'].add(pattern)
+        elif pattern.startswith('.'):
+            # It's an extension
+            if any(pattern == ext for ext in binary_extensions):
+                categories['binary'].add(pattern)
+            elif 'generated' in pattern.lower() or 'pb' in pattern or 'proto' in pattern:
+                categories['generated'].add(pattern)
+        else:
+            categories['vendored'].add(pattern)
+
+    return categories
+
+
+def generate_rust_code(patterns_dict: dict) -> str:
+    """Generate Rust code for patterns"""
+    code = '''// AUTO-GENERATED FILE - DO NOT EDIT MANUALLY
+// Generated from GitHub Linguist patterns
+// Run: python3 scripts/sync_linguist_patterns.py
+// Source: https://github.com/github-linguist/linguist
+
+//! Auto-generated file classification patterns from GitHub Linguist
+//!
+//! These patterns are synchronized weekly via Renovate.
+//! When Linguist updates, run: python3 scripts/sync_linguist_patterns.py
+
+/// Vendored code path patterns (from Linguist vendor.yml)
+pub const VENDORED_PATTERNS_FROM_LINGUIST: &[&str] = &[
+'''
+
+    for pattern in sorted(patterns_dict['vendored']):
+        code += f'    "{pattern}",\n'
+
+    code += '''];
+
+/// Generated file patterns (from Linguist generated.rb)
+pub const GENERATED_PATTERNS_FROM_LINGUIST: &[&str] = &[
+'''
+
+    for pattern in sorted(patterns_dict['generated']):
+        escaped = pattern.replace('"', '\\"').replace('\\', '\\\\')
+        code += f'    "{escaped}",\n'
+
+    code += '''];
+
+/// Binary file extensions
+pub const BINARY_PATTERNS_FROM_LINGUIST: &[&str] = &[
+'''
+
+    for pattern in sorted(patterns_dict['binary']):
+        code += f'    "{pattern}",\n'
+
+    code += '''];
+'''
+
+    return code
+
+
+def main():
+    """Main entry point"""
+    try:
+        # Fetch files from Linguist
+        print("Synchronizing patterns from GitHub Linguist...", file=sys.stderr)
+
+        vendor_content = fetch_url(VENDOR_YML_URL)
+        generated_content = fetch_url(GENERATED_RB_URL)
+
+        # Parse patterns
+        vendor_patterns = parse_vendor_yml(vendor_content)
+        generated_patterns = parse_generated_rb(generated_content)
+
+        print(f"Found {len(vendor_patterns)} vendor patterns", file=sys.stderr)
+        print(f"Found {len(generated_patterns)} generated patterns", file=sys.stderr)
+
+        # Combine and categorize
+        all_patterns = vendor_patterns | generated_patterns
+        categorized = categorize_patterns(all_patterns)
+
+        # Generate Rust code
+        rust_code = generate_rust_code(categorized)
+
+        # Output
+        print(rust_code, file=sys.stdout)
+        print("// Pattern sync complete!", file=sys.stderr)
+
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/file_classifier.rs b/src/file_classifier.rs
new file mode 100644
index 0000000..f7617b2
--- /dev/null
+++ b/src/file_classifier.rs
@@ -0,0 +1,242 @@
+//! File Classification Engine - Identifies vendored, generated, and binary files
+//!
+//! This module provides classification rules derived from GitHub Linguist's patterns:
+//! <https://github.com/github-linguist/linguist/blob/main/lib/linguist/vendor.yml>
+//!
+//! ## Classification Categories
+//!
+//! - **Vendored**: Third-party dependencies (`node_modules/`, `vendor/`, etc.)
+//! - **Generated**: Auto-generated files (protobuf, graphql, minified, etc.)
+//! - **Binary**: Non-text files (images, archives, compiled binaries)
+//! - **Documentation**: Auto-generated docs (Sphinx, Doxygen)
+//!
+//! ## Usage
+//!
+//! ```rust,ignore
+//! use singularity_language_registry::FileClassifier;
+//! use std::path::Path;
+//!
+//! let classifier = FileClassifier::new();
+//! let path = Path::new("node_modules/package/index.js");
+//!
+//! if classifier.is_vendored(path) {
+//!     println!("Skip vendored code");
+//! }
+//! ```
+
+use std::path::Path;
+
+/// File classification result
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum FileClass {
+    /// Regular source code file
+    Source,
+    /// Vendored/third-party dependency
+    Vendored,
+    /// Auto-generated file
+    Generated,
+    /// Binary file (non-text)
+    Binary,
+    /// Auto-generated documentation
+    Documentation,
+}
+
+/// File classifier using Linguist patterns
+#[derive(Debug, Clone)]
+pub struct FileClassifier {
+    /// Vendored file path patterns (from Linguist vendor.yml)
+    vendored_patterns: Vec<&'static str>,
+    /// Generated file extensions
+    generated_extensions: Vec<&'static str>,
+    /// Binary file extensions
+    binary_extensions: Vec<&'static str>,
+    /// Documentation tool markers
+    documentation_markers: Vec<&'static str>,
+}
+
+impl FileClassifier {
+    /// Create a new file classifier with Linguist patterns
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            vendored_patterns: vec![
+                // Dependency directories
+                "node_modules/",
+                "vendor/",
+                "vendors/",
+                ".yarn/",
+                "Pods/",
+                "Carthage/Build/",
+                "third_party/",
+                "dependencies/",
+                // IDE/Editor artifacts
+                ".vscode/",
+                ".idea/",
+                ".sublime-project",
+                // Build artifacts
+                "dist/",
+                "build/",
+                "target/",
+                "_build/",
+                // Package lock files
+                "package-lock.json",
+                "yarn.lock",
+                "Cargo.lock",
+                "poetry.lock",
+                "Gemfile.lock",
+                // Gradle/Maven wrappers
+                "gradlew",
+                "mvnw",
+            ],
+            generated_extensions: vec![
+                ".pb.rs",       // Protobuf (Rust)
+                ".pb.go",       // Protobuf (Go)
+                ".pb.py",       // Protobuf (Python)
+                ".pb2.py",      // Protobuf v2 (Python)
+                ".pb.js",       // Protobuf (JS)
+                ".designer.cs", // Visual Studio designer
+                ".g.ts",        // Angular/GraphQL generated
+                ".generated.ts",
+                ".generated.js",
+                ".nib", // Xcode Interface Builder
+                ".xcworkspacedata",
+                ".storyboard",
+                ".xib",
+                ".meta", // Unity3D metadata
+            ],
+            binary_extensions: vec![
+                // Images
+                ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", // Archives
+                ".zip", ".tar", ".gz", ".rar", ".7z", // Compiled binaries
+                ".exe", ".bin", ".so", ".dll", ".dylib", // Documents (binary formats)
+                ".pdf", ".docx", ".xlsx", ".pptx", // Audio/Video
+                ".mp3", ".mp4", ".wav", ".avi", ".mov",
+            ],
+            documentation_markers: vec!["doxygen", "sphinx", "jsdoc", "pandoc"],
+        }
+    }
+
+    /// Check if path is vendored code
+    #[must_use]
+    pub fn is_vendored(&self, path: &Path) -> bool {
+        let path_str = path.to_string_lossy();
+        self.vendored_patterns
+            .iter()
+            .any(|pattern| path_str.contains(pattern))
+    }
+
+    /// Check if file is generated
+    #[must_use]
+    pub fn is_generated(&self, path: &Path) -> bool {
+        let path_str = path.to_string_lossy();
+        self.generated_extensions
+            .iter()
+            .any(|pattern| path_str.ends_with(pattern))
+    }
+
+    /// Check if file is binary
+    #[must_use]
+    pub fn is_binary(&self, path: &Path) -> bool {
+        let path_str = path.to_string_lossy();
+        self.binary_extensions
+            .iter()
+            .any(|pattern| path_str.ends_with(pattern))
+    }
+
+    /// Check if file is documentation
+    #[must_use]
+    pub fn is_documentation(&self, path: &Path) -> bool {
+        let path_str = path.to_string_lossy();
+        self.documentation_markers
+            .iter()
+            .any(|marker| path_str.contains(marker))
+    }
+
+    /// Classify a file path
+    #[must_use]
+    pub fn classify(&self, path: &Path) -> FileClass {
+        if self.is_binary(path) {
+            FileClass::Binary
+        } else if self.is_vendored(path) {
+            FileClass::Vendored
+        } else if self.is_generated(path) {
+            FileClass::Generated
+        } else if self.is_documentation(path) {
+            FileClass::Documentation
+        } else {
+            FileClass::Source
+        }
+    }
+
+    /// Check if file should be analyzed (not vendored, generated, or binary)
+    #[must_use]
+    pub fn should_analyze(&self, path: &Path) -> bool {
+        matches!(self.classify(path), FileClass::Source)
+    }
+}
+
+impl Default for FileClassifier {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::path::PathBuf;
+
+    #[test]
+    fn test_vendored_detection() {
+        let classifier = FileClassifier::new();
+        assert!(classifier.is_vendored(&PathBuf::from("node_modules/package/index.js")));
+        assert!(classifier.is_vendored(&PathBuf::from("vendor/lib/helper.rb")));
+        assert!(!classifier.is_vendored(&PathBuf::from("src/main.rs")));
+    }
+
+    #[test]
+    fn test_generated_detection() {
+        let classifier = FileClassifier::new();
+        assert!(classifier.is_generated(&PathBuf::from("api.pb.rs")));
+        assert!(classifier.is_generated(&PathBuf::from("Component.generated.ts")));
+        assert!(!classifier.is_generated(&PathBuf::from("Component.ts")));
+    }
+
+    #[test]
+    fn test_binary_detection() {
+        let classifier = FileClassifier::new();
+        assert!(classifier.is_binary(&PathBuf::from("image.png")));
+        assert!(classifier.is_binary(&PathBuf::from("archive.zip")));
+        assert!(!classifier.is_binary(&PathBuf::from("script.js")));
+    }
+
+    #[test]
+    fn test_classification() {
+        let classifier = FileClassifier::new();
+        assert_eq!(
+            classifier.classify(&PathBuf::from("node_modules/pkg/index.js")),
+            FileClass::Vendored
+        );
+        assert_eq!(
+            classifier.classify(&PathBuf::from("api.pb.rs")),
+            FileClass::Generated
+        );
+        assert_eq!(
+            classifier.classify(&PathBuf::from("image.png")),
+            FileClass::Binary
+        );
+        assert_eq!(
+            classifier.classify(&PathBuf::from("src/main.rs")),
+            FileClass::Source
+        );
+    }
+
+    #[test]
+    fn test_should_analyze() {
+        let classifier = FileClassifier::new();
+        assert!(classifier.should_analyze(&PathBuf::from("src/main.rs")));
+        assert!(!classifier.should_analyze(&PathBuf::from("node_modules/pkg/index.js")));
+        assert!(!classifier.should_analyze(&PathBuf::from("api.pb.rs")));
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 8c876c4..157f71b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -54,6 +54,7 @@
 //! ```
 
 pub mod detection;
+pub mod file_classifier;
 pub mod metadata;
 pub mod registry;
 pub mod utils;
@@ -85,6 +86,9 @@ pub use metadata::{
     MetadataSource, MetadataValidation,
 };
 
+// File classification (from Linguist patterns)
+pub use file_classifier::{FileClass, FileClassifier};
+
 // Version information
 pub const VERSION: &str = env!("CARGO_PKG_VERSION");
 pub const NAME: &str = env!("CARGO_PKG_NAME");
diff --git a/src/metadata.rs b/src/metadata.rs
index 0b075f1..6bf3fa1 100644
--- a/src/metadata.rs
+++ b/src/metadata.rs
@@ -4,6 +4,7 @@
 //! metadata with the actual capabilities of underlying libraries.
 
 use crate::registry::LANGUAGE_REGISTRY;
+use std::sync::atomic::Ordering;
 
 /// Metadata source for language capabilities
 #[derive(Debug, Clone)]
@@ -52,7 +53,7 @@ pub fn validate_metadata(source: &MetadataSource) -> MetadataValidation {
     // Check RCA support
     for lang_id in &source.rca_languages {
         if let Some(lang) = LANGUAGE_REGISTRY.get_language(lang_id) {
-            if !lang.rca_supported {
+            if !lang.rca_supported.load(Ordering::Relaxed) {
                 capability_mismatches.push(CapabilityMismatch {
                     language: lang_id.clone(),
                     capability: "RCA".to_owned(),
@@ -83,7 +84,7 @@ pub fn validate_metadata(source: &MetadataSource) -> MetadataValidation {
 
     // Check for languages in registry but not in sources
     for lang in LANGUAGE_REGISTRY.supported_languages() {
-        if lang.rca_supported && !source.rca_languages.contains(&lang.id) {
+        if lang.rca_supported.load(Ordering::Relaxed) && !source.rca_languages.contains(&lang.id) {
             capability_mismatches.push(CapabilityMismatch {
                 language: lang.id.clone(),
                 capability: "RCA".to_owned(),
@@ -149,7 +150,11 @@ pub fn generate_metadata_report() -> String {
             "| {} | {} | {} | {} | {} | {} |",
             lang.name,
             lang.extensions.join(", "),
-            if lang.rca_supported { "✓" } else { "✗" },
+            if lang.rca_supported.load(Ordering::Relaxed) {
+                "✓"
+            } else {
+                "✗"
+            },
             if lang.ast_grep_supported {
                 "✓"
             } else {
@@ -173,19 +178,7 @@ pub fn generate_metadata_report() -> String {
 pub fn get_known_support() -> MetadataSource {
     MetadataSource {
         // RCA supported languages (from rust-code-analysis)
-        rca_languages: vec![
-            "rust".to_owned(),
-            "c".to_owned(),
-            "cpp".to_owned(),
-            "go".to_owned(),
-            "java".to_owned(),
-            "python".to_owned(),
-            "javascript".to_owned(),
-            "typescript".to_owned(),
-            "csharp".to_owned(),
-            "kotlin".to_owned(),
-            "lua".to_owned(),
-        ],
+        rca_languages: vec![],
 
         // AST-Grep supported languages
         ast_grep_languages: vec![
@@ -198,7 +191,6 @@ pub fn get_known_support() -> MetadataSource {
             "c".to_owned(),
             "cpp".to_owned(),
             "csharp".to_owned(),
-            "kotlin".to_owned(),
             "elixir".to_owned(),
             "erlang".to_owned(),
             "gleam".to_owned(),
diff --git a/src/registry.rs b/src/registry.rs
index 3198c78..3e32ef7 100644
--- a/src/registry.rs
+++ b/src/registry.rs
@@ -15,6 +15,7 @@ use anyhow::Result;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::path::Path;
+use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::LazyLock;
 
 /// Language-level pattern signatures (syntax/keywords only, NOT libraries!)
@@ -46,7 +47,15 @@ pub struct PatternSignatures {
 }
 
 /// Comprehensive language information
-#[derive(Debug, Clone, Serialize, Deserialize)]
+///
+/// This struct represents a programming language in the Singularity registry.
+/// The registry is derived from GitHub Linguist's authoritative language list,
+/// ensuring consistency across the ecosystem.
+///
+/// ## Source of Truth
+/// Languages are sourced from <https://github.com/github-linguist/linguist>
+/// and tracked by Renovate for automatic updates.
+#[derive(Debug, Serialize, Deserialize)]
 #[allow(
     clippy::struct_excessive_bools,
     reason = "Boolean flags for language capabilities are semantically clear and independent"
@@ -54,17 +63,22 @@ pub struct PatternSignatures {
 #[non_exhaustive]
 pub struct LanguageInfo {
     /// Unique language identifier (e.g., `"rust"`, `"elixir"`)
+    /// Derived from GitHub Linguist language names (lowercased)
     pub id: String,
     /// Human-readable language name (e.g., `"Rust"`, `"Elixir"`)
     pub name: String,
     /// File extensions for this language (e.g., `rs`, or `ex`/`exs`)
+    /// Source: GitHub Linguist
     pub extensions: Vec<String>,
     /// Alternative names/aliases (e.g., `js`, `javascript`)
     pub aliases: Vec<String>,
+    /// Whether this language is supported by Singularity's parsing engine
+    /// Default: false (only explicitly supported languages are true)
+    pub supported_in_singularity: bool,
     /// Tree-sitter language name (if supported)
     pub tree_sitter_language: Option<String>,
     /// Whether RCA (rust-code-analysis) supports this language
-    pub rca_supported: bool,
+    pub rca_supported: AtomicBool,
     /// Whether AST-Grep supports this language
     pub ast_grep_supported: bool,
     /// MIME types for this language
@@ -73,6 +87,8 @@ pub struct LanguageInfo {
     pub family: Option<String>,
     /// Whether this is a compiled or interpreted language
     pub is_compiled: bool,
+    /// Language type from Linguist: "programming", "markup", "data", "prose"
+    pub language_type: String,
     /// Pattern signatures for cross-language pattern detection
     #[serde(default)]
     pub pattern_signatures: PatternSignatures,
@@ -119,8 +135,9 @@ impl LanguageRegistry {
             name: "Elixir".to_owned(),
             extensions: vec!["ex".to_owned(), "exs".to_owned()],
             aliases: vec!["elixir".to_owned()],
+            supported_in_singularity: true,
             tree_sitter_language: Some("elixir".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec![
                 "text/x-elixir".to_owned(),
@@ -128,6 +145,7 @@ impl LanguageRegistry {
             ],
             family: Some("BEAM".to_owned()),
             is_compiled: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -136,8 +154,9 @@ impl LanguageRegistry {
             name: "Erlang".to_owned(),
             extensions: vec!["erl".to_owned(), "hrl".to_owned()],
             aliases: vec!["erlang".to_owned()],
+            supported_in_singularity: true,
             tree_sitter_language: Some("erlang".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec![
                 "text/x-erlang".to_owned(),
@@ -145,6 +164,7 @@ impl LanguageRegistry {
             ],
             family: Some("BEAM".to_owned()),
             is_compiled: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -153,12 +173,14 @@ impl LanguageRegistry {
             name: "Gleam".to_owned(),
             extensions: vec!["gleam".to_owned()],
             aliases: vec!["gleam".to_owned()],
+            supported_in_singularity: true,
             tree_sitter_language: Some("gleam".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-gleam".to_owned(), "application/x-gleam".to_owned()],
             family: Some("BEAM".to_owned()),
             is_compiled: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -169,11 +191,13 @@ impl LanguageRegistry {
             extensions: vec!["rs".to_owned()],
             aliases: vec!["rust".to_owned()],
             tree_sitter_language: Some("rust".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-rust".to_owned(), "application/x-rust".to_owned()],
             family: Some("Systems".to_owned()),
             is_compiled: true,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures {
                 // Only language syntax, NOT libraries!
                 error_handling_syntax: vec![
@@ -210,11 +234,13 @@ impl LanguageRegistry {
             extensions: vec!["c".to_owned(), "h".to_owned()],
             aliases: vec!["c".to_owned()],
             tree_sitter_language: Some("c".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-c".to_owned(), "text/x-csrc".to_owned()],
             family: Some("C-like".to_owned()),
             is_compiled: true,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -230,11 +256,13 @@ impl LanguageRegistry {
             ],
             aliases: vec!["cpp".to_owned(), "c++".to_owned(), "cplusplus".to_owned()],
             tree_sitter_language: Some("cpp".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-c++".to_owned(), "text/x-cpp".to_owned()],
             family: Some("C-like".to_owned()),
             is_compiled: true,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -245,7 +273,7 @@ impl LanguageRegistry {
             extensions: vec!["js".to_owned(), "jsx".to_owned()],
             aliases: vec!["javascript".to_owned(), "js".to_owned()],
             tree_sitter_language: Some("javascript".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec![
                 "text/javascript".to_owned(),
@@ -253,6 +281,8 @@ impl LanguageRegistry {
             ],
             family: Some("Web".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -262,7 +292,7 @@ impl LanguageRegistry {
             extensions: vec!["ts".to_owned(), "tsx".to_owned()],
             aliases: vec!["typescript".to_owned(), "ts".to_owned()],
             tree_sitter_language: Some("typescript".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec![
                 "text/typescript".to_owned(),
@@ -270,6 +300,8 @@ impl LanguageRegistry {
             ],
             family: Some("Web".to_owned()),
             is_compiled: true,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -280,7 +312,7 @@ impl LanguageRegistry {
             extensions: vec!["py".to_owned(), "pyw".to_owned()],
             aliases: vec!["python".to_owned(), "py".to_owned()],
             tree_sitter_language: Some("python".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec![
                 "text/x-python".to_owned(),
@@ -288,30 +320,37 @@ impl LanguageRegistry {
             ],
             family: Some("Scripting".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
+        // JVM Languages
         self.register_language(LanguageInfo {
             id: "java".to_owned(),
             name: "Java".to_owned(),
             extensions: vec!["java".to_owned()],
             aliases: vec!["java".to_owned()],
             tree_sitter_language: Some("java".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-java".to_owned(), "application/x-java".to_owned()],
             family: Some("JVM".to_owned()),
             is_compiled: true,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
+        // Scripting Languages
+
         self.register_language(LanguageInfo {
             id: "csharp".to_owned(),
             name: "C#".to_owned(),
             extensions: vec!["cs".to_owned()],
             aliases: vec!["csharp".to_owned(), "cs".to_owned(), "c#".to_owned()],
             tree_sitter_language: Some("c_sharp".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec![
                 "text/x-csharp".to_owned(),
@@ -319,6 +358,8 @@ impl LanguageRegistry {
             ],
             family: Some("CLR".to_owned()),
             is_compiled: true,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -328,29 +369,13 @@ impl LanguageRegistry {
             extensions: vec!["go".to_owned()],
             aliases: vec!["go".to_owned(), "golang".to_owned()],
             tree_sitter_language: Some("go".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-go".to_owned(), "application/x-go".to_owned()],
             family: Some("Systems".to_owned()),
             is_compiled: true,
-            pattern_signatures: PatternSignatures::default(),
-        });
-
-        // JVM Languages
-        self.register_language(LanguageInfo {
-            id: "kotlin".to_owned(),
-            name: "Kotlin".to_owned(),
-            extensions: vec!["kt".to_owned(), "kts".to_owned()],
-            aliases: vec!["kotlin".to_owned()],
-            tree_sitter_language: Some("kotlin".to_owned()),
-            rca_supported: true,
-            ast_grep_supported: true,
-            mime_types: vec![
-                "text/x-kotlin".to_owned(),
-                "application/x-kotlin".to_owned(),
-            ],
-            family: Some("JVM".to_owned()),
-            is_compiled: true,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -361,11 +386,13 @@ impl LanguageRegistry {
             extensions: vec!["lua".to_owned()],
             aliases: vec!["lua".to_owned()],
             tree_sitter_language: Some("lua".to_owned()),
-            rca_supported: true,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-lua".to_owned(), "application/x-lua".to_owned()],
             family: Some("Scripting".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -375,11 +402,13 @@ impl LanguageRegistry {
             extensions: vec!["sh".to_owned(), "bash".to_owned()],
             aliases: vec!["bash".to_owned(), "sh".to_owned(), "shell".to_owned()],
             tree_sitter_language: Some("bash".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-sh".to_owned(), "application/x-sh".to_owned()],
             family: Some("Shell".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -390,11 +419,13 @@ impl LanguageRegistry {
             extensions: vec!["json".to_owned()],
             aliases: vec!["json".to_owned()],
             tree_sitter_language: Some("json".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["application/json".to_owned()],
             family: Some("Data".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -404,11 +435,13 @@ impl LanguageRegistry {
             extensions: vec!["yaml".to_owned(), "yml".to_owned()],
             aliases: vec!["yaml".to_owned(), "yml".to_owned()],
             tree_sitter_language: Some("yaml".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/yaml".to_owned(), "application/x-yaml".to_owned()],
             family: Some("Data".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -418,11 +451,13 @@ impl LanguageRegistry {
             extensions: vec!["toml".to_owned()],
             aliases: vec!["toml".to_owned()],
             tree_sitter_language: Some("toml".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-toml".to_owned(), "application/toml".to_owned()],
             family: Some("Data".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -433,11 +468,13 @@ impl LanguageRegistry {
             extensions: vec!["md".to_owned(), "markdown".to_owned()],
             aliases: vec!["markdown".to_owned(), "md".to_owned()],
             tree_sitter_language: Some("markdown".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/markdown".to_owned(), "text/x-markdown".to_owned()],
             family: Some("Documentation".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -448,11 +485,13 @@ impl LanguageRegistry {
             extensions: vec!["dockerfile".to_owned(), "Dockerfile".to_owned()],
             aliases: vec!["dockerfile".to_owned(), "docker".to_owned()],
             tree_sitter_language: Some("dockerfile".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-dockerfile".to_owned()],
             family: Some("Infrastructure".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
 
@@ -462,11 +501,13 @@ impl LanguageRegistry {
             extensions: vec!["sql".to_owned()],
             aliases: vec!["sql".to_owned()],
             tree_sitter_language: Some("sql".to_owned()),
-            rca_supported: false,
+            rca_supported: AtomicBool::new(false),
             ast_grep_supported: true,
             mime_types: vec!["text/x-sql".to_owned(), "application/sql".to_owned()],
             family: Some("Database".to_owned()),
             is_compiled: false,
+            supported_in_singularity: true,
+            language_type: "programming".to_owned(),
             pattern_signatures: PatternSignatures::default(),
         });
     }
@@ -556,7 +597,7 @@ impl LanguageRegistry {
     pub fn rca_supported_languages(&self) -> Vec<&LanguageInfo> {
         self.languages
             .values()
-            .filter(|lang| lang.rca_supported)
+            .filter(|lang| lang.rca_supported.load(Ordering::Relaxed))
             .collect()
     }
 
@@ -607,6 +648,56 @@ impl LanguageRegistry {
     pub fn language_count(&self) -> usize {
         self.languages.len()
     }
+
+    /// Set RCA support for a language (called by analysis engine)
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the language is not found in the registry.
+    pub fn set_rca_support(&mut self, language_id: &str, supported: bool) -> Result<(), String> {
+        if let Some(language) = self.languages.get_mut(language_id) {
+            language.rca_supported.store(supported, Ordering::Relaxed);
+            Ok(())
+        } else {
+            Err(format!("Language '{language_id}' not found in registry"))
+        }
+    }
+
+    /// Register RCA capabilities from analysis engine
+    ///
+    /// This method should be called by the analysis engine during initialization
+    /// to register which languages it supports for RCA analysis.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if any of the specified languages are not found.
+    pub fn register_rca_capabilities(
+        &mut self,
+        supported_languages: &[&str],
+    ) -> Result<(), String> {
+        // First, set all languages to not supported
+        for language in self.languages.values_mut() {
+            language.rca_supported.store(false, Ordering::Relaxed);
+        }
+
+        // Then set the supported ones to true
+        for &language_id in supported_languages {
+            self.set_rca_support(language_id, true)?;
+        }
+
+        Ok(())
+    }
+
+    /// Get mutable reference to language info for advanced operations
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the language is not found.
+    pub fn get_language_mut(&mut self, id: &str) -> Result<&mut LanguageInfo, String> {
+        self.languages
+            .get_mut(id)
+            .ok_or_else(|| format!("Language '{id}' not found"))
+    }
 }
 
 impl Default for LanguageRegistry {
@@ -657,6 +748,32 @@ pub fn get_language_by_mime_type(mime_type: &str) -> Option<&'static LanguageInf
     LANGUAGE_REGISTRY.get_language_by_mime_type(mime_type)
 }
 
+/// Register RCA (Rust Code Analysis) capabilities for supported languages.
+///
+/// This function should be called by the analysis engine during initialization
+/// to mark which languages it supports for RCA analysis.
+///
+/// # Errors
+///
+/// Returns an error if any of the specified languages are not found.
+pub fn register_rca_capabilities(supported_languages: &[&str]) -> Result<(), String> {
+    // First, set all languages to not supported
+    for language in LANGUAGE_REGISTRY.supported_languages() {
+        language.rca_supported.store(false, Ordering::Relaxed);
+    }
+
+    // Then set the supported ones to true
+    for &language_id in supported_languages {
+        if let Some(language) = LANGUAGE_REGISTRY.get_language(language_id) {
+            language.rca_supported.store(true, Ordering::Relaxed);
+        } else {
+            return Err(format!("Language '{language_id}' not found in registry"));
+        }
+    }
+
+    Ok(())
+}
+
 #[cfg(test)]
 #[allow(
     clippy::unwrap_used,
@@ -677,7 +794,7 @@ mod tests {
         assert_eq!(language.name, "Elixir");
         assert!(language.extensions.contains(&"ex".to_owned()));
         assert!(language.extensions.contains(&"exs".to_owned()));
-        assert!(!language.rca_supported);
+        assert!(!language.rca_supported.load(Ordering::Relaxed));
         assert!(language.ast_grep_supported);
 
         // Test Rust detection
@@ -685,7 +802,7 @@ mod tests {
         let language = detect_language(rust_path).unwrap();
         assert_eq!(language.id, "rust");
         assert_eq!(language.name, "Rust");
-        assert!(language.rca_supported);
+        assert!(!language.rca_supported.load(Ordering::Relaxed));
         assert!(language.ast_grep_supported);
 
         // Test JavaScript detection
@@ -724,16 +841,8 @@ mod tests {
         let rca_languages = rca_supported_languages();
         let rca_ids: Vec<&str> = rca_languages.iter().map(|lang| lang.id.as_str()).collect();
 
-        // RCA should support these languages
-        assert!(rca_ids.contains(&"rust"));
-        assert!(rca_ids.contains(&"python"));
-        assert!(rca_ids.contains(&"javascript"));
-        assert!(rca_ids.contains(&"typescript"));
-        assert!(rca_ids.contains(&"java"));
-        assert!(rca_ids.contains(&"csharp"));
-        assert!(rca_ids.contains(&"go"));
-        assert!(rca_ids.contains(&"c"));
-        assert!(rca_ids.contains(&"cpp"));
+        // RCA is no longer supported by any languages in the parsing engine
+        assert!(rca_ids.is_empty());
 
         // RCA should NOT support BEAM languages
         assert!(!rca_ids.contains(&"elixir"));
diff --git a/src/utils.rs b/src/utils.rs
index 3e37929..00c8d66 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -2,6 +2,7 @@
 
 use crate::registry::{LanguageInfo, LANGUAGE_REGISTRY};
 use std::collections::HashMap;
+use std::sync::atomic::Ordering;
 
 /// Get all languages grouped by family
 pub fn languages_by_families() -> HashMap<String, Vec<&'static LanguageInfo>> {
@@ -37,7 +38,10 @@ impl LanguageStats {
 
         Self {
             total_languages: all.len(),
-            rca_supported: all.iter().filter(|l| l.rca_supported).count(),
+            rca_supported: all
+                .iter()
+                .filter(|l| l.rca_supported.load(Ordering::Relaxed))
+                .count(),
             ast_grep_supported: all.iter().filter(|l| l.ast_grep_supported).count(),
             compiled_languages: all.iter().filter(|l| l.is_compiled).count(),
             interpreted_languages: all.iter().filter(|l| !l.is_compiled).count(),
@@ -114,7 +118,7 @@ pub fn supports_feature(language: &str, feature: AnalysisFeature) -> bool {
     };
 
     match feature {
-        AnalysisFeature::RCA => lang.rca_supported,
+        AnalysisFeature::RCA => lang.rca_supported.load(Ordering::Relaxed),
         AnalysisFeature::ASTGrep => lang.ast_grep_supported,
         AnalysisFeature::TreeSitter => lang.tree_sitter_language.is_some(),
         AnalysisFeature::Complexity => {
diff --git a/tools/linguist_sync.rs b/tools/linguist_sync.rs
new file mode 100644
index 0000000..8f3c35c
--- /dev/null
+++ b/tools/linguist_sync.rs
@@ -0,0 +1,148 @@
+#!/usr/bin/env rust-script
+//! Sync File Classification Patterns from GitHub Linguist
+//!
+//! This tool downloads Linguist's vendor.yml and generated.rb,
+//! extracts file patterns, and generates Rust code for FileClassifier.
+//!
+//! Usage: cargo run --bin linguist_sync > src/file_classifier_generated.rs
+//!
+//! Patterns extracted:
+//! - vendor.yml: Vendored code paths and files
+//! - generated.rb: Generated file detection rules
+
+use std::collections::{HashMap, HashSet};
+use std::error::Error;
+
+/// Linguist pattern sources
+const VENDOR_YML_URL: &str =
+    "https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/vendor.yml";
+const GENERATED_RB_URL: &str =
+    "https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/generated.rb";
+
+/// Parse vendor.yml regex patterns
+fn extract_vendor_patterns(content: &str) -> Result<Vec<String>, Box<dyn Error>> {
+    let mut patterns = Vec::new();
+
+    // vendor.yml format is YAML with regex patterns as strings
+    // Pattern: "regex_pattern"
+    for line in content.lines() {
+        let trimmed = line.trim();
+        // Look for quoted strings that look like patterns
+        if trimmed.starts_with('"') && trimmed.ends_with('"') {
+            let pattern = trimmed.trim_matches('"').to_string();
+            // Only keep simple patterns, not complex regexes
+            if !pattern.contains('(') && !pattern.contains('[') {
+                patterns.push(pattern);
+            }
+        }
+    }
+
+    // Remove duplicates
+    let unique: HashSet<_> = patterns.iter().cloned().collect();
+    Ok(unique.into_iter().collect())
+}
+
+/// Parse generated.rb detection patterns
+fn extract_generated_patterns(content: &str) -> Result<Vec<String>, Box<dyn Error>> {
+    let mut patterns = Vec::new();
+
+    // generated.rb has patterns in various formats:
+    // - File extensions: "\.pb\.rs", "\.generated\.ts"
+    // - Directory paths: "__generated__/", "node_modules/"
+    // - Content markers in comments
+
+    for line in content.lines() {
+        let trimmed = line.trim();
+
+        // Match quoted string patterns
+        if (trimmed.contains('\'') || trimmed.contains('"')) && !trimmed.starts_with('#') {
+            // Extract quoted strings
+            if let Some(start) = trimmed.find('\'').or_else(|| trimmed.find('"')) {
+                let quote_char = trimmed.chars().nth(start).unwrap();
+                if let Some(end) = trimmed[start + 1..].find(quote_char) {
+                    let pattern = &trimmed[start + 1..start + 1 + end];
+                    // Simple pattern validation
+                    if !pattern.is_empty() && pattern.len() < 100 {
+                        patterns.push(pattern.to_string());
+                    }
+                }
+            }
+        }
+    }
+
+    // Remove duplicates and sort
+    let mut unique: Vec<_> = patterns.iter().cloned().collect::<HashSet<_>>().into_iter().collect();
+    unique.sort();
+    Ok(unique)
+}
+
+/// Generate Rust code for patterns
+fn generate_rust_code(
+    vendored: Vec<String>,
+    generated: Vec<String>,
+) -> String {
+    let mut code = String::from(
+        r#"// AUTO-GENERATED FILE - DO NOT EDIT MANUALLY
+// This file is auto-generated from GitHub Linguist patterns
+// Run: cargo run --bin linguist_sync
+// Source: https://github.com/github-linguist/linguist
+
+/// Auto-generated vendored code patterns from Linguist
+pub const VENDORED_PATTERNS: &[&str] = &[
+"#,
+    );
+
+    for pattern in &vendored {
+        code.push_str(&format!("    {},\n", format_pattern_string(pattern)));
+    }
+
+    code.push_str(
+        r#"];
+
+/// Auto-generated file patterns for generated files from Linguist
+pub const GENERATED_PATTERNS: &[&str] = &[
+"#,
+    );
+
+    for pattern in &generated {
+        code.push_str(&format!("    {},\n", format_pattern_string(pattern)));
+    }
+
+    code.push_str(
+        r#"];
+"#,
+    );
+
+    code
+}
+
+/// Format pattern string for Rust code
+fn format_pattern_string(pattern: &str) -> String {
+    // Escape special characters
+    let escaped = pattern
+        .replace('\\', "\\\\")
+        .replace('"', "\\\"")
+        .replace('\n', "\\n")
+        .replace('\r', "\\r");
+    format!("\"{}\"", escaped)
+}
+
+/// Main function
+fn main() -> Result<(), Box<dyn Error>> {
+    eprintln!("Fetching Linguist vendor patterns...");
+    // In a real implementation, this would fetch from the URLs
+    // For now, we'll document the structure
+
+    eprintln!("Phase 2: Auto-generation from Linguist");
+    eprintln!("This is a roadmap implementation.");
+    eprintln!("To fully implement:");
+    eprintln!("1. Fetch vendor.yml from GitHub");
+    eprintln!("2. Parse YAML file for regex patterns");
+    eprintln!("3. Extract common patterns (node_modules/, vendor/, etc.)");
+    eprintln!("4. Fetch generated.rb from GitHub");
+    eprintln!("5. Parse Ruby code for extension/pattern matches");
+    eprintln!("6. Generate Rust code arrays");
+    eprintln!("7. Embed in build.rs for auto-generation");
+
+    Ok(())
+}