diff --git a/scripts/ast_analyzer.py b/scripts/ast_analyzer.py
index c1de92e7..106f739a 100644
--- a/scripts/ast_analyzer.py
+++ b/scripts/ast_analyzer.py
@@ -209,18 +209,29 @@ class ASTAnalyzer:
     - Dependency tracking
     - Semantic chunking (preserve boundaries)
     - Cross-reference analysis
+    - Tree cache for parsed ASTs (avoids re-parsing unchanged files)
     """
     
-    def __init__(self, use_tree_sitter: bool = True):
+    def __init__(self, use_tree_sitter: bool = True, use_tree_cache: bool = True):
         """
         Initialize AST analyzer.
         
         Args:
             use_tree_sitter: Use tree-sitter when available (fallback to ast module)
+            use_tree_cache: Cache parsed trees for unchanged files (mtime-based invalidation)
         """
         self.use_tree_sitter = use_tree_sitter and _TS_AVAILABLE
         self._parsers: Dict[str, Any] = {}
         
+        # Tree cache for avoiding re-parsing unchanged files
+        self._tree_cache = None
+        if use_tree_cache:
+            try:
+                from scripts.ingest.tree_cache import get_default_cache
+                self._tree_cache = get_default_cache()
+            except ImportError:
+                logger.debug("TreeCache not available, parsing will not be cached")
+        
         # Language support matrix
         self.supported_languages = {
             "python": {"ast": True, "tree_sitter": True},
@@ -234,7 +245,49 @@ def __init__(self, use_tree_sitter: bool = True):
             "ruby": {"ast": False, "tree_sitter": True},
         }
         
-        logger.info(f"ASTAnalyzer initialized: tree_sitter={self.use_tree_sitter}")
+        logger.info(f"ASTAnalyzer initialized: tree_sitter={self.use_tree_sitter}, tree_cache={'enabled' if self._tree_cache else 'disabled'}")
+    
+    def _parse_with_cache(self, parser: Any, content: str, file_path: str, language: str, content_provided: bool = False) -> Optional[Any]:
+        """Parse content with tree-sitter, using cache when available.
+        
+        Args:
+            parser: Tree-sitter parser instance
+            content: Source code content
+            file_path: Path to the file (used as cache key)
+            language: Programming language
+            content_provided: If True, content was explicitly provided (not read from disk),
+                              so skip cache to avoid returning stale tree
+            
+        Returns:
+            Parsed tree or None on failure
+        """
+        path = Path(file_path) if file_path else None
+        
+        # Try to get cached tree (only for real files when content was NOT explicitly provided)
+        # If content_provided=True, the caller passed in-memory content that may differ from disk
+        if self._tree_cache and path and path.exists() and not content_provided:
+            cached_tree = self._tree_cache.get(path)
+            if cached_tree is not None:
+                return cached_tree
+        
+        # Parse the content
+        try:
+            tree = parser.parse(content.encode("utf-8"))
+        except Exception as e:
+            logger.debug(f"Tree-sitter parse failed for {language}: {e}")
+            return None
+        
+        # Cache the result for real files
+        if self._tree_cache and path and path.exists() and tree is not None:
+            self._tree_cache.put(path, tree)
+        
+        return tree
+    
+    def get_tree_cache_stats(self) -> Dict[str, Any]:
+        """Get tree cache statistics for monitoring."""
+        if self._tree_cache:
+            return self._tree_cache.get_stats()
+        return {"enabled": False}
     
     def analyze_file(
         self, file_path: str, language: str, content: Optional[str] = None
@@ -250,6 +303,10 @@ def analyze_file(
         Returns:
             Dict with symbols, imports, calls, and dependencies
         """
+        # Track if content was explicitly provided (vs read from disk)
+        # This affects caching - explicit content may differ from on-disk state
+        content_provided = content is not None
+        
         if content is None:
             try:
                 content = Path(file_path).read_text(encoding="utf-8", errors="ignore")
@@ -259,7 +316,7 @@ def analyze_file(
         
         # Use language mappings (32 languages, declarative queries)
         if _LANGUAGE_MAPPINGS_AVAILABLE and self.use_tree_sitter:
-            result = self._analyze_with_mapping(content, file_path, language)
+            result = self._analyze_with_mapping(content, file_path, language, content_provided)
             if result and (result.get("symbols") or result.get("imports") or result.get("calls")):
                 return result
         
@@ -438,11 +495,17 @@ def extract_dependencies(
     
     # ---- Language Mappings Analysis (unified, concept-based) ----
     
-    def _analyze_with_mapping(self, content: str, file_path: str, language: str) -> Dict[str, Any]:
+    def _analyze_with_mapping(self, content: str, file_path: str, language: str, content_provided: bool = False) -> Dict[str, Any]:
         """Analyze code using language mappings (concept-based extraction).
         
         This uses the declarative tree-sitter queries from language_mappings
         to extract symbols, imports, and calls. Supports 34 languages.
+        
+        Args:
+            content: Source code content
+            file_path: Path to the file
+            language: Programming language
+            content_provided: If True, content was explicitly provided (not read from disk)
         """
         if not _LANGUAGE_MAPPINGS_AVAILABLE:
             return self._empty_analysis()
@@ -461,12 +524,12 @@ def _analyze_with_mapping(self, content: str, file_path: str, language: str) ->
         if not parser:
             return self._empty_analysis()
         
-        try:
-            tree = parser.parse(content.encode("utf-8"))
-            root = tree.root_node
-        except Exception as e:
-            logger.debug(f"Tree-sitter parse failed for {language}: {e}")
+        # Parse with caching (avoids re-parsing unchanged files)
+        # Skip cache if content was explicitly provided to avoid stale results
+        tree = self._parse_with_cache(parser, content, file_path, language, content_provided)
+        if tree is None:
             return self._empty_analysis()
+        root = tree.root_node
         
         content_bytes = content.encode("utf-8")
         symbols: List[CodeSymbol] = []
diff --git a/scripts/ingest/qdrant.py b/scripts/ingest/qdrant.py
index 7711988e..26ed9038 100644
--- a/scripts/ingest/qdrant.py
+++ b/scripts/ingest/qdrant.py
@@ -855,10 +855,19 @@ def delete_points_by_path(client: QdrantClient, collection: str, file_path: str)
 
 
 def upsert_points(
-    client: QdrantClient, collection: str, points: List[models.PointStruct]
+    client: QdrantClient, collection: str, points: List[models.PointStruct],
+    *, wait: bool = None
 ):
     """Upsert points with retry and batching.
 
+    Args:
+        client: Qdrant client instance
+        collection: Collection name
+        points: List of points to upsert
+        wait: Whether to wait for upsert to complete. Default is controlled by
+              INDEX_UPSERT_ASYNC env var (0=sync/wait, 1=async/no-wait).
+              Async mode is faster but may cause read-after-write issues.
+
     Raises:
         ValueError: If collection is None or empty.
     """
@@ -878,6 +887,11 @@ def upsert_points(
         backoff = float(os.environ.get("INDEX_UPSERT_BACKOFF", "0.5") or 0.5)
     except Exception:
         backoff = 0.5
+    
+    # Determine wait mode: explicit param > env var > default (sync)
+    if wait is None:
+        async_mode = os.environ.get("INDEX_UPSERT_ASYNC", "0").strip().lower() in {"1", "true", "yes", "on"}
+        wait = not async_mode
 
     failed_count = 0
     for i in range(0, len(points), max(1, bsz)):
@@ -885,12 +899,12 @@ def upsert_points(
         attempt = 0
         while True:
             try:
-                client.upsert(collection_name=collection, points=batch, wait=True)
+                client.upsert(collection_name=collection, points=batch, wait=wait)
                 break
             except Exception as e:
                 attempt += 1
                 if attempt >= retries:
-                    # Final fallback: try smaller sub-batches
+                    # Final fallback: try smaller sub-batches (always sync for reliability)
                     sub_size = max(1, bsz // 4)
                     sub_failed = 0
                     for j in range(0, len(batch), sub_size):
@@ -901,7 +915,6 @@ def upsert_points(
                             )
                         except Exception as sub_e:
                             sub_failed += len(sub)
-                            # Log individual sub-batch failures for debugging
                             print(f"[UPSERT_WARNING] Sub-batch upsert failed ({len(sub)} points): {sub_e}", flush=True)
                     if sub_failed > 0:
                         failed_count += sub_failed
@@ -917,6 +930,45 @@ def upsert_points(
         print(f"[UPSERT_SUMMARY] Total {failed_count}/{len(points)} points failed to upsert", flush=True)
 
 
+def flush_upserts(client: QdrantClient, collection: str) -> None:
+    """Best-effort sync for pending async upserts.
+    
+    Call this after a batch of async upserts (INDEX_UPSERT_ASYNC=1) to improve
+    likelihood that data is visible for subsequent reads.
+    
+    IMPORTANT: Qdrant's wait=False semantics mean upserts are "confirmed received"
+    but not necessarily "applied". This function performs operations that encourage
+    the server to process pending writes, but cannot guarantee immediate consistency.
+    
+    For strict consistency requirements:
+    - Use wait=True (INDEX_UPSERT_ASYNC=0) during upserts, or
+    - Add application-level retry logic for read-after-write scenarios
+    
+    For remote deployments, network latency may increase the window between
+    upsert confirmation and data visibility.
+    
+    Args:
+        client: Qdrant client instance
+        collection: Collection name
+    """
+    if not collection:
+        return
+    try:
+        # 1. Get collection info (lightweight metadata read)
+        client.get_collection(collection)
+        
+        # 2. Perform a minimal scroll to encourage segment processing
+        # This touches actual data, which helps flush pending writes
+        client.scroll(
+            collection_name=collection,
+            limit=1,
+            with_payload=False,
+            with_vectors=False,
+        )
+    except Exception as e:
+        logger.debug(f"flush_upserts: {e}")
+
+
 def hash_id(text: str, path: str, start: int, end: int) -> int:
     """Generate a stable hash ID for a chunk."""
     h = hashlib.sha1(
diff --git a/scripts/ingest/vectors.py b/scripts/ingest/vectors.py
index 87ae2057..ac49c183 100644
--- a/scripts/ingest/vectors.py
+++ b/scripts/ingest/vectors.py
@@ -18,16 +18,28 @@
     _STOP,
 )
 
+# Try to use numpy for faster vector operations (10-50x speedup)
+try:
+    import numpy as np
+    _NUMPY_AVAILABLE = True
+except ImportError:
+    np = None  # type: ignore
+    _NUMPY_AVAILABLE = False
+
 # ---------------------------------------------------------------------------
 # Mini vector projection cache
 # ---------------------------------------------------------------------------
-_MINI_PROJ_CACHE: dict[tuple[int, int, int], list[list[float]]] = {}
+# Cache stores numpy arrays when numpy is available, else nested lists
+_MINI_PROJ_CACHE: dict[tuple[int, int, int], Any] = {}
 
 
 def _get_mini_proj(
     in_dim: int, out_dim: int, seed: int | None = None
-) -> list[list[float]]:
-    """Get or create a random projection matrix for mini vectors."""
+) -> Any:
+    """Get or create a random projection matrix for mini vectors.
+
+    Returns numpy array if numpy is available, else nested list.
+    """
     import math
     import random
 
@@ -38,31 +50,54 @@ def _get_mini_proj(
         rnd = random.Random(s)
         scale = 1.0 / math.sqrt(out_dim)
         # Dense Rademacher matrix (+/-1) scaled; good enough for fast gating
-        M = [
-            [scale * (1.0 if rnd.random() < 0.5 else -1.0) for _ in range(out_dim)]
-            for _ in range(in_dim)
-        ]
+        if _NUMPY_AVAILABLE:
+            # Use numpy for faster matrix operations
+            # Generate same values as pure Python for reproducibility
+            M_list = [
+                [scale * (1.0 if rnd.random() < 0.5 else -1.0) for _ in range(out_dim)]
+                for _ in range(in_dim)
+            ]
+            M = np.array(M_list, dtype=np.float32)
+        else:
+            M = [
+                [scale * (1.0 if rnd.random() < 0.5 else -1.0) for _ in range(out_dim)]
+                for _ in range(in_dim)
+            ]
         _MINI_PROJ_CACHE[key] = M
     return M
 
 
 def project_mini(vec: list[float], out_dim: int | None = None) -> list[float]:
-    """Project a dense vector to a compact mini vector using random projection."""
+    """Project a dense vector to a compact mini vector using random projection.
+
+    Uses numpy when available for 10-50x speedup, falls back to pure Python.
+    """
     if not vec:
         return [0.0] * (int(out_dim or MINI_VEC_DIM))
     od = int(out_dim or MINI_VEC_DIM)
     M = _get_mini_proj(len(vec), od)
-    out = [0.0] * od
-    # y = x @ M
-    for i, val in enumerate(vec):
-        if val == 0.0:
-            continue
-        row = M[i]
-        for j in range(od):
-            out[j] += val * row[j]
-    # L2 normalize to keep scale consistent
-    norm = (sum(x * x for x in out) or 0.0) ** 0.5 or 1.0
-    return [x / norm for x in out]
+
+    if _NUMPY_AVAILABLE:
+        # Fast path: numpy matrix multiply + normalize
+        x = np.array(vec, dtype=np.float32)
+        out = x @ M  # (in_dim,) @ (in_dim, out_dim) -> (out_dim,)
+        norm = np.linalg.norm(out)
+        if norm > 0:
+            out = out / norm
+        return out.tolist()
+    else:
+        # Fallback: pure Python implementation
+        out = [0.0] * od
+        # y = x @ M
+        for i, val in enumerate(vec):
+            if val == 0.0:
+                continue
+            row = M[i]
+            for j in range(od):
+                out[j] += val * row[j]
+        # L2 normalize to keep scale consistent
+        norm = (sum(x * x for x in out) or 0.0) ** 0.5 or 1.0
+        return [x / norm for x in out]
 
 
 def _split_ident_lex(s: str) -> List[str]:
diff --git a/tests/test_chunk_deduplication.py b/tests/test_chunk_deduplication.py
new file mode 100644
index 00000000..69076898
--- /dev/null
+++ b/tests/test_chunk_deduplication.py
@@ -0,0 +1,694 @@
+#!/usr/bin/env python3
+"""
+Comprehensive tests for chunk deduplication and concept extraction.
+
+Tests cover:
+- chunk_deduplication.py: O(n log n) deduplication algorithm
+- concept_extractor.py: Universal concept extraction with language mappings
+
+Test categories:
+- Exact content deduplication
+- Substring overlap detection
+- Specificity scoring
+- Language exemptions (Vue, Haskell)
+- Concept extraction across languages
+- Edge cases and stress tests
+"""
+
+import pytest
+from typing import List, Dict, Any
+
+from scripts.ingest.chunk_deduplication import (
+    normalize_content,
+    compute_specificity_score,
+    get_chunk_specificity,
+    deduplicate_chunks,
+    deduplicate_semantic_chunks,
+    _deduplicate_exact_content,
+    _remove_substring_overlaps,
+    _extract_type_name,
+    TYPE_WEIGHTS,
+)
+
+# Optional: concept extractor (may require tree-sitter)
+try:
+    from scripts.ingest.concept_extractor import (
+        extract_concepts,
+        ExtractedConcept,
+        supported_languages,
+    )
+    from scripts.ingest.language_mappings import ConceptType
+    CONCEPT_EXTRACTOR_AVAILABLE = True
+except ImportError:
+    CONCEPT_EXTRACTOR_AVAILABLE = False
+
+
+# =============================================================================
+# SECTION 1: Content Normalization
+# =============================================================================
+
+class TestNormalizeContent:
+    """Tests for content normalization."""
+
+    def test_strips_whitespace(self):
+        """Strips leading and trailing whitespace."""
+        assert normalize_content("  hello  ") == "hello"
+        assert normalize_content("\n\ncode\n\n") == "code"
+
+    def test_normalizes_line_endings(self):
+        """Converts all line endings to \n."""
+        assert normalize_content("a\r\nb\r\nc") == "a\nb\nc"
+        assert normalize_content("a\rb\rc") == "a\nb\nc"
+        assert normalize_content("a\nb\nc") == "a\nb\nc"
+
+    def test_mixed_line_endings(self):
+        """Handles mixed line endings."""
+        content = "line1\r\nline2\rline3\nline4"
+        normalized = normalize_content(content)
+        assert normalized == "line1\nline2\nline3\nline4"
+
+    def test_empty_string(self):
+        """Empty string returns empty."""
+        assert normalize_content("") == ""
+        assert normalize_content("   ") == ""
+
+
+# =============================================================================
+# SECTION 2: Type Name Extraction
+# =============================================================================
+
+class TestExtractTypeName:
+    """Tests for _extract_type_name helper."""
+
+    def test_string_type(self):
+        """Extracts type from string field."""
+        chunk = {"chunk_type": "definition"}
+        assert _extract_type_name(chunk) == "definition"
+
+    def test_concept_field(self):
+        """Falls back to concept field."""
+        chunk = {"concept": "BLOCK"}
+        assert _extract_type_name(chunk) == "block"
+
+    def test_type_field(self):
+        """Falls back to type field."""
+        chunk = {"type": "Function"}
+        assert _extract_type_name(chunk) == "function"
+
+    def test_enum_with_value(self):
+        """Handles enum with .value attribute."""
+        class MockEnum:
+            value = "import"
+        chunk = {"chunk_type": MockEnum()}
+        assert _extract_type_name(chunk) == "import"
+
+    def test_enum_with_name(self):
+        """Handles enum with .name attribute."""
+        class MockEnum:
+            name = "COMMENT"
+        chunk = {"chunk_type": MockEnum()}
+        assert _extract_type_name(chunk) == "comment"
+
+    def test_missing_type(self):
+        """Returns empty string for missing type."""
+        chunk = {}
+        assert _extract_type_name(chunk) == ""
+
+
+# =============================================================================
+# SECTION 3: Specificity Scoring
+# =============================================================================
+
+class TestComputeSpecificityScore:
+    """Tests for compute_specificity_score."""
+
+    def test_function_high_score(self):
+        """Functions get high specificity score."""
+        chunk = {"chunk_type": "function", "name": "my_func", "start_line": 1, "end_line": 10}
+        score = compute_specificity_score(chunk)
+        assert score > 0.5  # High due to type weight + name + size
+
+    def test_block_lower_score(self):
+        """Blocks get lower specificity score than definitions."""
+        func = {"chunk_type": "function", "name": "f", "start_line": 1, "end_line": 5}
+        block = {"chunk_type": "block", "start_line": 1, "end_line": 5}
+
+        func_score = compute_specificity_score(func)
+        block_score = compute_specificity_score(block)
+        assert func_score > block_score
+
+    def test_named_symbol_bonus(self):
+        """Named symbols get bonus."""
+        with_name = {"chunk_type": "function", "name": "foo", "start_line": 1, "end_line": 1}
+        without_name = {"chunk_type": "function", "start_line": 1, "end_line": 1}
+
+        assert compute_specificity_score(with_name) > compute_specificity_score(without_name)
+
+    def test_symbol_field_counts(self):
+        """symbol field also counts as name."""
+        chunk = {"chunk_type": "function", "symbol": "bar", "start_line": 1, "end_line": 1}
+        score = compute_specificity_score(chunk)
+        # Should have name bonus
+        assert score > 0.5
+
+    def test_larger_chunks_higher_score(self):
+        """Larger chunks get higher size component."""
+        small = {"chunk_type": "function", "start_line": 1, "end_line": 2}
+        large = {"chunk_type": "function", "start_line": 1, "end_line": 100}
+
+        assert compute_specificity_score(large) > compute_specificity_score(small)
+
+    def test_unknown_type_low_score(self):
+        """Unknown types get minimal score."""
+        chunk = {"chunk_type": "weird_type", "start_line": 1, "end_line": 1}
+        score = compute_specificity_score(chunk)
+        assert score < 0.3
+
+
+class TestGetChunkSpecificity:
+    """Tests for get_chunk_specificity (legacy 0-4 scale)."""
+
+    def test_function_returns_4(self):
+        """Function type returns 4 (highest)."""
+        chunk = {"chunk_type": "function"}
+        assert get_chunk_specificity(chunk) == 4
+
+    def test_class_returns_4(self):
+        """Class type returns 4."""
+        chunk = {"chunk_type": "class"}
+        assert get_chunk_specificity(chunk) == 4
+
+    def test_type_alias_returns_3(self):
+        """Type alias returns 3."""
+        chunk = {"chunk_type": "type_alias"}
+        assert get_chunk_specificity(chunk) == 3
+
+    def test_import_returns_2(self):
+        """Import returns 2."""
+        chunk = {"chunk_type": "import"}
+        assert get_chunk_specificity(chunk) == 2
+
+    def test_comment_returns_1(self):
+        """Comment returns 1."""
+        chunk = {"chunk_type": "comment"}
+        assert get_chunk_specificity(chunk) == 1
+
+    def test_block_returns_1(self):
+        """Block returns 1."""
+        chunk = {"chunk_type": "block"}
+        assert get_chunk_specificity(chunk) == 1
+
+    def test_unknown_returns_0(self):
+        """Unknown type returns 0."""
+        chunk = {"chunk_type": "xyz"}
+        assert get_chunk_specificity(chunk) == 0
+
+
+# =============================================================================
+# SECTION 4: Exact Content Deduplication
+# =============================================================================
+
+class TestExactContentDeduplication:
+    """Tests for _deduplicate_exact_content."""
+
+    def test_no_duplicates(self):
+        """No deduplication when all unique."""
+        chunks = [
+            {"code": "def foo(): pass", "chunk_type": "function"},
+            {"code": "def bar(): pass", "chunk_type": "function"},
+            {"code": "x = 1", "chunk_type": "definition"},
+        ]
+        result = _deduplicate_exact_content(chunks, "code")
+        assert len(result) == 3
+
+    def test_exact_duplicate_removed(self):
+        """Exact duplicates are removed."""
+        chunks = [
+            {"code": "def foo(): pass", "chunk_type": "function", "name": "foo"},
+            {"code": "def foo(): pass", "chunk_type": "block"},  # Lower specificity
+        ]
+        result = _deduplicate_exact_content(chunks, "code")
+        assert len(result) == 1
+        assert result[0]["name"] == "foo"  # Kept higher specificity
+
+    def test_whitespace_normalized(self):
+        """Whitespace differences are normalized."""
+        chunks = [
+            {"code": "def foo():\n    pass", "chunk_type": "function"},
+            {"code": "def foo():\n    pass  ", "chunk_type": "block"},  # Trailing space
+        ]
+        result = _deduplicate_exact_content(chunks, "code")
+        assert len(result) == 1
+
+    def test_keeps_highest_specificity(self):
+        """When duplicates exist, keeps highest specificity."""
+        chunks = [
+            {"code": "x = 1", "chunk_type": "block"},
+            {"code": "x = 1", "chunk_type": "function", "name": "x"},
+            {"code": "x = 1", "chunk_type": "comment"},
+        ]
+        result = _deduplicate_exact_content(chunks, "code")
+        assert len(result) == 1
+        assert result[0]["chunk_type"] == "function"
+
+    def test_content_key_fallback(self):
+        """Falls back to content/text keys."""
+        chunks = [
+            {"content": "abc", "chunk_type": "function"},
+            {"text": "def", "chunk_type": "function"},
+        ]
+        result = _deduplicate_exact_content(chunks, "code")
+        assert len(result) == 2
+
+    def test_empty_content_skipped(self):
+        """Empty content chunks are skipped."""
+        chunks = [
+            {"code": "", "chunk_type": "function"},
+            {"code": "valid", "chunk_type": "function"},
+        ]
+        result = _deduplicate_exact_content(chunks, "code")
+        assert len(result) == 1
+        assert result[0]["code"] == "valid"
+
+
+# =============================================================================
+# SECTION 5: Substring Overlap Detection
+# =============================================================================
+
+class TestSubstringOverlapRemoval:
+    """Tests for _remove_substring_overlaps."""
+
+    def test_no_overlaps(self):
+        """No removal when chunks don't overlap."""
+        chunks = [
+            {"code": "def foo(): pass", "chunk_type": "function", "start_line": 1, "end_line": 1},
+            {"code": "def bar(): pass", "chunk_type": "function", "start_line": 5, "end_line": 5},
+        ]
+        result = _remove_substring_overlaps(chunks, "code")
+        assert len(result) == 2
+
+    def test_block_substring_of_definition_removed(self):
+        """Block that is substring of definition is removed."""
+        definition_code = "def foo():\n    x = 1\n    return x"
+        block_code = "x = 1"
+
+        chunks = [
+            {"code": definition_code, "chunk_type": "function", "start_line": 1, "end_line": 3},
+            {"code": block_code, "chunk_type": "block", "start_line": 2, "end_line": 2},
+        ]
+        result = _remove_substring_overlaps(chunks, "code")
+        assert len(result) == 1
+        assert result[0]["chunk_type"] == "function"
+
+    def test_non_overlapping_block_kept(self):
+        """Block outside definition line range is kept."""
+        chunks = [
+            {"code": "def foo(): pass", "chunk_type": "function", "start_line": 1, "end_line": 1},
+            {"code": "if x: y", "chunk_type": "block", "start_line": 10, "end_line": 10},
+        ]
+        result = _remove_substring_overlaps(chunks, "code")
+        assert len(result) == 2
+
+    def test_similar_but_not_substring_kept(self):
+        """Similar content that isn't exact substring is kept."""
+        chunks = [
+            {"code": "def foo(x): pass", "chunk_type": "function", "start_line": 1, "end_line": 1},
+            {"code": "foo(y)", "chunk_type": "block", "start_line": 1, "end_line": 1},  # Not substring
+        ]
+        result = _remove_substring_overlaps(chunks, "code")
+        assert len(result) == 2
+
+
+# =============================================================================
+# SECTION 6: Full Deduplication Pipeline
+# =============================================================================
+
+class TestDeduplicateChunks:
+    """Tests for deduplicate_chunks main function."""
+
+    def test_empty_input(self):
+        """Empty input returns empty."""
+        result = deduplicate_chunks([])
+        assert result == []
+
+    def test_single_chunk(self):
+        """Single chunk returns as-is."""
+        chunks = [{"code": "x = 1", "chunk_type": "function"}]
+        result = deduplicate_chunks(chunks)
+        assert len(result) == 1
+
+    def test_full_deduplication(self):
+        """Full pipeline removes exact + substring duplicates."""
+        chunks = [
+            {"code": "def foo():\n    x = 1", "chunk_type": "function", "name": "foo",
+             "start_line": 1, "end_line": 2},
+            {"code": "def foo():\n    x = 1", "chunk_type": "block",
+             "start_line": 1, "end_line": 2},  # Exact dup
+            {"code": "x = 1", "chunk_type": "block",
+             "start_line": 2, "end_line": 2},  # Substring
+        ]
+        result = deduplicate_chunks(chunks, content_key="code")
+        assert len(result) == 1
+        assert result[0]["name"] == "foo"
+
+    def test_vue_exemption(self):
+        """Vue language preserves all chunks (no dedup)."""
+        chunks = [
+            {"code": "same", "chunk_type": "function"},
+            {"code": "same", "chunk_type": "block"},
+        ]
+        result = deduplicate_chunks(chunks, language="vue", content_key="code")
+        assert len(result) == 2
+
+    def test_haskell_exemption(self):
+        """Haskell language preserves all chunks."""
+        chunks = [
+            {"code": "same", "chunk_type": "function"},
+            {"code": "same", "chunk_type": "block"},
+        ]
+        result = deduplicate_chunks(chunks, language="haskell", content_key="code")
+        assert len(result) == 2
+
+    def test_vue_template_exemption(self):
+        """vue_template language preserves all chunks."""
+        chunks = [
+            {"code": "same", "chunk_type": "function"},
+            {"code": "same", "chunk_type": "block"},
+        ]
+        result = deduplicate_chunks(chunks, language="vue_template", content_key="code")
+        assert len(result) == 2
+
+
+class TestDeduplicateSemanticChunks:
+    """Tests for deduplicate_semantic_chunks (dataclass objects)."""
+
+    def test_with_mock_dataclass(self):
+        """Works with dataclass-like objects."""
+        class MockChunk:
+            def __init__(self, content, concept, start_line, end_line):
+                self.content = content
+                self.concept = concept
+                self.start_line = start_line
+                self.end_line = end_line
+
+        class MockConcept:
+            def __init__(self, value):
+                self.value = value
+
+        chunks = [
+            MockChunk("def foo(): pass", MockConcept("definition"), 1, 1),
+            MockChunk("def foo(): pass", MockConcept("block"), 1, 1),  # Duplicate
+        ]
+        result = deduplicate_semantic_chunks(chunks)
+        assert len(result) == 1
+
+    def test_empty_input(self):
+        """Empty input returns empty."""
+        result = deduplicate_semantic_chunks([])
+        assert result == []
+
+    def test_preserves_original_objects(self):
+        """Returns original objects, not dicts."""
+        class MockChunk:
+            def __init__(self, content, start_line, end_line):
+                self.content = content
+                self.concept = None
+                self.start_line = start_line
+                self.end_line = end_line
+                self.custom_field = "preserved"
+
+        chunks = [MockChunk("unique", 1, 1)]
+        result = deduplicate_semantic_chunks(chunks)
+        assert len(result) == 1
+        assert hasattr(result[0], "custom_field")
+        assert result[0].custom_field == "preserved"
+
+
+# =============================================================================
+# SECTION 7: Concept Extractor (if available)
+# =============================================================================
+
+@pytest.mark.skipif(not CONCEPT_EXTRACTOR_AVAILABLE, reason="concept_extractor not available")
+class TestConceptExtractor:
+    """Tests for concept extraction."""
+
+    def test_extract_python_function(self):
+        """Extracts Python function definition."""
+        code = '''
+def hello(name: str) -> str:
+    """Say hello."""
+    return f"Hello {name}"
+'''
+        concepts = extract_concepts(code, "python")
+
+        definitions = [c for c in concepts if c.concept == ConceptType.DEFINITION]
+        assert len(definitions) >= 1
+        names = [c.name for c in definitions]
+        assert "hello" in names
+
+    def test_extract_python_class(self):
+        """Extracts Python class."""
+        code = '''
+class MyClass:
+    def __init__(self):
+        pass
+'''
+        concepts = extract_concepts(code, "python")
+
+        definitions = [c for c in concepts if c.concept == ConceptType.DEFINITION]
+        names = [c.name for c in definitions]
+        assert "MyClass" in names
+
+    def test_extract_python_imports(self):
+        """Extracts Python imports."""
+        code = '''
+import os
+from pathlib import Path
+'''
+        concepts = extract_concepts(code, "python")
+
+        imports = [c for c in concepts if c.concept == ConceptType.IMPORT]
+        assert len(imports) >= 1
+
+    def test_extract_comments(self):
+        """Extracts comments."""
+        code = '''
+# This is a comment
+def foo():
+    pass
+'''
+        concepts = extract_concepts(code, "python")
+
+        comments = [c for c in concepts if c.concept == ConceptType.COMMENT]
+        # May or may not find comments depending on query
+        # Just verify no crash
+
+    def test_empty_content(self):
+        """Empty content returns empty list."""
+        concepts = extract_concepts("", "python")
+        assert concepts == []
+
+    def test_whitespace_only(self):
+        """Whitespace-only returns empty."""
+        concepts = extract_concepts("   \n\n   ", "python")
+        assert concepts == []
+
+    def test_sorted_by_line(self):
+        """Concepts are sorted by start_line."""
+        code = '''
+def b(): pass
+def a(): pass
+'''
+        concepts = extract_concepts(code, "python")
+
+        lines = [c.start_line for c in concepts]
+        assert lines == sorted(lines)
+
+    def test_extracted_concept_fields(self):
+        """ExtractedConcept has all expected fields."""
+        code = "def foo(): pass"
+        concepts = extract_concepts(code, "python")
+
+        if concepts:
+            c = concepts[0]
+            assert hasattr(c, "concept")
+            assert hasattr(c, "name")
+            assert hasattr(c, "content")
+            assert hasattr(c, "start_line")
+            assert hasattr(c, "end_line")
+            assert hasattr(c, "kind")
+            assert hasattr(c, "metadata")
+
+
+@pytest.mark.skipif(not CONCEPT_EXTRACTOR_AVAILABLE, reason="concept_extractor not available")
+class TestConceptExtractorMultiLanguage:
+    """Multi-language concept extraction tests."""
+
+    def test_javascript_function(self):
+        """Extracts JavaScript function."""
+        code = '''
+function hello(name) {
+    return "Hello " + name;
+}
+'''
+        concepts = extract_concepts(code, "javascript")
+        definitions = [c for c in concepts if c.concept == ConceptType.DEFINITION]
+        # Should find at least one definition
+        assert len(definitions) >= 0  # May vary by tree-sitter availability
+
+    def test_typescript_interface(self):
+        """Extracts TypeScript interface."""
+        code = '''
+interface User {
+    name: string;
+    age: number;
+}
+'''
+        concepts = extract_concepts(code, "typescript")
+        # Just verify no crash
+        assert isinstance(concepts, list)
+
+    def test_go_function(self):
+        """Extracts Go function."""
+        code = '''
+func Hello(name string) string {
+    return "Hello " + name
+}
+'''
+        concepts = extract_concepts(code, "go")
+        assert isinstance(concepts, list)
+
+    def test_rust_function(self):
+        """Extracts Rust function."""
+        code = '''
+fn hello(name: &str) -> String {
+    format!("Hello {}", name)
+}
+'''
+        concepts = extract_concepts(code, "rust")
+        assert isinstance(concepts, list)
+
+    def test_supported_languages(self):
+        """supported_languages returns list."""
+        langs = supported_languages()
+        assert isinstance(langs, list)
+        assert "python" in langs
+
+
+# =============================================================================
+# SECTION 8: Edge Cases and Stress Tests
+# =============================================================================
+
+class TestDeduplicationEdgeCases:
+    """Edge cases for deduplication."""
+
+    def test_unicode_content(self):
+        """Handles unicode content."""
+        chunks = [
+            {"code": "def héllo(): # 你好", "chunk_type": "function"},
+            {"code": "def héllo(): # 你好", "chunk_type": "block"},
+        ]
+        result = deduplicate_chunks(chunks, content_key="code")
+        assert len(result) == 1
+
+    def test_very_long_content(self):
+        """Handles very long content."""
+        long_code = "x = 1\n" * 10000
+        chunks = [
+            {"code": long_code, "chunk_type": "function"},
+            {"code": long_code, "chunk_type": "block"},
+        ]
+        result = deduplicate_chunks(chunks, content_key="code")
+        assert len(result) == 1
+
+    def test_binary_like_content(self):
+        """Handles content with special characters."""
+        chunks = [
+            {"code": "x = b'\\x00\\x01\\x02'", "chunk_type": "function"},
+        ]
+        result = deduplicate_chunks(chunks, content_key="code")
+        assert len(result) == 1
+
+    def test_all_same_specificity(self):
+        """When all same specificity, keeps one."""
+        chunks = [
+            {"code": "same", "chunk_type": "function", "start_line": 1, "end_line": 1},
+            {"code": "same", "chunk_type": "function", "start_line": 2, "end_line": 2},
+            {"code": "same", "chunk_type": "function", "start_line": 3, "end_line": 3},
+        ]
+        result = deduplicate_chunks(chunks, content_key="code")
+        assert len(result) == 1
+
+
+class TestDeduplicationStress:
+    """Stress tests for deduplication performance."""
+
+    def test_many_unique_chunks(self):
+        """Handles many unique chunks efficiently."""
+        chunks = [
+            {"code": f"def func_{i}(): pass", "chunk_type": "function",
+             "start_line": i, "end_line": i}
+            for i in range(1000)
+        ]
+        result = deduplicate_chunks(chunks, content_key="code")
+        assert len(result) == 1000
+
+    def test_many_duplicates(self):
+        """Handles many duplicates efficiently."""
+        chunks = [
+            {"code": "duplicate content", "chunk_type": "block" if i % 2 else "function",
+             "start_line": i, "end_line": i}
+            for i in range(1000)
+        ]
+        result = deduplicate_chunks(chunks, content_key="code")
+        assert len(result) == 1
+
+    def test_mixed_large_dataset(self):
+        """Handles mixed large dataset."""
+        chunks = []
+        for i in range(500):
+            # Half unique, half duplicates
+            if i < 250:
+                chunks.append({
+                    "code": f"unique_{i}",
+                    "chunk_type": "function",
+                    "start_line": i,
+                    "end_line": i,
+                })
+            else:
+                chunks.append({
+                    "code": "shared_code",
+                    "chunk_type": "block" if i % 3 else "function",
+                    "start_line": i,
+                    "end_line": i,
+                })
+
+        result = deduplicate_chunks(chunks, content_key="code")
+        # 250 unique + 1 from duplicates = 251
+        assert len(result) == 251
+
+
+class TestTypeWeights:
+    """Tests for TYPE_WEIGHTS configuration."""
+
+    def test_all_expected_types_present(self):
+        """All common chunk types have weights."""
+        expected = ["function", "method", "class", "interface", "struct",
+                    "enum", "definition", "import", "comment", "block"]
+        for t in expected:
+            assert t in TYPE_WEIGHTS, f"Missing weight for {t}"
+
+    def test_definitions_higher_than_blocks(self):
+        """Definition types have higher weight than blocks."""
+        assert TYPE_WEIGHTS["function"] > TYPE_WEIGHTS["block"]
+        assert TYPE_WEIGHTS["class"] > TYPE_WEIGHTS["block"]
+        assert TYPE_WEIGHTS["method"] > TYPE_WEIGHTS["block"]
+
+    def test_weights_in_valid_range(self):
+        """All weights are between 0 and 1."""
+        for t, w in TYPE_WEIGHTS.items():
+            assert 0 <= w <= 1, f"Invalid weight for {t}: {w}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/test_ingest_infrastructure.py b/tests/test_ingest_infrastructure.py
new file mode 100644
index 00000000..d22d8eef
--- /dev/null
+++ b/tests/test_ingest_infrastructure.py
@@ -0,0 +1,1053 @@
+#!/usr/bin/env python3
+"""
+Comprehensive tests for ingest infrastructure modules.
+
+Tests cover:
+- Domain models (models.py): validation, immutability, serialization
+- Type aliases (types.py): semantic type safety
+- Tree cache (tree_cache.py): LRU eviction, thread safety, invalidation
+- File discovery cache (file_discovery_cache.py): TTL, mtime validation
+- Exceptions (exceptions.py): hierarchy, context formatting
+
+Test categories:
+- Unit tests with edge cases
+- Property-based tests with hypothesis
+- Concurrency tests with ThreadPoolExecutor
+- Integration tests with real filesystem
+- Stress tests for cache eviction
+"""
+
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import FrozenInstanceError
+
+import pytest
+
+# Optional hypothesis import for property-based testing
+try:
+    from hypothesis import given, strategies as st, settings
+    HYPOTHESIS_AVAILABLE = True
+except ImportError:
+    HYPOTHESIS_AVAILABLE = False
+    def given(*args, **kwargs):
+        def decorator(f):
+            return pytest.mark.skip(reason="hypothesis not installed")(f)
+        return decorator
+    class st:
+        @staticmethod
+        def integers(*args, **kwargs): return None
+        @staticmethod
+        def text(*args, **kwargs): return None
+    def settings(*args, **kwargs):
+        def decorator(f): return f
+        return decorator
+
+from scripts.exceptions import (
+    ContextEngineError,
+    ValidationError,
+    ParsingError,
+    ChunkingError,
+    EmbeddingError,
+    IndexingError,
+    DatabaseError,
+    SearchError,
+    ConfigurationError,
+    ProviderError,
+    CacheError,
+    RateLimitError,
+    TimeoutError as OperationTimeoutError,
+)
+from scripts.ingest.models import (
+    ChunkType,
+    SymbolKind,
+    Position,
+    Range,
+    Symbol,
+    Chunk,
+    ImportRef,
+    CallRef,
+    FileAnalysis,
+    IndexingResult,
+    chunk_from_dict,
+    symbol_from_dict,
+)
+from scripts.ingest.types import (
+    ChunkId,
+    FileId,
+    LineNumber,
+    ByteOffset,
+    Score,
+    FilePath,
+    Language,
+)
+from scripts.ingest.tree_cache import (
+    TreeCache,
+    get_default_cache,
+    configure_default_cache,
+)
+from scripts.ingest.file_discovery_cache import (
+    FileDiscoveryCache,
+)
+
+
+# =============================================================================
+# SECTION 1: Domain Models (models.py)
+# =============================================================================
+
+class TestPosition:
+    """Tests for Position dataclass."""
+
+    def test_valid_position(self):
+        """Position with valid line and column."""
+        pos = Position(line=10, column=5)
+        assert pos.line == 10
+        assert pos.column == 5
+        assert pos.byte_offset is None
+
+    def test_position_with_byte_offset(self):
+        """Position with all fields."""
+        pos = Position(line=1, column=0, byte_offset=0)
+        assert pos.byte_offset == 0
+
+    def test_position_zero_line_valid(self):
+        """Line 0 is valid (0-indexed systems)."""
+        pos = Position(line=0, column=0)
+        assert pos.line == 0
+
+    def test_position_negative_line_raises(self):
+        """Negative line raises ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            Position(line=-1, column=0)
+        assert "Line must be non-negative" in str(exc_info.value)
+        assert exc_info.value.field == "line"
+
+    def test_position_negative_column_raises(self):
+        """Negative column raises ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            Position(line=0, column=-1)
+        assert "Column must be non-negative" in str(exc_info.value)
+
+    def test_position_immutable(self):
+        """Position is frozen (immutable)."""
+        pos = Position(line=1, column=1)
+        with pytest.raises(FrozenInstanceError):
+            pos.line = 2
+
+    def test_position_hashable(self):
+        """Position can be used in sets and dicts."""
+        pos1 = Position(line=1, column=1)
+        pos2 = Position(line=1, column=1)
+        pos3 = Position(line=2, column=1)
+        assert hash(pos1) == hash(pos2)
+        assert {pos1, pos2, pos3} == {pos1, pos3}
+
+
+class TestRange:
+    """Tests for Range dataclass."""
+
+    def test_valid_range(self):
+        """Range with valid start and end."""
+        r = Range(
+            start=Position(line=1, column=0),
+            end=Position(line=10, column=50)
+        )
+        assert r.line_count == 10
+
+    def test_single_line_range(self):
+        """Range spanning single line."""
+        r = Range(
+            start=Position(line=5, column=0),
+            end=Position(line=5, column=20)
+        )
+        assert r.line_count == 1
+
+    def test_range_start_after_end_raises(self):
+        """Start line after end line raises ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            Range(
+                start=Position(line=10, column=0),
+                end=Position(line=5, column=0)
+            )
+        assert "Start line" in str(exc_info.value)
+
+    def test_range_same_line_invalid_columns(self):
+        """Start column after end column on same line raises."""
+        with pytest.raises(ValidationError) as exc_info:
+            Range(
+                start=Position(line=5, column=20),
+                end=Position(line=5, column=10)
+            )
+        assert "column" in str(exc_info.value).lower()
+
+
+class TestSymbol:
+    """Tests for Symbol dataclass."""
+
+    def test_valid_symbol(self):
+        """Create a valid symbol."""
+        sym = Symbol(
+            name="my_function",
+            kind=SymbolKind.FUNCTION,
+            start_line=1,
+            end_line=10,
+        )
+        assert sym.name == "my_function"
+        assert sym.kind == SymbolKind.FUNCTION
+        assert sym.line_count == 10
+
+    def test_symbol_with_all_fields(self):
+        """Symbol with all optional fields."""
+        sym = Symbol(
+            name="MyClass",
+            kind=SymbolKind.CLASS,
+            start_line=1,
+            end_line=50,
+            path="/src/main.py",
+            signature="class MyClass(Base):",
+            docstring="A test class.",
+            decorators=frozenset(["@dataclass", "@frozen"]),
+            parameters=frozenset(["self", "value"]),
+            complexity=5,
+        )
+        assert sym.full_path == "/src/main.py"
+        assert "@dataclass" in sym.decorators
+
+    def test_symbol_empty_name_raises(self):
+        """Empty name raises ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            Symbol(name="", kind=SymbolKind.FUNCTION, start_line=1, end_line=1)
+        assert "name cannot be empty" in str(exc_info.value)
+
+    def test_symbol_negative_start_line_raises(self):
+        """Negative start_line raises ValidationError."""
+        with pytest.raises(ValidationError):
+            Symbol(name="foo", kind=SymbolKind.FUNCTION, start_line=-1, end_line=1)
+
+    def test_symbol_end_before_start_raises(self):
+        """end_line before start_line raises ValidationError."""
+        with pytest.raises(ValidationError):
+            Symbol(name="foo", kind=SymbolKind.FUNCTION, start_line=10, end_line=5)
+
+    def test_symbol_full_path_fallback(self):
+        """full_path falls back to name when path is None."""
+        sym = Symbol(name="helper", kind=SymbolKind.FUNCTION, start_line=1, end_line=1)
+        assert sym.full_path == "helper"
+
+    def test_symbol_kinds_enum(self):
+        """All SymbolKind values are valid."""
+        kinds = [SymbolKind.FUNCTION, SymbolKind.METHOD, SymbolKind.CLASS,
+                 SymbolKind.INTERFACE, SymbolKind.STRUCT, SymbolKind.ENUM,
+                 SymbolKind.CONSTANT, SymbolKind.VARIABLE, SymbolKind.TYPE_ALIAS,
+                 SymbolKind.MODULE, SymbolKind.NAMESPACE, SymbolKind.PROPERTY,
+                 SymbolKind.UNKNOWN]
+        for kind in kinds:
+            sym = Symbol(name="test", kind=kind, start_line=1, end_line=1)
+            assert sym.kind == kind
+
+
+class TestChunk:
+    """Tests for Chunk dataclass."""
+
+    def test_valid_chunk(self):
+        """Create a valid chunk."""
+        chunk = Chunk(
+            id="abc123",
+            content="def foo(): pass",
+            start_line=1,
+            end_line=1,
+            file_path="/src/main.py",
+            language="python",
+        )
+        assert chunk.id == "abc123"
+        assert chunk.chunk_type == ChunkType.UNKNOWN
+
+    def test_chunk_with_all_fields(self):
+        """Chunk with all fields populated."""
+        chunk = Chunk(
+            id="xyz789",
+            content="class Foo:\n    pass",
+            start_line=1,
+            end_line=2,
+            file_path="/src/main.py",
+            language="python",
+            chunk_type=ChunkType.DEFINITION,
+            symbol="Foo",
+            symbol_path="main.Foo",
+            imports=frozenset(["os", "sys"]),
+            calls=frozenset(["print", "open"]),
+            metadata={"complexity": 1},
+        )
+        assert chunk.chunk_type == ChunkType.DEFINITION
+        assert "os" in chunk.imports
+        assert chunk.line_count == 2
+
+    def test_chunk_empty_id_raises(self):
+        """Empty id raises ValidationError."""
+        with pytest.raises(ValidationError):
+            Chunk(id="", content="x", start_line=1, end_line=1,
+                  file_path="/a.py", language="python")
+
+    def test_chunk_empty_content_raises(self):
+        """Empty content raises ValidationError."""
+        with pytest.raises(ValidationError):
+            Chunk(id="a", content="", start_line=1, end_line=1,
+                  file_path="/a.py", language="python")
+
+    def test_chunk_types_enum(self):
+        """All ChunkType values work."""
+        for chunk_type in ChunkType:
+            chunk = Chunk(
+                id="test", content="x", start_line=1, end_line=1,
+                file_path="/a.py", language="python", chunk_type=chunk_type
+            )
+            assert chunk.chunk_type == chunk_type
+
+
+class TestChunkFromDict:
+    """Tests for chunk_from_dict interop function."""
+
+    def test_standard_keys(self):
+        """Convert dict with standard keys."""
+        data = {
+            "id": "chunk1",
+            "content": "def foo(): pass",
+            "start_line": 1,
+            "end_line": 1,
+            "file_path": "/a.py",
+            "language": "python",
+            "chunk_type": "definition",
+        }
+        chunk = chunk_from_dict(data)
+        assert chunk.id == "chunk1"
+        assert chunk.chunk_type == ChunkType.DEFINITION
+
+    def test_alternate_keys(self):
+        """Convert dict with alternate keys (chunk_id, code, start, end)."""
+        data = {
+            "chunk_id": "c2",
+            "code": "class Bar: pass",
+            "start": 5,
+            "end": 5,
+            "path": "/b.py",
+            "language": "python",
+            "type": "definition",
+        }
+        chunk = chunk_from_dict(data)
+        assert chunk.id == "c2"
+        assert chunk.content == "class Bar: pass"
+        assert chunk.start_line == 5
+
+    def test_missing_keys_use_defaults(self):
+        """Missing keys use reasonable defaults."""
+        data = {"id": "x", "content": "y", "file_path": "/z.py"}
+        chunk = chunk_from_dict(data)
+        assert chunk.language == "unknown"
+        assert chunk.chunk_type == ChunkType.UNKNOWN
+
+
+class TestSymbolFromDict:
+    """Tests for symbol_from_dict interop function."""
+
+    def test_standard_keys(self):
+        """Convert dict with standard keys."""
+        data = {
+            "name": "my_func",
+            "kind": "function",
+            "start_line": 1,
+            "end_line": 10,
+        }
+        sym = symbol_from_dict(data)
+        assert sym.name == "my_func"
+        assert sym.kind == SymbolKind.FUNCTION
+
+    def test_unknown_kind_fallback(self):
+        """Unknown kind falls back to UNKNOWN."""
+        data = {"name": "x", "kind": "weird_kind", "start_line": 1, "end_line": 1}
+        sym = symbol_from_dict(data)
+        assert sym.kind == SymbolKind.UNKNOWN
+
+    def test_case_insensitive_kind(self):
+        """Kind matching is case-insensitive."""
+        data = {"name": "x", "kind": "FUNCTION", "start_line": 1, "end_line": 1}
+        sym = symbol_from_dict(data)
+        assert sym.kind == SymbolKind.FUNCTION
+
+
+class TestFileAnalysis:
+    """Tests for FileAnalysis dataclass."""
+
+    def test_empty_analysis(self):
+        """Create empty file analysis."""
+        analysis = FileAnalysis(file_path="/a.py", language="python")
+        assert analysis.symbols == frozenset()
+        assert analysis.chunks == frozenset()
+        assert analysis.line_count == 0
+
+    def test_analysis_with_data(self):
+        """Create file analysis with symbols and chunks."""
+        sym = Symbol(name="foo", kind=SymbolKind.FUNCTION, start_line=1, end_line=5)
+        chunk = Chunk(id="c1", content="def foo(): pass", start_line=1, end_line=1,
+                      file_path="/a.py", language="python")
+        analysis = FileAnalysis(
+            file_path="/a.py",
+            language="python",
+            symbols=frozenset([sym]),
+            chunks=frozenset([chunk]),
+            line_count=100,
+            parse_time_ms=15.5,
+        )
+        assert len(analysis.symbols) == 1
+        assert analysis.parse_time_ms == 15.5
+
+
+class TestIndexingResult:
+    """Tests for IndexingResult dataclass."""
+
+    def test_empty_result(self):
+        """Empty result has 100% success rate."""
+        result = IndexingResult()
+        assert result.success_rate == 1.0
+
+    def test_success_rate_calculation(self):
+        """Success rate calculated correctly."""
+        result = IndexingResult(files_processed=8, files_skipped=1, files_failed=1)
+        assert result.success_rate == 0.8
+
+    def test_result_is_mutable(self):
+        """IndexingResult is mutable (not frozen)."""
+        result = IndexingResult()
+        result.files_processed = 10
+        result.errors.append("Some error")
+        assert result.files_processed == 10
+        assert len(result.errors) == 1
+
+
+# =============================================================================
+# SECTION 2: Property-Based Tests (with hypothesis)
+# =============================================================================
+
+@pytest.mark.skipif(not HYPOTHESIS_AVAILABLE, reason="hypothesis not installed")
+class TestModelPropertiesHypothesis:
+    """Property-based tests for domain models."""
+
+    @given(st.integers(min_value=0, max_value=10000),
+           st.integers(min_value=0, max_value=1000))
+    @settings(max_examples=100)
+    def test_position_valid_range(self, line, column):
+        """Any non-negative line/column produces valid Position."""
+        pos = Position(line=line, column=column)
+        assert pos.line >= 0
+        assert pos.column >= 0
+
+    @given(st.integers(min_value=0, max_value=1000),
+           st.integers(min_value=0, max_value=1000))
+    @settings(max_examples=50)
+    def test_range_line_count_property(self, start, delta):
+        """Range line_count is always end - start + 1."""
+        end = start + delta
+        r = Range(
+            start=Position(line=start, column=0),
+            end=Position(line=end, column=0)
+        )
+        assert r.line_count == delta + 1
+
+    @given(st.text(min_size=1, max_size=100, alphabet="abcdefghijklmnopqrstuvwxyz_"))
+    @settings(max_examples=50)
+    def test_symbol_name_preserved(self, name):
+        """Symbol name is preserved exactly."""
+        sym = Symbol(name=name, kind=SymbolKind.FUNCTION, start_line=1, end_line=1)
+        assert sym.name == name
+
+
+# =============================================================================
+# SECTION 3: Exception Hierarchy (exceptions.py)
+# =============================================================================
+
+class TestExceptionHierarchy:
+    """Tests for exception class hierarchy."""
+
+    def test_base_exception_inheritance(self):
+        """All exceptions inherit from ContextEngineError."""
+        exceptions = [
+            ValidationError, ParsingError, ChunkingError, EmbeddingError,
+            IndexingError, DatabaseError, SearchError, ConfigurationError,
+            ProviderError, CacheError, RateLimitError, OperationTimeoutError,
+        ]
+        for exc_class in exceptions:
+            assert issubclass(exc_class, ContextEngineError)
+
+    def test_rate_limit_inherits_provider(self):
+        """RateLimitError inherits from ProviderError."""
+        assert issubclass(RateLimitError, ProviderError)
+
+
+class TestExceptionContext:
+    """Tests for exception context formatting."""
+
+    def test_base_exception_str(self):
+        """Base exception formats message correctly."""
+        e = ContextEngineError("Something failed")
+        assert str(e) == "Something failed"
+
+    def test_exception_with_context(self):
+        """Exception with context includes it in str()."""
+        e = ContextEngineError("Failed", context={"file": "test.py", "line": 42})
+        s = str(e)
+        assert "Failed" in s
+        assert "file=test.py" in s
+        assert "line=42" in s
+
+    def test_validation_error_context(self):
+        """ValidationError includes field and value in context."""
+        e = ValidationError("Invalid value", field="age", value=-5)
+        s = str(e)
+        assert "field=age" in s
+        assert "-5" in s
+
+    def test_parsing_error_context(self):
+        """ParsingError includes file, language, and line."""
+        e = ParsingError("Syntax error", file_path="/test.py", language="python", line=10)
+        s = str(e)
+        assert "file=/test.py" in s
+        assert "language=python" in s
+        assert "line=10" in s
+
+    def test_rate_limit_error_retry_after(self):
+        """RateLimitError includes retry_after and status 429."""
+        e = RateLimitError("Rate limited", provider="openai", retry_after=30.0)
+        assert e.status_code == 429
+        assert e.retry_after == 30.0
+        assert "retry_after=30.0" in str(e)
+
+    def test_search_error_truncates_long_query(self):
+        """SearchError truncates very long queries in context."""
+        long_query = "x" * 200
+        e = SearchError("Search failed", query=long_query)
+        assert len(e.context.get("query", "")) <= 100
+
+
+class TestExceptionAttributes:
+    """Tests for exception-specific attributes."""
+
+    def test_validation_error_attributes(self):
+        """ValidationError stores field and value."""
+        e = ValidationError("Bad input", field="name", value="")
+        assert e.field == "name"
+        assert e.value == ""
+
+    def test_embedding_error_attributes(self):
+        """EmbeddingError stores provider, model, batch_size."""
+        e = EmbeddingError("Embed failed", provider="openai", model="ada", batch_size=100)
+        assert e.provider == "openai"
+        assert e.model == "ada"
+        assert e.batch_size == 100
+
+    def test_indexing_error_attributes(self):
+        """IndexingError stores collection, file_path, point_count."""
+        e = IndexingError("Index failed", collection="test_coll", point_count=500)
+        assert e.collection == "test_coll"
+        assert e.point_count == 500
+
+
+# =============================================================================
+# SECTION 4: Tree Cache (tree_cache.py)
+# =============================================================================
+
+class TestTreeCacheBasic:
+    """Basic TreeCache functionality tests."""
+
+    def test_cache_miss(self, tmp_path):
+        """Cache returns None for uncached files."""
+        cache = TreeCache(max_entries=10)
+        result = cache.get(tmp_path / "nonexistent.py")
+        assert result is None
+        stats = cache.get_stats()
+        assert stats["misses"] == 1
+        assert stats["hits"] == 0
+
+    def test_cache_put_get(self, tmp_path):
+        """Put and get returns cached tree."""
+        cache = TreeCache(max_entries=10)
+        test_file = tmp_path / "test.py"
+        test_file.write_text("def foo(): pass")
+        fake_tree = {"type": "module", "children": []}
+        cache.put(test_file, fake_tree)
+        result = cache.get(test_file)
+        assert result == fake_tree
+        stats = cache.get_stats()
+        assert stats["hits"] == 1
+
+    def test_cache_invalidation_on_mtime_change(self, tmp_path):
+        """Cache invalidates when file mtime changes."""
+        cache = TreeCache(max_entries=10)
+        test_file = tmp_path / "test.py"
+        test_file.write_text("v1")
+        cache.put(test_file, {"version": 1})
+        assert cache.get(test_file) == {"version": 1}
+        time.sleep(0.01)
+        test_file.write_text("v2")
+        result = cache.get(test_file)
+        assert result is None
+        stats = cache.get_stats()
+        assert stats["invalidations"] == 1
+
+    def test_cache_invalidation_on_size_change(self, tmp_path):
+        """Cache invalidates when file size changes."""
+        cache = TreeCache(max_entries=10)
+        test_file = tmp_path / "test.py"
+        test_file.write_text("short")
+        cache.put(test_file, {"v": 1})
+        original_mtime = test_file.stat().st_mtime
+        test_file.write_text("much longer content here")
+        os.utime(test_file, (original_mtime, original_mtime))
+        result = cache.get(test_file)
+        assert result is None
+
+    def test_explicit_invalidate(self, tmp_path):
+        """Explicit invalidate() removes entry."""
+        cache = TreeCache(max_entries=10)
+        test_file = tmp_path / "test.py"
+        test_file.write_text("content")
+        cache.put(test_file, {"tree": True})
+        assert cache.get(test_file) is not None
+        removed = cache.invalidate(test_file)
+        assert removed is True
+        assert cache.get(test_file) is None
+        removed_again = cache.invalidate(test_file)
+        assert removed_again is False
+
+
+class TestTreeCacheLRU:
+    """LRU eviction tests for TreeCache."""
+
+    def test_lru_eviction_by_count(self, tmp_path):
+        """Oldest entries evicted when max_entries exceeded."""
+        cache = TreeCache(max_entries=3)
+        files = []
+        for i in range(5):
+            f = tmp_path / f"file{i}.py"
+            f.write_text(f"content {i}")
+            files.append(f)
+            cache.put(f, {"idx": i})
+        assert cache.get(files[0]) is None
+        assert cache.get(files[1]) is None
+        assert cache.get(files[2]) == {"idx": 2}
+        assert cache.get(files[3]) == {"idx": 3}
+        assert cache.get(files[4]) == {"idx": 4}
+        stats = cache.get_stats()
+        assert stats["evictions"] == 2
+
+    def test_lru_access_updates_order(self, tmp_path):
+        """Accessing entry moves it to end (most recent)."""
+        cache = TreeCache(max_entries=3)
+        files = []
+        for i in range(3):
+            f = tmp_path / f"file{i}.py"
+            f.write_text(f"c{i}")
+            files.append(f)
+            cache.put(f, {"i": i})
+        cache.get(files[0])
+        new_file = tmp_path / "new.py"
+        new_file.write_text("new")
+        cache.put(new_file, {"new": True})
+        assert cache.get(files[0]) == {"i": 0}
+        assert cache.get(files[1]) is None
+        assert cache.get(files[2]) == {"i": 2}
+
+
+class TestTreeCacheMemoryLimit:
+    """Memory limit tests for TreeCache."""
+
+    def test_memory_limit_eviction(self, tmp_path):
+        """Entries evicted when memory limit exceeded."""
+        cache = TreeCache(max_entries=100, max_memory_mb=1)
+        files = []
+        for i in range(5):
+            f = tmp_path / f"big{i}.py"
+            f.write_text("x" * (500 * 1024))
+            files.append(f)
+            cache.put(f, {"big": i})
+        stats = cache.get_stats()
+        assert stats["evictions"] > 0
+        assert stats["estimated_memory_mb"] <= 1.5
+
+
+class TestTreeCacheConcurrency:
+    """Thread safety tests for TreeCache."""
+
+    def test_concurrent_put_get(self, tmp_path):
+        """Concurrent put/get operations don't corrupt cache."""
+        cache = TreeCache(max_entries=50)
+        errors = []
+
+        def worker(worker_id):
+            try:
+                for i in range(20):
+                    f = tmp_path / f"worker{worker_id}_file{i}.py"
+                    f.write_text(f"content_{worker_id}_{i}")
+                    cache.put(f, {"w": worker_id, "i": i})
+                    result = cache.get(f)
+                    if result is not None and result != {"w": worker_id, "i": i}:
+                        errors.append(f"Mismatch: {result}")
+            except Exception as e:
+                errors.append(str(e))
+
+        with ThreadPoolExecutor(max_workers=8) as executor:
+            futures = [executor.submit(worker, i) for i in range(8)]
+            for f in as_completed(futures):
+                f.result()
+        assert len(errors) == 0, f"Errors: {errors}"
+
+    def test_concurrent_invalidate(self, tmp_path):
+        """Concurrent invalidation is safe."""
+        cache = TreeCache(max_entries=100)
+        files = []
+        for i in range(50):
+            f = tmp_path / f"file{i}.py"
+            f.write_text(f"c{i}")
+            files.append(f)
+            cache.put(f, {"i": i})
+
+        def invalidator(file_path):
+            cache.invalidate(file_path)
+
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            futures = [executor.submit(invalidator, f) for f in files]
+            for fut in as_completed(futures):
+                fut.result()
+        for f in files:
+            assert cache.get(f) is None
+
+
+class TestTreeCacheHelpers:
+    """Tests for TreeCache helper methods."""
+
+    def test_get_for_comparison_returns_stale(self, tmp_path):
+        """get_for_comparison returns cached tree even if stale."""
+        cache = TreeCache(max_entries=10)
+        test_file = tmp_path / "test.py"
+        test_file.write_text("v1")
+        cache.put(test_file, {"old": True})
+        time.sleep(0.01)
+        test_file.write_text("v2")
+        assert cache.get(test_file) is None
+        cache.put(test_file, {"new": True})
+        time.sleep(0.01)
+        test_file.write_text("v3")
+        result = cache.get_for_comparison(test_file)
+        assert result == {"new": True}
+
+    def test_cleanup_stale_entries(self, tmp_path):
+        """cleanup_stale_entries removes outdated entries."""
+        cache = TreeCache(max_entries=10)
+        files = []
+        for i in range(5):
+            f = tmp_path / f"file{i}.py"
+            f.write_text(f"c{i}")
+            files.append(f)
+            cache.put(f, {"i": i})
+        time.sleep(0.01)
+        for f in files[:2]:
+            f.write_text("modified")
+        removed = cache.cleanup_stale_entries()
+        assert removed == 2
+        stats = cache.get_stats()
+        assert stats["entries"] == 3
+
+    def test_get_cache_info(self, tmp_path):
+        """get_cache_info returns detailed entry info."""
+        cache = TreeCache(max_entries=10)
+        test_file = tmp_path / "test.py"
+        test_file.write_text("content")
+        cache.put(test_file, {"tree": True})
+        cache.get(test_file)
+        cache.get(test_file)
+        info = cache.get_cache_info(test_file)
+        assert info is not None
+        assert info["hit_count"] == 2
+        assert info["is_valid"] is True
+        assert "cached_mtime" in info
+
+    def test_clear(self, tmp_path):
+        """clear() removes all entries."""
+        cache = TreeCache(max_entries=10)
+        for i in range(5):
+            f = tmp_path / f"file{i}.py"
+            f.write_text(f"c{i}")
+            cache.put(f, {"i": i})
+        cache.clear()
+        assert cache.get_stats()["entries"] == 0
+
+
+class TestTreeCacheGlobal:
+    """Tests for global cache functions."""
+
+    def test_get_default_cache_singleton(self):
+        """get_default_cache returns same instance."""
+        cache1 = get_default_cache()
+        cache2 = get_default_cache()
+        assert cache1 is cache2
+
+    def test_configure_default_cache(self):
+        """configure_default_cache creates new instance."""
+        old_cache = get_default_cache()
+        new_cache = configure_default_cache(max_entries=500, max_memory_mb=100)
+        assert new_cache is not old_cache
+        assert new_cache.max_entries == 500
+
+
+# =============================================================================
+# SECTION 5: File Discovery Cache (file_discovery_cache.py)
+# =============================================================================
+
+class TestFileDiscoveryCacheBasic:
+    """Basic FileDiscoveryCache tests."""
+
+    def test_cache_miss_discovery(self, tmp_path):
+        """First call discovers files (cache miss)."""
+        cache = FileDiscoveryCache(max_entries=10, ttl_seconds=300)
+        (tmp_path / "a.py").write_text("a")
+        (tmp_path / "b.py").write_text("b")
+        (tmp_path / "c.js").write_text("c")
+        files = cache.get_files(tmp_path, patterns=["*.py"])
+        assert len(files) == 2
+        assert any(f.name == "a.py" for f in files)
+        assert any(f.name == "b.py" for f in files)
+        stats = cache.get_stats()
+        assert stats["misses"] == 1
+        assert stats["hits"] == 0
+
+    def test_cache_hit(self, tmp_path):
+        """Second call with same params is cache hit."""
+        cache = FileDiscoveryCache(max_entries=10, ttl_seconds=300)
+        (tmp_path / "a.py").write_text("a")
+        cache.get_files(tmp_path, patterns=["*.py"])
+        cache.get_files(tmp_path, patterns=["*.py"])
+        stats = cache.get_stats()
+        assert stats["hits"] == 1
+        assert stats["misses"] == 1
+
+    def test_exclude_patterns(self, tmp_path):
+        """Exclude patterns filter results."""
+        cache = FileDiscoveryCache(max_entries=10, ttl_seconds=300)
+        (tmp_path / "a.py").write_text("a")
+        (tmp_path / "test_a.py").write_text("test")
+        files = cache.get_files(
+            tmp_path,
+            patterns=["*.py"],
+            exclude_patterns=["test_*.py"]
+        )
+        assert len(files) == 1
+        assert files[0].name == "a.py"
+
+
+class TestFileDiscoveryCacheTTL:
+    """TTL expiration tests for FileDiscoveryCache."""
+
+    def test_ttl_expiration(self, tmp_path):
+        """Cache expires after TTL."""
+        cache = FileDiscoveryCache(max_entries=10, ttl_seconds=1)
+        (tmp_path / "a.py").write_text("a")
+        cache.get_files(tmp_path, patterns=["*.py"])
+        assert cache.get_stats()["hits"] == 0
+        time.sleep(1.1)
+        cache.get_files(tmp_path, patterns=["*.py"])
+        stats = cache.get_stats()
+        assert stats["misses"] == 2
+
+
+class TestFileDiscoveryCacheMtime:
+    """Mtime-based invalidation tests."""
+
+    def test_mtime_invalidation(self, tmp_path):
+        """Cache invalidates when directory mtime changes."""
+        cache = FileDiscoveryCache(max_entries=10, ttl_seconds=300)
+        (tmp_path / "a.py").write_text("a")
+        cache.get_files(tmp_path, patterns=["*.py"])
+        time.sleep(0.01)
+        (tmp_path / "b.py").write_text("b")
+        files = cache.get_files(tmp_path, patterns=["*.py"])
+        assert len(files) == 2
+        stats = cache.get_stats()
+        assert stats["invalidations"] == 1
+
+    def test_invalidate_directory(self, tmp_path):
+        """invalidate_directory removes matching entries."""
+        cache = FileDiscoveryCache(max_entries=10, ttl_seconds=300)
+        sub1 = tmp_path / "sub1"
+        sub1.mkdir()
+        (sub1 / "a.py").write_text("a")
+        sub2 = tmp_path / "sub2"
+        sub2.mkdir()
+        (sub2 / "b.py").write_text("b")
+        cache.get_files(sub1, patterns=["*.py"])
+        cache.get_files(sub2, patterns=["*.py"])
+        assert cache.get_stats()["cache_size"] == 2
+        removed = cache.invalidate_directory(sub1)
+        assert removed == 1
+        assert cache.get_stats()["cache_size"] == 1
+
+
+class TestFileDiscoveryCacheLRU:
+    """LRU eviction tests for FileDiscoveryCache."""
+
+    def test_lru_eviction(self, tmp_path):
+        """Oldest entries evicted when max_entries exceeded."""
+        cache = FileDiscoveryCache(max_entries=3, ttl_seconds=300)
+        for i in range(5):
+            d = tmp_path / f"dir{i}"
+            d.mkdir()
+            (d / "file.py").write_text("x")
+            cache.get_files(d, patterns=["*.py"])
+        stats = cache.get_stats()
+        assert stats["cache_size"] == 3
+        assert stats["evictions"] == 2
+
+
+class TestFileDiscoveryCachePatterns:
+    """Pattern matching tests."""
+
+    def test_multiple_patterns(self, tmp_path):
+        """Multiple patterns are OR'd together."""
+        cache = FileDiscoveryCache(max_entries=10, ttl_seconds=300)
+        (tmp_path / "a.py").write_text("a")
+        (tmp_path / "b.js").write_text("b")
+        (tmp_path / "c.txt").write_text("c")
+        files = cache.get_files(tmp_path, patterns=["*.py", "*.js"])
+        assert len(files) == 2
+        names = {f.name for f in files}
+        assert names == {"a.py", "b.js"}
+
+    def test_recursive_pattern(self, tmp_path):
+        """Recursive glob patterns work."""
+        cache = FileDiscoveryCache(max_entries=10, ttl_seconds=300)
+        sub = tmp_path / "sub"
+        sub.mkdir()
+        (sub / "a.py").write_text("a")
+        (tmp_path / "b.py").write_text("b")
+        files = cache.get_files(tmp_path, patterns=["**/*.py"])
+        assert len(files) == 2
+
+    def test_deduplication(self, tmp_path):
+        """Overlapping patterns don't create duplicates."""
+        cache = FileDiscoveryCache(max_entries=10, ttl_seconds=300)
+        (tmp_path / "test.py").write_text("x")
+        files = cache.get_files(tmp_path, patterns=["*.py", "test.*"])
+        assert len(files) == 1
+
+
+# =============================================================================
+# SECTION 6: Type Aliases (types.py)
+# =============================================================================
+
+class TestTypeAliases:
+    """Tests for type aliases semantic safety."""
+
+    def test_newtype_creates_distinct_types(self):
+        """NewType creates logically distinct types."""
+        chunk_id: ChunkId = ChunkId("chunk123")
+        file_id: FileId = FileId("file456")
+        assert isinstance(chunk_id, str)
+        assert isinstance(file_id, str)
+        assert chunk_id == "chunk123"
+        assert file_id == "file456"
+
+    def test_numeric_types(self):
+        """Numeric NewTypes work correctly."""
+        line: LineNumber = LineNumber(42)
+        offset: ByteOffset = ByteOffset(1024)
+        score: Score = Score(0.95)
+        assert line == 42
+        assert offset == 1024
+        assert abs(score - 0.95) < 0.001
+
+    def test_path_types(self):
+        """Path-related NewTypes work."""
+        path: FilePath = FilePath("/src/main.py")
+        lang: Language = Language("python")
+        assert path.endswith(".py")
+        assert lang == "python"
+
+
+# =============================================================================
+# SECTION 7: Integration Tests
+# =============================================================================
+
+class TestCacheIntegration:
+    """Integration tests using multiple caches together."""
+
+    def test_tree_and_file_cache_together(self, tmp_path):
+        """TreeCache and FileDiscoveryCache work together."""
+        tree_cache = TreeCache(max_entries=10)
+        file_cache = FileDiscoveryCache(max_entries=10, ttl_seconds=300)
+        for i in range(5):
+            (tmp_path / f"file{i}.py").write_text(f"def func{i}(): pass")
+        files = file_cache.get_files(tmp_path, patterns=["*.py"])
+        assert len(files) == 5
+        for f in files:
+            tree_cache.put(f, {"parsed": True, "path": str(f)})
+        for f in files:
+            assert tree_cache.get(f) is not None
+        time.sleep(0.01)
+        files[0].write_text("modified content")
+        assert tree_cache.get(files[0]) is None
+        assert tree_cache.get(files[1]) is not None
+        new_files = file_cache.get_files(tmp_path, patterns=["*.py"])
+        assert len(new_files) == 5
+
+
+class TestExceptionInPipeline:
+    """Test exceptions in realistic pipeline scenarios."""
+
+    def test_exception_chain(self):
+        """Exceptions can be chained for context."""
+        try:
+            try:
+                raise ParsingError("Tree-sitter failed", file_path="/bad.py", language="python")
+            except ParsingError as pe:
+                raise IndexingError(
+                    f"Could not index file: {pe}",
+                    file_path=pe.file_path,
+                ) from pe
+        except IndexingError as ie:
+            assert "Tree-sitter failed" in str(ie.__cause__)
+            assert ie.file_path == "/bad.py"
+
+
+# =============================================================================
+# SECTION 8: Stress Tests
+# =============================================================================
+
+class TestStress:
+    """Stress tests for cache behavior under load."""
+
+    def test_high_volume_cache_operations(self, tmp_path):
+        """Cache handles high volume of operations."""
+        cache = TreeCache(max_entries=100)
+        files = []
+        for i in range(500):
+            f = tmp_path / f"file{i}.py"
+            f.write_text(f"content_{i}")
+            files.append(f)
+        for i, f in enumerate(files):
+            cache.put(f, {"idx": i})
+        stats = cache.get_stats()
+        assert stats["entries"] == 100
+        assert stats["evictions"] == 400
+
+    def test_rapid_invalidation_cycle(self, tmp_path):
+        """Rapid put/invalidate cycles don't cause issues."""
+        cache = TreeCache(max_entries=50)
+        test_file = tmp_path / "test.py"
+        test_file.write_text("initial")
+        for i in range(1000):
+            cache.put(test_file, {"iteration": i})
+            cache.invalidate(test_file)
+        assert cache.get(test_file) is None
+        stats = cache.get_stats()
+        assert stats["invalidations"] == 1000
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/test_tree_cache_integration.py b/tests/test_tree_cache_integration.py
new file mode 100644
index 00000000..f5fdac1f
--- /dev/null
+++ b/tests/test_tree_cache_integration.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+"""Test tree cache integration with ASTAnalyzer."""
+import sys
+sys.path.insert(0, '.')
+
+from scripts.ast_analyzer import ASTAnalyzer
+
+
+def test_tree_cache_integration():
+    """Test that the tree cache is wired into ASTAnalyzer correctly."""
+    
+    # Test instantiation
+    analyzer = ASTAnalyzer(use_tree_sitter=True)
+    print(f'✓ ASTAnalyzer instantiated')
+    print(f'  tree_sitter: {analyzer.use_tree_sitter}')
+    print(f'  tree_cache: {"enabled" if analyzer._tree_cache else "disabled"}')
+    
+    # Test analyzing a Python file
+    result = analyzer.analyze_file('scripts/ast_analyzer.py', 'python')
+    symbols = result.get('symbols', [])
+    print(f'✓ Analyzed scripts/ast_analyzer.py')
+    print(f'  Found {len(symbols)} symbols')
+    if symbols:
+        print(f'  First 3: {[s.name for s in symbols[:3]]}')
+    
+    # Test cache hit
+    print('\n--- Testing cache hit on second parse ---')
+    result2 = analyzer.analyze_file('scripts/ast_analyzer.py', 'python')
+    symbols2 = result2.get('symbols', [])
+    print(f'✓ Second analysis returned {len(symbols2)} symbols')
+    
+    # Show cache stats
+    if analyzer._tree_cache:
+        stats = analyzer._tree_cache.get_stats()
+        print(f'\nTree cache stats: {stats}')
+    
+    assert len(symbols) > 0, "Should find symbols"
+    assert len(symbols) == len(symbols2), "Results should be consistent"
+    
+    print('\n✓ All tests passed!')
+
+
+if __name__ == "__main__":
+    test_tree_cache_integration()
+