From d84ef48ddd9690e10bc77fffb75f096d9b25b4b0 Mon Sep 17 00:00:00 2001
From: Daniel Song <xdtsong@gmail.com>
Date: Sat, 24 Jan 2026 12:09:14 -0800
Subject: [PATCH] feat: add context overflow chunked review

- ChunkStrategy (FILE, LINES, AUTO) splits large diffs into chunks
- merge_review_results combines chunk results with deduplication
- Degradation pipeline triggers chunked review on context overflow
- AUTO strategy: file-based for multi-file, line-based for single large files
- 17 tests covering chunking strategies and result merging

Implements #40

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/pr_review_agent/execution/degradation.py |  41 ++-
 src/pr_review_agent/review/chunker.py        | 201 +++++++++++++
 tests/test_chunker.py                        | 286 +++++++++++++++++++
 3 files changed, 527 insertions(+), 1 deletion(-)
 create mode 100644 src/pr_review_agent/review/chunker.py
 create mode 100644 tests/test_chunker.py

diff --git a/src/pr_review_agent/execution/degradation.py b/src/pr_review_agent/execution/degradation.py
index b986cdd..39bc58c 100644
--- a/src/pr_review_agent/execution/degradation.py
+++ b/src/pr_review_agent/execution/degradation.py
@@ -10,7 +10,12 @@
 from enum import Enum
 from typing import Any
 
-from pr_review_agent.execution.retry_handler import RetryStrategy, retry_with_adaptation
+from pr_review_agent.execution.retry_handler import (
+    RetryExhaustedError,
+    RetryStrategy,
+    retry_with_adaptation,
+)
+from pr_review_agent.review.chunker import ChunkStrategy, chunk_diff, merge_review_results
 from pr_review_agent.review.llm_reviewer import LLMReviewer, LLMReviewResult
 
 
@@ -67,6 +72,7 @@ def execute(self) -> DegradationResult:
         """Execute the review pipeline with graceful degradation.
 
         Always returns a result, never raises exceptions.
+        Tries: full → chunked (on context overflow) → reduced → gates-only.
         """
         # Try full review with retry/backoff
         try:
@@ -77,6 +83,20 @@ def execute(self) -> DegradationResult:
                 gate_results=self._gate_results,
                 errors=self._errors,
             )
+        except RetryExhaustedError as e:
+            self._errors.append(f"Full review failed: {e}")
+            # If context overflow was a failure, try chunked review
+            if any(a.failure_type == "context_too_long" for a in e.attempts):
+                try:
+                    review = self._run_chunked_review(self.base_model)
+                    return DegradationResult(
+                        level=DegradationLevel.FULL,
+                        review_result=review,
+                        gate_results=self._gate_results,
+                        errors=self._errors,
+                    )
+                except Exception as chunk_err:
+                    self._errors.append(f"Chunked review failed: {chunk_err}")
         except Exception as e:
             self._errors.append(f"Full review failed: {e}")
 
@@ -123,6 +143,25 @@ def validate(result: LLMReviewResult) -> bool:
         )
         return retry_result.result
 
+    def _run_chunked_review(self, model: str) -> LLMReviewResult:
+        """Run chunked review by splitting diff into per-file chunks."""
+        chunks = chunk_diff(self.diff, strategy=ChunkStrategy.AUTO, max_lines=200)
+        if not chunks:
+            raise ValueError("No chunks produced from diff")
+
+        results: list[LLMReviewResult] = []
+        for chunk in chunks:
+            result = self._reviewer.review(
+                diff=chunk.content,
+                pr_description=self.pr_description,
+                model=model,
+                config=self.config,
+                focus_areas=self.focus_areas,
+            )
+            results.append(result)
+
+        return merge_review_results(results)
+
     def _run_reduced_review(self) -> LLMReviewResult:
         """Run reduced review using Haiku with retries."""
         fallback_model = self.config.llm.simple_model
diff --git a/src/pr_review_agent/review/chunker.py b/src/pr_review_agent/review/chunker.py
new file mode 100644
index 0000000..fa97d70
--- /dev/null
+++ b/src/pr_review_agent/review/chunker.py
@@ -0,0 +1,201 @@
+"""Diff chunking for large PRs that exceed LLM context limits.
+
+Splits diffs into manageable chunks and merges review results.
+"""
+
+import re
+from dataclasses import dataclass
+from enum import Enum
+
+from pr_review_agent.review.llm_reviewer import LLMReviewResult
+
+
+class ChunkStrategy(Enum):
+    """Strategy for splitting diffs."""
+
+    FILE = "file"
+    LINES = "lines"
+    AUTO = "auto"
+
+
+@dataclass
+class DiffChunk:
+    """A chunk of a diff for individual review."""
+
+    content: str
+    file_path: str
+    chunk_index: int
+    total_chunks: int
+
+
+# Pattern to match the start of a file diff
+_FILE_DIFF_PATTERN = re.compile(r"^diff --git a/(.*?) b/", re.MULTILINE)
+
+
+def _split_by_file(diff: str) -> list[tuple[str, str]]:
+    """Split a unified diff into per-file sections.
+
+    Returns list of (file_path, diff_content) tuples.
+    """
+    matches = list(_FILE_DIFF_PATTERN.finditer(diff))
+    if not matches:
+        return []
+
+    sections: list[tuple[str, str]] = []
+    for i, match in enumerate(matches):
+        start = match.start()
+        end = matches[i + 1].start() if i + 1 < len(matches) else len(diff)
+        file_path = match.group(1)
+        content = diff[start:end].rstrip()
+        sections.append((file_path, content))
+
+    return sections
+
+
+def _split_by_lines(
+    diff: str, file_path: str, max_lines: int
+) -> list[tuple[str, str]]:
+    """Split a single-file diff into line-based chunks.
+
+    Preserves the diff header in each chunk for context.
+    Returns list of (file_path, chunk_content) tuples.
+    """
+    lines = diff.split("\n")
+
+    # Find the header (everything before the first hunk)
+    header_end = 0
+    for i, line in enumerate(lines):
+        if line.startswith("@@"):
+            header_end = i
+            break
+
+    header = "\n".join(lines[:header_end])
+    body_lines = lines[header_end:]
+
+    if len(body_lines) <= max_lines:
+        return [(file_path, diff)]
+
+    chunks: list[tuple[str, str]] = []
+    for start in range(0, len(body_lines), max_lines):
+        chunk_lines = body_lines[start : start + max_lines]
+        chunk_content = header + "\n" + "\n".join(chunk_lines)
+        chunks.append((file_path, chunk_content.rstrip()))
+
+    return chunks
+
+
+def chunk_diff(
+    diff: str,
+    strategy: ChunkStrategy = ChunkStrategy.AUTO,
+    max_lines: int = 200,
+) -> list[DiffChunk]:
+    """Split a diff into chunks using the specified strategy.
+
+    Args:
+        diff: The full unified diff text.
+        strategy: Chunking strategy to use.
+        max_lines: Maximum lines per chunk (for LINES and AUTO strategies).
+
+    Returns:
+        List of DiffChunk objects ready for individual review.
+    """
+    if not diff.strip():
+        return []
+
+    if strategy == ChunkStrategy.AUTO:
+        file_sections = _split_by_file(diff)
+        if len(file_sections) > 1:
+            # Multiple files: use file-based chunking, then split large files
+            raw_chunks: list[tuple[str, str]] = []
+            for file_path, content in file_sections:
+                file_lines = content.split("\n")
+                if len(file_lines) > max_lines:
+                    raw_chunks.extend(
+                        _split_by_lines(content, file_path, max_lines)
+                    )
+                else:
+                    raw_chunks.append((file_path, content))
+        else:
+            # Single file: use line-based chunking
+            file_path = file_sections[0][0] if file_sections else "unknown"
+            content = file_sections[0][1] if file_sections else diff
+            raw_chunks = _split_by_lines(content, file_path, max_lines)
+
+    elif strategy == ChunkStrategy.FILE:
+        raw_chunks = _split_by_file(diff)
+
+    elif strategy == ChunkStrategy.LINES:
+        file_sections = _split_by_file(diff)
+        raw_chunks = []
+        for file_path, content in file_sections:
+            raw_chunks.extend(_split_by_lines(content, file_path, max_lines))
+
+    total = len(raw_chunks)
+    return [
+        DiffChunk(
+            content=content,
+            file_path=file_path,
+            chunk_index=i,
+            total_chunks=total,
+        )
+        for i, (file_path, content) in enumerate(raw_chunks)
+    ]
+
+
+def merge_review_results(results: list[LLMReviewResult]) -> LLMReviewResult:
+    """Merge multiple chunk review results into a unified result.
+
+    Combines issues (deduplicating by fingerprint), aggregates tokens/cost,
+    and joins summaries.
+    """
+    if not results:
+        return LLMReviewResult()
+
+    if len(results) == 1:
+        return results[0]
+
+    # Deduplicate issues by fingerprint
+    seen_fingerprints: set[str] = set()
+    all_issues = []
+    for r in results:
+        for issue in r.issues:
+            if issue.fingerprint and issue.fingerprint in seen_fingerprints:
+                continue
+            if issue.fingerprint:
+                seen_fingerprints.add(issue.fingerprint)
+            all_issues.append(issue)
+
+    # Combine inline comments
+    all_inline = []
+    for r in results:
+        all_inline.extend(r.inline_comments)
+
+    # Join summaries
+    summaries = [r.summary for r in results if r.summary]
+    merged_summary = " | ".join(summaries)
+
+    # Combine lists (deduplicate)
+    all_strengths = list(dict.fromkeys(s for r in results for s in r.strengths))
+    all_concerns = list(dict.fromkeys(c for r in results for c in r.concerns))
+    all_questions = list(dict.fromkeys(q for r in results for q in r.questions))
+
+    # Aggregate metrics
+    total_input = sum(r.input_tokens for r in results)
+    total_output = sum(r.output_tokens for r in results)
+    total_cost = sum(r.cost_usd for r in results)
+
+    # Use the model from the first result (all chunks use same model)
+    model = results[0].model if results else ""
+
+    return LLMReviewResult(
+        issues=all_issues,
+        inline_comments=all_inline,
+        summary=merged_summary,
+        strengths=all_strengths,
+        concerns=all_concerns,
+        questions=all_questions,
+        input_tokens=total_input,
+        output_tokens=total_output,
+        model=model,
+        cost_usd=total_cost,
+    )
diff --git a/tests/test_chunker.py b/tests/test_chunker.py
new file mode 100644
index 0000000..cc3397f
--- /dev/null
+++ b/tests/test_chunker.py
@@ -0,0 +1,286 @@
+"""Tests for diff chunking strategy."""
+
+from pr_review_agent.review.chunker import (
+    ChunkStrategy,
+    DiffChunk,
+    chunk_diff,
+    merge_review_results,
+)
+from pr_review_agent.review.llm_reviewer import LLMReviewResult, ReviewIssue
+
+
+class TestChunkDiffByFile:
+    """Test file-based chunking."""
+
+    def test_single_file_no_chunking_needed(self):
+        diff = """diff --git a/src/app.py b/src/app.py
+--- a/src/app.py
++++ b/src/app.py
+@@ -1,3 +1,4 @@
++import os
+ def main():
+     pass"""
+        chunks = chunk_diff(diff, strategy=ChunkStrategy.FILE)
+        assert len(chunks) == 1
+        assert chunks[0].file_path == "src/app.py"
+        assert "+import os" in chunks[0].content
+
+    def test_multiple_files_split_into_chunks(self):
+        diff = """diff --git a/src/app.py b/src/app.py
+--- a/src/app.py
++++ b/src/app.py
+@@ -1,3 +1,4 @@
++import os
+ def main():
+     pass
+diff --git a/src/utils.py b/src/utils.py
+--- a/src/utils.py
++++ b/src/utils.py
+@@ -1,2 +1,3 @@
++import sys
+ def helper():
+     pass
+diff --git a/tests/test_app.py b/tests/test_app.py
+--- a/tests/test_app.py
++++ b/tests/test_app.py
+@@ -1,2 +1,3 @@
++import pytest
+ def test_main():
+     pass"""
+        chunks = chunk_diff(diff, strategy=ChunkStrategy.FILE)
+        assert len(chunks) == 3
+        assert chunks[0].file_path == "src/app.py"
+        assert chunks[1].file_path == "src/utils.py"
+        assert chunks[2].file_path == "tests/test_app.py"
+
+    def test_empty_diff_returns_empty(self):
+        chunks = chunk_diff("", strategy=ChunkStrategy.FILE)
+        assert chunks == []
+
+    def test_preserves_diff_header_per_chunk(self):
+        diff = """diff --git a/a.py b/a.py
+--- a/a.py
++++ b/a.py
+@@ -1,1 +1,2 @@
++x = 1
+ y = 2
+diff --git a/b.py b/b.py
+--- a/b.py
++++ b/b.py
+@@ -1,1 +1,2 @@
++z = 3
+ w = 4"""
+        chunks = chunk_diff(diff, strategy=ChunkStrategy.FILE)
+        assert "diff --git a/a.py b/a.py" in chunks[0].content
+        assert "diff --git a/b.py b/b.py" in chunks[1].content
+
+
+class TestChunkDiffByLines:
+    """Test line-based chunking for very large files."""
+
+    def test_small_diff_not_chunked(self):
+        diff = """diff --git a/big.py b/big.py
+--- a/big.py
++++ b/big.py
+@@ -1,3 +1,4 @@
++line1
+ line2
+ line3"""
+        chunks = chunk_diff(diff, strategy=ChunkStrategy.LINES, max_lines=100)
+        assert len(chunks) == 1
+
+    def test_large_file_split_by_line_limit(self):
+        # Generate a diff with 50 added lines
+        lines = [f"+line{i}" for i in range(50)]
+        diff = (
+            "diff --git a/big.py b/big.py\n"
+            "--- a/big.py\n"
+            "+++ b/big.py\n"
+            "@@ -1,1 +1,51 @@\n"
+            + "\n".join(lines)
+            + "\n existing_line"
+        )
+        chunks = chunk_diff(diff, strategy=ChunkStrategy.LINES, max_lines=20)
+        assert len(chunks) > 1
+        # All chunks should reference the same file
+        assert all(c.file_path == "big.py" for c in chunks)
+
+    def test_chunk_index_tracking(self):
+        lines = [f"+line{i}" for i in range(40)]
+        diff = (
+            "diff --git a/big.py b/big.py\n"
+            "--- a/big.py\n"
+            "+++ b/big.py\n"
+            "@@ -1,1 +1,41 @@\n"
+            + "\n".join(lines)
+        )
+        chunks = chunk_diff(diff, strategy=ChunkStrategy.LINES, max_lines=15)
+        for i, chunk in enumerate(chunks):
+            assert chunk.chunk_index == i
+            assert chunk.total_chunks == len(chunks)
+
+
+class TestChunkDiffAuto:
+    """Test automatic strategy selection."""
+
+    def test_auto_uses_file_for_multi_file_diff(self):
+        diff = """diff --git a/a.py b/a.py
+--- a/a.py
++++ b/a.py
+@@ -1,1 +1,2 @@
++x = 1
+diff --git a/b.py b/b.py
+--- a/b.py
++++ b/b.py
+@@ -1,1 +1,2 @@
++y = 2"""
+        chunks = chunk_diff(diff, strategy=ChunkStrategy.AUTO, max_lines=100)
+        assert len(chunks) == 2
+
+    def test_auto_uses_lines_for_single_large_file(self):
+        lines = [f"+line{i}" for i in range(50)]
+        diff = (
+            "diff --git a/big.py b/big.py\n"
+            "--- a/big.py\n"
+            "+++ b/big.py\n"
+            "@@ -1,1 +1,51 @@\n"
+            + "\n".join(lines)
+        )
+        chunks = chunk_diff(diff, strategy=ChunkStrategy.AUTO, max_lines=20)
+        assert len(chunks) > 1
+
+
+class TestDiffChunk:
+    """Test DiffChunk dataclass."""
+
+    def test_chunk_has_required_fields(self):
+        chunk = DiffChunk(
+            content="diff content",
+            file_path="src/app.py",
+            chunk_index=0,
+            total_chunks=3,
+        )
+        assert chunk.content == "diff content"
+        assert chunk.file_path == "src/app.py"
+        assert chunk.chunk_index == 0
+        assert chunk.total_chunks == 3
+
+
+class TestMergeReviewResults:
+    """Test merging chunked review results."""
+
+    def test_merge_empty_list(self):
+        result = merge_review_results([])
+        assert result.issues == []
+        assert result.summary == ""
+
+    def test_merge_single_result(self):
+        r = LLMReviewResult(
+            summary="Good code",
+            issues=[
+                ReviewIssue(
+                    severity="minor",
+                    category="style",
+                    file="a.py",
+                    line=1,
+                    description="Unused var",
+                    suggestion="Remove it",
+                )
+            ],
+            strengths=["Clean"],
+            concerns=["None"],
+        )
+        merged = merge_review_results([r])
+        assert merged.summary == "Good code"
+        assert len(merged.issues) == 1
+
+    def test_merge_multiple_results_combines_issues(self):
+        r1 = LLMReviewResult(
+            summary="File A review",
+            issues=[
+                ReviewIssue(
+                    severity="major",
+                    category="logic",
+                    file="a.py",
+                    line=10,
+                    description="Bug in a.py",
+                    suggestion="Fix it",
+                )
+            ],
+            strengths=["Good tests"],
+            input_tokens=100,
+            output_tokens=50,
+            cost_usd=0.01,
+        )
+        r2 = LLMReviewResult(
+            summary="File B review",
+            issues=[
+                ReviewIssue(
+                    severity="minor",
+                    category="style",
+                    file="b.py",
+                    line=5,
+                    description="Style issue",
+                    suggestion="Rename",
+                )
+            ],
+            strengths=["Clean code"],
+            concerns=["Complex logic"],
+            input_tokens=80,
+            output_tokens=40,
+            cost_usd=0.008,
+        )
+        merged = merge_review_results([r1, r2])
+        assert len(merged.issues) == 2
+        assert merged.issues[0].file == "a.py"
+        assert merged.issues[1].file == "b.py"
+
+    def test_merge_aggregates_tokens_and_cost(self):
+        r1 = LLMReviewResult(
+            summary="A", input_tokens=100, output_tokens=50, cost_usd=0.01
+        )
+        r2 = LLMReviewResult(
+            summary="B", input_tokens=200, output_tokens=100, cost_usd=0.02
+        )
+        merged = merge_review_results([r1, r2])
+        assert merged.input_tokens == 300
+        assert merged.output_tokens == 150
+        assert abs(merged.cost_usd - 0.03) < 0.001
+
+    def test_merge_combines_strengths_and_concerns(self):
+        r1 = LLMReviewResult(
+            summary="A", strengths=["s1"], concerns=["c1"], questions=["q1"]
+        )
+        r2 = LLMReviewResult(
+            summary="B", strengths=["s2"], concerns=["c2"], questions=["q2"]
+        )
+        merged = merge_review_results([r1, r2])
+        assert "s1" in merged.strengths
+        assert "s2" in merged.strengths
+        assert "c1" in merged.concerns
+        assert "c2" in merged.concerns
+        assert "q1" in merged.questions
+        assert "q2" in merged.questions
+
+    def test_merge_deduplicates_issues_by_fingerprint(self):
+        """Issues with same fingerprint from overlapping chunks are deduplicated."""
+        issue = ReviewIssue(
+            severity="major",
+            category="logic",
+            file="a.py",
+            line=10,
+            description="Same bug",
+            suggestion="Fix",
+            fingerprint="abc123",
+        )
+        r1 = LLMReviewResult(summary="A", issues=[issue])
+        r2 = LLMReviewResult(summary="B", issues=[issue])
+        merged = merge_review_results([r1, r2])
+        assert len(merged.issues) == 1
+
+    def test_merge_summary_joins_chunk_summaries(self):
+        r1 = LLMReviewResult(summary="File A looks good")
+        r2 = LLMReviewResult(summary="File B has issues")
+        merged = merge_review_results([r1, r2])
+        assert "File A" in merged.summary
+        assert "File B" in merged.summary