From d84ef48ddd9690e10bc77fffb75f096d9b25b4b0 Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Sat, 24 Jan 2026 12:09:14 -0800 Subject: [PATCH] feat: add context overflow chunked review - ChunkStrategy (FILE, LINES, AUTO) splits large diffs into chunks - merge_review_results combines chunk results with deduplication - Degradation pipeline triggers chunked review on context overflow - AUTO strategy: file-based for multi-file, line-based for single large files - 17 tests covering chunking strategies and result merging Implements #40 Co-Authored-By: Claude Opus 4.5 --- src/pr_review_agent/execution/degradation.py | 41 ++- src/pr_review_agent/review/chunker.py | 201 +++++++++++++ tests/test_chunker.py | 286 +++++++++++++++++++ 3 files changed, 527 insertions(+), 1 deletion(-) create mode 100644 src/pr_review_agent/review/chunker.py create mode 100644 tests/test_chunker.py diff --git a/src/pr_review_agent/execution/degradation.py b/src/pr_review_agent/execution/degradation.py index b986cdd..39bc58c 100644 --- a/src/pr_review_agent/execution/degradation.py +++ b/src/pr_review_agent/execution/degradation.py @@ -10,7 +10,12 @@ from enum import Enum from typing import Any -from pr_review_agent.execution.retry_handler import RetryStrategy, retry_with_adaptation +from pr_review_agent.execution.retry_handler import ( + RetryExhaustedError, + RetryStrategy, + retry_with_adaptation, +) +from pr_review_agent.review.chunker import ChunkStrategy, chunk_diff, merge_review_results from pr_review_agent.review.llm_reviewer import LLMReviewer, LLMReviewResult @@ -67,6 +72,7 @@ def execute(self) -> DegradationResult: """Execute the review pipeline with graceful degradation. Always returns a result, never raises exceptions. + Tries: full → chunked (on context overflow) → reduced → gates-only. """ # Try full review with retry/backoff try: @@ -77,6 +83,20 @@ def execute(self) -> DegradationResult: gate_results=self._gate_results, errors=self._errors, ) + except RetryExhaustedError as e: + self._errors.append(f"Full review failed: {e}") + # If context overflow was a failure, try chunked review + if any(a.failure_type == "context_too_long" for a in e.attempts): + try: + review = self._run_chunked_review(self.base_model) + return DegradationResult( + level=DegradationLevel.FULL, + review_result=review, + gate_results=self._gate_results, + errors=self._errors, + ) + except Exception as chunk_err: + self._errors.append(f"Chunked review failed: {chunk_err}") except Exception as e: self._errors.append(f"Full review failed: {e}") @@ -123,6 +143,25 @@ def validate(result: LLMReviewResult) -> bool: ) return retry_result.result + def _run_chunked_review(self, model: str) -> LLMReviewResult: + """Run chunked review by splitting diff into per-file chunks.""" + chunks = chunk_diff(self.diff, strategy=ChunkStrategy.AUTO, max_lines=200) + if not chunks: + raise ValueError("No chunks produced from diff") + + results: list[LLMReviewResult] = [] + for chunk in chunks: + result = self._reviewer.review( + diff=chunk.content, + pr_description=self.pr_description, + model=model, + config=self.config, + focus_areas=self.focus_areas, + ) + results.append(result) + + return merge_review_results(results) + def _run_reduced_review(self) -> LLMReviewResult: """Run reduced review using Haiku with retries.""" fallback_model = self.config.llm.simple_model diff --git a/src/pr_review_agent/review/chunker.py b/src/pr_review_agent/review/chunker.py new file mode 100644 index 0000000..fa97d70 --- /dev/null +++ b/src/pr_review_agent/review/chunker.py @@ -0,0 +1,201 @@ +"""Diff chunking for large PRs that exceed LLM context limits. + +Splits diffs into manageable chunks and merges review results. +""" + +import re +from dataclasses import dataclass +from enum import Enum + +from pr_review_agent.review.llm_reviewer import LLMReviewResult + + +class ChunkStrategy(Enum): + """Strategy for splitting diffs.""" + + FILE = "file" + LINES = "lines" + AUTO = "auto" + + +@dataclass +class DiffChunk: + """A chunk of a diff for individual review.""" + + content: str + file_path: str + chunk_index: int + total_chunks: int + + +# Pattern to match the start of a file diff +_FILE_DIFF_PATTERN = re.compile(r"^diff --git a/(.*?) b/", re.MULTILINE) + + +def _split_by_file(diff: str) -> list[tuple[str, str]]: + """Split a unified diff into per-file sections. + + Returns list of (file_path, diff_content) tuples. + """ + matches = list(_FILE_DIFF_PATTERN.finditer(diff)) + if not matches: + return [] + + sections: list[tuple[str, str]] = [] + for i, match in enumerate(matches): + start = match.start() + end = matches[i + 1].start() if i + 1 < len(matches) else len(diff) + file_path = match.group(1) + content = diff[start:end].rstrip() + sections.append((file_path, content)) + + return sections + + +def _split_by_lines( + diff: str, file_path: str, max_lines: int +) -> list[tuple[str, str]]: + """Split a single-file diff into line-based chunks. + + Preserves the diff header in each chunk for context. + Returns list of (file_path, chunk_content) tuples. + """ + lines = diff.split("\n") + + # Find the header (everything before the first hunk) + header_end = 0 + for i, line in enumerate(lines): + if line.startswith("@@"): + header_end = i + break + + header = "\n".join(lines[:header_end]) + body_lines = lines[header_end:] + + if len(body_lines) <= max_lines: + return [(file_path, diff)] + + chunks: list[tuple[str, str]] = [] + for start in range(0, len(body_lines), max_lines): + chunk_lines = body_lines[start : start + max_lines] + chunk_content = header + "\n" + "\n".join(chunk_lines) + chunks.append((file_path, chunk_content.rstrip())) + + return chunks + + +def chunk_diff( + diff: str, + strategy: ChunkStrategy = ChunkStrategy.AUTO, + max_lines: int = 200, +) -> list[DiffChunk]: + """Split a diff into chunks using the specified strategy. + + Args: + diff: The full unified diff text. + strategy: Chunking strategy to use. + max_lines: Maximum lines per chunk (for LINES and AUTO strategies). + + Returns: + List of DiffChunk objects ready for individual review. + """ + if not diff.strip(): + return [] + + if strategy == ChunkStrategy.AUTO: + file_sections = _split_by_file(diff) + if len(file_sections) > 1: + # Multiple files: use file-based chunking, then split large files + raw_chunks: list[tuple[str, str]] = [] + for file_path, content in file_sections: + file_lines = content.split("\n") + if len(file_lines) > max_lines: + raw_chunks.extend( + _split_by_lines(content, file_path, max_lines) + ) + else: + raw_chunks.append((file_path, content)) + else: + # Single file: use line-based chunking + file_path = file_sections[0][0] if file_sections else "unknown" + content = file_sections[0][1] if file_sections else diff + raw_chunks = _split_by_lines(content, file_path, max_lines) + + elif strategy == ChunkStrategy.FILE: + raw_chunks = _split_by_file(diff) + + elif strategy == ChunkStrategy.LINES: + file_sections = _split_by_file(diff) + raw_chunks = [] + for file_path, content in file_sections: + raw_chunks.extend(_split_by_lines(content, file_path, max_lines)) + + total = len(raw_chunks) + return [ + DiffChunk( + content=content, + file_path=file_path, + chunk_index=i, + total_chunks=total, + ) + for i, (file_path, content) in enumerate(raw_chunks) + ] + + +def merge_review_results(results: list[LLMReviewResult]) -> LLMReviewResult: + """Merge multiple chunk review results into a unified result. + + Combines issues (deduplicating by fingerprint), aggregates tokens/cost, + and joins summaries. + """ + if not results: + return LLMReviewResult() + + if len(results) == 1: + return results[0] + + # Deduplicate issues by fingerprint + seen_fingerprints: set[str] = set() + all_issues = [] + for r in results: + for issue in r.issues: + if issue.fingerprint and issue.fingerprint in seen_fingerprints: + continue + if issue.fingerprint: + seen_fingerprints.add(issue.fingerprint) + all_issues.append(issue) + + # Combine inline comments + all_inline = [] + for r in results: + all_inline.extend(r.inline_comments) + + # Join summaries + summaries = [r.summary for r in results if r.summary] + merged_summary = " | ".join(summaries) + + # Combine lists (deduplicate) + all_strengths = list(dict.fromkeys(s for r in results for s in r.strengths)) + all_concerns = list(dict.fromkeys(c for r in results for c in r.concerns)) + all_questions = list(dict.fromkeys(q for r in results for q in r.questions)) + + # Aggregate metrics + total_input = sum(r.input_tokens for r in results) + total_output = sum(r.output_tokens for r in results) + total_cost = sum(r.cost_usd for r in results) + + # Use the model from the first result (all chunks use same model) + model = results[0].model if results else "" + + return LLMReviewResult( + issues=all_issues, + inline_comments=all_inline, + summary=merged_summary, + strengths=all_strengths, + concerns=all_concerns, + questions=all_questions, + input_tokens=total_input, + output_tokens=total_output, + model=model, + cost_usd=total_cost, + ) diff --git a/tests/test_chunker.py b/tests/test_chunker.py new file mode 100644 index 0000000..cc3397f --- /dev/null +++ b/tests/test_chunker.py @@ -0,0 +1,286 @@ +"""Tests for diff chunking strategy.""" + +from pr_review_agent.review.chunker import ( + ChunkStrategy, + DiffChunk, + chunk_diff, + merge_review_results, +) +from pr_review_agent.review.llm_reviewer import LLMReviewResult, ReviewIssue + + +class TestChunkDiffByFile: + """Test file-based chunking.""" + + def test_single_file_no_chunking_needed(self): + diff = """diff --git a/src/app.py b/src/app.py +--- a/src/app.py ++++ b/src/app.py +@@ -1,3 +1,4 @@ ++import os + def main(): + pass""" + chunks = chunk_diff(diff, strategy=ChunkStrategy.FILE) + assert len(chunks) == 1 + assert chunks[0].file_path == "src/app.py" + assert "+import os" in chunks[0].content + + def test_multiple_files_split_into_chunks(self): + diff = """diff --git a/src/app.py b/src/app.py +--- a/src/app.py ++++ b/src/app.py +@@ -1,3 +1,4 @@ ++import os + def main(): + pass +diff --git a/src/utils.py b/src/utils.py +--- a/src/utils.py ++++ b/src/utils.py +@@ -1,2 +1,3 @@ ++import sys + def helper(): + pass +diff --git a/tests/test_app.py b/tests/test_app.py +--- a/tests/test_app.py ++++ b/tests/test_app.py +@@ -1,2 +1,3 @@ ++import pytest + def test_main(): + pass""" + chunks = chunk_diff(diff, strategy=ChunkStrategy.FILE) + assert len(chunks) == 3 + assert chunks[0].file_path == "src/app.py" + assert chunks[1].file_path == "src/utils.py" + assert chunks[2].file_path == "tests/test_app.py" + + def test_empty_diff_returns_empty(self): + chunks = chunk_diff("", strategy=ChunkStrategy.FILE) + assert chunks == [] + + def test_preserves_diff_header_per_chunk(self): + diff = """diff --git a/a.py b/a.py +--- a/a.py ++++ b/a.py +@@ -1,1 +1,2 @@ ++x = 1 + y = 2 +diff --git a/b.py b/b.py +--- a/b.py ++++ b/b.py +@@ -1,1 +1,2 @@ ++z = 3 + w = 4""" + chunks = chunk_diff(diff, strategy=ChunkStrategy.FILE) + assert "diff --git a/a.py b/a.py" in chunks[0].content + assert "diff --git a/b.py b/b.py" in chunks[1].content + + +class TestChunkDiffByLines: + """Test line-based chunking for very large files.""" + + def test_small_diff_not_chunked(self): + diff = """diff --git a/big.py b/big.py +--- a/big.py ++++ b/big.py +@@ -1,3 +1,4 @@ ++line1 + line2 + line3""" + chunks = chunk_diff(diff, strategy=ChunkStrategy.LINES, max_lines=100) + assert len(chunks) == 1 + + def test_large_file_split_by_line_limit(self): + # Generate a diff with 50 added lines + lines = [f"+line{i}" for i in range(50)] + diff = ( + "diff --git a/big.py b/big.py\n" + "--- a/big.py\n" + "+++ b/big.py\n" + "@@ -1,1 +1,51 @@\n" + + "\n".join(lines) + + "\n existing_line" + ) + chunks = chunk_diff(diff, strategy=ChunkStrategy.LINES, max_lines=20) + assert len(chunks) > 1 + # All chunks should reference the same file + assert all(c.file_path == "big.py" for c in chunks) + + def test_chunk_index_tracking(self): + lines = [f"+line{i}" for i in range(40)] + diff = ( + "diff --git a/big.py b/big.py\n" + "--- a/big.py\n" + "+++ b/big.py\n" + "@@ -1,1 +1,41 @@\n" + + "\n".join(lines) + ) + chunks = chunk_diff(diff, strategy=ChunkStrategy.LINES, max_lines=15) + for i, chunk in enumerate(chunks): + assert chunk.chunk_index == i + assert chunk.total_chunks == len(chunks) + + +class TestChunkDiffAuto: + """Test automatic strategy selection.""" + + def test_auto_uses_file_for_multi_file_diff(self): + diff = """diff --git a/a.py b/a.py +--- a/a.py ++++ b/a.py +@@ -1,1 +1,2 @@ ++x = 1 +diff --git a/b.py b/b.py +--- a/b.py ++++ b/b.py +@@ -1,1 +1,2 @@ ++y = 2""" + chunks = chunk_diff(diff, strategy=ChunkStrategy.AUTO, max_lines=100) + assert len(chunks) == 2 + + def test_auto_uses_lines_for_single_large_file(self): + lines = [f"+line{i}" for i in range(50)] + diff = ( + "diff --git a/big.py b/big.py\n" + "--- a/big.py\n" + "+++ b/big.py\n" + "@@ -1,1 +1,51 @@\n" + + "\n".join(lines) + ) + chunks = chunk_diff(diff, strategy=ChunkStrategy.AUTO, max_lines=20) + assert len(chunks) > 1 + + +class TestDiffChunk: + """Test DiffChunk dataclass.""" + + def test_chunk_has_required_fields(self): + chunk = DiffChunk( + content="diff content", + file_path="src/app.py", + chunk_index=0, + total_chunks=3, + ) + assert chunk.content == "diff content" + assert chunk.file_path == "src/app.py" + assert chunk.chunk_index == 0 + assert chunk.total_chunks == 3 + + +class TestMergeReviewResults: + """Test merging chunked review results.""" + + def test_merge_empty_list(self): + result = merge_review_results([]) + assert result.issues == [] + assert result.summary == "" + + def test_merge_single_result(self): + r = LLMReviewResult( + summary="Good code", + issues=[ + ReviewIssue( + severity="minor", + category="style", + file="a.py", + line=1, + description="Unused var", + suggestion="Remove it", + ) + ], + strengths=["Clean"], + concerns=["None"], + ) + merged = merge_review_results([r]) + assert merged.summary == "Good code" + assert len(merged.issues) == 1 + + def test_merge_multiple_results_combines_issues(self): + r1 = LLMReviewResult( + summary="File A review", + issues=[ + ReviewIssue( + severity="major", + category="logic", + file="a.py", + line=10, + description="Bug in a.py", + suggestion="Fix it", + ) + ], + strengths=["Good tests"], + input_tokens=100, + output_tokens=50, + cost_usd=0.01, + ) + r2 = LLMReviewResult( + summary="File B review", + issues=[ + ReviewIssue( + severity="minor", + category="style", + file="b.py", + line=5, + description="Style issue", + suggestion="Rename", + ) + ], + strengths=["Clean code"], + concerns=["Complex logic"], + input_tokens=80, + output_tokens=40, + cost_usd=0.008, + ) + merged = merge_review_results([r1, r2]) + assert len(merged.issues) == 2 + assert merged.issues[0].file == "a.py" + assert merged.issues[1].file == "b.py" + + def test_merge_aggregates_tokens_and_cost(self): + r1 = LLMReviewResult( + summary="A", input_tokens=100, output_tokens=50, cost_usd=0.01 + ) + r2 = LLMReviewResult( + summary="B", input_tokens=200, output_tokens=100, cost_usd=0.02 + ) + merged = merge_review_results([r1, r2]) + assert merged.input_tokens == 300 + assert merged.output_tokens == 150 + assert abs(merged.cost_usd - 0.03) < 0.001 + + def test_merge_combines_strengths_and_concerns(self): + r1 = LLMReviewResult( + summary="A", strengths=["s1"], concerns=["c1"], questions=["q1"] + ) + r2 = LLMReviewResult( + summary="B", strengths=["s2"], concerns=["c2"], questions=["q2"] + ) + merged = merge_review_results([r1, r2]) + assert "s1" in merged.strengths + assert "s2" in merged.strengths + assert "c1" in merged.concerns + assert "c2" in merged.concerns + assert "q1" in merged.questions + assert "q2" in merged.questions + + def test_merge_deduplicates_issues_by_fingerprint(self): + """Issues with same fingerprint from overlapping chunks are deduplicated.""" + issue = ReviewIssue( + severity="major", + category="logic", + file="a.py", + line=10, + description="Same bug", + suggestion="Fix", + fingerprint="abc123", + ) + r1 = LLMReviewResult(summary="A", issues=[issue]) + r2 = LLMReviewResult(summary="B", issues=[issue]) + merged = merge_review_results([r1, r2]) + assert len(merged.issues) == 1 + + def test_merge_summary_joins_chunk_summaries(self): + r1 = LLMReviewResult(summary="File A looks good") + r2 = LLMReviewResult(summary="File B has issues") + merged = merge_review_results([r1, r2]) + assert "File A" in merged.summary + assert "File B" in merged.summary