+
+ Args:
+ file_path: Path to the file
+ old_line: Line number in old file (or None)
+ new_line: Line number in new file (or None)
+
+ Returns:
+ Line code string
+ """
+ import hashlib
+
+ # Generate SHA1 hash of the file path
+ filename_sha = hashlib.sha1(file_path.encode()).hexdigest()
+
+ # Format: sha_old_new, using empty string for None values
+ old_str = str(old_line) if old_line is not None else ""
+ new_str = str(new_line) if new_line is not None else ""
+
+ return f"{filename_sha}_{old_str}_{new_str}"
+
+
def create_position_for_issue(
diff_text: str,
issue_line_start: int,
@@ -970,26 +1051,28 @@ def create_position_for_issue(
current_old += 1
current_new += 1
- # Choose the best line to anchor the discussion:
- # 1. Prefer the first added line (issues are usually about new code)
- # 2. Fall back to middle context line
- # 3. Finally use deleted line or start line
- found_old_line = None
- found_new_line = None
+ # Determine start and end lines for the position
+ # Priority: added lines > context lines > deleted lines
+ start_old_line = None
+ start_new_line = None
+ end_old_line = None
+ end_new_line = None
if added_lines:
- # Use the first added line in the range
- found_old_line, found_new_line = added_lines[0]
+ # Use the first and last added lines in the range
+ start_old_line, start_new_line = added_lines[0]
+ end_old_line, end_new_line = added_lines[-1]
elif context_lines:
- # Use the middle context line
- mid_idx = len(context_lines) // 2
- found_old_line, found_new_line = context_lines[mid_idx]
+ # Use the first and last context lines
+ start_old_line, start_new_line = context_lines[0]
+ end_old_line, end_new_line = context_lines[-1]
elif deleted_lines:
- # Use the first deleted line
- found_old_line, found_new_line = deleted_lines[0]
+ # Use the first and last deleted lines
+ start_old_line, start_new_line = deleted_lines[0]
+ end_old_line, end_new_line = deleted_lines[-1]
# If we didn't find any line in the diff, return None
- if found_old_line is None and found_new_line is None:
+ if start_old_line is None and start_new_line is None:
return None
# Create position object
@@ -1002,11 +1085,37 @@ def create_position_for_issue(
"position_type": "text",
}
- if found_new_line is not None:
- position["new_line"] = found_new_line
-
- if found_old_line is not None:
- position["old_line"] = found_old_line
+ # For single-line issues, use the simple format
+ if issue_line_start == issue_line_end:
+ if start_new_line is not None:
+ position["new_line"] = start_new_line
+ if start_old_line is not None:
+ position["old_line"] = start_old_line
+ else:
+ # For multi-line issues, use line_range
+ # Determine the line type (new for added lines, old for deleted lines)
+ line_type = "new" if start_new_line is not None else "old"
+
+ position["line_range"] = {
+ "start": {
+ "line_code": _generate_line_code(new_path, start_old_line, start_new_line),
+ "type": line_type,
+ },
+ "end": {
+ "line_code": _generate_line_code(new_path, end_old_line, end_new_line),
+ "type": line_type,
+ },
+ }
+
+ # Add line numbers to start and end
+ if start_old_line is not None:
+ position["line_range"]["start"]["old_line"] = start_old_line
+ if start_new_line is not None:
+ position["line_range"]["start"]["new_line"] = start_new_line
+ if end_old_line is not None:
+ position["line_range"]["end"]["old_line"] = end_old_line
+ if end_new_line is not None:
+ position["line_range"]["end"]["new_line"] = end_new_line
return position
@@ -1032,7 +1141,8 @@ def create_discussion(
# GitLab discussions don't have separate titles in the API,
# so we include the title in the body with markdown formatting
- discussion_id = post_discussion(
+ # post_discussion returns (discussion_id, note_id), we only need discussion_id
+ discussion_id, _ = post_discussion(
api_v4=gitlab_config.api_v4,
token=gitlab_config.token,
project_id=gitlab_config.project_id,
diff --git a/src/reviewbot/infra/gitlab/note.py b/src/reviewbot/infra/gitlab/note.py
index 6bfa966..535c6c7 100644
--- a/src/reviewbot/infra/gitlab/note.py
+++ b/src/reviewbot/infra/gitlab/note.py
@@ -35,9 +35,9 @@ def post_discussion(
body: str,
position: dict[str, Any] | None = None,
timeout: int = 30,
-) -> str:
+) -> tuple[str, str | None]:
"""
- Create a new discussion and return its ID.
+ Create a new discussion and return its ID and first note ID.
Args:
api_v4: GitLab API v4 base URL
@@ -49,7 +49,7 @@ def post_discussion(
timeout: Request timeout
Returns:
- The discussion ID from GitLab
+ Tuple of (discussion_id, note_id). note_id may be None if not found.
"""
url = f"{api_v4.rstrip('/')}/projects/{project_id}/merge_requests/{mr_iid}/discussions"
@@ -90,14 +90,18 @@ def post_discussion(
r.raise_for_status()
- # GitLab returns the created discussion with an 'id' field
+ # GitLab returns the created discussion with an 'id' field and notes array
response_data = r.json()
discussion_id = response_data.get("id")
if not discussion_id:
raise RuntimeError(f"Discussion created but no ID returned: {response_data}")
- return discussion_id
+ # Also return the first note ID (the discussion body note)
+ notes = response_data.get("notes", [])
+ note_id = notes[0].get("id") if notes else None
+
+ return discussion_id, note_id
def post_discussion_reply(
@@ -137,7 +141,8 @@ def create_discussion(
# GitLab discussions don't have separate titles, so we include it in the body
full_body = f"## {title}\n\n{body}"
- discussion_id = post_discussion(
+ # post_discussion returns (discussion_id, note_id), we only need discussion_id
+ discussion_id, _ = post_discussion(
api_v4=api_v4,
token=token,
project_id=project_id,
@@ -266,12 +271,14 @@ def update_discussion_note(
timeout=timeout,
)
+ # Check for errors and raise with detailed information
if r.status_code >= 400:
console.print(f"[red]Failed to update note: {r.status_code} {r.reason}[/red]")
try:
error_response = r.json()
console.print(f"[red]Error response: {error_response}[/red]")
- except Exception:
+ except ValueError:
+ # JSON parsing failed, use text
+ error_response = r.text
console.print(f"[red]Error response text: {r.text}[/red]")
-
- r.raise_for_status()
+ raise RuntimeError(f"Failed to update note: {r.status_code} {r.reason}: {error_response}")
From c8c288c8c8ca3a48f08da2e2acf3f2c30398a8f4 Mon Sep 17 00:00:00 2001
From: canefe <8518141+canefe@users.noreply.github.com>
Date: Tue, 6 Jan 2026 18:08:00 +0400
Subject: [PATCH 8/8] feat: integrate reasoning tool and enhance review process
- Added a new `think` tool to record internal reasoning during code reviews, improving context retention and analysis quality.
- Updated the review process to require reasoning before generating output, ensuring deeper analysis of code changes.
- Enhanced the summary generation to include previous reasoning context, providing reviewers with insights into past evaluations.
- Modified the configuration to support thread creation options, allowing for more flexible discussion management.
- Refactored various functions to accommodate these enhancements and improve overall code clarity.
---
src/reviewbot/agent/tasks/issues.py | 105 ++++-
src/reviewbot/agent/workflow.py | 596 ++++++++++++----------------
src/reviewbot/core/config.py | 1 +
src/reviewbot/infra/config/env.py | 3 +
src/reviewbot/infra/gitlab/diff.py | 5 +-
src/reviewbot/infra/gitlab/note.py | 11 +-
src/reviewbot/models/gpt.py | 2 +-
src/reviewbot/tools/__init__.py | 2 +
src/reviewbot/tools/think.py | 45 +++
9 files changed, 411 insertions(+), 359 deletions(-)
create mode 100644 src/reviewbot/tools/think.py
diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py
index a05e314..9a58f23 100644
--- a/src/reviewbot/agent/tasks/issues.py
+++ b/src/reviewbot/agent/tasks/issues.py
@@ -18,6 +18,34 @@
console = Console()
+def get_reasoning_context() -> str:
+ """
+ Retrieve stored reasoning history from the current context.
+
+ Returns:
+ Formatted string of previous reasoning, or empty string if none exists.
+ """
+ try:
+ context = store_manager_ctx.get()
+ issue_store = context.get("issue_store")
+
+ if not issue_store or not hasattr(issue_store, "_reasoning_history"):
+ return ""
+
+ reasoning_history = issue_store._reasoning_history
+ if not reasoning_history:
+ return ""
+
+ # Format reasoning history for context
+ formatted = "\n\n**Your Previous Reasoning:**\n"
+ for i, reasoning in enumerate(reasoning_history, 1):
+ formatted += f"{i}. {reasoning}\n"
+
+ return formatted
+ except Exception:
+ return ""
+
+
def with_retry(func: Callable, settings: ToolCallerSettings, *args, **kwargs) -> Any:
"""
Execute a function with exponential backoff retry logic.
@@ -298,6 +326,7 @@ def quick_scan_file(
- Complex algorithms or data structures
- Error handling changes
- Configuration changes that affect behavior
+- Use tool 'think' to reason. You must reason at least 10 times before giving an answer
Return FALSE if:
- Only formatting/whitespace changes
@@ -383,10 +412,48 @@ def review_single_file(
"""
Review a single diff file and return issues found.
"""
+ # Get any previous reasoning context
+ reasoning_context = get_reasoning_context()
+
+ # Force a reasoning pass to ensure think() is invoked during deep review
+ try:
+ from reviewbot.tools import get_diff as get_diff_tool
+
+ diff_content = get_diff_tool.invoke({"file_path": file_path})
+ think_messages: list[BaseMessage] = [
+ SystemMessage(
+ content=(
+ "You are a senior code reviewer. You MUST call think() exactly once "
+ "with 2-5 sentences of reasoning about the provided diff. "
+ "Do not use any other tools. After calling think(), reply with the "
+ "single word DONE."
+ )
+ ),
+ HumanMessage(
+ content=f"""Diff for {file_path}:
+
+```diff
+{diff_content}
+```
+""",
+ ),
+ ]
+ think_settings = ToolCallerSettings(max_tool_calls=1, max_iterations=1)
+ tool_caller(agent, think_messages, think_settings)
+ except Exception as e:
+ console.print(f"[yellow]ā Failed to record reasoning for {file_path}: {e}[/yellow]")
+
messages: list[BaseMessage] = [
SystemMessage(
content=f"""You are a senior code reviewer analyzing a specific file change.
+REASONING TOOL:
+- You have access to a `think()` tool for recording your internal reasoning
+- Use it to plan your approach, analyze patterns, or reason about potential issues
+- Your reasoning is stored and will be available in subsequent requests
+- This helps maintain context and improves review quality{reasoning_context}
+ - During deep reviews, you MUST call think() before producing your JSON output
+
Your task: Review ONLY the file '{file_path}' from the merge request diff.
IMPORTANT GUIDELINES:
@@ -396,36 +463,42 @@ def review_single_file(
- Only report issues with clear negative impact (bugs, security risks, performance problems, logic errors)
- Avoid reporting issues about code style, formatting, or personal preferences unless they violate critical standards
- Medium/High severity issues should be reserved for actual bugs, security vulnerabilities, or broken functionality
+- The `description` field MUST include a fenced ```diff block quoting only the relevant added/removed/context lines without (@@ but + - is fine), followed by a short plain-text explanation (1-3 sentences)
CRITICAL - KNOWLEDGE CUTOFF AWARENESS:
-ā ļø Your training data has a cutoff date. The code you're reviewing may use:
+Your training data has a cutoff date. The code you're reviewing may use:
- Package versions released AFTER your training (e.g., v2, v3 of libraries)
- Language versions you don't know about (e.g., Go 1.23+, Python 3.13+)
- Import paths that have changed since your training
- APIs that have been updated
DO NOT FLAG as issues:
-ā Version numbers (e.g., "Go 1.25 doesn't exist" - it might now!)
-ā Import paths you don't recognize (e.g., "should be v1 not v2" - v2 might be correct!)
-ā Package versions (e.g., "mongo-driver/v2" - newer versions exist!)
-ā Language features you don't recognize (they might be new)
-ā API methods you don't know (they might have been added)
+Version numbers (e.g., "Go 1.25 doesn't exist" - it might now!)
+Import paths you don't recognize (e.g., "should be v1 not v2" - v2 might be correct!)
+Package versions (e.g., "mongo-driver/v2" - newer versions exist!)
+Language features you don't recognize (they might be new)
+API methods you don't know (they might have been added)
ONLY flag version/import issues if:
-ā
There's an obvious typo (e.g., "monggo" instead of "mongo")
-ā
The code itself shows an error (e.g., import fails in the diff)
-ā
There's a clear pattern mismatch (e.g., mixing v1 and v2 imports inconsistently)
+There's an obvious typo (e.g., "monggo" instead of "mongo")
+The code itself shows an error (e.g., import fails in the diff)
+There's a clear pattern mismatch (e.g., mixing v1 and v2 imports inconsistently)
When in doubt about versions/imports: ASSUME THE DEVELOPER IS CORRECT and skip it.
SUGGESTIONS:
-- When the fix is OBVIOUS and simple, include a "suggestion" field with the corrected code
-- The suggestion should contain ONLY the fixed code (not diff markers like +/-)
-- Only include suggestions for simple fixes (typos, obvious bugs, missing fields, etc.)
-- Do NOT include suggestions for complex refactorings or architectural changes
-- DO NOT suggest version/import changes unless there's an obvious typo
-- Format: just the corrected code, no explanations
-
+- When a fix is simple, provide a "suggestion" field.
+- **GitLab Syntax Requirement**: You must format the suggestion using relative line offsets based on your `start_line` and `end_line`.
+- **The Formula**:
+ 1. Calculate the offset: `L = end_line - start_line`.
+ 2. The header MUST be: ```suggestion:-L+0
+- **Example**: If `start_line` is 7 and `end_line` is 9, the offset `L` is 2. The header is ```suggestion:-2+0.
+- **Content**: The suggestion must include the full corrected code for every line from `start_line` to `end_line`.
+- **Indentation**: You MUST preserve the exact leading whitespace of the original code.
+- Format:
+```suggestion:-L+0
+[CORRECTED CODE BLOCK]
+```
Output format: JSON array of issue objects following this schema:
{IssueModel.model_json_schema()}
diff --git a/src/reviewbot/agent/workflow.py b/src/reviewbot/agent/workflow.py
index 7090015..0de5d04 100644
--- a/src/reviewbot/agent/workflow.py
+++ b/src/reviewbot/agent/workflow.py
@@ -1,4 +1,5 @@
import fnmatch
+import hashlib
import re
from dataclasses import dataclass
from pathlib import Path
@@ -40,6 +41,7 @@
from reviewbot.tools import (
get_diff,
read_file,
+ think,
)
console = Console()
@@ -192,87 +194,44 @@ def filter_diffs(diffs: list[FileDiff], reviewignore_patterns: list[str]) -> lis
return filtered
-def _extract_code_from_diff(diff_patch: str, line_start: int, line_end: int) -> str:
- """
- Extract code lines from a unified diff patch for a given line range.
-
- Args:
- diff_patch: The unified diff patch string
- line_start: Starting line number (1-indexed, in the new file)
- line_end: Ending line number (1-indexed, in the new file)
-
- Returns:
- String containing the code lines from the diff
- """
- import re
-
- lines = diff_patch.splitlines(keepends=True)
- result_lines: list[str] = []
- current_new_line = 0
- current_old_line = 0
- in_target_range = False
+def _extract_code_from_diff(diff_text: str, line_start: int, line_end: int) -> str:
+ hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@")
+ lines = diff_text.splitlines()
- # Pattern to match hunk headers: @@ -old_start,old_count +new_start,new_count @@
- hunk_header_re = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@")
+ extracted = []
+ current_new = 0
+ in_hunk = False
for line in lines:
- # Check if this is a hunk header
- match = hunk_header_re.match(line)
+ match = hunk_header_pattern.match(line)
if match:
- # new_start is the line number in the new file where this hunk starts
- new_start = int(match.group(3))
- old_start = int(match.group(1))
- current_new_line = new_start
- current_old_line = old_start
- # Check if this hunk overlaps with our target range
- new_count = int(match.group(4)) if match.group(4) else 1
- in_target_range = new_start <= line_end and (new_start + new_count) >= line_start
+ current_new = int(match.group(3))
+ in_hunk = True
continue
- # Skip diff header lines
- if line.startswith("diff --git") or line.startswith("---") or line.startswith("+++"):
+ if not in_hunk:
continue
- # Process diff lines - keep the prefixes to show the actual diff
- # Include context lines to show proper indentation and structure
+ # We only care about the lines in the NEW file (the result of the change)
if line.startswith("+"):
- # Added line - this is in the new file
- if current_new_line >= line_start and current_new_line <= line_end:
- # Ensure space after '+' for proper markdown diff formatting
- if len(line) > 1 and line[1] != " ":
- formatted_line = "+ " + line[1:]
- else:
- formatted_line = line
- result_lines.append(formatted_line)
- current_new_line += 1
+ if line_start <= current_new <= line_end:
+ extracted.append(line[1:]) # Remove '+'
+ current_new += 1
elif line.startswith("-"):
+ # Skip deleted lines for code extraction of the 'new' state
continue
- # Removed line - include it to show what was removed
- # Include removals that are in the same hunk as our target range
- # Also include nearby removals for context
- if in_target_range or (
- current_old_line >= line_start - 3 and current_old_line <= line_end + 3
- ):
- # Ensure space after '-' for proper markdown diff formatting
- if len(line) > 1 and line[1] != " ":
- formatted_line = "- " + line[1:]
- else:
- formatted_line = line
- result_lines.append(formatted_line)
- current_old_line += 1
- elif line.startswith(" "):
- # Context line - this exists in both old and new files
- # Include context lines within the range and a few lines before/after for structure
- if current_new_line >= line_start - 2 and current_new_line <= line_end + 2:
- # Context lines already have space prefix
- result_lines.append(line)
- current_new_line += 1
- current_old_line += 1
- elif line.startswith("\\"):
- # End of file marker - skip
- continue
+ else:
+ # Context line
+ if line_start <= current_new <= line_end:
+ extracted.append(line[1:] if line else "")
+ current_new += 1
- return "".join(result_lines)
+ # FIX: Exit early if we've passed the end of our requested range
+ if current_new > line_end:
+ if extracted: # Only break if we actually found lines
+ break
+
+ return "\n".join(extracted)
@dataclass
@@ -456,6 +415,7 @@ def update_review_summary(
note_id: str,
issues: list[Issue],
diffs: list[FileDiff],
+ diff_refs: dict[str, str],
agent: Agent,
) -> None:
"""
@@ -471,6 +431,7 @@ def update_review_summary(
note_id: Note ID to update
issues: List of issues found during review
diffs: List of file diffs that were reviewed
+ diff_refs: Diff references including head_sha and project_web_url
agent: The agent to use for generating summary
"""
from langchain_core.messages import HumanMessage, SystemMessage
@@ -506,14 +467,24 @@ def update_review_summary(
messages = [
SystemMessage(
- content="""You are a code review assistant. Generate a concise, professional summary of a code review with reasoning.
+ content="""You are a Merge Request reviewer. Generate a concise, professional summary of a code review with reasoning.
IMPORTANT:
-- Keep it SHORT (3-5 sentences max)
-- Provide reasoning about the overall code quality
+- Use EXACTLY two paragraphs, each wrapped in tags.
+- Provide reasoning about the overall merge request purpose and code quality.
- Highlight key concerns or positive aspects
- Be constructive and professional
- DO NOT use any tools
+- Use paragraphs with readable flow.
+Paragraphs should be wrapped with
tags. Use new
tag for a newline.
+Example
+
+paragraph
+
+
+
+paragraph2
+
- Focus on the big picture, not individual issue details"""
),
HumanMessage(
@@ -530,13 +501,11 @@ def update_review_summary(
**Issues found:**
{issues_text}
-Generate a brief summary (3-5 sentences) that:
-1. Provides overall assessment of the code quality
+- Use EXACTLY two paragraphs, each wrapped in tags.
+1. Provides overall assessment of the purpose of the merge request purpose and code quality.
2. Highlights the most important concerns (if any)
3. Gives reasoning about the review findings
-4. Is constructive and actionable
-
-If no issues were found, celebrate the good code quality."""
+4. Is constructive and actionable """
),
]
@@ -556,6 +525,9 @@ def update_review_summary(
]
if issues:
+ project_web_url = diff_refs.get("project_web_url")
+ head_sha = diff_refs.get("head_sha")
+
summary_parts.append("**Issue Breakdown**\n\n")
if high_count > 0:
summary_parts.append(
@@ -570,34 +542,69 @@ def update_review_summary(
f'
\n'
)
- summary_parts.append("\n
\n
\n\n**Issues by File**\n\n")
- for file_path, file_issues in sorted(issues_by_file.items()):
- high = sum(1 for i in file_issues if i.severity == IssueSeverity.HIGH)
- medium = sum(1 for i in file_issues if i.severity == IssueSeverity.MEDIUM)
- low = sum(1 for i in file_issues if i.severity == IssueSeverity.LOW)
+ summary_parts.append("\n
\n
\n\n")
+
+ non_dedicated_issues = [issue for issue in issues if not issue.discussion_id]
+ if non_dedicated_issues:
+ issues_by_file: dict[str, list[Issue]] = {}
+ for issue in non_dedicated_issues:
+ issues_by_file.setdefault(issue.file_path, []).append(issue)
+
+ severity_badge_colors = {
+ IssueSeverity.HIGH: "red",
+ IssueSeverity.MEDIUM: "orange",
+ IssueSeverity.LOW: "green",
+ }
+
+ for file_path, file_issues in sorted(issues_by_file.items()):
+ summary_parts.append(f"### š {file_path}\n\n")
+ for issue in file_issues:
+ file_diff = next((fd for fd in diffs if fd.new_path == issue.file_path), None)
+ code_snippet = ""
+ if file_diff:
+ code_snippet = _extract_code_from_diff(
+ file_diff.patch,
+ issue.start_line,
+ issue.end_line,
+ )
+ if not code_snippet:
+ code_snippet = "(no diff context available)"
+
+ label = issue.severity.value.upper()
+ badge_color = severity_badge_colors[issue.severity]
+ file_url = None
+ if project_web_url and head_sha:
+ escaped_path = quote(issue.file_path, safe="/")
+ if issue.start_line == issue.end_line:
+ anchor = f"#L{issue.start_line}"
+ else:
+ anchor = f"#L{issue.start_line}-L{issue.end_line}"
+ file_url = f"{project_web_url}/-/blob/{head_sha}/{escaped_path}{anchor}"
+ if file_url:
+ location_line = (
+ f''
+ f"#L {issue.start_line}-{issue.end_line}"
+ f""
+ )
+ else:
+ location_line = f"#L {issue.start_line}-{issue.end_line}"
- # File name and issue count
- summary_parts.append(f"`{file_path}`: {len(file_issues)} issue(s) \n\n")
+ issue_body = f"""{issue.description}
+"""
+ summary_parts.append(
+ f"""
+
{issue.title} ({location_line})
+
+{issue_body}
- # Badges on separate lines
- if high > 0:
- summary_parts.append(
- f'
\n'
- )
- if medium > 0:
- summary_parts.append(
- f'
\n'
- )
- if low > 0:
- summary_parts.append(
- f'
\n'
- )
- # Spacing between files
- summary_parts.append("\n
\n
\n\n")
+
+"""
+ )
+ summary_parts.append("\n")
else:
summary_parts.append(
- '\n
**No issues found!**\n'
+ '\n
\n'
)
summary_parts.append("\n---\n*Review powered by ReviewBot*")
@@ -643,6 +650,7 @@ def work_agent(config: Config, project_id: str, mr_iid: str) -> str:
tools = [
get_diff, # Primary tool: get the diff for the file
read_file, # Optional: get additional context if needed
+ think, # Internal reasoning and thought process
]
agent: Agent = create_agent(
@@ -686,7 +694,7 @@ def work_agent(config: Config, project_id: str, mr_iid: str) -> str:
)
low_effort_agent: Agent = create_agent(
model=low_effort_model,
- tools=[get_diff], # Only needs get_diff for quick scanning
+ tools=[get_diff, think], # Only needs get_diff for quick scanning
)
# Post acknowledgment that review is starting
@@ -713,6 +721,11 @@ def on_file_review_complete(file_path: str, issues: list[Any]) -> None:
if not issues:
console.print(f"[dim]No issues found in {file_path}, skipping discussion[/dim]")
return
+ if not config.create_threads:
+ console.print(
+ f"[dim]Thread creation disabled, deferring issues in {file_path} to summary[/dim]"
+ )
+ return
# Convert IssueModel to Issue domain objects
from reviewbot.core.issues.issue_model import IssueModel
@@ -749,6 +762,7 @@ def on_file_review_complete(file_path: str, issues: list[Any]) -> None:
note_id=note_id,
issues=issues,
diffs=filtered_diffs,
+ diff_refs=diff_refs,
agent=low_effort_agent,
)
console.print("[dim]update_review_summary completed[/dim]")
@@ -782,7 +796,8 @@ def handle_file_issues(
diff_refs: dict[str, str], # Add this parameter (contains base_sha, head_sha, start_sha)
) -> None:
"""
- Create one discussion per file with the first issue, and reply with subsequent issues.
+ Create positioned discussions for a capped set of high-priority issues, and
+ group the rest into a single per-file discussion with replies.
Args:
file_path: Path to the file being reviewed
@@ -809,180 +824,124 @@ def handle_file_issues(
IssueSeverity.LOW: "#28a745", # green
}
- discussion_id = None
-
- # Process the first issue - create a discussion with position
- first_issue = issues[0]
- discussion_title = ""
-
- color = severity_color_pairs[first_issue.severity].strip("#")
-
- # Build the discussion body with optional suggestion
- discussion_body = f"""
-
-{first_issue.description}
-"""
+ max_dedicated_threads = 3
+ dedicated_issues: list[Issue] = []
+ reply_issues: list[Issue] = []
- # Add suggestion if available (GitLab will render it as an applicable suggestion)
- if first_issue.suggestion:
- discussion_body += f"""
-
-```suggestion
-{first_issue.suggestion}
-```
-"""
-
- # Create position for the first issue
- position = None
- if (
- file_diff
- and base_sha
- and head_sha
- and start_sha
- and file_diff.old_path
- and file_diff.new_path
- ):
- position = create_position_for_issue(
- diff_text=file_diff.patch,
- issue_line_start=first_issue.start_line,
- issue_line_end=first_issue.end_line,
- base_sha=base_sha,
- head_sha=head_sha,
- start_sha=start_sha,
- old_path=file_diff.old_path,
- new_path=file_diff.new_path,
- )
-
- # Create discussion for the first issue
- try:
- discussion_id = create_discussion(
- title=discussion_title,
- body=discussion_body,
- gitlab_config=gitlab_config,
- position=position,
- )
- console.print(
- f"[green]ā Created discussion for issue at lines {first_issue.start_line}-{first_issue.end_line} (ID: {discussion_id})[/green]"
- )
- except Exception as e:
- if position:
- # If position was provided and it failed, try without position
- console.print(
- f"[yellow]Failed with position for lines {first_issue.start_line}-{first_issue.end_line}, retrying without position: {e}[/yellow]"
- )
- try:
- discussion_id = create_discussion(
- title=discussion_title,
- body=discussion_body,
- gitlab_config=gitlab_config,
- position=None,
- )
- console.print(
- f"[green]ā Created discussion without position (ID: {discussion_id})[/green]"
- )
- except Exception as e2:
- console.print(
- f"[red]ā Failed to create discussion for issue at lines {first_issue.start_line}-{first_issue.end_line}: {e2}[/red]"
- )
- import traceback
-
- traceback.print_exc()
- return # Can't proceed without a discussion
+ for issue in issues:
+ needs_dedicated = issue.suggestion is not None or issue.severity == IssueSeverity.HIGH
+ if needs_dedicated and len(dedicated_issues) < max_dedicated_threads:
+ dedicated_issues.append(issue)
else:
- console.print(
- f"[red]ā Failed to create discussion for issue at lines {first_issue.start_line}-{first_issue.end_line}: {e}[/red]"
+ reply_issues.append(issue)
+
+ def build_position(issue: Issue) -> dict[str, Any] | None:
+ if (
+ file_diff
+ and base_sha
+ and head_sha
+ and start_sha
+ and file_diff.old_path
+ and file_diff.new_path
+ ):
+ return create_position_for_issue(
+ diff_text=file_diff.patch,
+ issue_line_start=issue.start_line,
+ issue_line_end=issue.end_line,
+ base_sha=base_sha,
+ head_sha=head_sha,
+ start_sha=start_sha,
+ old_path=file_diff.old_path,
+ new_path=file_diff.new_path,
)
- import traceback
-
- traceback.print_exc()
- return # Can't proceed without a discussion
-
- # Process remaining issues - reply to the discussion with diff blocks
- for issue in issues[1:]:
- if not discussion_id:
- console.print(
- f"[yellow]ā Skipping issue at lines {issue.start_line}-{issue.end_line} (no discussion created)[/yellow]"
- )
- continue
-
- # Extract the relevant code from the diff
- code_snippet = ""
- if file_diff:
- code_snippet = _extract_code_from_diff(
- file_diff.patch,
- issue.start_line,
- issue.end_line,
- )
-
- label = issue.severity.value.upper()
- color = severity_color_pairs[issue.severity]
+ return None
- # Format the reply with a diff block and optional suggestion
- reply_body = f"""
" />
+ def create_discussion_for_issue(issue: Issue, include_suggestion: bool = True) -> str | None:
+ discussion_title = ""
+ color = severity_color_pairs[issue.severity].strip("#")
+ discussion_body = f"""
{issue.description}
"""
+ if include_suggestion and issue.suggestion:
+ discussion_body += f"""
- # Add suggestion if available (GitLab will render it as an applicable suggestion)
- if issue.suggestion:
- reply_body += f"""
-
-```suggestion
{issue.suggestion}
-```
"""
- else:
- # If no suggestion, show the diff context
- reply_body += f"""
-```diff
-{code_snippet}
-```
-"""
+ position = build_position(issue)
+ if position:
+ console.print(
+ f"[dim]Position object for lines {issue.start_line}-{issue.end_line}:[/dim]"
+ )
+ import json
+
+ console.print(f"[dim]{json.dumps(position, indent=2)}[/dim]")
- # Reply to the discussion
try:
- reply_to_discussion(
- discussion_id=discussion_id,
- body=reply_body,
+ discussion_id = create_discussion(
+ title=discussion_title,
+ body=discussion_body,
gitlab_config=gitlab_config,
+ position=position,
)
+ issue.discussion_id = discussion_id
console.print(
- f"[green]ā Added reply for issue at lines {issue.start_line}-{issue.end_line}[/green]"
+ f"[green]ā Created discussion for issue at lines {issue.start_line}-{issue.end_line} (ID: {discussion_id})[/green]"
)
+ return discussion_id
except Exception as e:
+ if position:
+ console.print(
+ f"[yellow]Failed with position for lines {issue.start_line}-{issue.end_line}, retrying without position: {e}[/yellow]"
+ )
+ try:
+ discussion_id = create_discussion(
+ title=discussion_title,
+ body=discussion_body,
+ gitlab_config=gitlab_config,
+ position=None,
+ )
+ issue.discussion_id = discussion_id
+ console.print(
+ f"[green]ā Created discussion without position (ID: {discussion_id})[/green]"
+ )
+ return discussion_id
+ except Exception as e2:
+ console.print(
+ f"[red]ā Failed to create discussion for issue at lines {issue.start_line}-{issue.end_line}: {e2}[/red]"
+ )
+ import traceback
+
+ traceback.print_exc()
+ return None
+
console.print(
- f"[red]ā Failed to reply for issue at lines {issue.start_line}-{issue.end_line}: {e}[/red]"
+ f"[red]ā Failed to create discussion for issue at lines {issue.start_line}-{issue.end_line}: {e}[/red]"
)
import traceback
traceback.print_exc()
+ return None
+ for issue in dedicated_issues:
+ create_discussion_for_issue(issue, include_suggestion=True)
-def _generate_line_code(file_path: str, old_line: int | None, new_line: int | None) -> str:
- """
- Generate a line_code string for GitLab position API.
-
- Format: __
+ if reply_issues:
+ console.print(
+ f"[dim]Leaving {len(reply_issues)} non-dedicated issue(s) for the summary[/dim]"
+ )
- Args:
- file_path: Path to the file
- old_line: Line number in old file (or None)
- new_line: Line number in new file (or None)
- Returns:
- Line code string
+def generate_line_code(file_path: str, old_line: int | None, new_line: int | None) -> str:
"""
- import hashlib
-
- # Generate SHA1 hash of the file path
- filename_sha = hashlib.sha1(file_path.encode()).hexdigest()
-
- # Format: sha_old_new, using empty string for None values
- old_str = str(old_line) if old_line is not None else ""
- new_str = str(new_line) if new_line is not None else ""
-
- return f"{filename_sha}_{old_str}_{new_str}"
+ Generates a GitLab-compatible line_code.
+ Format: sha1(path) + "_" + old_line + "_" + new_line
+ """
+ path_hash = hashlib.sha1(file_path.encode()).hexdigest()
+ old_s = str(old_line) if old_line is not None else ""
+ new_s = str(new_line) if new_line is not None else ""
+ return f"{path_hash}_{old_s}_{new_s}"
def create_position_for_issue(
@@ -995,127 +954,88 @@ def create_position_for_issue(
old_path: str,
new_path: str,
) -> dict[str, Any] | None:
- """
- Create a GitLab position object for a specific issue line range.
-
- Args:
- diff_text: The full diff text for the file
- issue_line_start: Start line number of the issue (in new file)
- issue_line_end: End line number of the issue (in new file)
- base_sha, head_sha, start_sha: GitLab diff refs
- old_path, new_path: File paths
-
- Returns:
- Position dict for GitLab API, or None if line not found in diff
- """
hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@")
-
lines = diff_text.splitlines()
- current_old = 0
- current_new = 0
+
+ current_old, current_new = 0, 0
in_hunk = False
- # Track all candidate lines in the range
- # Priority: added lines > context lines > deleted lines
- added_lines = []
- context_lines = []
- deleted_lines = []
+ # Track the actual lines found in the diff to build the range
+ matched_lines = [] # List of (old_line, new_line)
for line in lines:
- # Check for hunk header
match = hunk_header_pattern.match(line)
if match:
- current_old = int(match.group(1))
- current_new = int(match.group(3))
+ current_old, current_new = int(match.group(1)), int(match.group(3))
in_hunk = True
continue
if not in_hunk:
continue
- # Collect all matching lines in the range
- if line.startswith("-"):
- # Deletion - only has old line number
- if current_old >= issue_line_start and current_old <= issue_line_end:
- deleted_lines.append((current_old, None))
- current_old += 1
- elif line.startswith("+"):
- # Addition - only has new line number
- if current_new >= issue_line_start and current_new <= issue_line_end:
- added_lines.append((None, current_new))
+ # Logic to determine if this specific line is within our target range
+ if line.startswith("+"):
+ if issue_line_start <= current_new <= issue_line_end:
+ matched_lines.append((None, current_new))
current_new += 1
+ elif line.startswith("-"):
+ if issue_line_start <= current_old <= issue_line_end:
+ matched_lines.append((current_old, None))
+ current_old += 1
else:
- # Context line - has both
- if current_new >= issue_line_start and current_new <= issue_line_end:
- context_lines.append((current_old, current_new))
+ if issue_line_start <= current_new <= issue_line_end:
+ matched_lines.append((current_old, current_new))
current_old += 1
current_new += 1
- # Determine start and end lines for the position
- # Priority: added lines > context lines > deleted lines
- start_old_line = None
- start_new_line = None
- end_old_line = None
- end_new_line = None
-
- if added_lines:
- # Use the first and last added lines in the range
- start_old_line, start_new_line = added_lines[0]
- end_old_line, end_new_line = added_lines[-1]
- elif context_lines:
- # Use the first and last context lines
- start_old_line, start_new_line = context_lines[0]
- end_old_line, end_new_line = context_lines[-1]
- elif deleted_lines:
- # Use the first and last deleted lines
- start_old_line, start_new_line = deleted_lines[0]
- end_old_line, end_new_line = deleted_lines[-1]
-
- # If we didn't find any line in the diff, return None
- if start_old_line is None and start_new_line is None:
+ # FIX: Optimization to prevent "sticky" hunk matching.
+ # If we have passed the end_line in the NEW file, we stop.
+ if current_new > issue_line_end and not line.startswith("-"):
+ if matched_lines:
+ break
+
+ if not matched_lines:
return None
- # Create position object
+ # We anchor the comment to the LAST line of the range so the code is visible
+ start_old, start_new = matched_lines[0]
+ end_old, end_new = matched_lines[-1]
+
+ # Calculate line codes for the range
+ start_code = generate_line_code(new_path if start_new else old_path, start_old, start_new)
+ end_code = generate_line_code(new_path if end_new else old_path, end_old, end_new)
+
position = {
"base_sha": base_sha,
"head_sha": head_sha,
"start_sha": start_sha,
+ "position_type": "text",
"old_path": old_path,
"new_path": new_path,
- "position_type": "text",
- }
-
- # For single-line issues, use the simple format
- if issue_line_start == issue_line_end:
- if start_new_line is not None:
- position["new_line"] = start_new_line
- if start_old_line is not None:
- position["old_line"] = start_old_line
- else:
- # For multi-line issues, use line_range
- # Determine the line type (new for added lines, old for deleted lines)
- line_type = "new" if start_new_line is not None else "old"
-
- position["line_range"] = {
+ # Anchor the main comment on the end of the range
+ "new_line": end_new,
+ "old_line": end_old,
+ "line_range": {
"start": {
- "line_code": _generate_line_code(new_path, start_old_line, start_new_line),
- "type": line_type,
+ "line_code": start_code,
+ "type": "new" if start_new else "old",
+ "new_line": start_new,
+ "old_line": start_old,
},
"end": {
- "line_code": _generate_line_code(new_path, end_old_line, end_new_line),
- "type": line_type,
+ "line_code": end_code,
+ "type": "new" if end_new else "old",
+ "new_line": end_new,
+ "old_line": end_old,
},
- }
-
- # Add line numbers to start and end
- if start_old_line is not None:
- position["line_range"]["start"]["old_line"] = start_old_line
- if start_new_line is not None:
- position["line_range"]["start"]["new_line"] = start_new_line
- if end_old_line is not None:
- position["line_range"]["end"]["old_line"] = end_old_line
- if end_new_line is not None:
- position["line_range"]["end"]["new_line"] = end_new_line
+ },
+ }
+
+ # Cleanup: GitLab doesn't like None values in the schema
+ if position["new_line"] is None:
+ del position["new_line"]
+ if position["old_line"] is None:
+ del position["old_line"]
return position
diff --git a/src/reviewbot/core/config.py b/src/reviewbot/core/config.py
index 91c4767..a44e423 100644
--- a/src/reviewbot/core/config.py
+++ b/src/reviewbot/core/config.py
@@ -11,3 +11,4 @@ class Config:
gitlab_api_v4: str
gitlab_token: str
gemini_project_id: str
+ create_threads: bool = False
diff --git a/src/reviewbot/infra/config/env.py b/src/reviewbot/infra/config/env.py
index e8d631a..e45b957 100644
--- a/src/reviewbot/infra/config/env.py
+++ b/src/reviewbot/infra/config/env.py
@@ -14,6 +14,7 @@ def load_env() -> Config:
gitlab_api_v4 = os.getenv("GITLAB_API_V4_URL")
gitlab_token = os.getenv("GITLAB_BOT_TOKEN")
gemini_project_id = os.getenv("GEMINI_PROJECT_ID")
+ create_threads_raw = os.getenv("REVIEWBOT_CREATE_THREADS", "true")
if (
not llm_api_key
or not llm_base_url
@@ -25,6 +26,7 @@ def load_env() -> Config:
raise ValueError(
"LLM_API_KEY, LLM_BASE_URL, LLM_MODEL, GITLAB_API_V4_URL, GITLAB_BOT_TOKEN, and GEMINI_PROJECT_ID must be set"
)
+ create_threads = create_threads_raw.strip().lower() not in {"0", "false", "no", "off"}
return Config(
llm_api_key=SecretStr(llm_api_key),
llm_base_url=llm_base_url,
@@ -32,4 +34,5 @@ def load_env() -> Config:
gitlab_api_v4=gitlab_api_v4,
gitlab_token=gitlab_token,
gemini_project_id=gemini_project_id,
+ create_threads=create_threads,
)
diff --git a/src/reviewbot/infra/gitlab/diff.py b/src/reviewbot/infra/gitlab/diff.py
index 8e3c2f0..0743a16 100644
--- a/src/reviewbot/infra/gitlab/diff.py
+++ b/src/reviewbot/infra/gitlab/diff.py
@@ -128,10 +128,13 @@ def fetch_mr_diffs(
mr_data = mr_response.json()
# Get diff_refs for position objects
- diff_refs = mr_data.get("diff_refs", {})
+ diff_refs = mr_data.get("diff_refs") or {}
base_sha = diff_refs.get("base_sha")
head_sha = diff_refs.get("head_sha")
start_sha = diff_refs.get("start_sha")
+ mr_web_url = mr_data.get("web_url")
+ if mr_web_url and "/-/merge_requests/" in mr_web_url:
+ diff_refs["project_web_url"] = mr_web_url.split("/-/merge_requests/")[0]
# Try the new JSON changes endpoint first
changes_response = requests.get(changes_url, headers=headers, timeout=timeout)
diff --git a/src/reviewbot/infra/gitlab/note.py b/src/reviewbot/infra/gitlab/note.py
index 535c6c7..5807e05 100644
--- a/src/reviewbot/infra/gitlab/note.py
+++ b/src/reviewbot/infra/gitlab/note.py
@@ -58,9 +58,14 @@ def post_discussion(
# For file-level discussions without specific lines, don't include position
data: dict[str, Any] = {"body": body}
if position:
- # Only include position if it has required fields (new_line or old_line)
- # Otherwise GitLab will reject it as incomplete
- has_line_info = "new_line" in position or "old_line" in position or "line_code" in position
+ # Only include position if it has required fields
+ # Can have: new_line, old_line, line_code (single line) OR line_range (multi-line)
+ has_line_info = (
+ "new_line" in position
+ or "old_line" in position
+ or "line_code" in position
+ or "line_range" in position # Support multi-line positions
+ )
if has_line_info:
data["position"] = position
else:
diff --git a/src/reviewbot/models/gpt.py b/src/reviewbot/models/gpt.py
index 84201d9..db28e8c 100644
--- a/src/reviewbot/models/gpt.py
+++ b/src/reviewbot/models/gpt.py
@@ -16,7 +16,7 @@ def get_gpt_model(
llm_api_key: SecretStr,
base_url: str,
temperature: float = 0.2,
- reasoning_effort: str = "medium",
+ reasoning_effort: str = "low",
):
return ChatOpenAI(
model=llm_model_name,
diff --git a/src/reviewbot/tools/__init__.py b/src/reviewbot/tools/__init__.py
index 9aced75..574e906 100644
--- a/src/reviewbot/tools/__init__.py
+++ b/src/reviewbot/tools/__init__.py
@@ -4,10 +4,12 @@
search_codebase,
search_codebase_semantic_search,
)
+from .think import think
__all__ = [
"get_diff",
"read_file",
"search_codebase",
"search_codebase_semantic_search",
+ "think",
]
diff --git a/src/reviewbot/tools/think.py b/src/reviewbot/tools/think.py
new file mode 100644
index 0000000..0c6b052
--- /dev/null
+++ b/src/reviewbot/tools/think.py
@@ -0,0 +1,45 @@
+from langchain.tools import tool # type: ignore
+
+from reviewbot.context import store_manager_ctx
+
+
+@tool
+def think(reasoning: str) -> str:
+ """Record internal reasoning and thought process.
+
+ Use this tool to think through problems, plan your approach, or reason about code before taking action.
+ The reasoning is stored and will be included in subsequent requests to maintain context.
+
+ Args:
+ reasoning: Your internal thoughts, analysis, or reasoning about the current task.
+ This can include:
+ - Analysis of code patterns
+ - Planning next steps
+ - Reasoning about potential issues
+ - Conclusions drawn from observations
+
+ Returns:
+ Confirmation that the reasoning was recorded
+
+ Examples:
+ - "I notice this function has multiple responsibilities. It handles both data validation
+ and API calls, which violates the Single Responsibility Principle."
+ - "Before checking for issues, I should first understand the overall structure.
+ The code appears to be a REST API with three main endpoints."
+ - "This looks like a potential security issue - user input is being directly
+ concatenated into a SQL query. I should flag this as high severity."
+ """
+ context = store_manager_ctx.get()
+ issue_store = context.get("issue_store")
+
+ if not issue_store:
+ return "Context not available for storing reasoning."
+
+ # Store reasoning in the issue store's metadata
+ if not hasattr(issue_store, "_reasoning_history"):
+ issue_store._reasoning_history = []
+
+ issue_store._reasoning_history.append(reasoning)
+ print("Reasoned:")
+ print(reasoning)
+ return f"Reasoning recorded: {reasoning[:100]}{'...' if len(reasoning) > 100 else ''}"