From 74c8f734d6202ce07249d53bc0ba0df03e6f75f3 Mon Sep 17 00:00:00 2001
From: canefe <8518141+canefe@users.noreply.github.com>
Date: Wed, 7 Jan 2026 17:59:08 +0400
Subject: [PATCH 1/2] refactor: restructure workflow and enhance GitLab
integration
- Moved `post_mr_note` to a new `gitlab_notes` module for better organization.
- Created a `config` module to encapsulate GitLab API configuration settings.
- Introduced a `diff_extract` module for handling code extraction from diffs.
- Added a `discussions` module to manage discussion creation and replies in GitLab.
- Implemented an `ignore` module to handle file ignore patterns and filtering logic.
- Updated the `runner` module to utilize the new structure, improving code clarity and maintainability.
- Enhanced the `IssueModel` to include discussion and note IDs for better tracking of issues in GitLab discussions.
---
api.py | 3 +-
src/reviewbot/agent/tasks/issues.py | 8 +-
src/reviewbot/agent/workflow.py | 1123 ------------------
src/reviewbot/agent/workflow/__init__.py | 4 +
src/reviewbot/agent/workflow/config.py | 11 +
src/reviewbot/agent/workflow/diff_extract.py | 167 +++
src/reviewbot/agent/workflow/discussions.py | 238 ++++
src/reviewbot/agent/workflow/gitlab_notes.py | 393 ++++++
src/reviewbot/agent/workflow/hooks.py | 29 +
src/reviewbot/agent/workflow/ignore.py | 155 +++
src/reviewbot/agent/workflow/runner.py | 183 +++
src/reviewbot/core/issues/issue.py | 1 +
src/reviewbot/core/issues/issue_model.py | 2 +
src/reviewbot/infra/gitlab/note.py | 14 +-
14 files changed, 1197 insertions(+), 1134 deletions(-)
delete mode 100644 src/reviewbot/agent/workflow.py
create mode 100644 src/reviewbot/agent/workflow/__init__.py
create mode 100644 src/reviewbot/agent/workflow/config.py
create mode 100644 src/reviewbot/agent/workflow/diff_extract.py
create mode 100644 src/reviewbot/agent/workflow/discussions.py
create mode 100644 src/reviewbot/agent/workflow/gitlab_notes.py
create mode 100644 src/reviewbot/agent/workflow/hooks.py
create mode 100644 src/reviewbot/agent/workflow/ignore.py
create mode 100644 src/reviewbot/agent/workflow/runner.py
diff --git a/api.py b/api.py
index fdf2130..3a518cc 100644
--- a/api.py
+++ b/api.py
@@ -5,7 +5,8 @@
from fastapi import BackgroundTasks, FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
-from src.reviewbot.agent.workflow import post_mr_note, work_agent
+from src.reviewbot.agent.workflow import work_agent
+from src.reviewbot.agent.workflow.gitlab_notes import post_mr_note
from src.reviewbot.infra.config.env import load_env
dotenv.load_dotenv()
diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py
index 9a58f23..41d0197 100644
--- a/src/reviewbot/agent/tasks/issues.py
+++ b/src/reviewbot/agent/tasks/issues.py
@@ -463,7 +463,7 @@ def review_single_file(
- Only report issues with clear negative impact (bugs, security risks, performance problems, logic errors)
- Avoid reporting issues about code style, formatting, or personal preferences unless they violate critical standards
- Medium/High severity issues should be reserved for actual bugs, security vulnerabilities, or broken functionality
-- The `description` field MUST include a fenced ```diff block quoting only the relevant added/removed/context lines without (@@ but + - is fine), followed by a short plain-text explanation (1-3 sentences)
+- The `description` field MUST include a short plain-text explanation (1-3 sentences).
CRITICAL - KNOWLEDGE CUTOFF AWARENESS:
Your training data has a cutoff date. The code you're reviewing may use:
@@ -490,13 +490,11 @@ def review_single_file(
- When a fix is simple, provide a "suggestion" field.
- **GitLab Syntax Requirement**: You must format the suggestion using relative line offsets based on your `start_line` and `end_line`.
- **The Formula**:
- 1. Calculate the offset: `L = end_line - start_line`.
- 2. The header MUST be: ```suggestion:-L+0
-- **Example**: If `start_line` is 7 and `end_line` is 9, the offset `L` is 2. The header is ```suggestion:-2+0.
+1. The header MUST be: ```diff
- **Content**: The suggestion must include the full corrected code for every line from `start_line` to `end_line`.
- **Indentation**: You MUST preserve the exact leading whitespace of the original code.
- Format:
-```suggestion:-L+0
+```diff
[CORRECTED CODE BLOCK]
```
Output format: JSON array of issue objects following this schema:
diff --git a/src/reviewbot/agent/workflow.py b/src/reviewbot/agent/workflow.py
deleted file mode 100644
index 0de5d04..0000000
--- a/src/reviewbot/agent/workflow.py
+++ /dev/null
@@ -1,1123 +0,0 @@
-import fnmatch
-import hashlib
-import re
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any
-from urllib.parse import quote
-
-from langchain.agents import create_agent # type: ignore
-from langchain.agents.middleware import ( # type: ignore
- AgentState,
- before_agent,
- before_model,
-)
-from langgraph.pregel.main import Runtime # type: ignore
-from rich.console import Console # type: ignore
-
-from reviewbot.agent.base import ( # type: ignore
- AgentRunnerInput,
- agent_runner, # type: ignore
-)
-from reviewbot.agent.tasks.core import ToolCallerSettings
-from reviewbot.context import Context, store_manager_ctx
-from reviewbot.core.agent import Agent
-from reviewbot.core.config import Config
-from reviewbot.core.issues import Issue, IssueSeverity
-from reviewbot.infra.embeddings.store_manager import CodebaseStoreManager
-from reviewbot.infra.git.clone import clone_repo_persistent, get_repo_name
-from reviewbot.infra.git.repo_tree import tree
-from reviewbot.infra.gitlab.clone import build_clone_url
-from reviewbot.infra.gitlab.diff import FileDiff, fetch_mr_diffs, get_mr_branch
-from reviewbot.infra.gitlab.note import (
- get_all_discussions,
- post_discussion,
- post_discussion_reply,
- post_merge_request_note,
- update_discussion_note,
-)
-from reviewbot.infra.issues.in_memory_issue_store import InMemoryIssueStore
-from reviewbot.models.gpt import get_gpt_model, get_gpt_model_low_effort
-from reviewbot.tools import (
- get_diff,
- read_file,
- think,
-)
-
-console = Console()
-
-# Global blacklist for common files that typically don't need code review
-GLOBAL_REVIEW_BLACKLIST = [
- # Dependency management files
- "package-lock.json",
- "yarn.lock",
- "pnpm-lock.yaml",
- "Gemfile.lock",
- "Pipfile.lock",
- "poetry.lock",
- "composer.lock",
- "go.sum",
- "go.mod",
- "Cargo.lock",
- # Build and distribution files
- "*.min.js",
- "*.min.css",
- "*.map",
- "dist/*",
- "build/*",
- "*.pyc",
- "*.pyo",
- "*.so",
- "*.dll",
- "*.exe",
- "*.o",
- "*.a",
- # Generated files
- "*.generated.*",
- "*_pb2.py",
- "*_pb2_grpc.py",
- "*.pb.go",
- # Documentation and assets
- "*.png",
- "*.jpg",
- "*.jpeg",
- "*.gif",
- "*.svg",
- "*.ico",
- "*.woff",
- "*.woff2",
- "*.ttf",
- "*.eot",
- # IDE and editor files
- ".vscode/*",
- ".idea/*",
- "*.swp",
- "*.swo",
- "*~",
-]
-
-
-def parse_reviewignore(repo_path: Path) -> list[str]:
- """
- Parse .reviewignore file from the repository.
-
- Args:
- repo_path: Path to the repository root
-
- Returns:
- List of glob patterns to ignore
- """
- reviewignore_path = repo_path / ".reviewignore"
- patterns = []
-
- if not reviewignore_path.exists():
- console.print("[dim].reviewignore file not found, using global blacklist only[/dim]")
- return patterns
-
- try:
- with open(reviewignore_path, encoding="utf-8") as f:
- for line in f:
- # Strip whitespace
- line = line.strip()
- # Skip empty lines and comments
- if not line or line.startswith("#"):
- continue
- patterns.append(line)
-
- console.print(f"[dim]Loaded {len(patterns)} patterns from .reviewignore[/dim]")
- except Exception as e:
- console.print(f"[yellow]Warning: Failed to read .reviewignore: {e}[/yellow]")
-
- return patterns
-
-
-def should_ignore_file(file_path: str, reviewignore_patterns: list[str]) -> bool:
- """
- Check if a file should be ignored based on .reviewignore patterns and global blacklist.
-
- Args:
- file_path: Path to the file (relative to repo root)
- reviewignore_patterns: Patterns from .reviewignore file
-
- Returns:
- True if the file should be ignored, False otherwise
- """
- # Normalize the file path (remove leading ./ or /)
- normalized_path = file_path.lstrip("./")
-
- # Check against global blacklist
- for pattern in GLOBAL_REVIEW_BLACKLIST:
- if fnmatch.fnmatch(normalized_path, pattern):
- return True
- # Also check just the filename for non-path patterns
- if "/" not in pattern and fnmatch.fnmatch(Path(normalized_path).name, pattern):
- return True
-
- # Check against .reviewignore patterns
- for pattern in reviewignore_patterns:
- if fnmatch.fnmatch(normalized_path, pattern):
- return True
- # Also check just the filename for non-path patterns
- if "/" not in pattern and fnmatch.fnmatch(Path(normalized_path).name, pattern):
- return True
-
- return False
-
-
-def filter_diffs(diffs: list[FileDiff], reviewignore_patterns: list[str]) -> list[FileDiff]:
- """
- Filter out diffs for files that should be ignored.
-
- Args:
- diffs: List of file diffs
- reviewignore_patterns: Patterns from .reviewignore file
-
- Returns:
- Filtered list of diffs
- """
- filtered = []
- ignored_count = 0
-
- for diff in diffs:
- # Use new_path if available, otherwise use old_path
- file_path = diff.new_path or diff.old_path
-
- if file_path and should_ignore_file(file_path, reviewignore_patterns):
- console.print(f"[dim]⊘ Ignoring {file_path}[/dim]")
- ignored_count += 1
- else:
- filtered.append(diff)
-
- if ignored_count > 0:
- console.print(f"[cyan]Filtered out {ignored_count} file(s) based on ignore patterns[/cyan]")
-
- return filtered
-
-
-def _extract_code_from_diff(diff_text: str, line_start: int, line_end: int) -> str:
- hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@")
- lines = diff_text.splitlines()
-
- extracted = []
- current_new = 0
- in_hunk = False
-
- for line in lines:
- match = hunk_header_pattern.match(line)
- if match:
- current_new = int(match.group(3))
- in_hunk = True
- continue
-
- if not in_hunk:
- continue
-
- # We only care about the lines in the NEW file (the result of the change)
- if line.startswith("+"):
- if line_start <= current_new <= line_end:
- extracted.append(line[1:]) # Remove '+'
- current_new += 1
- elif line.startswith("-"):
- # Skip deleted lines for code extraction of the 'new' state
- continue
- else:
- # Context line
- if line_start <= current_new <= line_end:
- extracted.append(line[1:] if line else "")
- current_new += 1
-
- # FIX: Exit early if we've passed the end of our requested range
- if current_new > line_end:
- if extracted: # Only break if we actually found lines
- break
-
- return "\n".join(extracted)
-
-
-@dataclass
-class GitLabConfig:
- """GitLab API configuration"""
-
- api_v4: str
- token: str
- project_id: str
- mr_iid: str
-
-
-@before_model(can_jump_to=["end"])
-def check_message_limit(state: AgentState, runtime: Runtime) -> dict[str, Any] | None: # type: ignore
- messages = state["messages"]
- console.print("[blue]Before modelMessages:[/blue]")
- console.print(messages[-5:])
- console.print("[blue]Before model messages end.[/blue]")
- return None
-
-
-@before_agent(can_jump_to=["end"])
-def check_agent_messages(state: AgentState, runtime: Runtime) -> dict[str, Any] | None: # type: ignore
- messages = state["messages"]
- console.print("[red]Before agent messages:[/red]")
- console.print(messages[-5:])
- console.print("[red]Before agent messages end.[/red]")
- return None
-
-
-def post_review_acknowledgment(
- api_v4: str,
- token: str,
- project_id: str,
- mr_iid: str,
- agent: Agent,
- diffs: list[FileDiff],
-) -> tuple[str, str] | None:
- """
- Post a surface-level summary acknowledging the review is starting.
- Creates a discussion so it can be updated later.
- Only posts if no acknowledgment already exists to prevent duplicates.
-
- Args:
- api_v4: GitLab API v4 base URL
- token: GitLab API token
- project_id: Project ID
- mr_iid: Merge request IID
- agent: The agent to use for generating summary
- diffs: List of file diffs
-
- Returns:
- Tuple of (discussion_id, note_id) if created, None if already exists or failed
- """
- from langchain_core.messages import HumanMessage, SystemMessage
-
- # Check if an acknowledgment already exists
- try:
- discussions = get_all_discussions(
- api_v4=api_v4,
- token=token,
- project_id=project_id,
- mr_iid=mr_iid,
- )
-
- # Only reuse "Starting" acknowledgments (in-progress reviews)
- # Don't reuse "Complete" acknowledgments - create new ones for new review runs
- starting_marker = (
- ''
- )
-
- # Find ALL "Starting" acknowledgments, then pick the most recent one
- found_acknowledgments = []
- for discussion in discussions:
- notes = discussion.get("notes", [])
- for note in notes:
- body = note.get("body", "")
- # Only check for "Starting" marker - this means review is in progress
- if starting_marker in body:
- discussion_id = discussion.get("id")
- note_id = note.get("id")
- created_at = note.get("created_at", "")
- if discussion_id and note_id:
- found_acknowledgments.append(
- {
- "discussion_id": str(discussion_id),
- "note_id": str(note_id),
- "created_at": created_at,
- }
- )
-
- # If we found any in-progress acknowledgments, use the most recent one
- if found_acknowledgments:
- # Sort by created_at timestamp (most recent first)
- found_acknowledgments.sort(key=lambda x: x["created_at"], reverse=True)
- most_recent = found_acknowledgments[0]
- console.print(
- f"[dim]Found {len(found_acknowledgments)} in-progress review(s), reusing most recent[/dim]"
- )
- return (most_recent["discussion_id"], most_recent["note_id"])
-
- # No in-progress reviews found - will create a new acknowledgment
- console.print("[dim]No in-progress reviews found, will create new acknowledgment[/dim]")
- except Exception as e:
- console.print(f"[yellow]⚠ Could not check for existing acknowledgment: {e}[/yellow]")
- # Continue anyway - better to post a duplicate than miss it
-
- # Get list of files being reviewed
- file_list = [diff.new_path for diff in diffs if diff.new_path]
- files_summary = "\n".join([f"- `{f}`" for f in file_list[:10]]) # Limit to first 10
- if len(file_list) > 10:
- files_summary += f"\n- ... and {len(file_list) - 10} more files"
-
- # Generate a simple summary with very limited tool calls
- messages = [
- SystemMessage(
- content="""You are a code review assistant. Generate a brief, friendly acknowledgment that a code review is starting.
-
-IMPORTANT:
-- Keep it SHORT (2-3 sentences max)
-- Be surface-level - this is just an acknowledgment, not the actual review
-- DO NOT analyze code yet
-- DO NOT use any tools
-- Just acknowledge what files are being reviewed"""
- ),
- HumanMessage(
- content=f"""A merge request code review is starting for the following files:
-
-{files_summary}
-
-Write a brief acknowledgment message (2-3 sentences) letting the developer know the review is in progress. Be friendly and professional."""
- ),
- ]
-
- try:
- # Get response with no tool calls allowed
- from reviewbot.agent.tasks.core import ToolCallerSettings, tool_caller
-
- summary_settings = ToolCallerSettings(max_tool_calls=0, max_iterations=1)
- summary = tool_caller(agent, messages, summary_settings)
-
- # Post as a discussion (so we can update it later)
- acknowledgment_body = f"""
-
-{summary}
-
----
-*Review powered by ReviewBot*
-"""
-
- # post_discussion now returns both discussion_id and note_id
- discussion_id, note_id = post_discussion(
- api_v4=api_v4,
- token=token,
- project_id=project_id,
- mr_iid=mr_iid,
- body=acknowledgment_body,
- )
-
- if not note_id:
- console.print("[yellow]⚠ Discussion created but no note ID returned[/yellow]")
- return None
-
- console.print(
- f"[green]✓ Posted review acknowledgment (discussion: {discussion_id}, note: {note_id})[/green]"
- )
- return (str(discussion_id), str(note_id))
-
- except Exception as e:
- console.print(f"[yellow]⚠ Failed to post acknowledgment: {e}[/yellow]")
- # Don't fail the whole review if acknowledgment fails
- return None
-
-
-def update_review_summary(
- api_v4: str,
- token: str,
- project_id: str,
- mr_iid: str,
- discussion_id: str,
- note_id: str,
- issues: list[Issue],
- diffs: list[FileDiff],
- diff_refs: dict[str, str],
- agent: Agent,
-) -> None:
- """
- Update the acknowledgment note with a summary of the review results.
- Uses LLM to generate reasoning and summary.
-
- Args:
- api_v4: GitLab API v4 base URL
- token: GitLab API token
- project_id: Project ID
- mr_iid: Merge request IID
- discussion_id: Discussion ID of the acknowledgment
- note_id: Note ID to update
- issues: List of issues found during review
- diffs: List of file diffs that were reviewed
- diff_refs: Diff references including head_sha and project_web_url
- agent: The agent to use for generating summary
- """
- from langchain_core.messages import HumanMessage, SystemMessage
-
- # Count issues by severity
- high_count = sum(1 for issue in issues if issue.severity == IssueSeverity.HIGH)
- medium_count = sum(1 for issue in issues if issue.severity == IssueSeverity.MEDIUM)
- low_count = sum(1 for issue in issues if issue.severity == IssueSeverity.LOW)
-
- # Group issues by file
- issues_by_file: dict[str, list[Issue]] = {}
- for issue in issues:
- if issue.file_path not in issues_by_file:
- issues_by_file[issue.file_path] = []
- issues_by_file[issue.file_path].append(issue)
-
- # Build structured statistics
- total_files = len(diffs)
- files_with_issues = len(issues_by_file)
-
- # Prepare issue details for LLM
- issues_summary = []
- for issue in issues:
- issues_summary.append(
- f"- **{issue.severity.value.upper()}** in `{issue.file_path}` (lines {issue.start_line}-{issue.end_line}): {issue.description}"
- )
-
- issues_text = "\n".join(issues_summary) if issues_summary else "No issues found."
-
- # Generate LLM summary with reasoning
- try:
- from reviewbot.agent.tasks.core import ToolCallerSettings, tool_caller
-
- messages = [
- SystemMessage(
- content="""You are a Merge Request reviewer. Generate a concise, professional summary of a code review with reasoning.
-
-IMPORTANT:
-- Use EXACTLY two paragraphs, each wrapped in
tags. -- Provide reasoning about the overall merge request purpose and code quality. -- Highlight key concerns or positive aspects -- Be constructive and professional -- DO NOT use any tools -- Use paragraphs with readable flow. -Paragraphs should be wrapped with
tags. Use new
tag for a newline. -Example -
-paragraph -
--paragraph2 -
-- Focus on the big picture, not individual issue details""" - ), - HumanMessage( - content=f"""A code review has been completed with the following results: - -**Statistics:** -- Files reviewed: {total_files} -- Files with issues: {files_with_issues} -- Total issues: {len(issues)} - - High severity: {high_count} - - Medium severity: {medium_count} - - Low severity: {low_count} - -**Issues found:** -{issues_text} - -- Use EXACTLY two paragraphs, each wrapped in tags.
-1. Provides overall assessment of the purpose of the merge request purpose and code quality.
-2. Highlights the most important concerns (if any)
-3. Gives reasoning about the review findings
-4. Is constructive and actionable """
- ),
- ]
-
- summary_settings = ToolCallerSettings(max_tool_calls=0, max_iterations=1)
- llm_summary = tool_caller(agent, messages, summary_settings)
-
- except Exception as e:
- console.print(f"[yellow]⚠ Failed to generate LLM summary: {e}[/yellow]")
- llm_summary = "Review completed successfully."
-
- # Build final summary combining statistics and LLM reasoning
- summary_parts = [
- '\n\n',
- f"Reviewed **{total_files}** file(s), found **{len(issues)}** issue(s) across **{files_with_issues}** file(s).\n\n",
- "**Summary**\n\n",
- f"{llm_summary}\n\n",
- ]
-
- if issues:
- project_web_url = diff_refs.get("project_web_url")
- head_sha = diff_refs.get("head_sha")
-
- summary_parts.append("**Issue Breakdown**\n\n")
- if high_count > 0:
- summary_parts.append(
- f'
\n'
- )
- if medium_count > 0:
- summary_parts.append(
- f'
\n'
- )
- if low_count > 0:
- summary_parts.append(
- f'
\n'
- )
-
- summary_parts.append("\n
\n
\n\n")
-
- non_dedicated_issues = [issue for issue in issues if not issue.discussion_id]
- if non_dedicated_issues:
- issues_by_file: dict[str, list[Issue]] = {}
- for issue in non_dedicated_issues:
- issues_by_file.setdefault(issue.file_path, []).append(issue)
-
- severity_badge_colors = {
- IssueSeverity.HIGH: "red",
- IssueSeverity.MEDIUM: "orange",
- IssueSeverity.LOW: "green",
- }
-
- for file_path, file_issues in sorted(issues_by_file.items()):
- summary_parts.append(f"### 📂 {file_path}\n\n")
- for issue in file_issues:
- file_diff = next((fd for fd in diffs if fd.new_path == issue.file_path), None)
- code_snippet = ""
- if file_diff:
- code_snippet = _extract_code_from_diff(
- file_diff.patch,
- issue.start_line,
- issue.end_line,
- )
- if not code_snippet:
- code_snippet = "(no diff context available)"
-
- label = issue.severity.value.upper()
- badge_color = severity_badge_colors[issue.severity]
- file_url = None
- if project_web_url and head_sha:
- escaped_path = quote(issue.file_path, safe="/")
- if issue.start_line == issue.end_line:
- anchor = f"#L{issue.start_line}"
- else:
- anchor = f"#L{issue.start_line}-L{issue.end_line}"
- file_url = f"{project_web_url}/-/blob/{head_sha}/{escaped_path}{anchor}"
- if file_url:
- location_line = (
- f''
- f"#L {issue.start_line}-{issue.end_line}"
- f""
- )
- else:
- location_line = f"#L {issue.start_line}-{issue.end_line}"
-
- issue_body = f"""{issue.description}
-"""
- summary_parts.append(
- f"""
-
-{issue_body}
-
-
- {issue.title} ({location_line})
\n'
- )
-
- summary_parts.append("\n---\n*Review powered by ReviewBot*")
-
- summary_body = "".join(summary_parts)
-
- console.print(
- f"[dim]Updating discussion {discussion_id}, note {note_id} with review summary...[/dim]"
- )
- try:
- update_discussion_note(
- api_v4=api_v4,
- token=token,
- project_id=project_id,
- mr_iid=mr_iid,
- discussion_id=discussion_id,
- note_id=note_id,
- body=summary_body,
- )
- console.print("[green]✓ Updated review acknowledgment with summary[/green]")
- except Exception as e:
- console.print(f"[yellow]⚠ Failed to update acknowledgment: {e}[/yellow]")
- import traceback
-
- traceback.print_exc()
- # Don't fail the whole review if update fails
-
-
-def work_agent(config: Config, project_id: str, mr_iid: str) -> str:
- api_v4 = config.gitlab_api_v4 + "/api/v4"
- token = config.gitlab_token
- model = get_gpt_model(config.llm_model_name, config.llm_api_key, config.llm_base_url)
-
- clone_url = build_clone_url(api_v4, project_id, token)
-
- diffs, diff_refs = fetch_mr_diffs(api_v4, project_id, mr_iid, token)
-
- # Limit tool calls to prevent agent from wandering
- # For diff review: get_diff (1) + maybe read_file for context (1-2) = 3 max
- settings = ToolCallerSettings(max_tool_calls=5, max_iterations=10)
-
- # Only provide essential tools - remove search tools to prevent wandering
- tools = [
- get_diff, # Primary tool: get the diff for the file
- read_file, # Optional: get additional context if needed
- think, # Internal reasoning and thought process
- ]
-
- agent: Agent = create_agent(
- model=model,
- tools=tools,
- # middleware=[check_message_limit, check_agent_messages], # type: ignore
- )
- branch = get_mr_branch(api_v4, project_id, mr_iid, token)
- repo_path = clone_repo_persistent(clone_url, branch=branch)
- repo_path = Path(repo_path).resolve()
- repo_tree = tree(repo_path)
-
- # Parse .reviewignore and filter diffs
- reviewignore_patterns = parse_reviewignore(repo_path)
- filtered_diffs = filter_diffs(diffs, reviewignore_patterns)
- console.print(f"[cyan]Reviewing {len(filtered_diffs)} out of {len(diffs)} changed files[/cyan]")
-
- manager = CodebaseStoreManager()
- manager.set_repo_root(repo_path)
- manager.set_repo_name(get_repo_name(repo_path))
- manager.set_tree(repo_tree)
- manager.set_diffs(filtered_diffs) # Use filtered diffs instead of all diffs
- manager.get_store()
-
- issue_store = InMemoryIssueStore()
- token_ctx = store_manager_ctx.set(Context(store_manager=manager, issue_store=issue_store))
-
- context = store_manager_ctx.get()
-
- # Create GitLab configuration
- gitlab_config = GitLabConfig(
- api_v4=api_v4,
- token=token,
- project_id=project_id,
- mr_iid=mr_iid,
- )
-
- # Create a low-effort agent for simple tasks like acknowledgments and quick scans
- low_effort_model = get_gpt_model_low_effort(
- config.llm_model_name, config.llm_api_key, config.llm_base_url
- )
- low_effort_agent: Agent = create_agent(
- model=low_effort_model,
- tools=[get_diff, think], # Only needs get_diff for quick scanning
- )
-
- # Post acknowledgment that review is starting
- console.print("[dim]Posting review acknowledgment...[/dim]")
- acknowledgment_ids = post_review_acknowledgment(
- api_v4=api_v4,
- token=token,
- project_id=project_id,
- mr_iid=mr_iid,
- agent=low_effort_agent,
- diffs=filtered_diffs,
- )
- if acknowledgment_ids:
- console.print(
- f"[dim]Acknowledgment created: discussion={acknowledgment_ids[0]}, note={acknowledgment_ids[1]}[/dim]"
- )
- else:
- console.print("[yellow]⚠ Failed to create acknowledgment (returned None)[/yellow]")
-
- try:
- # Define callback to create discussions as each file's review completes
- def on_file_review_complete(file_path: str, issues: list[Any]) -> None:
- """Callback called when a file's review completes."""
- if not issues:
- console.print(f"[dim]No issues found in {file_path}, skipping discussion[/dim]")
- return
- if not config.create_threads:
- console.print(
- f"[dim]Thread creation disabled, deferring issues in {file_path} to summary[/dim]"
- )
- return
-
- # Convert IssueModel to Issue domain objects
- from reviewbot.core.issues.issue_model import IssueModel
-
- domain_issues = [issue.to_domain() for issue in issues if isinstance(issue, IssueModel)]
- handle_file_issues(file_path, domain_issues, gitlab_config, filtered_diffs, diff_refs)
-
- # Pass the callback to the agent runner
- issues: list[Issue] = agent_runner.invoke( # type: ignore
- AgentRunnerInput(
- agent=agent,
- context=context,
- settings=settings,
- on_file_complete=on_file_review_complete,
- quick_scan_agent=low_effort_agent,
- )
- )
-
- console.print(f"[bold cyan]📊 Total issues found: {len(issues)}[/bold cyan]")
-
- # Update the acknowledgment note with summary
- console.print(f"[dim]Checking acknowledgment_ids: {acknowledgment_ids}[/dim]")
- if acknowledgment_ids:
- discussion_id, note_id = acknowledgment_ids
- console.print(
- f"[dim]Calling update_review_summary for discussion {discussion_id}, note {note_id}...[/dim]"
- )
- update_review_summary(
- api_v4=api_v4,
- token=token,
- project_id=project_id,
- mr_iid=mr_iid,
- discussion_id=discussion_id,
- note_id=note_id,
- issues=issues,
- diffs=filtered_diffs,
- diff_refs=diff_refs,
- agent=low_effort_agent,
- )
- console.print("[dim]update_review_summary completed[/dim]")
- else:
- console.print(
- "[yellow]⚠ No acknowledgment to update (initial acknowledgment may have failed)[/yellow]"
- )
-
- # Discussions are now created as reviews complete, but we still need to
- # handle any files that might have been processed but had no issues
- # (though the callback already handles this case)
-
- console.print("[bold green]🎉 All reviews completed and discussions created![/bold green]")
- return "Review completed successfully"
-
- except Exception as e:
- console.print(f"[bold red]❌ Error during review: {e}[/bold red]")
- import traceback
-
- traceback.print_exc()
- raise
- finally:
- store_manager_ctx.reset(token_ctx)
-
-
-def handle_file_issues(
- file_path: str,
- issues: list[Issue],
- gitlab_config: GitLabConfig,
- file_diffs: list[FileDiff], # Add this parameter
- diff_refs: dict[str, str], # Add this parameter (contains base_sha, head_sha, start_sha)
-) -> None:
- """
- Create positioned discussions for a capped set of high-priority issues, and
- group the rest into a single per-file discussion with replies.
-
- Args:
- file_path: Path to the file being reviewed
- issues: List of issues found in this file
- gitlab_config: GitLab API configuration
- file_diffs: List of file diffs from the MR
- diff_refs: Dict with base_sha, head_sha, start_sha
- """
- if not issues:
- return
-
- console.print(f"[cyan]Creating discussion for {file_path} with {len(issues)} issue(s)[/cyan]")
-
- # Get the file diff once
- file_diff = next((fd for fd in file_diffs if fd.new_path == file_path), None)
- base_sha = diff_refs.get("base_sha")
- head_sha = diff_refs.get("head_sha")
- start_sha = diff_refs.get("start_sha")
-
- # Severity, Color pairs
- severity_color_pairs = {
- IssueSeverity.HIGH: "#d73a49", # red
- IssueSeverity.MEDIUM: "#fbca04", # yellow/orange
- IssueSeverity.LOW: "#28a745", # green
- }
-
- max_dedicated_threads = 3
- dedicated_issues: list[Issue] = []
- reply_issues: list[Issue] = []
-
- for issue in issues:
- needs_dedicated = issue.suggestion is not None or issue.severity == IssueSeverity.HIGH
- if needs_dedicated and len(dedicated_issues) < max_dedicated_threads:
- dedicated_issues.append(issue)
- else:
- reply_issues.append(issue)
-
- def build_position(issue: Issue) -> dict[str, Any] | None:
- if (
- file_diff
- and base_sha
- and head_sha
- and start_sha
- and file_diff.old_path
- and file_diff.new_path
- ):
- return create_position_for_issue(
- diff_text=file_diff.patch,
- issue_line_start=issue.start_line,
- issue_line_end=issue.end_line,
- base_sha=base_sha,
- head_sha=head_sha,
- start_sha=start_sha,
- old_path=file_diff.old_path,
- new_path=file_diff.new_path,
- )
- return None
-
- def create_discussion_for_issue(issue: Issue, include_suggestion: bool = True) -> str | None:
- discussion_title = ""
- color = severity_color_pairs[issue.severity].strip("#")
- discussion_body = f"""
-
-{issue.description}
-"""
- if include_suggestion and issue.suggestion:
- discussion_body += f"""
-
-{issue.suggestion}
-"""
-
- position = build_position(issue)
- if position:
- console.print(
- f"[dim]Position object for lines {issue.start_line}-{issue.end_line}:[/dim]"
- )
- import json
-
- console.print(f"[dim]{json.dumps(position, indent=2)}[/dim]")
-
- try:
- discussion_id = create_discussion(
- title=discussion_title,
- body=discussion_body,
- gitlab_config=gitlab_config,
- position=position,
- )
- issue.discussion_id = discussion_id
- console.print(
- f"[green]✓ Created discussion for issue at lines {issue.start_line}-{issue.end_line} (ID: {discussion_id})[/green]"
- )
- return discussion_id
- except Exception as e:
- if position:
- console.print(
- f"[yellow]Failed with position for lines {issue.start_line}-{issue.end_line}, retrying without position: {e}[/yellow]"
- )
- try:
- discussion_id = create_discussion(
- title=discussion_title,
- body=discussion_body,
- gitlab_config=gitlab_config,
- position=None,
- )
- issue.discussion_id = discussion_id
- console.print(
- f"[green]✓ Created discussion without position (ID: {discussion_id})[/green]"
- )
- return discussion_id
- except Exception as e2:
- console.print(
- f"[red]✗ Failed to create discussion for issue at lines {issue.start_line}-{issue.end_line}: {e2}[/red]"
- )
- import traceback
-
- traceback.print_exc()
- return None
-
- console.print(
- f"[red]✗ Failed to create discussion for issue at lines {issue.start_line}-{issue.end_line}: {e}[/red]"
- )
- import traceback
-
- traceback.print_exc()
- return None
-
- for issue in dedicated_issues:
- create_discussion_for_issue(issue, include_suggestion=True)
-
- if reply_issues:
- console.print(
- f"[dim]Leaving {len(reply_issues)} non-dedicated issue(s) for the summary[/dim]"
- )
-
-
-def generate_line_code(file_path: str, old_line: int | None, new_line: int | None) -> str:
- """
- Generates a GitLab-compatible line_code.
- Format: sha1(path) + "_" + old_line + "_" + new_line
- """
- path_hash = hashlib.sha1(file_path.encode()).hexdigest()
- old_s = str(old_line) if old_line is not None else ""
- new_s = str(new_line) if new_line is not None else ""
- return f"{path_hash}_{old_s}_{new_s}"
-
-
-def create_position_for_issue(
- diff_text: str,
- issue_line_start: int,
- issue_line_end: int,
- base_sha: str,
- head_sha: str,
- start_sha: str,
- old_path: str,
- new_path: str,
-) -> dict[str, Any] | None:
- hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@")
- lines = diff_text.splitlines()
-
- current_old, current_new = 0, 0
- in_hunk = False
-
- # Track the actual lines found in the diff to build the range
- matched_lines = [] # List of (old_line, new_line)
-
- for line in lines:
- match = hunk_header_pattern.match(line)
- if match:
- current_old, current_new = int(match.group(1)), int(match.group(3))
- in_hunk = True
- continue
-
- if not in_hunk:
- continue
-
- # Logic to determine if this specific line is within our target range
- if line.startswith("+"):
- if issue_line_start <= current_new <= issue_line_end:
- matched_lines.append((None, current_new))
- current_new += 1
- elif line.startswith("-"):
- if issue_line_start <= current_old <= issue_line_end:
- matched_lines.append((current_old, None))
- current_old += 1
- else:
- if issue_line_start <= current_new <= issue_line_end:
- matched_lines.append((current_old, current_new))
- current_old += 1
- current_new += 1
-
- # FIX: Optimization to prevent "sticky" hunk matching.
- # If we have passed the end_line in the NEW file, we stop.
- if current_new > issue_line_end and not line.startswith("-"):
- if matched_lines:
- break
-
- if not matched_lines:
- return None
-
- # We anchor the comment to the LAST line of the range so the code is visible
- start_old, start_new = matched_lines[0]
- end_old, end_new = matched_lines[-1]
-
- # Calculate line codes for the range
- start_code = generate_line_code(new_path if start_new else old_path, start_old, start_new)
- end_code = generate_line_code(new_path if end_new else old_path, end_old, end_new)
-
- position = {
- "base_sha": base_sha,
- "head_sha": head_sha,
- "start_sha": start_sha,
- "position_type": "text",
- "old_path": old_path,
- "new_path": new_path,
- # Anchor the main comment on the end of the range
- "new_line": end_new,
- "old_line": end_old,
- "line_range": {
- "start": {
- "line_code": start_code,
- "type": "new" if start_new else "old",
- "new_line": start_new,
- "old_line": start_old,
- },
- "end": {
- "line_code": end_code,
- "type": "new" if end_new else "old",
- "new_line": end_new,
- "old_line": end_old,
- },
- },
- }
-
- # Cleanup: GitLab doesn't like None values in the schema
- if position["new_line"] is None:
- del position["new_line"]
- if position["old_line"] is None:
- del position["old_line"]
-
- return position
-
-
-def create_discussion(
- title: str,
- body: str,
- gitlab_config: GitLabConfig,
- position: dict[str, Any] | None = None,
-) -> str:
- """
- Create a discussion with title and body.
-
- Args:
- title: Discussion title
- body: Discussion body content
- gitlab_config: GitLab API configuration
- position: Optional position object for file-based discussions
-
- Returns:
- Discussion ID from GitLab
- """
- # GitLab discussions don't have separate titles in the API,
- # so we include the title in the body with markdown formatting
-
- # post_discussion returns (discussion_id, note_id), we only need discussion_id
- discussion_id, _ = post_discussion(
- api_v4=gitlab_config.api_v4,
- token=gitlab_config.token,
- project_id=gitlab_config.project_id,
- mr_iid=gitlab_config.mr_iid,
- body=body,
- position=position,
- )
-
- return discussion_id
-
-
-def reply_to_discussion(
- discussion_id: str,
- body: str,
- gitlab_config: GitLabConfig,
-) -> None:
- """
- Reply to an existing discussion.
-
- Args:
- discussion_id: ID of the discussion to reply to
- body: Content of the reply
- gitlab_config: GitLab API configuration
- """
- post_discussion_reply(
- api_v4=gitlab_config.api_v4,
- token=gitlab_config.token,
- project_id=gitlab_config.project_id,
- merge_request_id=gitlab_config.mr_iid,
- discussion_id=discussion_id,
- body=body,
- )
-
-
-def post_mr_note(
- api_v4: str,
- token: str,
- project_id: str,
- mr_iid: str,
- body: str,
-) -> None:
- """
- Post a standalone note (comment) to a merge request without creating a discussion.
-
- Args:
- api_v4: GitLab API v4 base URL
- token: GitLab API token
- project_id: Project ID
- mr_iid: Merge request IID
- body: Note content
- """
- post_merge_request_note(
- api_v4=api_v4,
- token=token,
- project_id=project_id,
- mr_iid=mr_iid,
- body=body,
- )
diff --git a/src/reviewbot/agent/workflow/__init__.py b/src/reviewbot/agent/workflow/__init__.py
new file mode 100644
index 0000000..d09e7a1
--- /dev/null
+++ b/src/reviewbot/agent/workflow/__init__.py
@@ -0,0 +1,4 @@
+from reviewbot.agent.workflow.config import GitLabConfig
+from reviewbot.agent.workflow.runner import work_agent
+
+__all__ = ["GitLabConfig", "work_agent"]
diff --git a/src/reviewbot/agent/workflow/config.py b/src/reviewbot/agent/workflow/config.py
new file mode 100644
index 0000000..8ac06c0
--- /dev/null
+++ b/src/reviewbot/agent/workflow/config.py
@@ -0,0 +1,11 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class GitLabConfig:
+ """GitLab API configuration"""
+
+ api_v4: str
+ token: str
+ project_id: str
+ mr_iid: str
diff --git a/src/reviewbot/agent/workflow/diff_extract.py b/src/reviewbot/agent/workflow/diff_extract.py
new file mode 100644
index 0000000..73a76d7
--- /dev/null
+++ b/src/reviewbot/agent/workflow/diff_extract.py
@@ -0,0 +1,167 @@
+import hashlib
+import re
+from typing import Any
+
+
+def _extract_code_from_diff(diff_text: str, line_start: int, line_end: int) -> str:
+ hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@")
+ lines = diff_text.splitlines()
+
+ extracted = []
+ current_new = 0
+ in_hunk = False
+
+ for line in lines:
+ match = hunk_header_pattern.match(line)
+ if match:
+ current_new = int(match.group(3))
+ in_hunk = True
+ continue
+
+ if not in_hunk:
+ continue
+
+ # We only care about the lines in the NEW file (the result of the change)
+ if line.startswith("+"):
+ if line_start <= current_new <= line_end:
+ extracted.append(line[1:]) # Remove '+'
+ current_new += 1
+ elif line.startswith("-"):
+ # Skip deleted lines for code extraction of the 'new' state
+ continue
+ else:
+ # Context line
+ if line_start <= current_new <= line_end:
+ extracted.append(line[1:] if line else "")
+ current_new += 1
+
+ # FIX: Exit early if we've passed the end of our requested range
+ if current_new > line_end:
+ if extracted: # Only break if we actually found lines
+ break
+
+ return "\n".join(extracted)
+
+
+def generate_line_code(file_path: str, old_line: int | None, new_line: int | None) -> str:
+ """
+ Generates a GitLab-compatible line_code.
+ Format: sha1(path) + "_" + old_line + "_" + new_line
+ """
+ path_hash = hashlib.sha1(file_path.encode()).hexdigest()
+ old_s = str(old_line) if old_line is not None else ""
+ new_s = str(new_line) if new_line is not None else ""
+ return f"{path_hash}_{old_s}_{new_s}"
+
+
+def create_position_for_issue(
+ diff_text: str,
+ issue_line_start: int,
+ issue_line_end: int,
+ base_sha: str,
+ head_sha: str,
+ start_sha: str,
+ old_path: str,
+ new_path: str,
+) -> dict[str, Any] | None:
+ hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@")
+ lines = diff_text.splitlines()
+
+ current_old, current_new = 0, 0
+ in_hunk = False
+
+ # Track the actual lines found in the diff to build the range
+ matched_lines = [] # List of (old_line, new_line)
+
+ for line in lines:
+ match = hunk_header_pattern.match(line)
+ if match:
+ current_old, current_new = int(match.group(1)), int(match.group(3))
+ in_hunk = True
+ continue
+
+ if not in_hunk:
+ continue
+
+ # Logic to determine if this specific line is within our target range
+ if line.startswith("+"):
+ if issue_line_start <= current_new <= issue_line_end:
+ matched_lines.append((None, current_new))
+ current_new += 1
+ elif line.startswith("-"):
+ if issue_line_start <= current_old <= issue_line_end:
+ matched_lines.append((current_old, None))
+ current_old += 1
+ else:
+ if issue_line_start <= current_new <= issue_line_end:
+ matched_lines.append((current_old, current_new))
+ current_old += 1
+ current_new += 1
+
+ # FIX: Optimization to prevent "sticky" hunk matching.
+ # If we have passed the end_line in the NEW file, we stop.
+ if current_new > issue_line_end and not line.startswith("-"):
+ if matched_lines:
+ break
+
+ if not matched_lines:
+ return None
+
+ # We anchor the comment to the LAST line of the range so the code is visible
+ start_old, start_new = matched_lines[0]
+ end_old, end_new = matched_lines[-1]
+
+ # Calculate line codes for the range
+ start_code = generate_line_code(new_path if start_new else old_path, start_old, start_new)
+ end_code = generate_line_code(new_path if end_new else old_path, end_old, end_new)
+
+ position = {
+ "base_sha": base_sha,
+ "head_sha": head_sha,
+ "start_sha": start_sha,
+ "position_type": "text",
+ "old_path": old_path,
+ "new_path": new_path,
+ # Anchor the main comment on the end of the range
+ "new_line": end_new,
+ "old_line": end_old,
+ "line_range": {
+ "start": {
+ "line_code": start_code,
+ "type": "new" if start_new else "old",
+ "new_line": start_new,
+ "old_line": start_old,
+ },
+ "end": {
+ "line_code": end_code,
+ "type": "new" if end_new else "old",
+ "new_line": end_new,
+ "old_line": end_old,
+ },
+ },
+ }
+
+ # Cleanup: GitLab doesn't like None values in the schema
+ if position["new_line"] is None:
+ del position["new_line"]
+ if position["old_line"] is None:
+ del position["old_line"]
+
+ return position
+
+
+def create_file_position(
+ base_sha: str,
+ head_sha: str,
+ start_sha: str,
+ old_path: str,
+ new_path: str,
+) -> dict[str, Any]:
+ return {
+ "base_sha": base_sha,
+ "head_sha": head_sha,
+ "start_sha": start_sha,
+ "position_type": "file",
+ "old_path": old_path,
+ "new_path": new_path,
+ }
diff --git a/src/reviewbot/agent/workflow/discussions.py b/src/reviewbot/agent/workflow/discussions.py
new file mode 100644
index 0000000..4143234
--- /dev/null
+++ b/src/reviewbot/agent/workflow/discussions.py
@@ -0,0 +1,238 @@
+from typing import Any
+from urllib.parse import quote
+
+from rich.console import Console # type: ignore
+
+from reviewbot.agent.workflow.config import GitLabConfig
+from reviewbot.agent.workflow.diff_extract import create_file_position
+from reviewbot.core.issues import IssueModel, IssueSeverity
+from reviewbot.infra.gitlab.diff import FileDiff
+from reviewbot.infra.gitlab.note import post_discussion, post_discussion_reply
+
+console = Console()
+
+
+def handle_file_issues(
+ file_path: str,
+ issues: list[IssueModel],
+ gitlab_config: GitLabConfig,
+ file_diffs: list[FileDiff],
+ diff_refs: dict[str, str],
+) -> None:
+ """
+ Create positioned discussions for a capped set of high-priority issues, and
+ group the rest into a single per-file discussion with replies.
+
+ Args:
+ file_path: Path to the file being reviewed
+ issues: List of issues found in this file
+ gitlab_config: GitLab API configuration
+ file_diffs: List of file diffs from the MR
+ diff_refs: Dict with base_sha, head_sha, start_sha
+ """
+ if not issues:
+ return
+
+ console.print(f"[cyan]Creating discussion for {file_path} with {len(issues)} issue(s)[/cyan]")
+
+ # Get the file diff once
+ file_diff = next((fd for fd in file_diffs if fd.new_path == file_path), None)
+ base_sha = diff_refs.get("base_sha")
+ head_sha = diff_refs.get("head_sha")
+ start_sha = diff_refs.get("start_sha")
+ project_web_url = diff_refs.get("project_web_url")
+
+ # Severity, Color pairs
+ severity_color_pairs = {
+ IssueSeverity.HIGH: "red", # red
+ IssueSeverity.MEDIUM: "orange", # yellow/orange
+ IssueSeverity.LOW: "green", # green
+ }
+
+ def build_location_line(issue: IssueModel) -> str:
+ if project_web_url and head_sha:
+ escaped_path = quote(issue.file_path, safe="/")
+ if issue.start_line == issue.end_line:
+ anchor = f"#L{issue.start_line}"
+ else:
+ anchor = f"#L{issue.start_line}-L{issue.end_line}"
+ file_url = f"{project_web_url}/-/blob/{head_sha}/{escaped_path}{anchor}"
+ return (
+ f''
+ f"#L {issue.start_line}-{issue.end_line}"
+ f""
+ )
+ return f"lines {issue.start_line}-{issue.end_line}"
+
+ def build_position() -> dict[str, Any] | None:
+ if (
+ file_diff
+ and base_sha
+ and head_sha
+ and start_sha
+ and file_diff.old_path
+ and file_diff.new_path
+ ):
+ return create_file_position(
+ base_sha=base_sha,
+ head_sha=head_sha,
+ start_sha=start_sha,
+ old_path=file_diff.old_path,
+ new_path=file_diff.new_path,
+ )
+ return None
+
+ discussion_title = ""
+ discussion_body = "\n\n"
+ first_issue, *remaining_issues = issues
+ for issue in [first_issue]:
+ color = severity_color_pairs[issue.severity].strip("#")
+ location_line = build_location_line(issue)
+ discussion_body += (
+ f'
\n\n'
+ f"**{issue.title}** ({location_line})\n\n"
+ f"{issue.description}\n"
+ )
+ if issue.suggestion:
+ discussion_body += f"\n{issue.suggestion}\n"
+ discussion_body += "\n"
+
+ position = build_position()
+ if position:
+ console.print(f"[dim]Position object for {file_path}:[/dim]")
+ import json
+
+ console.print(f"[dim]{json.dumps(position, indent=2)}[/dim]")
+
+ try:
+ discussion_id, note_id = create_discussion(
+ title=discussion_title,
+ body=discussion_body,
+ gitlab_config=gitlab_config,
+ position=position,
+ )
+ first_issue.discussion_id = discussion_id
+ first_issue.note_id = note_id
+ for issue in remaining_issues:
+ reply_body = ""
+ color = severity_color_pairs[issue.severity].strip("#")
+ location_line = build_location_line(issue)
+ reply_body += (
+ f'
\n\n'
+ f"**{issue.title}** ({location_line})\n\n"
+ f"{issue.description}\n"
+ )
+ if issue.suggestion:
+ reply_body += f"\n{issue.suggestion}\n"
+ note_id = reply_to_discussion(
+ discussion_id=discussion_id,
+ body=reply_body,
+ gitlab_config=gitlab_config,
+ )
+ issue.discussion_id = discussion_id
+ issue.note_id = note_id
+ console.print(f"[green]✓ Created discussion for {file_path} (ID: {discussion_id})[/green]")
+ except Exception as e:
+ if position:
+ console.print(
+ f"[yellow]Failed with position for {file_path}, retrying without position: {e}[/yellow]"
+ )
+ try:
+ discussion_id, note_id = create_discussion(
+ title=discussion_title,
+ body=discussion_body,
+ gitlab_config=gitlab_config,
+ position=None,
+ )
+ first_issue.discussion_id = discussion_id
+ first_issue.note_id = note_id
+ for issue in remaining_issues:
+ reply_body = ""
+ color = severity_color_pairs[issue.severity].strip("#")
+ location_line = build_location_line(issue)
+ reply_body += (
+ f'
\n\n'
+ f"**{issue.title}** ({location_line})\n\n"
+ f"{issue.description}\n"
+ )
+ if issue.suggestion:
+ reply_body += f"\n{issue.suggestion}\n"
+ note_id = reply_to_discussion(
+ discussion_id=discussion_id,
+ body=reply_body,
+ gitlab_config=gitlab_config,
+ )
+ issue.discussion_id = discussion_id
+ issue.note_id = note_id
+ console.print(
+ f"[green]✓ Created discussion without position for {file_path} (ID: {discussion_id})[/green]"
+ )
+ return
+ except Exception as e2:
+ console.print(f"[red]✗ Failed to create discussion for {file_path}: {e2}[/red]")
+ import traceback
+
+ traceback.print_exc()
+ return
+
+ console.print(f"[red]✗ Failed to create discussion for {file_path}: {e}[/red]")
+ import traceback
+
+ traceback.print_exc()
+
+
+def create_discussion(
+ title: str,
+ body: str,
+ gitlab_config: GitLabConfig,
+ position: dict[str, Any] | None = None,
+) -> tuple[str, str | None]:
+ """
+ Create a discussion with title and body.
+
+ Args:
+ title: Discussion title
+ body: Discussion body content
+ gitlab_config: GitLab API configuration
+ position: Optional position object for file-based discussions
+
+ Returns:
+ Tuple of (discussion_id, note_id)
+ """
+ # GitLab discussions don't have separate titles in the API,
+ # so we include the title in the body with markdown formatting
+
+ # post_discussion returns (discussion_id, note_id), we only need discussion_id
+ discussion_id, note_id = post_discussion(
+ api_v4=gitlab_config.api_v4,
+ token=gitlab_config.token,
+ project_id=gitlab_config.project_id,
+ mr_iid=gitlab_config.mr_iid,
+ body=body,
+ position=position,
+ )
+
+ return discussion_id, note_id
+
+
+def reply_to_discussion(
+ discussion_id: str,
+ body: str,
+ gitlab_config: GitLabConfig,
+) -> str | None:
+ """
+ Reply to an existing discussion.
+
+ Args:
+ discussion_id: ID of the discussion to reply to
+ body: Content of the reply
+ gitlab_config: GitLab API configuration
+ """
+ return post_discussion_reply(
+ api_v4=gitlab_config.api_v4,
+ token=gitlab_config.token,
+ project_id=gitlab_config.project_id,
+ merge_request_id=gitlab_config.mr_iid,
+ discussion_id=discussion_id,
+ body=body,
+ )
diff --git a/src/reviewbot/agent/workflow/gitlab_notes.py b/src/reviewbot/agent/workflow/gitlab_notes.py
new file mode 100644
index 0000000..c271a5c
--- /dev/null
+++ b/src/reviewbot/agent/workflow/gitlab_notes.py
@@ -0,0 +1,393 @@
+from urllib.parse import quote
+
+from rich.console import Console # type: ignore
+
+from reviewbot.core.agent import Agent
+from reviewbot.core.issues import Issue, IssueSeverity
+from reviewbot.infra.gitlab.diff import FileDiff
+from reviewbot.infra.gitlab.note import (
+ get_all_discussions,
+ post_discussion,
+ post_merge_request_note,
+ update_discussion_note,
+)
+
+console = Console()
+
+
+def post_review_acknowledgment(
+ api_v4: str,
+ token: str,
+ project_id: str,
+ mr_iid: str,
+ agent: Agent,
+ diffs: list[FileDiff],
+) -> tuple[str, str] | None:
+ """
+ Post a surface-level summary acknowledging the review is starting.
+ Creates a discussion so it can be updated later.
+ Only posts if no acknowledgment already exists to prevent duplicates.
+
+ Args:
+ api_v4: GitLab API v4 base URL
+ token: GitLab API token
+ project_id: Project ID
+ mr_iid: Merge request IID
+ agent: The agent to use for generating summary
+ diffs: List of file diffs
+
+ Returns:
+ Tuple of (discussion_id, note_id) if created, None if already exists or failed
+ """
+ from langchain_core.messages import HumanMessage, SystemMessage
+
+ # Check if an acknowledgment already exists
+ try:
+ discussions = get_all_discussions(
+ api_v4=api_v4,
+ token=token,
+ project_id=project_id,
+ mr_iid=mr_iid,
+ )
+
+ # Only reuse "Starting" acknowledgments (in-progress reviews)
+ # Don't reuse "Complete" acknowledgments - create new ones for new review runs
+ starting_marker = (
+ '
'
+ )
+
+ # Find ALL "Starting" acknowledgments, then pick the most recent one
+ found_acknowledgments = []
+ for discussion in discussions:
+ notes = discussion.get("notes", [])
+ for note in notes:
+ body = note.get("body", "")
+ # Only check for "Starting" marker - this means review is in progress
+ if starting_marker in body:
+ discussion_id = discussion.get("id")
+ note_id = note.get("id")
+ created_at = note.get("created_at", "")
+ if discussion_id and note_id:
+ found_acknowledgments.append(
+ {
+ "discussion_id": str(discussion_id),
+ "note_id": str(note_id),
+ "created_at": created_at,
+ }
+ )
+
+ # If we found any in-progress acknowledgments, use the most recent one
+ if found_acknowledgments:
+ # Sort by created_at timestamp (most recent first)
+ found_acknowledgments.sort(key=lambda x: x["created_at"], reverse=True)
+ most_recent = found_acknowledgments[0]
+ console.print(
+ f"[dim]Found {len(found_acknowledgments)} in-progress review(s), reusing most recent[/dim]"
+ )
+ return (most_recent["discussion_id"], most_recent["note_id"])
+
+ # No in-progress reviews found - will create a new acknowledgment
+ console.print("[dim]No in-progress reviews found, will create new acknowledgment[/dim]")
+ except Exception as e:
+ console.print(f"[yellow]⚠ Could not check for existing acknowledgment: {e}[/yellow]")
+ # Continue anyway - better to post a duplicate than miss it
+
+ # Get list of files being reviewed
+ file_list = [diff.new_path for diff in diffs if diff.new_path]
+ files_summary = "\n".join([f"- `{f}`" for f in file_list[:10]]) # Limit to first 10
+ if len(file_list) > 10:
+ files_summary += f"\n- ... and {len(file_list) - 10} more files"
+
+ # Generate a simple summary with very limited tool calls
+ messages = [
+ SystemMessage(
+ content="""You are a code review assistant. Generate a brief, friendly acknowledgment that a code review is starting.
+
+IMPORTANT:
+- Keep it SHORT (2-3 sentences max)
+- Be surface-level - this is just an acknowledgment, not the actual review
+- DO NOT analyze code yet
+- DO NOT use any tools
+- Just acknowledge what files are being reviewed"""
+ ),
+ HumanMessage(
+ content=f"""A merge request code review is starting for the following files:
+
+{files_summary}
+
+Write a brief acknowledgment message (2-3 sentences) letting the developer know the review is in progress. Be friendly and professional."""
+ ),
+ ]
+
+ try:
+ # Get response with no tool calls allowed
+ from reviewbot.agent.tasks.core import ToolCallerSettings, tool_caller
+
+ summary_settings = ToolCallerSettings(max_tool_calls=0, max_iterations=1)
+ summary = tool_caller(agent, messages, summary_settings)
+
+ # Post as a discussion (so we can update it later)
+ acknowledgment_body = f"""
+
+{summary}
+
+---
+*Review powered by ReviewBot*
+"""
+
+ # post_discussion now returns both discussion_id and note_id
+ discussion_id, note_id = post_discussion(
+ api_v4=api_v4,
+ token=token,
+ project_id=project_id,
+ mr_iid=mr_iid,
+ body=acknowledgment_body,
+ )
+
+ if not note_id:
+ console.print("[yellow]⚠ Discussion created but no note ID returned[/yellow]")
+ return None
+
+ console.print(
+ f"[green]✓ Posted review acknowledgment (discussion: {discussion_id}, note: {note_id})[/green]"
+ )
+ return (str(discussion_id), str(note_id))
+
+ except Exception as e:
+ console.print(f"[yellow]⚠ Failed to post acknowledgment: {e}[/yellow]")
+ # Don't fail the whole review if acknowledgment fails
+ return None
+
+
+def update_review_summary(
+ api_v4: str,
+ token: str,
+ project_id: str,
+ mr_iid: str,
+ discussion_id: str,
+ note_id: str,
+ issues: list[Issue],
+ diffs: list[FileDiff],
+ diff_refs: dict[str, str],
+ agent: Agent,
+) -> None:
+ """
+ Update the acknowledgment note with a summary of the review results.
+ Uses LLM to generate reasoning and summary.
+
+ Args:
+ api_v4: GitLab API v4 base URL
+ token: GitLab API token
+ project_id: Project ID
+ mr_iid: Merge request IID
+ discussion_id: Discussion ID of the acknowledgment
+ note_id: Note ID to update
+ issues: List of issues found during review
+ diffs: List of file diffs that were reviewed
+ diff_refs: Diff references including project_web_url
+ agent: The agent to use for generating summary
+ """
+ from langchain_core.messages import HumanMessage, SystemMessage
+
+ # Count issues by severity
+ high_count = sum(1 for issue in issues if issue.severity == IssueSeverity.HIGH)
+ medium_count = sum(1 for issue in issues if issue.severity == IssueSeverity.MEDIUM)
+ low_count = sum(1 for issue in issues if issue.severity == IssueSeverity.LOW)
+
+ # Group issues by file
+ issues_by_file: dict[str, list[Issue]] = {}
+ for issue in issues:
+ if issue.file_path not in issues_by_file:
+ issues_by_file[issue.file_path] = []
+ issues_by_file[issue.file_path].append(issue)
+
+ # Build structured statistics
+ total_files = len(diffs)
+ files_with_issues = len(issues_by_file)
+
+ # Prepare issue details for LLM
+ issues_summary = []
+ for issue in issues:
+ issues_summary.append(
+ f"- **{issue.severity.value.upper()}** in `{issue.file_path}` (lines {issue.start_line}-{issue.end_line}): {issue.description}"
+ )
+
+ issues_text = "\n".join(issues_summary) if issues_summary else "No issues found."
+
+ # Generate LLM summary with reasoning
+ try:
+ from reviewbot.agent.tasks.core import ToolCallerSettings, tool_caller
+
+ messages = [
+ SystemMessage(
+ content="""You are a Merge Request reviewer. Generate a concise, professional summary of a code review with reasoning.
+
+IMPORTANT:
+- Use EXACTLY two paragraphs, each wrapped in
tags. +- Provide reasoning about the overall merge request purpose and code quality. +- Highlight key concerns or positive aspects +- Be constructive and professional +- DO NOT use any tools +- Use paragraphs with readable flow. Use two paragrahs with 3-5 sentences. +Paragraphs should be wrapped with
tags. Use new
tag for a newline. +Example +
+paragraph +
++paragraph2 +
+- Focus on the big picture, not individual issue details""" + ), + HumanMessage( + content=f"""A code review has been completed with the following results: + +**Statistics:** +- Files reviewed: {total_files} +- Files with issues: {files_with_issues} +- Total issues: {len(issues)} + - High severity: {high_count} + - Medium severity: {medium_count} + - Low severity: {low_count} + +**Issues found:** +{issues_text} + +- Use EXACTLY two paragraphs, each wrapped in tags.
+1. Provides overall assessment of the purpose of the merge request purpose and code quality.
+2. Highlights the most important concerns (if any)
+3. Gives reasoning about the review findings
+4. Is constructive and actionable """
+ ),
+ ]
+
+ summary_settings = ToolCallerSettings(max_tool_calls=0, max_iterations=1)
+ llm_summary = tool_caller(agent, messages, summary_settings)
+
+ except Exception as e:
+ console.print(f"[yellow]⚠ Failed to generate LLM summary: {e}[/yellow]")
+ llm_summary = "Review completed successfully."
+
+ # Build final summary combining statistics and LLM reasoning
+ summary_parts = [
+ '\n\n',
+ f"Reviewed **{total_files}** file(s), found **{len(issues)}** issue(s) across **{files_with_issues}** file(s).\n\n",
+ "**Summary**\n\n",
+ f"{llm_summary}\n\n",
+ ]
+
+ if issues:
+ project_web_url = diff_refs.get("project_web_url")
+
+ summary_parts.append("**Issue Breakdown**\n\n")
+ if high_count > 0:
+ summary_parts.append(
+ f'
\n'
+ )
+ if medium_count > 0:
+ summary_parts.append(
+ f'
\n'
+ )
+ if low_count > 0:
+ summary_parts.append(
+ f'
\n'
+ )
+
+ summary_parts.append("\n
\n
\n\n")
+
+ if issues:
+ summary_parts.append("---\n\n")
+ issues_by_file: dict[str, list[Issue]] = {}
+ for issue in issues:
+ issues_by_file.setdefault(issue.file_path, []).append(issue)
+
+ severity_badge_colors = {
+ IssueSeverity.HIGH: "red",
+ IssueSeverity.MEDIUM: "orange",
+ IssueSeverity.LOW: "green",
+ }
+
+ for file_path, file_issues in sorted(issues_by_file.items()):
+ summary_parts.append(f"#### `{file_path}`\n\n")
+ severity_order = {
+ IssueSeverity.HIGH: 0,
+ IssueSeverity.MEDIUM: 1,
+ IssueSeverity.LOW: 2,
+ }
+ for issue in sorted(file_issues, key=lambda item: severity_order[item.severity]):
+ label = issue.severity.value.upper()
+ badge_color = severity_badge_colors[issue.severity]
+ note_url = None
+ if project_web_url and issue.note_id:
+ note_url = (
+ f"{project_web_url}/-/merge_requests/{mr_iid}#note_{issue.note_id}"
+ )
+ if note_url:
+ link_html = (
+ f''
+ f"{issue.title}"
+ )
+ else:
+ link_html = "Comment link unavailable"
+
+ summary_parts.append(
+ f' '
+ f'{link_html}\n\n'
+ )
+ summary_parts.append("\n")
+ else:
+ summary_parts.append(
+ '
\n'
+ )
+
+ summary_parts.append("\n---\n*Review powered by ReviewBot*")
+
+ summary_body = "".join(summary_parts)
+
+ console.print(
+ f"[dim]Updating discussion {discussion_id}, note {note_id} with review summary...[/dim]"
+ )
+ try:
+ update_discussion_note(
+ api_v4=api_v4,
+ token=token,
+ project_id=project_id,
+ mr_iid=mr_iid,
+ discussion_id=discussion_id,
+ note_id=note_id,
+ body=summary_body,
+ )
+ console.print("[green]✓ Updated review acknowledgment with summary[/green]")
+ except Exception as e:
+ console.print(f"[yellow]⚠ Failed to update acknowledgment: {e}[/yellow]")
+ import traceback
+
+ traceback.print_exc()
+ # Don't fail the whole review if update fails
+
+
+def post_mr_note(
+ api_v4: str,
+ token: str,
+ project_id: str,
+ mr_iid: str,
+ body: str,
+) -> None:
+ """
+ Post a standalone note (comment) to a merge request without creating a discussion.
+
+ Args:
+ api_v4: GitLab API v4 base URL
+ token: GitLab API token
+ project_id: Project ID
+ mr_iid: Merge request IID
+ body: Note content
+ """
+ post_merge_request_note(
+ api_v4=api_v4,
+ token=token,
+ project_id=project_id,
+ mr_iid=mr_iid,
+ body=body,
+ )
diff --git a/src/reviewbot/agent/workflow/hooks.py b/src/reviewbot/agent/workflow/hooks.py
new file mode 100644
index 0000000..e815ed5
--- /dev/null
+++ b/src/reviewbot/agent/workflow/hooks.py
@@ -0,0 +1,29 @@
+from typing import Any
+
+from langchain.agents.middleware import ( # type: ignore
+ AgentState,
+ before_agent,
+ before_model,
+)
+from langgraph.pregel.main import Runtime # type: ignore
+from rich.console import Console # type: ignore
+
+console = Console()
+
+
+@before_model(can_jump_to=["end"])
+def check_message_limit(state: AgentState, runtime: Runtime) -> dict[str, Any] | None: # type: ignore
+ messages = state["messages"]
+ console.print("[blue]Before modelMessages:[/blue]")
+ console.print(messages[-5:])
+ console.print("[blue]Before model messages end.[/blue]")
+ return None
+
+
+@before_agent(can_jump_to=["end"])
+def check_agent_messages(state: AgentState, runtime: Runtime) -> dict[str, Any] | None: # type: ignore
+ messages = state["messages"]
+ console.print("[red]Before agent messages:[/red]")
+ console.print(messages[-5:])
+ console.print("[red]Before agent messages end.[/red]")
+ return None
diff --git a/src/reviewbot/agent/workflow/ignore.py b/src/reviewbot/agent/workflow/ignore.py
new file mode 100644
index 0000000..833d2f4
--- /dev/null
+++ b/src/reviewbot/agent/workflow/ignore.py
@@ -0,0 +1,155 @@
+import fnmatch
+from pathlib import Path
+
+from rich.console import Console # type: ignore
+
+from reviewbot.infra.gitlab.diff import FileDiff
+
+console = Console()
+
+# Global blacklist for common files that typically don't need code review
+GLOBAL_REVIEW_BLACKLIST = [
+ # Dependency management files
+ "package-lock.json",
+ "yarn.lock",
+ "pnpm-lock.yaml",
+ "Gemfile.lock",
+ "Pipfile.lock",
+ "poetry.lock",
+ "composer.lock",
+ "go.sum",
+ "go.mod",
+ "Cargo.lock",
+ # Build and distribution files
+ "*.min.js",
+ "*.min.css",
+ "*.map",
+ "dist/*",
+ "build/*",
+ "*.pyc",
+ "*.pyo",
+ "*.so",
+ "*.dll",
+ "*.exe",
+ "*.o",
+ "*.a",
+ # Generated files
+ "*.generated.*",
+ "*_pb2.py",
+ "*_pb2_grpc.py",
+ "*.pb.go",
+ # Documentation and assets
+ "*.png",
+ "*.jpg",
+ "*.jpeg",
+ "*.gif",
+ "*.svg",
+ "*.ico",
+ "*.woff",
+ "*.woff2",
+ "*.ttf",
+ "*.eot",
+ # IDE and editor files
+ ".vscode/*",
+ ".idea/*",
+ "*.swp",
+ "*.swo",
+ "*~",
+]
+
+
+def parse_reviewignore(repo_path: Path) -> list[str]:
+ """
+ Parse .reviewignore file from the repository.
+
+ Args:
+ repo_path: Path to the repository root
+
+ Returns:
+ List of glob patterns to ignore
+ """
+ reviewignore_path = repo_path / ".reviewignore"
+ patterns = []
+
+ if not reviewignore_path.exists():
+ console.print("[dim].reviewignore file not found, using global blacklist only[/dim]")
+ return patterns
+
+ try:
+ with open(reviewignore_path, encoding="utf-8") as f:
+ for line in f:
+ # Strip whitespace
+ line = line.strip()
+ # Skip empty lines and comments
+ if not line or line.startswith("#"):
+ continue
+ patterns.append(line)
+
+ console.print(f"[dim]Loaded {len(patterns)} patterns from .reviewignore[/dim]")
+ except Exception as e:
+ console.print(f"[yellow]Warning: Failed to read .reviewignore: {e}[/yellow]")
+
+ return patterns
+
+
+def should_ignore_file(file_path: str, reviewignore_patterns: list[str]) -> bool:
+ """
+ Check if a file should be ignored based on .reviewignore patterns and global blacklist.
+
+ Args:
+ file_path: Path to the file (relative to repo root)
+ reviewignore_patterns: Patterns from .reviewignore file
+
+ Returns:
+ True if the file should be ignored, False otherwise
+ """
+ # Normalize the file path (remove leading ./ or /)
+ normalized_path = file_path.lstrip("./")
+
+ # Check against global blacklist
+ for pattern in GLOBAL_REVIEW_BLACKLIST:
+ if fnmatch.fnmatch(normalized_path, pattern):
+ return True
+ # Also check just the filename for non-path patterns
+ if "/" not in pattern and fnmatch.fnmatch(Path(normalized_path).name, pattern):
+ return True
+
+ # Check against .reviewignore patterns
+ for pattern in reviewignore_patterns:
+ if fnmatch.fnmatch(normalized_path, pattern):
+ return True
+ # Also check just the filename for non-path patterns
+ if "/" not in pattern and fnmatch.fnmatch(Path(normalized_path).name, pattern):
+ return True
+
+ return False
+
+
+def filter_diffs(diffs: list[FileDiff], reviewignore_patterns: list[str]) -> list[FileDiff]:
+ """
+ Filter out diffs for files that should be ignored.
+
+ Args:
+ diffs: List of file diffs
+ reviewignore_patterns: Patterns from .reviewignore file
+
+ Returns:
+ Filtered list of diffs
+ """
+ filtered = []
+ ignored_count = 0
+
+ for diff in diffs:
+ # Use new_path if available, otherwise use old_path
+ file_path = diff.new_path or diff.old_path
+
+ if file_path and should_ignore_file(file_path, reviewignore_patterns):
+ console.print(f"[dim]⊘ Ignoring {file_path}[/dim]")
+ ignored_count += 1
+ else:
+ filtered.append(diff)
+
+ if ignored_count > 0:
+ console.print(f"[cyan]Filtered out {ignored_count} file(s) based on ignore patterns[/cyan]")
+
+ return filtered
diff --git a/src/reviewbot/agent/workflow/runner.py b/src/reviewbot/agent/workflow/runner.py
new file mode 100644
index 0000000..5940a63
--- /dev/null
+++ b/src/reviewbot/agent/workflow/runner.py
@@ -0,0 +1,183 @@
+from pathlib import Path
+from typing import Any
+
+from langchain.agents import create_agent # type: ignore
+from rich.console import Console # type: ignore
+
+from reviewbot.agent.base import ( # type: ignore
+ AgentRunnerInput,
+ agent_runner, # type: ignore
+)
+from reviewbot.agent.tasks.core import ToolCallerSettings
+from reviewbot.agent.workflow.config import GitLabConfig
+from reviewbot.agent.workflow.discussions import handle_file_issues
+from reviewbot.agent.workflow.gitlab_notes import (
+ post_review_acknowledgment,
+ update_review_summary,
+)
+from reviewbot.agent.workflow.ignore import filter_diffs, parse_reviewignore
+from reviewbot.context import Context, store_manager_ctx
+from reviewbot.core.agent import Agent
+from reviewbot.core.config import Config
+from reviewbot.core.issues import Issue
+from reviewbot.infra.embeddings.store_manager import CodebaseStoreManager
+from reviewbot.infra.git.clone import clone_repo_persistent, get_repo_name
+from reviewbot.infra.git.repo_tree import tree
+from reviewbot.infra.gitlab.clone import build_clone_url
+from reviewbot.infra.gitlab.diff import fetch_mr_diffs, get_mr_branch
+from reviewbot.infra.issues.in_memory_issue_store import InMemoryIssueStore
+from reviewbot.models.gpt import get_gpt_model, get_gpt_model_low_effort
+from reviewbot.tools import get_diff, read_file, think
+
+console = Console()
+
+
+def work_agent(config: Config, project_id: str, mr_iid: str) -> str:
+ api_v4 = config.gitlab_api_v4 + "/api/v4"
+ token = config.gitlab_token
+ model = get_gpt_model(config.llm_model_name, config.llm_api_key, config.llm_base_url)
+
+ clone_url = build_clone_url(api_v4, project_id, token)
+
+ diffs, diff_refs = fetch_mr_diffs(api_v4, project_id, mr_iid, token)
+
+ # Limit tool calls to prevent agent from wandering
+ # For diff review: get_diff (1) + maybe read_file for context (1-2) = 3 max
+ settings = ToolCallerSettings(max_tool_calls=5, max_iterations=10)
+
+ # Only provide essential tools - remove search tools to prevent wandering
+ tools = [
+ get_diff, # Primary tool: get the diff for the file
+ read_file, # Optional: get additional context if needed
+ think, # Internal reasoning and thought process
+ ]
+
+ agent: Agent = create_agent(
+ model=model,
+ tools=tools,
+ # middleware=[check_message_limit, check_agent_messages], # type: ignore
+ )
+ branch = get_mr_branch(api_v4, project_id, mr_iid, token)
+ repo_path = clone_repo_persistent(clone_url, branch=branch)
+ repo_path = Path(repo_path).resolve()
+ repo_tree = tree(repo_path)
+
+ # Parse .reviewignore and filter diffs
+ reviewignore_patterns = parse_reviewignore(repo_path)
+ filtered_diffs = filter_diffs(diffs, reviewignore_patterns)
+ console.print(f"[cyan]Reviewing {len(filtered_diffs)} out of {len(diffs)} changed files[/cyan]")
+
+ manager = CodebaseStoreManager()
+ manager.set_repo_root(repo_path)
+ manager.set_repo_name(get_repo_name(repo_path))
+ manager.set_tree(repo_tree)
+ manager.set_diffs(filtered_diffs) # Use filtered diffs instead of all diffs
+ manager.get_store()
+
+ issue_store = InMemoryIssueStore()
+ token_ctx = store_manager_ctx.set(Context(store_manager=manager, issue_store=issue_store))
+
+ context = store_manager_ctx.get()
+
+ # Create GitLab configuration
+ gitlab_config = GitLabConfig(
+ api_v4=api_v4,
+ token=token,
+ project_id=project_id,
+ mr_iid=mr_iid,
+ )
+
+ # Create a low-effort agent for simple tasks like acknowledgments and quick scans
+ low_effort_model = get_gpt_model_low_effort(
+ config.llm_model_name, config.llm_api_key, config.llm_base_url
+ )
+ low_effort_agent: Agent = create_agent(
+ model=low_effort_model,
+ tools=[get_diff, think], # Only needs get_diff for quick scanning
+ )
+
+ # Post acknowledgment that review is starting
+ console.print("[dim]Posting review acknowledgment...[/dim]")
+ acknowledgment_ids = post_review_acknowledgment(
+ api_v4=api_v4,
+ token=token,
+ project_id=project_id,
+ mr_iid=mr_iid,
+ agent=low_effort_agent,
+ diffs=filtered_diffs,
+ )
+ if acknowledgment_ids:
+ console.print(
+ f"[dim]Acknowledgment created: discussion={acknowledgment_ids[0]}, note={acknowledgment_ids[1]}[/dim]"
+ )
+ else:
+ console.print("[yellow]⚠ Failed to create acknowledgment (returned None)[/yellow]")
+
+ try:
+ # Define callback to create discussions as each file's review completes
+ def on_file_review_complete(file_path: str, issues: list[Any]) -> None:
+ """Callback called when a file's review completes."""
+ if not issues:
+ console.print(f"[dim]No issues found in {file_path}, skipping discussion[/dim]")
+ return
+ if not config.create_threads:
+ console.print(
+ f"[dim]Thread creation disabled, deferring issues in {file_path} to summary[/dim]"
+ )
+ return
+
+ handle_file_issues(file_path, issues, gitlab_config, filtered_diffs, diff_refs)
+
+ # Pass the callback to the agent runner
+ issues: list[Issue] = agent_runner.invoke( # type: ignore
+ AgentRunnerInput(
+ agent=agent,
+ context=context,
+ settings=settings,
+ on_file_complete=on_file_review_complete,
+ quick_scan_agent=low_effort_agent,
+ )
+ )
+
+ console.print(f"[bold cyan]📊 Total issues found: {len(issues)}[/bold cyan]")
+
+ # Update the acknowledgment note with summary
+ console.print(f"[dim]Checking acknowledgment_ids: {acknowledgment_ids}[/dim]")
+ if acknowledgment_ids:
+ discussion_id, note_id = acknowledgment_ids
+ console.print(
+ f"[dim]Calling update_review_summary for discussion {discussion_id}, note {note_id}...[/dim]"
+ )
+ update_review_summary(
+ api_v4=api_v4,
+ token=token,
+ project_id=project_id,
+ mr_iid=mr_iid,
+ discussion_id=discussion_id,
+ note_id=note_id,
+ issues=issues,
+ diffs=filtered_diffs,
+ diff_refs=diff_refs,
+ agent=low_effort_agent,
+ )
+ console.print("[dim]update_review_summary completed[/dim]")
+ else:
+ console.print(
+ "[yellow]⚠ No acknowledgment to update (initial acknowledgment may have failed)[/yellow]"
+ )
+
+ # Discussions are now created as reviews complete, but we still need to
+ # handle any files that might have been processed but had no issues
+ # (though the callback already handles this case)
+
+ console.print("[bold green]🎉 All reviews completed and discussions created![/bold green]")
+ return "Review completed successfully"
+
+ except Exception as e:
+ console.print(f"[bold red]❌ Error during review: {e}[/bold red]")
+ import traceback
+
+ traceback.print_exc()
+ raise
+ finally:
+ store_manager_ctx.reset(token_ctx)
diff --git a/src/reviewbot/core/issues/issue.py b/src/reviewbot/core/issues/issue.py
index 05d80ba..645dbfd 100644
--- a/src/reviewbot/core/issues/issue.py
+++ b/src/reviewbot/core/issues/issue.py
@@ -24,3 +24,4 @@ class Issue:
created_at: datetime = field(default_factory=datetime.now)
updated_at: datetime = field(default_factory=datetime.now)
discussion_id: str | None = None
+ note_id: str | None = None
diff --git a/src/reviewbot/core/issues/issue_model.py b/src/reviewbot/core/issues/issue_model.py
index a78d64b..148ce64 100644
--- a/src/reviewbot/core/issues/issue_model.py
+++ b/src/reviewbot/core/issues/issue_model.py
@@ -14,6 +14,8 @@ class IssueModel(BaseModel):
severity: IssueSeverity
status: str
suggestion: str | None = None # Optional code suggestion to fix the issue
+ discussion_id: str | None = None
+ note_id: str | None = None
def to_domain(self) -> Issue:
return Issue(**self.model_dump())
diff --git a/src/reviewbot/infra/gitlab/note.py b/src/reviewbot/infra/gitlab/note.py
index 5807e05..2c859b1 100644
--- a/src/reviewbot/infra/gitlab/note.py
+++ b/src/reviewbot/infra/gitlab/note.py
@@ -66,7 +66,7 @@ def post_discussion(
or "line_code" in position
or "line_range" in position # Support multi-line positions
)
- if has_line_info:
+ if has_line_info or position["position_type"] == "file":
data["position"] = position
else:
# Position is incomplete, skip it for file-level discussions
@@ -117,7 +117,7 @@ def post_discussion_reply(
discussion_id: str,
body: str,
timeout: int = 30,
-) -> None:
+) -> str | None:
url = f"{api_v4.rstrip('/')}/projects/{project_id}/merge_requests/{merge_request_id}/discussions/{discussion_id}/notes"
r = requests.post(
url,
@@ -126,6 +126,10 @@ def post_discussion_reply(
timeout=timeout,
)
r.raise_for_status()
+ try:
+ return r.json().get("id")
+ except Exception:
+ return None
# Wrapper functions for easier use
@@ -165,11 +169,11 @@ def reply_to_discussion(
token: str,
project_id: str,
mr_iid: str,
-) -> None:
+) -> str | None:
"""
- Reply to an existing discussion.
+ Reply to an existing discussion and return the note ID if available.
"""
- post_discussion_reply(
+ return post_discussion_reply(
api_v4=api_v4,
token=token,
project_id=project_id,
From 15081b90178de5facf22a7f9796571693feaff94 Mon Sep 17 00:00:00 2001
From: canefe <8518141+canefe@users.noreply.github.com>
Date: Wed, 7 Jan 2026 18:33:10 +0400
Subject: [PATCH 2/2] feat: validate issues
---
README.md | 4 +-
src/reviewbot/agent/base.py | 2 +-
src/reviewbot/agent/tasks/issues.py | 189 +++++++++++++++++++++++-----
3 files changed, 160 insertions(+), 35 deletions(-)
diff --git a/README.md b/README.md
index 6fd2686..11fe534 100644
--- a/README.md
+++ b/README.md
@@ -20,8 +20,8 @@ posts actionable review notes back to the MR.
Reviewbot wires together diff fetching, codebase context, and LLM reasoning to automate code
reviews on GitLab:
-- **Multi-Agent Review Flow**. Coordinates tasks like diff inspection, context lookup, and issue
- synthesis
+- **Multi-Agent Review Flow**. Coordinates tasks like diff inspection, context lookup, issue
+ synthesis, and issue validation to reduce hallucinations
- **MR-Centric**. Works on GitLab MRs and posts discussions/notes back to the MR
- **Codebase Context**. Optional embeddings + search for better review depth
- **Ignore Rules**. Supports `.reviewignore` and global ignore patterns to skip noise
diff --git a/src/reviewbot/agent/base.py b/src/reviewbot/agent/base.py
index de6d5f8..886a2dd 100644
--- a/src/reviewbot/agent/base.py
+++ b/src/reviewbot/agent/base.py
@@ -39,7 +39,7 @@ def agent_runner(input: AgentRunnerInput) -> list[Issue]:
if not store_manager:
raise ValueError("Store manager not found")
- # Step 1: Identify the issues
+ # Step 1: Identify and validate issues
issues = identify_issues(
ctx=IssuesInput(
agent=agent,
diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py
index 41d0197..2842c04 100644
--- a/src/reviewbot/agent/tasks/issues.py
+++ b/src/reviewbot/agent/tasks/issues.py
@@ -1,3 +1,4 @@
+import json
import threading
import time
from collections.abc import Callable
@@ -514,15 +515,7 @@ def review_single_file(
- Hypothetical edge cases without evidence they're relevant
- Refactoring suggestions unless current code is broken
- Version numbers, import paths, or package versions you're unfamiliar with
-
-CONTEXT AWARENESS:
-- If you need to verify package versions, you can use read_file to check:
- - Go: go.mod, go.sum
- - Python: requirements.txt, pyproject.toml, Pipfile
- - Node: package.json, package-lock.json
- - Rust: Cargo.toml, Cargo.lock
-- Use this to understand what versions are ACTUALLY being used in the project
-- Trust the dependency files over your training data
+- Missing imports
Be specific and reference exact line numbers from the diff."""
),
@@ -531,13 +524,12 @@ def review_single_file(
INSTRUCTIONS:
1. Use the get_diff("{file_path}") tool ONCE to retrieve the diff
-2. Review the diff content directly - DO NOT search for other files or read other files unless absolutely necessary
+2. Review the diff content directly - read other files if absolutely necessary for more context
3. Output your findings immediately in JSON format
Analyze ONLY this file's diff. If you find legitimate issues, output them in JSON format.
If there are no real issues, output an empty array: []
-
-Be efficient with your tool calls, they are limited, so use them wisely."""
+"""
),
]
@@ -552,26 +544,11 @@ def review_single_file(
f"Raw response: {raw[:200]}..." if len(str(raw)) > 200 else f"Raw response: {raw}"
)
- # Parse issues from response
- issues: list[IssueModel] = []
- if isinstance(raw, str):
- try:
- import json
-
- parsed = json.loads(raw)
- if isinstance(parsed, list):
- for issue_data in parsed:
- try:
- issues.append(IssueModel.model_validate(issue_data))
- except Exception as e:
- console.print(f"[yellow]Failed to validate issue: {e}[/yellow]")
- elif isinstance(parsed, dict):
- try:
- issues.append(IssueModel.model_validate(parsed))
- except Exception as e:
- console.print(f"[yellow]Failed to validate issue: {e}[/yellow]")
- except json.JSONDecodeError as e:
- console.print(f"[red]Failed to parse JSON for {file_path}: {e}[/red]")
+ issues = parse_issues_from_response(raw, file_path, "review")
+
+ if issues:
+ # Validate issues against the diff to reduce hallucinations before creating notes.
+ issues = validate_issues_for_file(agent, file_path, issues, settings)
console.print(f"[blue]Found {len(issues)} issues in {file_path}[/blue]")
return issues
@@ -582,3 +559,151 @@ def review_single_file(
traceback.print_exc()
return []
+
+
+def parse_issues_from_response(
+ raw: Any,
+ file_path: str,
+ context_label: str,
+) -> list[IssueModel]:
+ issues: list[IssueModel] = []
+ if isinstance(raw, str):
+ try:
+ parsed = json.loads(raw)
+ if isinstance(parsed, list):
+ for issue_data in parsed:
+ try:
+ issues.append(IssueModel.model_validate(issue_data))
+ except Exception as e:
+ console.print(f"[yellow]Failed to validate issue: {e}[/yellow]")
+ elif isinstance(parsed, dict):
+ try:
+ issues.append(IssueModel.model_validate(parsed))
+ except Exception as e:
+ console.print(f"[yellow]Failed to validate issue: {e}[/yellow]")
+ except json.JSONDecodeError as e:
+ console.print(f"[red]Failed to parse JSON for {file_path} ({context_label}): {e}[/red]")
+ return issues
+
+
+def validate_issues_for_file(
+ agent: Any,
+ file_path: str,
+ issues: list[IssueModel],
+ settings: ToolCallerSettings,
+) -> list[IssueModel]:
+ if not issues:
+ return []
+
+ try:
+ from reviewbot.tools import get_diff as get_diff_tool
+
+ diff_content = get_diff_tool.invoke({"file_path": file_path})
+ except Exception as e:
+ console.print(f"[yellow]Issue validation skipped for {file_path}: {e}[/yellow]")
+ return []
+
+ # Use JSON-friendly payload so enums serialize cleanly.
+ issues_payload = [issue.model_dump(mode="json") for issue in issues]
+ messages: list[BaseMessage] = [
+ SystemMessage(
+ content=(
+ "You are an issue checker. Validate each issue strictly against the diff.\n"
+ "Keep an issue ONLY if the diff provides direct evidence that the issue is real.\n"
+ "Do NOT create new issues and do NOT modify fields. For any removed issue, provide\n"
+ "a short reason grounded in the diff. Do not use tools."
+ )
+ ),
+ HumanMessage(
+ content=f"""File: {file_path}
+
+Diff:
+```diff
+{diff_content}
+```
+
+Issues to validate (JSON):
+{json.dumps(issues_payload, indent=2)}
+
+Return ONLY a JSON object in this exact shape:
+{{
+ "valid_issues": [