From 3d4d3308f83b07504d8532ca07f39d0bd35c3d97 Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 2 Jan 2026 15:44:50 +0400 Subject: [PATCH 1/8] feat: enhance reviewbot with new dependencies and quick scan functionality - Added `fastapi` and `uvicorn` as dependencies for improved API capabilities. - Introduced `ruff` for linting and code formatting, with configuration settings in `pyproject.toml`. - Implemented a low-effort agent for quick scanning of files to determine if deep review is necessary. - Updated the review process to include quick scans before detailed analysis, optimizing review efficiency. - Refactored various functions to support the new quick scan feature and improve code readability. --- .reviewignore.example | 38 +++ Dockerfile | 26 ++ examples/webhook_api.py => api.py | 69 +++--- docker-compose.dev.yml | 41 ++++ pyproject.toml | 24 ++ src/reviewbot/agent/base.py | 3 + src/reviewbot/agent/tasks/issues.py | 76 +++++- src/reviewbot/agent/workflow.py | 361 ++++++++++++++++++++++------ src/reviewbot/infra/gitlab/note.py | 43 ++++ src/reviewbot/models/gpt.py | 24 +- uv.lock | 49 +++- 11 files changed, 641 insertions(+), 113 deletions(-) create mode 100644 .reviewignore.example create mode 100644 Dockerfile rename examples/webhook_api.py => api.py (70%) create mode 100644 docker-compose.dev.yml diff --git a/.reviewignore.example b/.reviewignore.example new file mode 100644 index 0000000..ecb1212 --- /dev/null +++ b/.reviewignore.example @@ -0,0 +1,38 @@ +# .reviewignore - Patterns for files to exclude from code review +# This file works similar to .gitignore with glob pattern matching + +# Dependency lock files (already in global blacklist, but shown as example) +# go.sum +# go.mod +# package-lock.json + +# Configuration files you might want to skip +# *.yaml +# *.yml +# *.json + +# Test fixtures or mock data +# test/fixtures/* +# **/mocks/* +# **/__snapshots__/* + +# Generated code or build artifacts +# **/*.generated.ts +# **/*.pb.go +# dist/* +# build/* + +# Documentation that doesn't need review +# docs/* +# *.md + +# Specific files you want to exclude +# legacy/old-code.py +# vendor/* + +# Pattern examples: +# - Exact filename: config.json +# - Wildcard: *.min.js +# - Directory: node_modules/* +# - Nested: **/test/**/*.txt +# - Multiple extensions: *.{jpg,png,gif} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..17ad0bf --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.12-slim + +WORKDIR /app + +# base Linux tooling + git +RUN apt-get update && apt-get install -y \ + git \ + bash \ + findutils \ + coreutils \ + ca-certificates \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# install uv +RUN pip install --no-cache-dir uv + +# copy project +COPY . . + +# install deps +RUN uv sync --frozen + +EXPOSE 8122 + +CMD ["uv", "run", "uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8122"] \ No newline at end of file diff --git a/examples/webhook_api.py b/api.py similarity index 70% rename from examples/webhook_api.py rename to api.py index 07c0194..fdf2130 100644 --- a/examples/webhook_api.py +++ b/api.py @@ -1,33 +1,31 @@ -import asyncio import os -import re import dotenv import requests -from fastapi import FastAPI, HTTPException, Request +from fastapi import BackgroundTasks, FastAPI, HTTPException, Request from fastapi.responses import JSONResponse -from src.reviewbot.main import post_merge_request_note, work_agent +from src.reviewbot.agent.workflow import post_mr_note, work_agent +from src.reviewbot.infra.config.env import load_env dotenv.load_dotenv() app = FastAPI() -GITLAB_SECRET = os.environ.get("GITLAB_WEBHOOK_SECRET") -GITLAB_TOKEN = os.environ.get("GITLAB_BOT_TOKEN") -GITLAB_API_V4 = os.environ.get("GITLAB_API_V4_URL") -BOT_USERNAME = os.environ.get("GITLAB_BOT_USERNAME") # without @ +def get_required_env(key: str) -> str: + """Get required environment variable or raise error with helpful message.""" + value = os.environ.get(key) + if not value: + raise ValueError(f"Missing required environment variable: {key}") + return value -def post_mr_note(project_id: str, mr_iid: str, body: str): - url = f"{GITLAB_API_V4.rstrip('/')}/projects/{project_id}/merge_requests/{mr_iid}/notes" - r = requests.post( - url, - headers={"PRIVATE-TOKEN": GITLAB_TOKEN}, - data={"body": body}, - timeout=30, - ) - r.raise_for_status() + +GITLAB_SECRET = get_required_env("GITLAB_WEBHOOK_SECRET") +GITLAB_TOKEN = get_required_env("GITLAB_BOT_TOKEN") +GITLAB_API_V4 = get_required_env("GITLAB_API_V4_URL") + "/api/v4" +BOT_USERNAME = os.environ.get("GITLAB_BOT_USERNAME") +BOT_ID = get_required_env("GITLAB_BOT_AUTHOR_ID") def get_pipeline_status(project_id: str, pipeline_id: int) -> str: @@ -61,7 +59,7 @@ def pipeline_passed(project_id: str, pipeline_id: int) -> bool: @app.post("/webhook") -async def gitlab_webhook(req: Request): +async def gitlab_webhook(req: Request, background_tasks: BackgroundTasks): token = req.headers.get("X-Gitlab-Token") if GITLAB_SECRET and token != GITLAB_SECRET: raise HTTPException(status_code=403, detail="Invalid token") @@ -82,9 +80,12 @@ async def gitlab_webhook(req: Request): return JSONResponse({"ignored": "bot note"}) text = note.get("note", "") - pattern = rf"(?:/review\b.*@{re.escape(BOT_USERNAME)}|@{re.escape(BOT_USERNAME)}.*?/review\b)" - if not re.search(pattern, text): - return JSONResponse({"ignored": "no /review command"}) + # pattern = rf"(?:/review\b.*@{re.escape(BOT_USERNAME)}|@{re.escape(BOT_USERNAME)}.*?/review\b)" + # if not re.search(pattern, text): + # return JSONResponse({"ignored": "no /review command"}) + + if text.strip() != "/reviewbot review": + return JSONResponse({"ignored": "not a review command"}) mr = payload.get("merge_request") if not mr: @@ -93,12 +94,12 @@ async def gitlab_webhook(req: Request): project_id = payload["project"]["id"] mr_iid = mr["iid"] - await asyncio.to_thread( + config = load_env() + background_tasks.add_task( work_agent, - GITLAB_API_V4, + config, project_id, mr_iid, - GITLAB_TOKEN, ) return JSONResponse({"status": "manual review triggered"}) @@ -112,38 +113,42 @@ async def gitlab_webhook(req: Request): detailed_status = attrs.get("detailed_status") project_id = payload["project"]["id"] - mr_iid = mr["iid"] if detailed_status not in ["passed", "failed"]: return JSONResponse({"ignored": "pipeline is not in a final state"}) + if not mr: + return JSONResponse({"ignored": "not an MR pipeline"}) + + mr_iid = mr["iid"] + if detailed_status != "passed": - post_merge_request_note( + post_mr_note( GITLAB_API_V4, GITLAB_TOKEN, project_id, mr_iid, - "Pipeline was not successful. If you want ReviewBot to review your changes, please re-run the pipeline, and make sure it passes. Or you can manually call ReviewBot by typing: \n\n @project_29_bot_5a466f228cb9d019289c41195219f291 /review", + "Pipeline was not successful. If you want ReviewBot to review your changes, please re-run the pipeline, and make sure it passes. Or you can manually call ReviewBot by typing: \n\n /reviewbot review", ) return JSONResponse({"ignored": "pipeline failed"}) # conditions if mr_has_conflicts(mr): - post_merge_request_note( + post_mr_note( GITLAB_API_V4, GITLAB_TOKEN, project_id, mr_iid, - "Merge conflicts present. Please resolve them and commit changes to re-run the pipeline. Or you can manually call ReviewBot by typing: \n\n @project_29_bot_5a466f228cb9d019289c41195219f291 /review", + "Merge conflicts present. Please resolve them and commit changes to re-run the pipeline. Or you can manually call ReviewBot by typing: \n\n /reviewbot review", ) return JSONResponse({"ignored": "merge conflicts present"}) - await asyncio.to_thread( + config = load_env() + background_tasks.add_task( work_agent, - GITLAB_API_V4, + config, project_id, mr_iid, - GITLAB_TOKEN, ) return JSONResponse({"status": "auto review triggered"}) diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml new file mode 100644 index 0000000..a457595 --- /dev/null +++ b/docker-compose.dev.yml @@ -0,0 +1,41 @@ +services: + gitlab: + image: gitlab/gitlab-ce:latest + container_name: gitlab + hostname: gitlab.local + restart: always + ports: + - "8081:80" + - "8443:443" + - "2222:22" + environment: + GITLAB_OMNIBUS_CONFIG: | + external_url 'http://gitlab.reviewbot.orb.local:8081' + gitlab_rails['gitlab_shell_ssh_port'] = 2222 + volumes: + - gitlab_config:/etc/gitlab + - gitlab_logs:/var/log/gitlab + - gitlab_data:/var/opt/gitlab + + reviewbot: + build: . + container_name: reviewbot + ports: + - "8122:8122" + env_file: + - .env + volumes: + - .:/app + - ./logs:/logs + command: > + sh -c "uv run uvicorn api:app + --host 0.0.0.0 + --port 8122 + --reload + 2>&1 | tee /logs/stdout.log" + restart: unless-stopped + +volumes: + gitlab_config: + gitlab_logs: + gitlab_data: diff --git a/pyproject.toml b/pyproject.toml index 40b33e1..49bfeb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ requires-python = ">=3.13" dependencies = [ "dotenv>=0.9.9", "faiss-cpu>=1.13.1", + "fastapi>=0.125.0", "langchain>=1.2.0", "langchain-community>=0.4.1", "langchain-google-genai>=4.1.2", @@ -18,6 +19,7 @@ dependencies = [ "rich>=14.2.0", "transformers>=4.57.3", "typer>=0.20.0", + "uvicorn>=0.40.0", "xai-review>=0.48.0", ] [tool.uv] @@ -27,8 +29,30 @@ reviewbot = "reviewbot.main:app" [tool.pyright] typeCheckingMode = "strict" +[tool.ruff] +line-length = 100 +target-version = "py313" + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "B", # flake8-bugbear + "UP", # pyupgrade +] +ignore = [ + "E501", # line too long (handled by formatter) +] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" + [dependency-groups] dev = [ + "ruff>=0.8.6", "ty>=0.0.4", ] [project.optional-dependencies] diff --git a/src/reviewbot/agent/base.py b/src/reviewbot/agent/base.py index b9a9775..4b3b6f8 100644 --- a/src/reviewbot/agent/base.py +++ b/src/reviewbot/agent/base.py @@ -20,6 +20,7 @@ class AgentRunnerInput: context: Context settings: ToolCallerSettings = field(default_factory=ToolCallerSettings) on_file_complete: Optional[Callable[[str, List[IssueModel]], None]] = None + quick_scan_agent: Optional[Agent] = None @entrypoint() @@ -28,6 +29,7 @@ def agent_runner(input: AgentRunnerInput) -> List[Issue]: settings = input.settings context = input.context on_file_complete = input.on_file_complete + quick_scan_agent = input.quick_scan_agent issue_store = context.get("issue_store") if not issue_store: @@ -44,6 +46,7 @@ def agent_runner(input: AgentRunnerInput) -> List[Issue]: context=context, settings=settings, on_file_complete=on_file_complete, + quick_scan_agent=quick_scan_agent, ) ).result() diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py index e7591db..08be8f6 100644 --- a/src/reviewbot/agent/tasks/issues.py +++ b/src/reviewbot/agent/tasks/issues.py @@ -92,6 +92,7 @@ class IssuesInput: context: Context settings: ToolCallerSettings on_file_complete: Optional[Callable[[str, List[IssueModel]], None]] = None + quick_scan_agent: Optional[Agent] = None @task @@ -103,6 +104,7 @@ def identify_issues(ctx: IssuesInput) -> List[Issue]: context = ctx.context settings = ctx.settings on_file_complete = ctx.on_file_complete + quick_scan_agent = ctx.quick_scan_agent issue_store = context.get("issue_store") if not issue_store: @@ -124,7 +126,12 @@ def identify_issues(ctx: IssuesInput) -> List[Issue]: # Run concurrent reviews - pass the context values and callback all_issues = run_concurrent_reviews( - agent, diffs, settings, context, on_file_complete=on_file_complete + agent, + diffs, + settings, + context, + on_file_complete=on_file_complete, + quick_scan_agent=quick_scan_agent, ) # Convert to domain objects @@ -166,6 +173,7 @@ def run_concurrent_reviews( max_workers: int = 3, # Serial processing to avoid thread safety and rate limit issues task_timeout: int = 160, # 5 minutes timeout per file on_file_complete: Optional[Callable[[str, List[IssueModel]], None]] = None, + quick_scan_agent: Any | None = None, ) -> List[IssueModel]: """ Run concurrent reviews of all diff files with context propagation and monitoring. @@ -179,6 +187,8 @@ def run_concurrent_reviews( task_timeout: Timeout per task in seconds on_file_complete: Optional callback function called when each file's review completes. Receives (file_path, issues) as arguments. + quick_scan_agent: Optional low-effort agent for quick prerequisite scanning. + If provided, files are scanned first to determine if deep review is needed. """ diff_file_paths = [diff.new_path for diff in diffs] @@ -196,6 +206,7 @@ def run_concurrent_reviews( agent=agent, settings=settings, context=context, + quick_scan_agent=quick_scan_agent, ) # Submit tasks @@ -268,11 +279,67 @@ def run_concurrent_reviews( return all_issues +def quick_scan_file( + agent: Any, + file_path: str, + settings: ToolCallerSettings, +) -> bool: + """ + Quick scan with low-effort agent to determine if file needs deep review. + Returns True if file needs deep review, False otherwise. + """ + messages: List[BaseMessage] = [ + SystemMessage( + content="""You are a code review triage assistant. Your job is to quickly determine if a file change needs deep review. + +Review the diff and decide if this file needs detailed analysis. Return TRUE if ANY of these apply: +- New code that implements business logic +- Changes to security-sensitive code (auth, permissions, data validation) +- Database queries or migrations +- API endpoint changes +- Complex algorithms or data structures +- Error handling changes +- Configuration changes that affect behavior + +Return FALSE if: +- Only formatting/whitespace changes +- Simple refactoring (renaming variables/functions) +- Adding/updating comments or documentation only +- Import reordering +- Trivial changes (typo fixes in strings, adding logging) + +Output ONLY "true" or "false" (lowercase, no quotes).""" + ), + HumanMessage( + content=f"""Quickly scan this file and determine if it needs deep review: {file_path} + +Use get_diff("{file_path}") to see the changes, then respond with ONLY "true" or "false".""" + ), + ] + + try: + console.print(f"[dim]Quick scanning: {file_path}[/dim]") + raw = with_retry(tool_caller, settings, agent, messages, settings) + result = str(raw).strip().lower() + + needs_review = "true" in result + if needs_review: + console.print(f"[yellow]āœ“ Needs deep review: {file_path}[/yellow]") + else: + console.print(f"[dim]⊘ Skipping deep review: {file_path}[/dim]") + + return needs_review + except Exception as e: + console.print(f"[yellow]Quick scan failed for {file_path}, defaulting to deep review: {e}[/yellow]") + return True # If scan fails, do deep review to be safe + + def review_single_file_with_context( file_path: str, agent: Any, settings: ToolCallerSettings, context: Context, + quick_scan_agent: Any | None = None, ) -> List[IssueModel]: """ Wrapper that sets context before reviewing. @@ -284,6 +351,13 @@ def review_single_file_with_context( console.print(f"[dim]Context set for thread processing: {file_path}[/dim]") + # Quick scan first if agent provided + if quick_scan_agent: + needs_deep_review = quick_scan_file(quick_scan_agent, file_path, settings) + if not needs_deep_review: + console.print(f"[dim]Skipping deep review for: {file_path}[/dim]") + return [] + # Now call the actual review function return review_single_file(agent, file_path, settings) except Exception as e: diff --git a/src/reviewbot/agent/workflow.py b/src/reviewbot/agent/workflow.py index 676cf74..01384cd 100644 --- a/src/reviewbot/agent/workflow.py +++ b/src/reviewbot/agent/workflow.py @@ -2,7 +2,7 @@ import re from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any from langchain.agents import create_agent # type: ignore from langchain.agents.middleware import ( # type: ignore @@ -28,12 +28,14 @@ from reviewbot.infra.gitlab.clone import build_clone_url from reviewbot.infra.gitlab.diff import FileDiff, fetch_mr_diffs, get_mr_branch from reviewbot.infra.gitlab.note import ( + get_all_discussions, post_discussion, post_discussion_reply, post_merge_request_note, + update_discussion_note, ) from reviewbot.infra.issues.in_memory_issue_store import InMemoryIssueStore -from reviewbot.models.gpt import get_gpt_model +from reviewbot.models.gpt import get_gpt_model, get_gpt_model_low_effort from reviewbot.tools import ( get_diff, read_file, @@ -92,7 +94,7 @@ ] -def parse_reviewignore(repo_path: Path) -> List[str]: +def parse_reviewignore(repo_path: Path) -> list[str]: """ Parse .reviewignore file from the repository. @@ -106,13 +108,11 @@ def parse_reviewignore(repo_path: Path) -> List[str]: patterns = [] if not reviewignore_path.exists(): - console.print( - "[dim].reviewignore file not found, using global blacklist only[/dim]" - ) + console.print("[dim].reviewignore file not found, using global blacklist only[/dim]") return patterns try: - with open(reviewignore_path, "r", encoding="utf-8") as f: + with open(reviewignore_path, encoding="utf-8") as f: for line in f: # Strip whitespace line = line.strip() @@ -128,7 +128,7 @@ def parse_reviewignore(repo_path: Path) -> List[str]: return patterns -def should_ignore_file(file_path: str, reviewignore_patterns: List[str]) -> bool: +def should_ignore_file(file_path: str, reviewignore_patterns: list[str]) -> bool: """ Check if a file should be ignored based on .reviewignore patterns and global blacklist. @@ -161,9 +161,7 @@ def should_ignore_file(file_path: str, reviewignore_patterns: List[str]) -> bool return False -def filter_diffs( - diffs: List[FileDiff], reviewignore_patterns: List[str] -) -> List[FileDiff]: +def filter_diffs(diffs: list[FileDiff], reviewignore_patterns: list[str]) -> list[FileDiff]: """ Filter out diffs for files that should be ignored. @@ -188,9 +186,7 @@ def filter_diffs( filtered.append(diff) if ignored_count > 0: - console.print( - f"[cyan]Filtered out {ignored_count} file(s) based on ignore patterns[/cyan]" - ) + console.print(f"[cyan]Filtered out {ignored_count} file(s) based on ignore patterns[/cyan]") return filtered @@ -210,7 +206,7 @@ def _extract_code_from_diff(diff_patch: str, line_start: int, line_end: int) -> import re lines = diff_patch.splitlines(keepends=True) - result_lines: List[str] = [] + result_lines: list[str] = [] current_new_line = 0 current_old_line = 0 in_target_range = False @@ -229,17 +225,11 @@ def _extract_code_from_diff(diff_patch: str, line_start: int, line_end: int) -> current_old_line = old_start # Check if this hunk overlaps with our target range new_count = int(match.group(4)) if match.group(4) else 1 - in_target_range = ( - new_start <= line_end and (new_start + new_count) >= line_start - ) + in_target_range = new_start <= line_end and (new_start + new_count) >= line_start continue # Skip diff header lines - if ( - line.startswith("diff --git") - or line.startswith("---") - or line.startswith("+++") - ): + if line.startswith("diff --git") or line.startswith("---") or line.startswith("+++"): continue # Process diff lines - keep the prefixes to show the actual diff @@ -318,10 +308,11 @@ def post_review_acknowledgment( project_id: str, mr_iid: str, agent: Agent, - diffs: List[FileDiff], -) -> None: + diffs: list[FileDiff], +) -> tuple[str, str] | None: """ Post a surface-level summary acknowledging the review is starting. + Creates a discussion so it can be updated later. Only posts if no acknowledgment already exists to prevent duplicates. Args: @@ -331,31 +322,39 @@ def post_review_acknowledgment( mr_iid: Merge request IID agent: The agent to use for generating summary diffs: List of file diffs + + Returns: + Tuple of (discussion_id, note_id) if created, None if already exists or failed """ from langchain_core.messages import HumanMessage, SystemMessage - from reviewbot.infra.gitlab.note import get_merge_request_notes - # Check if an acknowledgment already exists try: - notes = get_merge_request_notes( + discussions = get_all_discussions( api_v4=api_v4, token=token, project_id=project_id, mr_iid=mr_iid, ) - # Check if any note contains the acknowledgment marker + # Check if any discussion contains the acknowledgment marker acknowledgment_marker = "šŸ¤– **Code Review Starting**" - for note in notes: - body = note.get("body", "") - if acknowledgment_marker in body: - console.print("[dim]Acknowledgment already exists, skipping[/dim]") - return + for discussion in discussions: + notes = discussion.get("notes", []) + for note in notes: + body = note.get("body", "") + if acknowledgment_marker in body: + console.print( + "[dim]Acknowledgment already exists, returning existing IDs[/dim]" + ) + # Return the existing discussion and note IDs + discussion_id = discussion.get("id") + note_id = note.get("id") + if discussion_id and note_id: + return (str(discussion_id), str(note_id)) + return None except Exception as e: - console.print( - f"[yellow]⚠ Could not check for existing acknowledgment: {e}[/yellow]" - ) + console.print(f"[yellow]⚠ Could not check for existing acknowledgment: {e}[/yellow]") # Continue anyway - better to post a duplicate than miss it # Get list of files being reviewed @@ -392,7 +391,7 @@ def post_review_acknowledgment( summary_settings = ToolCallerSettings(max_tool_calls=0, max_iterations=1) summary = tool_caller(agent, messages, summary_settings) - # Post as a general MR note + # Post as a discussion (so we can update it later) acknowledgment_body = f"""šŸ¤– **Code Review Starting** {summary} @@ -401,7 +400,7 @@ def post_review_acknowledgment( *Review powered by ReviewBot* """ - post_merge_request_note( + discussion_id = post_discussion( api_v4=api_v4, token=token, project_id=project_id, @@ -409,19 +408,194 @@ def post_review_acknowledgment( body=acknowledgment_body, ) + # Get the note ID from the discussion + # The first note in a discussion is the original note + discussions = get_all_discussions( + api_v4=api_v4, + token=token, + project_id=project_id, + mr_iid=mr_iid, + ) + + note_id = None + for discussion in discussions: + if str(discussion.get("id")) == str(discussion_id): + notes = discussion.get("notes", []) + if notes: + note_id = str(notes[0].get("id")) + break + + if not note_id: + console.print("[yellow]⚠ Created discussion but could not find note ID[/yellow]") + return None + console.print("[green]āœ“ Posted review acknowledgment[/green]") + return (str(discussion_id), note_id) except Exception as e: console.print(f"[yellow]⚠ Failed to post acknowledgment: {e}[/yellow]") # Don't fail the whole review if acknowledgment fails + return None + + +def update_review_summary( + api_v4: str, + token: str, + project_id: str, + mr_iid: str, + discussion_id: str, + note_id: str, + issues: list[Issue], + diffs: list[FileDiff], + agent: Agent, +) -> None: + """ + Update the acknowledgment note with a summary of the review results. + Uses LLM to generate reasoning and summary. + + Args: + api_v4: GitLab API v4 base URL + token: GitLab API token + project_id: Project ID + mr_iid: Merge request IID + discussion_id: Discussion ID of the acknowledgment + note_id: Note ID to update + issues: List of issues found during review + diffs: List of file diffs that were reviewed + agent: The agent to use for generating summary + """ + from langchain_core.messages import HumanMessage, SystemMessage + + # Count issues by severity + high_count = sum(1 for issue in issues if issue.severity == IssueSeverity.HIGH) + medium_count = sum(1 for issue in issues if issue.severity == IssueSeverity.MEDIUM) + low_count = sum(1 for issue in issues if issue.severity == IssueSeverity.LOW) + + # Group issues by file + issues_by_file: dict[str, list[Issue]] = {} + for issue in issues: + if issue.file_path not in issues_by_file: + issues_by_file[issue.file_path] = [] + issues_by_file[issue.file_path].append(issue) + + # Build structured statistics + total_files = len(diffs) + files_with_issues = len(issues_by_file) + + # Prepare issue details for LLM + issues_summary = [] + for issue in issues: + issues_summary.append( + f"- **{issue.severity.value.upper()}** in `{issue.file_path}` (lines {issue.start_line}-{issue.end_line}): {issue.description}" + ) + + issues_text = "\n".join(issues_summary) if issues_summary else "No issues found." + + # Generate LLM summary with reasoning + try: + from reviewbot.agent.tasks.core import ToolCallerSettings, tool_caller + + messages = [ + SystemMessage( + content="""You are a code review assistant. Generate a concise, professional summary of a code review with reasoning. + +IMPORTANT: +- Keep it SHORT (3-5 sentences max) +- Provide reasoning about the overall code quality +- Highlight key concerns or positive aspects +- Be constructive and professional +- DO NOT use any tools +- Focus on the big picture, not individual issue details""" + ), + HumanMessage( + content=f"""A code review has been completed with the following results: + +**Statistics:** +- Files reviewed: {total_files} +- Files with issues: {files_with_issues} +- Total issues: {len(issues)} + - High severity: {high_count} + - Medium severity: {medium_count} + - Low severity: {low_count} + +**Issues found:** +{issues_text} + +Generate a brief summary (3-5 sentences) that: +1. Provides overall assessment of the code quality +2. Highlights the most important concerns (if any) +3. Gives reasoning about the review findings +4. Is constructive and actionable + +If no issues were found, celebrate the good code quality.""" + ), + ] + + summary_settings = ToolCallerSettings(max_tool_calls=0, max_iterations=1) + llm_summary = tool_caller(agent, messages, summary_settings) + + except Exception as e: + console.print(f"[yellow]⚠ Failed to generate LLM summary: {e}[/yellow]") + llm_summary = "Review completed successfully." + + # Build final summary combining statistics and LLM reasoning + summary_parts = [ + "šŸ¤– **Code Review Complete**\n\n", + f"Reviewed **{total_files}** file(s), found **{len(issues)}** issue(s) across **{files_with_issues}** file(s).\n\n", + "**Summary:**\n", + f"{llm_summary}\n\n", + ] + + if issues: + summary_parts.append("**Issue Breakdown:**\n") + if high_count > 0: + summary_parts.append(f"- šŸ”“ **{high_count}** High severity\n") + if medium_count > 0: + summary_parts.append(f"- 🟠 **{medium_count}** Medium severity\n") + if low_count > 0: + summary_parts.append(f"- 🟢 **{low_count}** Low severity\n") + + summary_parts.append("\n**Issues by File:**\n") + for file_path, file_issues in sorted(issues_by_file.items()): + high = sum(1 for i in file_issues if i.severity == IssueSeverity.HIGH) + medium = sum(1 for i in file_issues if i.severity == IssueSeverity.MEDIUM) + low = sum(1 for i in file_issues if i.severity == IssueSeverity.LOW) + severity_badges = [] + if high > 0: + severity_badges.append(f"šŸ”“ {high}") + if medium > 0: + severity_badges.append(f"🟠 {medium}") + if low > 0: + severity_badges.append(f"🟢 {low}") + badges = " ".join(severity_badges) + summary_parts.append(f"- `{file_path}`: {len(file_issues)} issue(s) {badges}\n") + else: + summary_parts.append("\nāœ… **No issues found!**\n") + + summary_parts.append("\n---\n*Review powered by ReviewBot*") + + summary_body = "".join(summary_parts) + + try: + update_discussion_note( + api_v4=api_v4, + token=token, + project_id=project_id, + mr_iid=mr_iid, + discussion_id=discussion_id, + note_id=note_id, + body=summary_body, + ) + console.print("[green]āœ“ Updated review acknowledgment with summary[/green]") + except Exception as e: + console.print(f"[yellow]⚠ Failed to update acknowledgment: {e}[/yellow]") + # Don't fail the whole review if update fails def work_agent(config: Config, project_id: str, mr_iid: str) -> str: api_v4 = config.gitlab_api_v4 + "/api/v4" token = config.gitlab_token - model = get_gpt_model( - config.llm_model_name, config.llm_api_key, config.llm_base_url - ) + model = get_gpt_model(config.llm_model_name, config.llm_api_key, config.llm_base_url) clone_url = build_clone_url(api_v4, project_id, token) @@ -450,9 +624,7 @@ def work_agent(config: Config, project_id: str, mr_iid: str) -> str: # Parse .reviewignore and filter diffs reviewignore_patterns = parse_reviewignore(repo_path) filtered_diffs = filter_diffs(diffs, reviewignore_patterns) - console.print( - f"[cyan]Reviewing {len(filtered_diffs)} out of {len(diffs)} changed files[/cyan]" - ) + console.print(f"[cyan]Reviewing {len(filtered_diffs)} out of {len(diffs)} changed files[/cyan]") manager = CodebaseStoreManager() manager.set_repo_root(repo_path) @@ -462,9 +634,7 @@ def work_agent(config: Config, project_id: str, mr_iid: str) -> str: manager.get_store() issue_store = InMemoryIssueStore() - token_ctx = store_manager_ctx.set( - Context(store_manager=manager, issue_store=issue_store) - ) + token_ctx = store_manager_ctx.set(Context(store_manager=manager, issue_store=issue_store)) context = store_manager_ctx.get() @@ -476,55 +646,72 @@ def work_agent(config: Config, project_id: str, mr_iid: str) -> str: mr_iid=mr_iid, ) + # Create a low-effort agent for simple tasks like acknowledgments and quick scans + low_effort_model = get_gpt_model_low_effort( + config.llm_model_name, config.llm_api_key, config.llm_base_url + ) + low_effort_agent: Agent = create_agent( + model=low_effort_model, + tools=[get_diff], # Only needs get_diff for quick scanning + ) + # Post acknowledgment that review is starting - post_review_acknowledgment( + acknowledgment_ids = post_review_acknowledgment( api_v4=api_v4, token=token, project_id=project_id, mr_iid=mr_iid, - agent=agent, + agent=low_effort_agent, diffs=filtered_diffs, ) try: # Define callback to create discussions as each file's review completes - def on_file_review_complete(file_path: str, issues: List[Any]) -> None: + def on_file_review_complete(file_path: str, issues: list[Any]) -> None: """Callback called when a file's review completes.""" if not issues: - console.print( - f"[dim]No issues found in {file_path}, skipping discussion[/dim]" - ) + console.print(f"[dim]No issues found in {file_path}, skipping discussion[/dim]") return # Convert IssueModel to Issue domain objects from reviewbot.core.issues.issue_model import IssueModel - domain_issues = [ - issue.to_domain() for issue in issues if isinstance(issue, IssueModel) - ] - handle_file_issues( - file_path, domain_issues, gitlab_config, filtered_diffs, diff_refs - ) + domain_issues = [issue.to_domain() for issue in issues if isinstance(issue, IssueModel)] + handle_file_issues(file_path, domain_issues, gitlab_config, filtered_diffs, diff_refs) # Pass the callback to the agent runner - issues: List[Issue] = agent_runner.invoke( # type: ignore + issues: list[Issue] = agent_runner.invoke( # type: ignore AgentRunnerInput( agent=agent, context=context, settings=settings, on_file_complete=on_file_review_complete, + quick_scan_agent=low_effort_agent, ) ) console.print(f"[bold cyan]šŸ“Š Total issues found: {len(issues)}[/bold cyan]") + # Update the acknowledgment note with summary + if acknowledgment_ids: + discussion_id, note_id = acknowledgment_ids + update_review_summary( + api_v4=api_v4, + token=token, + project_id=project_id, + mr_iid=mr_iid, + discussion_id=discussion_id, + note_id=note_id, + issues=issues, + diffs=filtered_diffs, + agent=low_effort_agent, + ) + # Discussions are now created as reviews complete, but we still need to # handle any files that might have been processed but had no issues # (though the callback already handles this case) - console.print( - "[bold green]šŸŽ‰ All reviews completed and discussions created![/bold green]" - ) + console.print("[bold green]šŸŽ‰ All reviews completed and discussions created![/bold green]") return "Review completed successfully" except Exception as e: @@ -539,12 +726,10 @@ def on_file_review_complete(file_path: str, issues: List[Any]) -> None: def handle_file_issues( file_path: str, - issues: List[Issue], + issues: list[Issue], gitlab_config: GitLabConfig, - file_diffs: List[FileDiff], # Add this parameter - diff_refs: Dict[ - str, str - ], # Add this parameter (contains base_sha, head_sha, start_sha) + file_diffs: list[FileDiff], # Add this parameter + diff_refs: dict[str, str], # Add this parameter (contains base_sha, head_sha, start_sha) ) -> None: """ Create one discussion per file with the first issue, and reply with subsequent issues. @@ -559,9 +744,7 @@ def handle_file_issues( if not issues: return - console.print( - f"[cyan]Creating discussion for {file_path} with {len(issues)} issue(s)[/cyan]" - ) + console.print(f"[cyan]Creating discussion for {file_path} with {len(issues)} issue(s)[/cyan]") # Get the file diff once file_diff = next((fd for fd in file_diffs if fd.new_path == file_path), None) @@ -730,7 +913,7 @@ def create_position_for_issue( start_sha: str, old_path: str, new_path: str, -) -> Optional[Dict[str, Any]]: +) -> dict[str, Any] | None: """ Create a GitLab position object for a specific issue line range. @@ -744,9 +927,7 @@ def create_position_for_issue( Returns: Position dict for GitLab API, or None if line not found in diff """ - hunk_header_pattern = re.compile( - r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@" - ) + hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@") lines = diff_text.splitlines() current_old = 0 @@ -834,7 +1015,7 @@ def create_discussion( title: str, body: str, gitlab_config: GitLabConfig, - position: Dict[str, Any] | None = None, + position: dict[str, Any] | None = None, ) -> str: """ Create a discussion with title and body. @@ -884,3 +1065,29 @@ def reply_to_discussion( discussion_id=discussion_id, body=body, ) + + +def post_mr_note( + api_v4: str, + token: str, + project_id: str, + mr_iid: str, + body: str, +) -> None: + """ + Post a standalone note (comment) to a merge request without creating a discussion. + + Args: + api_v4: GitLab API v4 base URL + token: GitLab API token + project_id: Project ID + mr_iid: Merge request IID + body: Note content + """ + post_merge_request_note( + api_v4=api_v4, + token=token, + project_id=project_id, + mr_iid=mr_iid, + body=body, + ) diff --git a/src/reviewbot/infra/gitlab/note.py b/src/reviewbot/infra/gitlab/note.py index c02c620..ebd13e9 100644 --- a/src/reviewbot/infra/gitlab/note.py +++ b/src/reviewbot/infra/gitlab/note.py @@ -236,3 +236,46 @@ def get_merge_request_notes( ) r.raise_for_status() return r.json() + + +def update_discussion_note( + api_v4: str, + token: str, + project_id: str, + mr_iid: str, + discussion_id: str, + note_id: str, + body: str, + timeout: int = 30, +) -> None: + """ + Update a note in a discussion. + + Args: + api_v4: GitLab API v4 base URL + token: GitLab API token + project_id: Project ID + mr_iid: Merge request IID + discussion_id: Discussion ID + note_id: Note ID to update + body: New body content for the note + timeout: Request timeout + """ + url = f"{api_v4.rstrip('/')}/projects/{project_id}/merge_requests/{mr_iid}/discussions/{discussion_id}/notes/{note_id}" + + r = requests.put( + url, + headers={"PRIVATE-TOKEN": token}, + json={"body": body}, + timeout=timeout, + ) + + if r.status_code >= 400: + console.print(f"[red]Failed to update note: {r.status_code} {r.reason}[/red]") + try: + error_response = r.json() + console.print(f"[red]Error response: {error_response}[/red]") + except Exception: + console.print(f"[red]Error response text: {r.text}[/red]") + + r.raise_for_status() diff --git a/src/reviewbot/models/gpt.py b/src/reviewbot/models/gpt.py index ce2b79e..84201d9 100644 --- a/src/reviewbot/models/gpt.py +++ b/src/reviewbot/models/gpt.py @@ -12,12 +12,32 @@ def get_gpt_model( - llm_model_name: str, llm_api_key: SecretStr, base_url: str, temperature: float = 0.2 + llm_model_name: str, + llm_api_key: SecretStr, + base_url: str, + temperature: float = 0.2, + reasoning_effort: str = "medium", ): return ChatOpenAI( model=llm_model_name, api_key=llm_api_key, base_url=base_url, temperature=temperature, - reasoning_effort="medium", + reasoning_effort=reasoning_effort, + ) + + +def get_gpt_model_low_effort( + llm_model_name: str, + llm_api_key: SecretStr, + base_url: str, + temperature: float = 0.2, +): + """Get a GPT model with low reasoning effort for simple tasks like greetings.""" + return ChatOpenAI( + model=llm_model_name, + api_key=llm_api_key, + base_url=base_url, + temperature=temperature, + reasoning_effort="low", ) diff --git a/uv.lock b/uv.lock index 762453d..3f0bb95 100644 --- a/uv.lock +++ b/uv.lock @@ -1465,6 +1465,7 @@ source = { editable = "." } dependencies = [ { name = "dotenv" }, { name = "faiss-cpu" }, + { name = "fastapi" }, { name = "langchain" }, { name = "langchain-community" }, { name = "langchain-google-genai" }, @@ -1476,6 +1477,7 @@ dependencies = [ { name = "rich" }, { name = "transformers" }, { name = "typer" }, + { name = "uvicorn" }, { name = "xai-review" }, ] @@ -1486,6 +1488,7 @@ examples = [ [package.dev-dependencies] dev = [ + { name = "ruff" }, { name = "ty" }, ] @@ -1493,6 +1496,7 @@ dev = [ requires-dist = [ { name = "dotenv", specifier = ">=0.9.9" }, { name = "faiss-cpu", specifier = ">=1.13.1" }, + { name = "fastapi", specifier = ">=0.125.0" }, { name = "fastapi", marker = "extra == 'examples'" }, { name = "langchain", specifier = ">=1.2.0" }, { name = "langchain-community", specifier = ">=0.4.1" }, @@ -1505,12 +1509,16 @@ requires-dist = [ { name = "rich", specifier = ">=14.2.0" }, { name = "transformers", specifier = ">=4.57.3" }, { name = "typer", specifier = ">=0.20.0" }, + { name = "uvicorn", specifier = ">=0.40.0" }, { name = "xai-review", specifier = ">=0.48.0" }, ] provides-extras = ["examples"] [package.metadata.requires-dev] -dev = [{ name = "ty", specifier = ">=0.0.4" }] +dev = [ + { name = "ruff", specifier = ">=0.8.6" }, + { name = "ty", specifier = ">=0.0.4" }, +] [[package]] name = "rich" @@ -1537,6 +1545,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, ] +[[package]] +name = "ruff" +version = "0.14.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/08/52232a877978dd8f9cf2aeddce3e611b40a63287dfca29b6b8da791f5e8d/ruff-0.14.10.tar.gz", hash = "sha256:9a2e830f075d1a42cd28420d7809ace390832a490ed0966fe373ba288e77aaf4", size = 5859763, upload-time = "2025-12-18T19:28:57.98Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/01/933704d69f3f05ee16ef11406b78881733c186fe14b6a46b05cfcaf6d3b2/ruff-0.14.10-py3-none-linux_armv6l.whl", hash = "sha256:7a3ce585f2ade3e1f29ec1b92df13e3da262178df8c8bdf876f48fa0e8316c49", size = 13527080, upload-time = "2025-12-18T19:29:25.642Z" }, + { url = "https://files.pythonhosted.org/packages/df/58/a0349197a7dfa603ffb7f5b0470391efa79ddc327c1e29c4851e85b09cc5/ruff-0.14.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:674f9be9372907f7257c51f1d4fc902cb7cf014b9980152b802794317941f08f", size = 13797320, upload-time = "2025-12-18T19:29:02.571Z" }, + { url = "https://files.pythonhosted.org/packages/7b/82/36be59f00a6082e38c23536df4e71cdbc6af8d7c707eade97fcad5c98235/ruff-0.14.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d85713d522348837ef9df8efca33ccb8bd6fcfc86a2cde3ccb4bc9d28a18003d", size = 12918434, upload-time = "2025-12-18T19:28:51.202Z" }, + { url = "https://files.pythonhosted.org/packages/a6/00/45c62a7f7e34da92a25804f813ebe05c88aa9e0c25e5cb5a7d23dd7450e3/ruff-0.14.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6987ebe0501ae4f4308d7d24e2d0fe3d7a98430f5adfd0f1fead050a740a3a77", size = 13371961, upload-time = "2025-12-18T19:29:04.991Z" }, + { url = "https://files.pythonhosted.org/packages/40/31/a5906d60f0405f7e57045a70f2d57084a93ca7425f22e1d66904769d1628/ruff-0.14.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16a01dfb7b9e4eee556fbfd5392806b1b8550c9b4a9f6acd3dbe6812b193c70a", size = 13275629, upload-time = "2025-12-18T19:29:21.381Z" }, + { url = "https://files.pythonhosted.org/packages/3e/60/61c0087df21894cf9d928dc04bcd4fb10e8b2e8dca7b1a276ba2155b2002/ruff-0.14.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7165d31a925b7a294465fa81be8c12a0e9b60fb02bf177e79067c867e71f8b1f", size = 14029234, upload-time = "2025-12-18T19:29:00.132Z" }, + { url = "https://files.pythonhosted.org/packages/44/84/77d911bee3b92348b6e5dab5a0c898d87084ea03ac5dc708f46d88407def/ruff-0.14.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c561695675b972effb0c0a45db233f2c816ff3da8dcfbe7dfc7eed625f218935", size = 15449890, upload-time = "2025-12-18T19:28:53.573Z" }, + { url = "https://files.pythonhosted.org/packages/e9/36/480206eaefa24a7ec321582dda580443a8f0671fdbf6b1c80e9c3e93a16a/ruff-0.14.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb98fcbbc61725968893682fd4df8966a34611239c9fd07a1f6a07e7103d08e", size = 15123172, upload-time = "2025-12-18T19:29:23.453Z" }, + { url = "https://files.pythonhosted.org/packages/5c/38/68e414156015ba80cef5473d57919d27dfb62ec804b96180bafdeaf0e090/ruff-0.14.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f24b47993a9d8cb858429e97bdf8544c78029f09b520af615c1d261bf827001d", size = 14460260, upload-time = "2025-12-18T19:29:27.808Z" }, + { url = "https://files.pythonhosted.org/packages/b3/19/9e050c0dca8aba824d67cc0db69fb459c28d8cd3f6855b1405b3f29cc91d/ruff-0.14.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59aabd2e2c4fd614d2862e7939c34a532c04f1084476d6833dddef4afab87e9f", size = 14229978, upload-time = "2025-12-18T19:29:11.32Z" }, + { url = "https://files.pythonhosted.org/packages/51/eb/e8dd1dd6e05b9e695aa9dd420f4577debdd0f87a5ff2fedda33c09e9be8c/ruff-0.14.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:213db2b2e44be8625002dbea33bb9c60c66ea2c07c084a00d55732689d697a7f", size = 14338036, upload-time = "2025-12-18T19:29:09.184Z" }, + { url = "https://files.pythonhosted.org/packages/6a/12/f3e3a505db7c19303b70af370d137795fcfec136d670d5de5391e295c134/ruff-0.14.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b914c40ab64865a17a9a5b67911d14df72346a634527240039eb3bd650e5979d", size = 13264051, upload-time = "2025-12-18T19:29:13.431Z" }, + { url = "https://files.pythonhosted.org/packages/08/64/8c3a47eaccfef8ac20e0484e68e0772013eb85802f8a9f7603ca751eb166/ruff-0.14.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1484983559f026788e3a5c07c81ef7d1e97c1c78ed03041a18f75df104c45405", size = 13283998, upload-time = "2025-12-18T19:29:06.994Z" }, + { url = "https://files.pythonhosted.org/packages/12/84/534a5506f4074e5cc0529e5cd96cfc01bb480e460c7edf5af70d2bcae55e/ruff-0.14.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c70427132db492d25f982fffc8d6c7535cc2fd2c83fc8888f05caaa248521e60", size = 13601891, upload-time = "2025-12-18T19:28:55.811Z" }, + { url = "https://files.pythonhosted.org/packages/0d/1e/14c916087d8598917dbad9b2921d340f7884824ad6e9c55de948a93b106d/ruff-0.14.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5bcf45b681e9f1ee6445d317ce1fa9d6cba9a6049542d1c3d5b5958986be8830", size = 14336660, upload-time = "2025-12-18T19:29:16.531Z" }, + { url = "https://files.pythonhosted.org/packages/f2/1c/d7b67ab43f30013b47c12b42d1acd354c195351a3f7a1d67f59e54227ede/ruff-0.14.10-py3-none-win32.whl", hash = "sha256:104c49fc7ab73f3f3a758039adea978869a918f31b73280db175b43a2d9b51d6", size = 13196187, upload-time = "2025-12-18T19:29:19.006Z" }, + { url = "https://files.pythonhosted.org/packages/fb/9c/896c862e13886fae2af961bef3e6312db9ebc6adc2b156fe95e615dee8c1/ruff-0.14.10-py3-none-win_amd64.whl", hash = "sha256:466297bd73638c6bdf06485683e812db1c00c7ac96d4ddd0294a338c62fdc154", size = 14661283, upload-time = "2025-12-18T19:29:30.16Z" }, + { url = "https://files.pythonhosted.org/packages/74/31/b0e29d572670dca3674eeee78e418f20bdf97fa8aa9ea71380885e175ca0/ruff-0.14.10-py3-none-win_arm64.whl", hash = "sha256:e51d046cf6dda98a4633b8a8a771451107413b0f07183b2bef03f075599e44e6", size = 13729839, upload-time = "2025-12-18T19:28:48.636Z" }, +] + [[package]] name = "safetensors" version = "0.7.0" @@ -1830,6 +1864,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/f9/52ab0359618987331a1f739af837d26168a4b16281c9c3ab46519940c628/uuid_utils-0.12.0-cp39-abi3-win_arm64.whl", hash = "sha256:c9bea7c5b2aa6f57937ebebeee4d4ef2baad10f86f1b97b58a3f6f34c14b4e84", size = 182975, upload-time = "2025-12-01T17:29:46.444Z" }, ] +[[package]] +name = "uvicorn" +version = "0.40.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" }, +] + [[package]] name = "websockets" version = "15.0.1" From 40a7a132c28fc68a066fd689fc7e73414b8e0176 Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 2 Jan 2026 15:52:02 +0400 Subject: [PATCH 2/8] chore: format --- .gitignore | 3 +- src/reviewbot/agent/tasks/core.py | 28 ++++--------- src/reviewbot/agent/tasks/issues.py | 36 +++++----------- src/reviewbot/infra/embeddings/openai.py | 8 +--- src/reviewbot/infra/git/clone.py | 10 +++-- src/reviewbot/infra/gitlab/diff.py | 16 ++------ src/reviewbot/infra/gitlab/note.py | 8 +--- src/reviewbot/tools/search_codebase.py | 4 +- tests/test_agent.py | 12 ++---- tests/test_gitlab_discussions.py | 52 ++++++------------------ 10 files changed, 54 insertions(+), 123 deletions(-) diff --git a/.gitignore b/.gitignore index 55c8a38..a1f458d 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,5 @@ wheels/ .DS_Store .env vectors/ -.env.* \ No newline at end of file +.env.* +logs/stdout.log diff --git a/src/reviewbot/agent/tasks/core.py b/src/reviewbot/agent/tasks/core.py index aca26c2..ce52257 100644 --- a/src/reviewbot/agent/tasks/core.py +++ b/src/reviewbot/agent/tasks/core.py @@ -35,9 +35,7 @@ class ToolCallerSettings: """ -def tool_caller( - agent: Any, messages: List[BaseMessage], settings: ToolCallerSettings -) -> str: +def tool_caller(agent: Any, messages: List[BaseMessage], settings: ToolCallerSettings) -> str: finished = False final_response = None max_tool_calls = settings.max_tool_calls @@ -74,22 +72,16 @@ def tool_caller( text_parts.append(block.get("text", "")) elif "text" in block: text_parts.append(block["text"]) - final_response = ( - "\n".join(text_parts) if text_parts else str(content) - ) + final_response = "\n".join(text_parts) if text_parts else str(content) else: final_response = content - console.print( - f"[dim]Got final response: {final_response[:100]}...[/dim]" - ) + console.print(f"[dim]Got final response: {final_response[:100]}...[/dim]") finished = True elif isinstance(latest_message, ToolMessage): total_tool_calls += 1 - console.print( - f"[dim]Tool call completed ({total_tool_calls} total)[/dim]" - ) + console.print(f"[dim]Tool call completed ({total_tool_calls} total)[/dim]") if max_tool_calls != -1 and total_tool_calls >= max_tool_calls: console.print( f"[yellow]Max tool calls ({max_tool_calls}) reached - forcing final response[/yellow]" @@ -111,7 +103,9 @@ def tool_caller( for block in final_response: if isinstance(block, dict) and block.get("type") == "text": text_parts.append(block.get("text", "")) - final_response = "\n".join(text_parts) if text_parts else str(final_response) + final_response = ( + "\n".join(text_parts) if text_parts else str(final_response) + ) else: final_response = "[]" # Empty array as fallback except Exception as e: @@ -128,9 +122,7 @@ def tool_caller( final_response = None if not isinstance(final_response, str): - console.print( - f"Final response is not a string: {final_response}, returning None" - ) + console.print(f"Final response is not a string: {final_response}, returning None") return "None" return final_response @@ -216,8 +208,6 @@ def tool_caller_stream( console.print(last_chunk) messages = last_chunk["messages"] if not isinstance(final_response, str): - console.print( - f"Final response is not a string : {final_response}, returning None" - ) + console.print(f"Final response is not a string : {final_response}, returning None") return "None" return final_response diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py index 08be8f6..5d5ecb4 100644 --- a/src/reviewbot/agent/tasks/issues.py +++ b/src/reviewbot/agent/tasks/issues.py @@ -46,9 +46,7 @@ def with_retry(func: Callable, settings: ToolCallerSettings, *args, **kwargs) -> # If this was the last attempt, raise the exception if attempt >= max_retries: - console.print( - f"[red]All {max_retries} retries failed. Last error: {e}[/red]" - ) + console.print(f"[red]All {max_retries} retries failed. Last error: {e}[/red]") raise # Calculate delay with exponential backoff @@ -74,9 +72,7 @@ def with_retry(func: Callable, settings: ToolCallerSettings, *args, **kwargs) -> console.print(f"[yellow]Non-retryable error encountered: {e}[/yellow]") raise - console.print( - f"[yellow]Attempt {attempt + 1}/{max_retries + 1} failed: {e}[/yellow]" - ) + console.print(f"[yellow]Attempt {attempt + 1}/{max_retries + 1} failed: {e}[/yellow]") console.print(f"[yellow]Retrying in {delay:.1f} seconds...[/yellow]") time.sleep(delay) @@ -192,9 +188,7 @@ def run_concurrent_reviews( """ diff_file_paths = [diff.new_path for diff in diffs] - console.print( - f"[bold]Starting concurrent review of {len(diff_file_paths)} files[/bold]" - ) + console.print(f"[bold]Starting concurrent review of {len(diff_file_paths)} files[/bold]") console.print(f"[dim]Files: {', '.join(diff_file_paths)}[/dim]\n") all_issues: List[IssueModel] = [] @@ -225,17 +219,13 @@ def run_concurrent_reviews( monitor_thread.start() # Process results with timeout - for future in as_completed( - future_to_file, timeout=task_timeout * len(diff_file_paths) - ): + for future in as_completed(future_to_file, timeout=task_timeout * len(diff_file_paths)): file_path = future_to_file[future] try: # Get result with per-task timeout issues = future.result(timeout=task_timeout) all_issues.extend(issues) - console.print( - f"[green]āœ“[/green] Processed {file_path}: {len(issues)} issues" - ) + console.print(f"[green]āœ“[/green] Processed {file_path}: {len(issues)} issues") # Call the callback if provided, allowing immediate discussion creation if on_file_complete: @@ -249,9 +239,7 @@ def run_concurrent_reviews( traceback.print_exc() except TimeoutError: - console.print( - f"[red]āœ—[/red] TIMEOUT: {file_path} took longer than {task_timeout}s" - ) + console.print(f"[red]āœ—[/red] TIMEOUT: {file_path} took longer than {task_timeout}s") except Exception as e: console.print(f"[red]āœ—[/red] Failed {file_path}: {e}") import traceback @@ -330,7 +318,9 @@ def quick_scan_file( return needs_review except Exception as e: - console.print(f"[yellow]Quick scan failed for {file_path}, defaulting to deep review: {e}[/yellow]") + console.print( + f"[yellow]Quick scan failed for {file_path}, defaulting to deep review: {e}[/yellow]" + ) return True # If scan fails, do deep review to be safe @@ -471,9 +461,7 @@ def review_single_file( console.print(f"[green]Completed review of: {file_path}[/green]") console.print( - f"Raw response: {raw[:200]}..." - if len(str(raw)) > 200 - else f"Raw response: {raw}" + f"Raw response: {raw[:200]}..." if len(str(raw)) > 200 else f"Raw response: {raw}" ) # Parse issues from response @@ -488,9 +476,7 @@ def review_single_file( try: issues.append(IssueModel.model_validate(issue_data)) except Exception as e: - console.print( - f"[yellow]Failed to validate issue: {e}[/yellow]" - ) + console.print(f"[yellow]Failed to validate issue: {e}[/yellow]") elif isinstance(parsed, dict): try: issues.append(IssueModel.model_validate(parsed)) diff --git a/src/reviewbot/infra/embeddings/openai.py b/src/reviewbot/infra/embeddings/openai.py index d4b4594..b9c52ae 100644 --- a/src/reviewbot/infra/embeddings/openai.py +++ b/src/reviewbot/infra/embeddings/openai.py @@ -154,9 +154,7 @@ def build(self) -> None: self.save() def compile(self, command: str) -> str: - return subprocess.run( - command, shell=True, capture_output=True, text=True - ).stdout + return subprocess.run(command, shell=True, capture_output=True, text=True).stdout def _build_metadata_index(self) -> None: self.metadata_index = {} @@ -205,9 +203,7 @@ def search( filter = {} if path: filter["path"] = path - results = self.vector_store.similarity_search_with_score( - query, k=top_k, filter=filter - ) + results = self.vector_store.similarity_search_with_score(query, k=top_k, filter=filter) out = [] for doc, score in results: out.append( diff --git a/src/reviewbot/infra/git/clone.py b/src/reviewbot/infra/git/clone.py index 45ffb7d..9c22e00 100644 --- a/src/reviewbot/infra/git/clone.py +++ b/src/reviewbot/infra/git/clone.py @@ -3,13 +3,12 @@ from tempfile import TemporaryDirectory import hashlib + class GitCloneError(RuntimeError): pass -def clone_repo_tmp( - repo_url: str, *, branch: str | None = None -) -> TemporaryDirectory[str]: +def clone_repo_tmp(repo_url: str, *, branch: str | None = None) -> TemporaryDirectory[str]: tmp = TemporaryDirectory(prefix="reviewbot-") dest = Path(tmp.name) @@ -31,11 +30,15 @@ def clone_repo_tmp( return tmp + def _repo_key(repo_url: str, branch: str | None) -> str: key = f"{repo_url}::{branch or 'default'}" return hashlib.sha1(key.encode()).hexdigest()[:16] + BASE_REPO_DIR = Path.home() / "reviewbot" / "repos" + + def clone_repo_persistent( repo_url: str, *, @@ -87,6 +90,7 @@ def clone_repo_persistent( return repo_dir + def get_repo_name(repo_dir: Path) -> str: cmd = ["git", "rev-parse", "--short", "HEAD"] result = subprocess.run( diff --git a/src/reviewbot/infra/gitlab/diff.py b/src/reviewbot/infra/gitlab/diff.py index 7d823ea..3090a0d 100644 --- a/src/reviewbot/infra/gitlab/diff.py +++ b/src/reviewbot/infra/gitlab/diff.py @@ -225,9 +225,7 @@ def fetch_mr_diffs( if not diff_text or change.get("too_large", False): # Fallback to raw diff endpoint for this file raw_diff_url = f"{mr_url}/diffs" - raw_response = requests.get( - raw_diff_url, headers=headers, timeout=timeout - ) + raw_response = requests.get(raw_diff_url, headers=headers, timeout=timeout) raw_response.raise_for_status() raw_diff = raw_response.text @@ -238,9 +236,7 @@ def fetch_mr_diffs( if not lines or not lines[0].startswith("diff --git "): continue - chunk_old_path, chunk_new_path, _, _, _ = ( - _parse_paths_from_chunk(lines) - ) + chunk_old_path, chunk_new_path, _, _, _ = _parse_paths_from_chunk(lines) if (chunk_new_path == change_new_path) or ( chunk_old_path == change_old_path ): @@ -299,9 +295,7 @@ def fetch_mr_diffs( extracted_old_line: Optional[int] = None # Parse diff to find first hunk and line numbers - hunk_header_pattern = re.compile( - r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@" - ) + hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@") for diff_line in chunk.splitlines(): match = hunk_header_pattern.match(diff_line) if match: @@ -344,9 +338,7 @@ def fetch_mr_diffs( return out, diff_refs -def get_mr_branch( - api_v4: str, project_id: str, mr_iid: str, token: str, timeout: int = 30 -) -> str: +def get_mr_branch(api_v4: str, project_id: str, mr_iid: str, token: str, timeout: int = 30) -> str: api_v4 = api_v4.rstrip("/") headers = {"PRIVATE-TOKEN": token} mr_url = f"{api_v4}/projects/{project_id}/merge_requests/{mr_iid}" diff --git a/src/reviewbot/infra/gitlab/note.py b/src/reviewbot/infra/gitlab/note.py index ebd13e9..fad2c82 100644 --- a/src/reviewbot/infra/gitlab/note.py +++ b/src/reviewbot/infra/gitlab/note.py @@ -24,9 +24,7 @@ def post_merge_request_note( ) if r.status_code >= 300: - raise RuntimeError( - f"gitlab note post failed: {r.status_code} {r.reason}: {r.text}" - ) + raise RuntimeError(f"gitlab note post failed: {r.status_code} {r.reason}: {r.text}") def post_discussion( @@ -62,9 +60,7 @@ def post_discussion( if position: # Only include position if it has required fields (new_line or old_line) # Otherwise GitLab will reject it as incomplete - has_line_info = ( - "new_line" in position or "old_line" in position or "line_code" in position - ) + has_line_info = "new_line" in position or "old_line" in position or "line_code" in position if has_line_info: data["position"] = position else: diff --git a/src/reviewbot/tools/search_codebase.py b/src/reviewbot/tools/search_codebase.py index bd390d2..a6e98a7 100644 --- a/src/reviewbot/tools/search_codebase.py +++ b/src/reviewbot/tools/search_codebase.py @@ -87,9 +87,7 @@ def search_codebase_semantic_search(query: str, path: Optional[str] = None) -> s @tool -def read_file( - path: str, line_start: Optional[int] = None, line_end: Optional[int] = None -) -> str: +def read_file(path: str, line_start: Optional[int] = None, line_end: Optional[int] = None) -> str: """Read the file at the given path. Args: diff --git a/tests/test_agent.py b/tests/test_agent.py index d134828..1a3e3ec 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -62,9 +62,7 @@ def test_agent(): project_id = "29" mr_iid = "5" - model = get_gpt_model( - config.llm_model_name, config.llm_api_key, config.llm_base_url - ) + model = get_gpt_model(config.llm_model_name, config.llm_api_key, config.llm_base_url) clone_url = build_clone_url(api_v4, project_id, token) @@ -96,15 +94,11 @@ def test_agent(): manager.get_store() issue_store = InMemoryIssueStore() - token = store_manager_ctx.set( - Context(store_manager=manager, issue_store=issue_store) - ) + token = store_manager_ctx.set(Context(store_manager=manager, issue_store=issue_store)) context = store_manager_ctx.get() - diff_file_paths = " ".join( - [diff.new_path for diff in diffs if diff.new_path is not None] - ) + diff_file_paths = " ".join([diff.new_path for diff in diffs if diff.new_path is not None]) try: response = agent.invoke( # type: ignore { diff --git a/tests/test_gitlab_discussions.py b/tests/test_gitlab_discussions.py index b175258..77c2564 100644 --- a/tests/test_gitlab_discussions.py +++ b/tests/test_gitlab_discussions.py @@ -92,27 +92,19 @@ def test_delete_debug_discussions( for note in discussion["notes"]: if note.get("author", {}).get("id") == 83: # Add this discussion with its first note ID (required for deletion) - discussion_note_ids.append( - (discussion["id"], discussion["notes"][0]["id"]) - ) + discussion_note_ids.append((discussion["id"], discussion["notes"][0]["id"])) break # Only add once per discussion - console.print( - f"[yellow]Found {len(discussion_note_ids)} discussions from author 83[/yellow]" - ) + console.print(f"[yellow]Found {len(discussion_note_ids)} discussions from author 83[/yellow]") # Delete each discussion for discussion_id, note_id in discussion_note_ids: try: - console.print( - f"[yellow]Deleting discussion {discussion_id}, note {note_id}[/yellow]" - ) + console.print(f"[yellow]Deleting discussion {discussion_id}, note {note_id}[/yellow]") delete_discussion(api_v4, token, project_id, mr_iid, discussion_id, note_id) console.print(f"[green]āœ“ Deleted discussion {discussion_id}[/green]") except Exception as e: - console.print( - f"[red]āœ— Failed to delete discussion {discussion_id}: {e}[/red]" - ) + console.print(f"[red]āœ— Failed to delete discussion {discussion_id}: {e}[/red]") def test_handle_file_issues_creates_discussion( @@ -144,9 +136,7 @@ def test_handle_file_issues_creates_discussion( # Set up context for tools (needed for read_file.invoke()) issue_store = InMemoryIssueStore() - token_ctx = store_manager_ctx.set( - Context(store_manager=manager, issue_store=issue_store) - ) + token_ctx = store_manager_ctx.set(Context(store_manager=manager, issue_store=issue_store)) try: # Call the function - this will make real API calls to GitLab @@ -195,9 +185,7 @@ def test_code_block_in_markdown_between_line_numbers( # Set up context for tools issue_store = InMemoryIssueStore() - token_ctx = store_manager_ctx.set( - Context(store_manager=manager, issue_store=issue_store) - ) + token_ctx = store_manager_ctx.set(Context(store_manager=manager, issue_store=issue_store)) # Find a file that actually has changes in the diff if not diffs: @@ -205,9 +193,7 @@ def test_code_block_in_markdown_between_line_numbers( # Use the first file with a diff # get the first .go file - go_files = [ - file for file in diffs if file.new_path and file.new_path.endswith(".go") - ] + go_files = [file for file in diffs if file.new_path and file.new_path.endswith(".go")] if not go_files: pytest.skip("No .go files found in diff") file_diff = go_files[1] @@ -231,9 +217,7 @@ def test_code_block_in_markdown_between_line_numbers( new_start = int(match.group(3)) new_count = int(match.group(4)) if match.group(4) else 1 line_start = new_start - line_end = min( - new_start + new_count - 1, new_start + 10 - ) # Limit to reasonable range + line_end = min(new_start + new_count - 1, new_start + 10) # Limit to reasonable range break if line_start is None or line_end is None: @@ -264,12 +248,8 @@ def test_code_block_in_markdown_between_line_numbers( try: # Mock the GitLab API calls to capture the reply body with ( - patch( - "reviewbot.agent.workflow.create_discussion" - ) as mock_create_discussion, - patch( - "reviewbot.agent.workflow.reply_to_discussion" - ) as mock_reply_to_discussion, + patch("reviewbot.agent.workflow.create_discussion") as mock_create_discussion, + patch("reviewbot.agent.workflow.reply_to_discussion") as mock_reply_to_discussion, ): mock_create_discussion.return_value = "discussion-123" @@ -283,9 +263,7 @@ def test_code_block_in_markdown_between_line_numbers( reply_body = call_args.kwargs["body"] # Verify the code block uses diff syntax - assert "```diff" in reply_body, ( - "Code block should use diff syntax highlighting" - ) + assert "```diff" in reply_body, "Code block should use diff syntax highlighting" # Extract the code block content code_block_start = reply_body.find("```diff") @@ -293,15 +271,11 @@ def test_code_block_in_markdown_between_line_numbers( assert code_block_end != -1, "Code block should be properly closed" # Get the content between the code block markers (skip the ```diff part) - code_block_content = reply_body[ - code_block_start + 7 : code_block_end - ].strip() + code_block_content = reply_body[code_block_start + 7 : code_block_end].strip() # Verify the code block contains diff markers assert ( - "+" in code_block_content - or "-" in code_block_content - or " " in code_block_content + "+" in code_block_content or "-" in code_block_content or " " in code_block_content ), "Code block should contain diff markers (+, -, or space)" # Verify the code block contains code from the diff From 31c1f9fd950a6e63a7ee8054485d42490506ed0f Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 2 Jan 2026 15:55:44 +0400 Subject: [PATCH 3/8] chore: remove outdated tests --- tests/test_agent.py | 128 ------------- tests/test_ai.py | 2 - tests/test_gitlab_discussions.py | 307 ------------------------------- tests/test_search_codebase.py | 41 ----- 4 files changed, 478 deletions(-) delete mode 100644 tests/test_agent.py delete mode 100644 tests/test_ai.py delete mode 100644 tests/test_gitlab_discussions.py delete mode 100644 tests/test_search_codebase.py diff --git a/tests/test_agent.py b/tests/test_agent.py deleted file mode 100644 index 1a3e3ec..0000000 --- a/tests/test_agent.py +++ /dev/null @@ -1,128 +0,0 @@ -from typing import Any - -from langchain.agents import create_agent # type: ignore -from langchain.agents.middleware import AgentState, after_model, before_model -from langchain_core.messages import HumanMessage -from langgraph.pregel.main import Runtime # type: ignore -from rich.console import Console - -from reviewbot.agent.tasks.core import ToolCallerSettings -from reviewbot.context import Context, store_manager_ctx -from reviewbot.core.agent import Agent -from reviewbot.infra.config.env import load_env -from reviewbot.infra.embeddings.store_manager import CodebaseStoreManager -from reviewbot.infra.git.clone import clone_repo_persistent, get_repo_name -from reviewbot.infra.git.repo_tree import tree -from reviewbot.infra.gitlab.clone import build_clone_url -from reviewbot.infra.gitlab.diff import fetch_mr_diffs, get_mr_branch -from reviewbot.infra.issues.in_memory_issue_store import InMemoryIssueStore -from reviewbot.models.gpt import get_gpt_model -from reviewbot.tools.diff import get_diff, get_tree -from reviewbot.tools.search_codebase import read_file, search_codebase_semantic_search - -console = Console() - -MESSAGE = [] - - -@before_model(can_jump_to=["end"]) -def check_message_limit(state: AgentState, runtime: Runtime) -> dict[str, Any] | None: # type: ignore - global MESSAGE - MESSAGE = state["messages"] - return None - - -@after_model(can_jump_to=["end"]) -def after_model_check(state: AgentState, runtime: Runtime) -> dict[str, Any] | None: # type: ignore - last_message = state["messages"][-1] - if last_message.content is not None: - if isinstance(last_message.content, list): - if last_message.content[-1].get("type") == "reasoning": - # print(f"Reasoning: {last_message.content[-1].get('content')}") - messages = state["messages"] - # delete the last message - # messages.pop() - # messages.append( - # HumanMessage( - # content="You attempted an invalid tool call. Please avoid this in future. Your faulty tool call was: " - # + str(last_message.content[-1].get("content", "Unknown")) - # ) - # ) - print("Faulty tool call!") - MESSAGE = messages - return {"messages": messages} - return None - - -def test_agent(): - global MESSAGE - config = load_env() - api_v4 = config.gitlab_api_v4 - token = config.gitlab_token - project_id = "29" - mr_iid = "5" - - model = get_gpt_model(config.llm_model_name, config.llm_api_key, config.llm_base_url) - - clone_url = build_clone_url(api_v4, project_id, token) - - diffs = fetch_mr_diffs(api_v4, project_id, mr_iid, token) - - settings = ToolCallerSettings(max_tool_calls=10, max_iterations=50) - - tools = [ - search_codebase_semantic_search, - get_diff, - read_file, - get_tree, - ] - - agent: Agent = create_agent( - model=model, - tools=tools, - middleware=[check_message_limit, after_model_check], # type: ignore - ) # type: ignore - branch = get_mr_branch(api_v4, project_id, mr_iid, token) - repo_path = clone_repo_persistent(clone_url, branch=branch) - repo_tree = tree(repo_path) - - manager = CodebaseStoreManager() - manager.set_repo_root(repo_path) - manager.set_repo_name(get_repo_name(repo_path)) - manager.set_tree(repo_tree) - manager.set_diffs(diffs) - manager.get_store() - - issue_store = InMemoryIssueStore() - token = store_manager_ctx.set(Context(store_manager=manager, issue_store=issue_store)) - - context = store_manager_ctx.get() - - diff_file_paths = " ".join([diff.new_path for diff in diffs if diff.new_path is not None]) - try: - response = agent.invoke( # type: ignore - { - "messages": [ - HumanMessage( - content="Check my merge request diff and code review it. Use the tools provided, you got all you need you don't have to ask questions." - + "The codebase tree is: " - + repo_tree - + "The merge request diff file paths are: " - + diff_file_paths - ) - ] - } - ) - - if isinstance(response, str): - assert response is not None - else: - print(f"Response: {response['messages'][-1]}") - - except Exception as e: - console.print(f"Error: {e}") - open("errors.txt", "w").write(str(MESSAGE)) - raise e - - finally: - store_manager_ctx.reset(token) diff --git a/tests/test_ai.py b/tests/test_ai.py deleted file mode 100644 index 1d7a5ae..0000000 --- a/tests/test_ai.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_ai(): - pass diff --git a/tests/test_gitlab_discussions.py b/tests/test_gitlab_discussions.py deleted file mode 100644 index 77c2564..0000000 --- a/tests/test_gitlab_discussions.py +++ /dev/null @@ -1,307 +0,0 @@ -"""Integration test for GitLab discussion creation with predefined issues. - -This test actually calls the GitLab API to create discussions. -Requires the following environment variables to be set: -- GITLAB_API_V4_URL: GitLab API base URL -- GITLAB_BOT_TOKEN: GitLab API token -""" - -import os -from pathlib import Path - -import pytest -from rich.console import Console - -from reviewbot.agent.workflow import GitLabConfig, handle_file_issues -from reviewbot.context import Context, store_manager_ctx -from reviewbot.core.issues.issue import Issue, IssueSeverity -from reviewbot.infra.embeddings.store_manager import CodebaseStoreManager -from reviewbot.infra.git.clone import clone_repo_persistent, get_repo_name -from reviewbot.infra.git.repo_tree import tree -from reviewbot.infra.gitlab.clone import build_clone_url -from reviewbot.infra.gitlab.diff import fetch_mr_diffs, get_mr_branch -from reviewbot.infra.gitlab.note import delete_discussion, get_all_discussions -from reviewbot.infra.issues.in_memory_issue_store import InMemoryIssueStore - - -@pytest.fixture -def gitlab_config(): - """Create a GitLab configuration from environment variables.""" - api_v4 = os.getenv("GITLAB_API_V4_URL") - token = os.getenv("GITLAB_BOT_TOKEN") - project_id = "29" - mr_iid = "5" - - if not all([api_v4, token, project_id, mr_iid]): - pytest.skip( - "GitLab credentials not set. " - "Set GITLAB_API_V4_URL, GITLAB_BOT_TOKEN, GITLAB_TEST_PROJECT_ID, and GITLAB_TEST_MR_IID" - ) - - # Type narrowing: we know these are not None after the check above - assert api_v4 is not None - assert token is not None - assert project_id is not None - assert mr_iid is not None - api_v4 = api_v4 + "/api/v4" - return GitLabConfig( - api_v4=api_v4, - token=token, - project_id=project_id, - mr_iid=mr_iid, - ) - - -@pytest.fixture -def sample_issues(): - """Create a list of predefined issues for testing.""" - return [ - Issue( - title="[TEST] Potential null pointer exception", - description="The variable 'user' might be None when accessing user.name. This is a test issue.", - file_path="api/feature_testing/mdsl/mdsl.go", - start_line=30, - end_line=30, - severity=IssueSeverity.HIGH, - status="open", - ), - ] - - -def test_delete_debug_discussions( - gitlab_config: GitLabConfig, -): - """Test that we can delete debug discussions.""" - api_v4 = gitlab_config.api_v4 - token = gitlab_config.token - project_id = gitlab_config.project_id - mr_iid = gitlab_config.mr_iid - - # get all discussions for the merge request - discussions = get_all_discussions(api_v4, token, project_id, mr_iid) - console = Console() - console.print(f"[cyan]Found {len(discussions)} total discussions[/cyan]") - - # Filter discussions where ANY note has author.id == 83 - discussion_note_ids = [] - for discussion in discussions: - if not discussion.get("notes"): - continue - - # Check if ANY note in this discussion is from author 83 - for note in discussion["notes"]: - if note.get("author", {}).get("id") == 83: - # Add this discussion with its first note ID (required for deletion) - discussion_note_ids.append((discussion["id"], discussion["notes"][0]["id"])) - break # Only add once per discussion - - console.print(f"[yellow]Found {len(discussion_note_ids)} discussions from author 83[/yellow]") - - # Delete each discussion - for discussion_id, note_id in discussion_note_ids: - try: - console.print(f"[yellow]Deleting discussion {discussion_id}, note {note_id}[/yellow]") - delete_discussion(api_v4, token, project_id, mr_iid, discussion_id, note_id) - console.print(f"[green]āœ“ Deleted discussion {discussion_id}[/green]") - except Exception as e: - console.print(f"[red]āœ— Failed to delete discussion {discussion_id}: {e}[/red]") - - -def test_handle_file_issues_creates_discussion( - gitlab_config: GitLabConfig, - sample_issues: list[Issue], -): - """Integration test that actually creates a GitLab discussion with predefined issues.""" - # Set up GitLab API connection - api_v4 = gitlab_config.api_v4 - token = gitlab_config.token - project_id = gitlab_config.project_id - mr_iid = gitlab_config.mr_iid - - # Clone the repository - clone_url = build_clone_url(api_v4, project_id, token) - diffs, diff_refs = fetch_mr_diffs(api_v4, project_id, mr_iid, token) - branch = get_mr_branch(api_v4, project_id, mr_iid, token) - repo_path = clone_repo_persistent(clone_url, branch=branch) - repo_path = Path(repo_path).resolve() - repo_tree = tree(repo_path) - - # Initialize store manager - manager = CodebaseStoreManager() - manager.set_repo_root(repo_path) - manager.set_repo_name(get_repo_name(repo_path)) - manager.set_tree(repo_tree) - manager.set_diffs(diffs) - manager.get_store() - - # Set up context for tools (needed for read_file.invoke()) - issue_store = InMemoryIssueStore() - token_ctx = store_manager_ctx.set(Context(store_manager=manager, issue_store=issue_store)) - - try: - # Call the function - this will make real API calls to GitLab - handle_file_issues( - "api/feature_testing/mdsl/mdsl.go", - sample_issues, - gitlab_config, - diffs, - diff_refs, - ) - - # If we get here without an exception, the discussion was created successfully - # You can verify by checking the merge request in GitLab - finally: - # Clean up context - store_manager_ctx.reset(token_ctx) - - -def test_code_block_in_markdown_between_line_numbers( - gitlab_config: GitLabConfig, -): - """Test that code blocks in markdown correctly extract code from diff between line numbers.""" - from unittest.mock import patch - - # Set up GitLab API connection - api_v4 = gitlab_config.api_v4 - token = gitlab_config.token - project_id = gitlab_config.project_id - mr_iid = gitlab_config.mr_iid - - # Clone the repository - clone_url = build_clone_url(api_v4, project_id, token) - diffs, diff_refs = fetch_mr_diffs(api_v4, project_id, mr_iid, token) - branch = get_mr_branch(api_v4, project_id, mr_iid, token) - repo_path = clone_repo_persistent(clone_url, branch=branch) - repo_path = Path(repo_path).resolve() - repo_tree = tree(repo_path) - - # Initialize store manager - manager = CodebaseStoreManager() - manager.set_repo_root(repo_path) - manager.set_repo_name(get_repo_name(repo_path)) - manager.set_tree(repo_tree) - manager.set_diffs(diffs) - manager.get_store() - - # Set up context for tools - issue_store = InMemoryIssueStore() - token_ctx = store_manager_ctx.set(Context(store_manager=manager, issue_store=issue_store)) - - # Find a file that actually has changes in the diff - if not diffs: - pytest.skip("No diffs found in merge request") - - # Use the first file with a diff - # get the first .go file - go_files = [file for file in diffs if file.new_path and file.new_path.endswith(".go")] - if not go_files: - pytest.skip("No .go files found in diff") - file_diff = go_files[1] - test_file_path = file_diff.new_path or file_diff.old_path - if not test_file_path: - pytest.skip("No valid file path in diff") - - # Find a reasonable line range in the diff - # Look for the first hunk to get line numbers - import re - - hunk_header_re = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@") - patch_lines = file_diff.patch.splitlines() - - line_start = None - line_end = None - - for line in patch_lines: - match = hunk_header_re.match(line) - if match: - new_start = int(match.group(3)) - new_count = int(match.group(4)) if match.group(4) else 1 - line_start = new_start - line_end = min(new_start + new_count - 1, new_start + 10) # Limit to reasonable range - break - - if line_start is None or line_end is None: - pytest.skip("Could not find valid line range in diff") - - issue_with_line_range = Issue( - title="[TEST] Code block extraction test", - description="Testing that code blocks correctly extract code from diff between line numbers.", - file_path=test_file_path, - start_line=line_start, - end_line=line_end, - severity=IssueSeverity.MEDIUM, - status="open", - ) - - # Verify the diff contains content for our line range - # We'll check that the diff has content in the expected range - patch_lines = file_diff.patch.splitlines() - has_content_in_range = False - for line in patch_lines: - if line.startswith(("+", "-", " ")) and len(line) > 1: - has_content_in_range = True - break - - if not has_content_in_range: - pytest.skip("Diff does not contain sufficient content for testing") - - try: - # Mock the GitLab API calls to capture the reply body - with ( - patch("reviewbot.agent.workflow.create_discussion") as mock_create_discussion, - patch("reviewbot.agent.workflow.reply_to_discussion") as mock_reply_to_discussion, - ): - mock_create_discussion.return_value = "discussion-123" - - # Call the function - handle_file_issues( - test_file_path, [issue_with_line_range], gitlab_config, diffs, diff_refs - ) - - # Get the reply body that was sent - call_args = mock_reply_to_discussion.call_args - reply_body = call_args.kwargs["body"] - - # Verify the code block uses diff syntax - assert "```diff" in reply_body, "Code block should use diff syntax highlighting" - - # Extract the code block content - code_block_start = reply_body.find("```diff") - code_block_end = reply_body.find("```", code_block_start + 7) - assert code_block_end != -1, "Code block should be properly closed" - - # Get the content between the code block markers (skip the ```diff part) - code_block_content = reply_body[code_block_start + 7 : code_block_end].strip() - - # Verify the code block contains diff markers - assert ( - "+" in code_block_content or "-" in code_block_content or " " in code_block_content - ), "Code block should contain diff markers (+, -, or space)" - - # Verify the code block contains code from the diff - # It should have some actual code content (not just empty) - assert len(code_block_content.strip()) > 0, ( - "Code block should contain code extracted from diff" - ) - - # Verify it contains lines that could be from the diff - # (either added lines with +, removed with -, or context with space) - assert any( - line.startswith(("+", "-", " ")) - for line in code_block_content.splitlines() - if line.strip() - ), "Code block should contain diff-formatted lines" - - # Verify the line info is correct in the markdown - assert ( - f"Line {line_start}-{line_end}" in reply_body - or f"Line {line_end}-{line_start}" in reply_body - ), "Line range should be displayed correctly" - - # Write to a markdown file for inspection - with open("reply_body.md", "w") as f: - f.write(reply_body) - - finally: - # Clean up context - store_manager_ctx.reset(token_ctx) diff --git a/tests/test_search_codebase.py b/tests/test_search_codebase.py deleted file mode 100644 index c7a3078..0000000 --- a/tests/test_search_codebase.py +++ /dev/null @@ -1,41 +0,0 @@ -import subprocess -import tempfile -from pathlib import Path - -import dotenv - -from reviewbot.infra.embeddings.store_manager import CodebaseStoreManager -from reviewbot.tools.search_codebase import search_codebase - - -def main() -> None: - dotenv.load_dotenv() - with tempfile.TemporaryDirectory(prefix="reviewbot-test-") as tmp: - repo_dir = Path(tmp) / "repo" - - subprocess.run( - [ - "git", - "clone", - "https://github.com/canefe/npcdrops", - str(repo_dir), - ], - check=True, - ) - - manager = CodebaseStoreManager() - manager.set_repo_root(repo_dir) - manager.set_repo_name("npcdrops") - manager.get_store() - - print("=== SEARCH: npcDrops ===") - res = search_codebase.invoke("how does npcdrops trigger the drop chance?") # type: ignore - print(res) - - print("\n=== SEARCH: drop chance ===") - res = search_codebase.invoke("where is the hook that triggers npc death") # type: ignore - print(res) - - -if __name__ == "__main__": - main() From ae3f6fe1e807d198d75a5e6cf3c9c346fcc42291 Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 2 Jan 2026 16:06:07 +0400 Subject: [PATCH 4/8] chore: lint --- src/reviewbot/agent/base.py | 8 ++-- src/reviewbot/agent/tasks/core.py | 18 ++------ src/reviewbot/agent/tasks/issues.py | 27 ++++++------ src/reviewbot/context.py | 13 +++--- src/reviewbot/core/issues/issue.py | 5 +-- src/reviewbot/core/issues/issue_model.py | 3 +- src/reviewbot/core/issues/issue_store.py | 4 +- src/reviewbot/core/reviews/review.py | 3 +- src/reviewbot/infra/embeddings/openai.py | 15 +++---- .../infra/embeddings/store_manager.py | 15 +++---- src/reviewbot/infra/git/clone.py | 14 +++--- src/reviewbot/infra/gitlab/diff.py | 44 +++++++++---------- src/reviewbot/infra/gitlab/note.py | 10 ++--- .../infra/issues/in_memory_issue_store.py | 6 +-- src/reviewbot/models/openrouter.py | 6 +-- src/reviewbot/tools/search_codebase.py | 12 +++-- 16 files changed, 92 insertions(+), 111 deletions(-) diff --git a/src/reviewbot/agent/base.py b/src/reviewbot/agent/base.py index 4b3b6f8..de6d5f8 100644 --- a/src/reviewbot/agent/base.py +++ b/src/reviewbot/agent/base.py @@ -1,5 +1,5 @@ +from collections.abc import Callable from dataclasses import dataclass, field -from typing import Callable, List, Optional from langgraph.func import entrypoint # type: ignore from rich.console import Console @@ -19,12 +19,12 @@ class AgentRunnerInput: agent: Agent context: Context settings: ToolCallerSettings = field(default_factory=ToolCallerSettings) - on_file_complete: Optional[Callable[[str, List[IssueModel]], None]] = None - quick_scan_agent: Optional[Agent] = None + on_file_complete: Callable[[str, list[IssueModel]], None] | None = None + quick_scan_agent: Agent | None = None @entrypoint() -def agent_runner(input: AgentRunnerInput) -> List[Issue]: +def agent_runner(input: AgentRunnerInput) -> list[Issue]: agent = input.agent settings = input.settings context = input.context diff --git a/src/reviewbot/agent/tasks/core.py b/src/reviewbot/agent/tasks/core.py index ce52257..329f1b4 100644 --- a/src/reviewbot/agent/tasks/core.py +++ b/src/reviewbot/agent/tasks/core.py @@ -1,6 +1,6 @@ import json from dataclasses import dataclass -from typing import Any, List +from typing import Any from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage from rich.console import Console @@ -35,7 +35,7 @@ class ToolCallerSettings: """ -def tool_caller(agent: Any, messages: List[BaseMessage], settings: ToolCallerSettings) -> str: +def tool_caller(agent: Any, messages: list[BaseMessage], settings: ToolCallerSettings) -> str: finished = False final_response = None max_tool_calls = settings.max_tool_calls @@ -128,7 +128,7 @@ def tool_caller(agent: Any, messages: List[BaseMessage], settings: ToolCallerSet def tool_caller_stream( - agent: Any, messages: List[BaseMessage], settings: ToolCallerSettings + agent: Any, messages: list[BaseMessage], settings: ToolCallerSettings ) -> str: finished = False final_response = None @@ -143,17 +143,7 @@ def tool_caller_stream( latest_message = chunk["messages"][-1] if isinstance(latest_message, AIMessage): - content = latest_message.content - if isinstance(content, list): - if content and "content" in content[-1]: - reason = content[-1]["content"][-1]["text"] - elif content and "text" in content[-1]: - reason = content[-1]["text"] - else: - reason = str(content) - else: - reason = content - final_response = content + final_response = latest_message.content elif isinstance(latest_message, ToolMessage): tool_call_count += 1 diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py index 5d5ecb4..820f894 100644 --- a/src/reviewbot/agent/tasks/issues.py +++ b/src/reviewbot/agent/tasks/issues.py @@ -1,9 +1,10 @@ import threading import time +from collections.abc import Callable from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed from dataclasses import dataclass from functools import partial -from typing import Any, Callable, List, Optional +from typing import Any from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage from langgraph.func import task @@ -87,12 +88,12 @@ class IssuesInput: agent: Agent context: Context settings: ToolCallerSettings - on_file_complete: Optional[Callable[[str, List[IssueModel]], None]] = None - quick_scan_agent: Optional[Agent] = None + on_file_complete: Callable[[str, list[IssueModel]], None] | None = None + quick_scan_agent: Agent | None = None @task -def identify_issues(ctx: IssuesInput) -> List[Issue]: +def identify_issues(ctx: IssuesInput) -> list[Issue]: """ Identify the issues in the codebase using concurrent agents per file. """ @@ -163,14 +164,14 @@ def monitor_progress( def run_concurrent_reviews( agent: Any, - diffs: List[Any], + diffs: list[Any], settings: ToolCallerSettings, context: Context, max_workers: int = 3, # Serial processing to avoid thread safety and rate limit issues task_timeout: int = 160, # 5 minutes timeout per file - on_file_complete: Optional[Callable[[str, List[IssueModel]], None]] = None, + on_file_complete: Callable[[str, list[IssueModel]], None] | None = None, quick_scan_agent: Any | None = None, -) -> List[IssueModel]: +) -> list[IssueModel]: """ Run concurrent reviews of all diff files with context propagation and monitoring. @@ -191,7 +192,7 @@ def run_concurrent_reviews( console.print(f"[bold]Starting concurrent review of {len(diff_file_paths)} files[/bold]") console.print(f"[dim]Files: {', '.join(diff_file_paths)}[/dim]\n") - all_issues: List[IssueModel] = [] + all_issues: list[IssueModel] = [] with ThreadPoolExecutor(max_workers=max_workers) as executor: # Create a partial function with context baked in @@ -276,7 +277,7 @@ def quick_scan_file( Quick scan with low-effort agent to determine if file needs deep review. Returns True if file needs deep review, False otherwise. """ - messages: List[BaseMessage] = [ + messages: list[BaseMessage] = [ SystemMessage( content="""You are a code review triage assistant. Your job is to quickly determine if a file change needs deep review. @@ -330,7 +331,7 @@ def review_single_file_with_context( settings: ToolCallerSettings, context: Context, quick_scan_agent: Any | None = None, -) -> List[IssueModel]: +) -> list[IssueModel]: """ Wrapper that sets context before reviewing. This runs in each worker thread. @@ -362,11 +363,11 @@ def review_single_file( agent: Any, file_path: str, settings: ToolCallerSettings, -) -> List[IssueModel]: +) -> list[IssueModel]: """ Review a single diff file and return issues found. """ - messages: List[BaseMessage] = [ + messages: list[BaseMessage] = [ SystemMessage( content=f"""You are a senior code reviewer analyzing a specific file change. @@ -465,7 +466,7 @@ def review_single_file( ) # Parse issues from response - issues: List[IssueModel] = [] + issues: list[IssueModel] = [] if isinstance(raw, str): try: import json diff --git a/src/reviewbot/context.py b/src/reviewbot/context.py index 761f564..0eab583 100644 --- a/src/reviewbot/context.py +++ b/src/reviewbot/context.py @@ -1,15 +1,16 @@ from contextvars import ContextVar -from typing import Optional, TypedDict +from typing import TypedDict from reviewbot.core.issues import IssueStore from reviewbot.infra.embeddings.store_manager import CodebaseStoreManager class Context(TypedDict): - store_manager: Optional[CodebaseStoreManager] - issue_store: Optional[IssueStore] + store_manager: CodebaseStoreManager | None + issue_store: IssueStore | None -store_manager_ctx: ContextVar[Context] = ContextVar( - "store_manager", default=Context(store_manager=None, issue_store=None) -) +store_manager_ctx: ContextVar[Context | None] = ContextVar("store_manager", default=None) + +def init_ctx() -> None: + store_manager_ctx.set({"store_manager": None, "issue_store": None}) diff --git a/src/reviewbot/core/issues/issue.py b/src/reviewbot/core/issues/issue.py index c843a76..05d80ba 100644 --- a/src/reviewbot/core/issues/issue.py +++ b/src/reviewbot/core/issues/issue.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field from datetime import datetime from enum import Enum -from typing import Optional from uuid import UUID, uuid4 @@ -20,8 +19,8 @@ class Issue: end_line: int severity: IssueSeverity status: str - suggestion: Optional[str] = None # Optional code suggestion to fix the issue + suggestion: str | None = None # Optional code suggestion to fix the issue id: UUID = field(default_factory=uuid4) created_at: datetime = field(default_factory=datetime.now) updated_at: datetime = field(default_factory=datetime.now) - discussion_id: Optional[str] = None + discussion_id: str | None = None diff --git a/src/reviewbot/core/issues/issue_model.py b/src/reviewbot/core/issues/issue_model.py index 941a35a..21f3ef9 100644 --- a/src/reviewbot/core/issues/issue_model.py +++ b/src/reviewbot/core/issues/issue_model.py @@ -1,4 +1,3 @@ -from typing import Optional from pydantic import BaseModel, ConfigDict @@ -15,7 +14,7 @@ class IssueModel(BaseModel): end_line: int severity: IssueSeverity status: str - suggestion: Optional[str] = None # Optional code suggestion to fix the issue + suggestion: str | None = None # Optional code suggestion to fix the issue def to_domain(self) -> Issue: return Issue(**self.model_dump()) diff --git a/src/reviewbot/core/issues/issue_store.py b/src/reviewbot/core/issues/issue_store.py index 7e61264..25c8a09 100644 --- a/src/reviewbot/core/issues/issue_store.py +++ b/src/reviewbot/core/issues/issue_store.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Iterable, Optional +from collections.abc import Iterable from uuid import UUID from .issue import Issue @@ -10,7 +10,7 @@ class IssueStore(ABC): def add(self, issue: Issue) -> None: ... @abstractmethod - def get(self, issue_id: UUID) -> Optional[Issue]: ... + def get(self, issue_id: UUID) -> Issue | None: ... @abstractmethod def list(self) -> Iterable[Issue]: ... diff --git a/src/reviewbot/core/reviews/review.py b/src/reviewbot/core/reviews/review.py index bdba569..d85aef8 100644 --- a/src/reviewbot/core/reviews/review.py +++ b/src/reviewbot/core/reviews/review.py @@ -1,5 +1,4 @@ from dataclasses import dataclass, field -from typing import List from uuid import UUID, uuid4 from reviewbot.core.issues import Issue @@ -10,5 +9,5 @@ class Review: id: UUID = field(default_factory=uuid4) repo: str = "" commit: str = "" - issues: List[Issue] = field(default_factory=list) + issues: list[Issue] = field(default_factory=list) summary: str = "" diff --git a/src/reviewbot/infra/embeddings/openai.py b/src/reviewbot/infra/embeddings/openai.py index b9c52ae..6a6a9b1 100644 --- a/src/reviewbot/infra/embeddings/openai.py +++ b/src/reviewbot/infra/embeddings/openai.py @@ -3,8 +3,8 @@ import json import os import subprocess +from collections.abc import Iterable from pathlib import Path -from typing import Iterable, List, Optional from langchain_community.vectorstores import FAISS from langchain_core.documents import Document @@ -50,7 +50,6 @@ "tmp", "tempdata", "tempfiles", - "tempfiles", } @@ -108,8 +107,8 @@ def _iter_source_files(self) -> Iterable[Path]: continue yield path - def _load_documents(self) -> List[Document]: - docs: List[Document] = [] + def _load_documents(self) -> list[Document]: + docs: list[Document] = [] for file in self._iter_source_files(): try: @@ -185,7 +184,7 @@ def load(self) -> bool: ) if self.metadata_path.exists(): - with open(self.metadata_path, "r", encoding="utf-8") as f: + with open(self.metadata_path, encoding="utf-8") as f: self.metadata_index = json.load(f) return True @@ -195,7 +194,7 @@ def search( query: str, *, top_k: int = 10, - path: Optional[str] = None, + path: str | None = None, ) -> list[dict]: if not self.vector_store: raise RuntimeError("Vector store not loaded") @@ -221,8 +220,8 @@ def search( def read_file( self, path: str, - line_start: Optional[int] = None, - line_end: Optional[int] = None, + line_start: int | None = None, + line_end: int | None = None, ) -> str: file_path = Path(path) # the path is relative to the repo root so add the repo root to the path diff --git a/src/reviewbot/infra/embeddings/store_manager.py b/src/reviewbot/infra/embeddings/store_manager.py index 67c1a71..11a89bf 100644 --- a/src/reviewbot/infra/embeddings/store_manager.py +++ b/src/reviewbot/infra/embeddings/store_manager.py @@ -1,5 +1,4 @@ from pathlib import Path -from typing import List, Optional from reviewbot.infra.embeddings.openai import CodebaseVectorStore from reviewbot.infra.gitlab.diff import FileDiff @@ -7,11 +6,11 @@ class CodebaseStoreManager: def __init__(self) -> None: - self._store: Optional[CodebaseVectorStore] = None - self._repo_root: Optional[Path] = None - self._repo_name: Optional[str] = None - self._tree: Optional[str] = None - self._diffs: Optional[List[FileDiff]] = None + self._store: CodebaseVectorStore | None = None + self._repo_root: Path | None = None + self._repo_name: str | None = None + self._tree: str | None = None + self._diffs: list[FileDiff] | None = None def set_repo_root(self, path: str | Path) -> None: self._repo_root = Path(path).resolve() @@ -28,7 +27,7 @@ def set_repo_name(self, name: str) -> None: def set_tree(self, tree: str) -> None: self._tree = tree - def set_diffs(self, diffs: List[FileDiff]) -> None: + def set_diffs(self, diffs: list[FileDiff]) -> None: self._diffs = diffs def get_tree(self) -> str: @@ -36,7 +35,7 @@ def get_tree(self) -> str: raise ValueError("Tree not set") return self._tree - def get_diffs(self) -> List[FileDiff]: + def get_diffs(self) -> list[FileDiff]: if self._diffs is None: raise ValueError("Diff not set") return self._diffs diff --git a/src/reviewbot/infra/git/clone.py b/src/reviewbot/infra/git/clone.py index 9c22e00..a65db83 100644 --- a/src/reviewbot/infra/git/clone.py +++ b/src/reviewbot/infra/git/clone.py @@ -1,7 +1,7 @@ +import hashlib import subprocess from pathlib import Path from tempfile import TemporaryDirectory -import hashlib class GitCloneError(RuntimeError): @@ -20,8 +20,7 @@ def clone_repo_tmp(repo_url: str, *, branch: str | None = None) -> TemporaryDire subprocess.run( cmd, check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + capture_output=True, text=True, ) except subprocess.CalledProcessError as e: @@ -55,8 +54,7 @@ def clone_repo_persistent( ["git", "fetch", "--all", "--prune"], cwd=repo_dir, check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + capture_output=True, text=True, ) if branch: @@ -81,8 +79,7 @@ def clone_repo_persistent( subprocess.run( cmd, check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + capture_output=True, text=True, ) except subprocess.CalledProcessError as e: @@ -97,8 +94,7 @@ def get_repo_name(repo_dir: Path) -> str: cmd, check=True, cwd=repo_dir, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + capture_output=True, text=True, ) return result.stdout.strip() diff --git a/src/reviewbot/infra/gitlab/diff.py b/src/reviewbot/infra/gitlab/diff.py index 3090a0d..8e3c2f0 100644 --- a/src/reviewbot/infra/gitlab/diff.py +++ b/src/reviewbot/infra/gitlab/diff.py @@ -1,7 +1,7 @@ import json import re from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import requests from rich.console import Console @@ -11,13 +11,13 @@ @dataclass(frozen=True) class FileDiff: - old_path: Optional[str] # None for new files - new_path: Optional[str] # None for deleted files + old_path: str | None # None for new files + new_path: str | None # None for deleted files is_new_file: bool is_deleted_file: bool is_renamed: bool patch: str # full unified diff for this file - position: Optional[Dict[str, Any]] = None # GitLab position object for discussions + position: dict[str, Any] | None = None # GitLab position object for discussions _DIFF_HEADER_RE = re.compile(r"^diff --git a/(.+?) b/(.+?)\s*$") @@ -30,10 +30,10 @@ def _strip_prefix(p: str) -> str: def _parse_paths_from_chunk( - lines: List[str], -) -> Tuple[Optional[str], Optional[str], bool, bool, bool]: - old_path: Optional[str] = None - new_path: Optional[str] = None + lines: list[str], +) -> tuple[str | None, str | None, bool, bool, bool]: + old_path: str | None = None + new_path: str | None = None is_new_file = False is_deleted_file = False is_renamed = False @@ -82,11 +82,11 @@ def _parse_paths_from_chunk( return old_path, new_path, is_new_file, is_deleted_file, is_renamed -def _split_raw_diff_by_file(raw: str) -> List[str]: +def _split_raw_diff_by_file(raw: str) -> list[str]: # Split on "diff --git", keeping the header line with each chunk lines = raw.splitlines(keepends=True) - chunks: List[List[str]] = [] - cur: List[str] = [] + chunks: list[list[str]] = [] + cur: list[str] = [] for ln in lines: if ln.startswith("diff --git "): @@ -109,7 +109,7 @@ def fetch_mr_diffs( mr_iid: str, token: str, timeout: int = 30, -) -> Tuple[List[FileDiff], Dict[str, str]]: +) -> tuple[list[FileDiff], dict[str, str]]: """ Fetch merge request diffs from GitLab API. @@ -143,18 +143,18 @@ def fetch_mr_diffs( if isinstance(changes_data, dict) and "changes" in changes_data: # New JSON format with changes array - file_diffs: List[FileDiff] = [] + file_diffs: list[FileDiff] = [] for change in changes_data["changes"]: - change_old_path: Optional[str] = change.get("old_path") - change_new_path: Optional[str] = change.get("new_path") + change_old_path: str | None = change.get("old_path") + change_new_path: str | None = change.get("new_path") diff_text: str = change.get("diff", "") change_is_new_file: bool = change.get("new_file", False) change_is_deleted_file: bool = change.get("deleted_file", False) change_is_renamed: bool = change.get("renamed_file", False) # Create position object for discussions - change_position: Optional[Dict[str, Any]] = None + change_position: dict[str, Any] | None = None if base_sha and head_sha and start_sha: # Parse diff to find first hunk with line range information # Parse diff to find first hunk @@ -163,8 +163,8 @@ def fetch_mr_diffs( r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@" ) - change_old_line: Optional[int] = None - change_new_line: Optional[int] = None + change_old_line: int | None = None + change_new_line: int | None = None lines = diff_text.splitlines() in_hunk = False @@ -269,7 +269,7 @@ def fetch_mr_diffs( file_chunks = _split_raw_diff_by_file(raw) - out: List[FileDiff] = [] + out: list[FileDiff] = [] for chunk in file_chunks: lines = chunk.splitlines(keepends=False) if not lines: @@ -288,11 +288,11 @@ def fetch_mr_diffs( # Create position object for discussions # GitLab requires line_code or line numbers (new_line/old_line) # Extract the first line number from the diff for file-level positioning - raw_position: Optional[Dict[str, Any]] = None + raw_position: dict[str, Any] | None = None if base_sha and head_sha and start_sha: # Try to extract the first line number from the diff - extracted_new_line: Optional[int] = None - extracted_old_line: Optional[int] = None + extracted_new_line: int | None = None + extracted_old_line: int | None = None # Parse diff to find first hunk and line numbers hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@") diff --git a/src/reviewbot/infra/gitlab/note.py b/src/reviewbot/infra/gitlab/note.py index fad2c82..6bfa966 100644 --- a/src/reviewbot/infra/gitlab/note.py +++ b/src/reviewbot/infra/gitlab/note.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any import requests from rich.console import Console @@ -33,7 +33,7 @@ def post_discussion( project_id: str, mr_iid: str, body: str, - position: Optional[Dict[str, Any]] = None, + position: dict[str, Any] | None = None, timeout: int = 30, ) -> str: """ @@ -56,7 +56,7 @@ def post_discussion( # Prepare request data # Note: GitLab requires either line_code or complete position with line numbers # For file-level discussions without specific lines, don't include position - data: Dict[str, Any] = {"body": body} + data: dict[str, Any] = {"body": body} if position: # Only include position if it has required fields (new_line or old_line) # Otherwise GitLab will reject it as incomplete @@ -193,7 +193,7 @@ def get_all_discussions( project_id: str, mr_iid: str, timeout: int = 30, -) -> List[Dict[str, Any]]: +) -> list[dict[str, Any]]: url = f"{api_v4.rstrip('/')}/projects/{project_id}/merge_requests/{mr_iid}/discussions" r = requests.get( url, @@ -210,7 +210,7 @@ def get_merge_request_notes( project_id: str, mr_iid: str, timeout: int = 30, -) -> List[Dict[str, Any]]: +) -> list[dict[str, Any]]: """ Get all notes (comments) for a merge request. diff --git a/src/reviewbot/infra/issues/in_memory_issue_store.py b/src/reviewbot/infra/issues/in_memory_issue_store.py index 4d48f0a..5a03fe5 100644 --- a/src/reviewbot/infra/issues/in_memory_issue_store.py +++ b/src/reviewbot/infra/issues/in_memory_issue_store.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterable, Optional +from collections.abc import Iterable from uuid import UUID from reviewbot.core.issues.issue import Issue @@ -7,12 +7,12 @@ class InMemoryIssueStore(IssueStore): def __init__(self) -> None: - self._items: Dict[UUID, Issue] = {} + self._items: dict[UUID, Issue] = {} def add(self, issue: Issue) -> None: self._items[issue.id] = issue - def get(self, issue_id: UUID) -> Optional[Issue]: + def get(self, issue_id: UUID) -> Issue | None: return self._items.get(issue_id) def list(self) -> Iterable[Issue]: diff --git a/src/reviewbot/models/openrouter.py b/src/reviewbot/models/openrouter.py index 153a98e..d779aa8 100644 --- a/src/reviewbot/models/openrouter.py +++ b/src/reviewbot/models/openrouter.py @@ -1,5 +1,5 @@ import os -from typing import Any, Optional +from typing import Any from dotenv import load_dotenv from langchain_core.utils.utils import secret_from_env @@ -10,7 +10,7 @@ class ChatOpenRouter(ChatOpenAI): - api_key: Optional[SecretStr] = Field( + api_key: SecretStr | None = Field( alias="api_key", default_factory=secret_from_env("OPENROUTER_API_KEY", default=None), ) @@ -19,7 +19,7 @@ class ChatOpenRouter(ChatOpenAI): def lc_secrets(self) -> dict[str, str]: return {"openai_api_key": "OPENROUTER_API_KEY"} - def __init__(self, api_key: Optional[SecretStr] = None, **kwargs: Any): + def __init__(self, api_key: SecretStr | None = None, **kwargs: Any): api_key = api_key or SecretStr(os.getenv("OPENROUTER_API_KEY", "")) super().__init__( base_url="https://openrouter.ai/api/v1", diff --git a/src/reviewbot/tools/search_codebase.py b/src/reviewbot/tools/search_codebase.py index a6e98a7..e7b8dd5 100644 --- a/src/reviewbot/tools/search_codebase.py +++ b/src/reviewbot/tools/search_codebase.py @@ -2,7 +2,6 @@ import shlex import subprocess -from typing import Optional from langchain.tools import tool # type: ignore from rich.console import Console @@ -46,10 +45,9 @@ def search_codebase(query: str) -> str: ] result = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, + cmd, + capture_output=True, + text=True, ) print(cmd) print("================================================") @@ -65,7 +63,7 @@ def search_codebase(query: str) -> str: @tool -def search_codebase_semantic_search(query: str, path: Optional[str] = None) -> str: +def search_codebase_semantic_search(query: str, path: str | None = None) -> str: """Search the codebase for the given query. If a path is provided, search the codebase for the given query in the given path. Args: @@ -87,7 +85,7 @@ def search_codebase_semantic_search(query: str, path: Optional[str] = None) -> s @tool -def read_file(path: str, line_start: Optional[int] = None, line_end: Optional[int] = None) -> str: +def read_file(path: str, line_start: int | None = None, line_end: int | None = None) -> str: """Read the file at the given path. Args: From ebdb59305e90ec617ee8dc9ca40207da9309566b Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 2 Jan 2026 16:09:18 +0400 Subject: [PATCH 5/8] feat: enhance quick scan functionality by integrating diff fetching --- src/reviewbot/agent/tasks/issues.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py index 820f894..6cc3989 100644 --- a/src/reviewbot/agent/tasks/issues.py +++ b/src/reviewbot/agent/tasks/issues.py @@ -277,6 +277,17 @@ def quick_scan_file( Quick scan with low-effort agent to determine if file needs deep review. Returns True if file needs deep review, False otherwise. """ + # Fetch the diff first to include in prompt + from reviewbot.tools import get_diff as get_diff_tool + + try: + diff_content = get_diff_tool.invoke({"file_path": file_path}) + # Truncate diff for display if too long + display_diff = diff_content[:500] + "..." if len(diff_content) > 500 else diff_content + except Exception as e: + console.print(f"[yellow]Could not fetch diff for {file_path}: {e}[/yellow]") + return True # If can't get diff, do deep review to be safe + messages: list[BaseMessage] = [ SystemMessage( content="""You are a code review triage assistant. Your job is to quickly determine if a file change needs deep review. @@ -302,12 +313,20 @@ def quick_scan_file( HumanMessage( content=f"""Quickly scan this file and determine if it needs deep review: {file_path} -Use get_diff("{file_path}") to see the changes, then respond with ONLY "true" or "false".""" +Here is the diff: + +```diff +{diff_content} +``` + +Respond with ONLY "true" or "false" based on the criteria above.""" ), ] try: console.print(f"[dim]Quick scanning: {file_path}[/dim]") + console.print(f"[dim]Diff preview:\n{display_diff}[/dim]") + raw = with_retry(tool_caller, settings, agent, messages, settings) result = str(raw).strip().lower() From 91222132d98f59e5c752ec71d054fb9e26eb97aa Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 2 Jan 2026 16:15:39 +0400 Subject: [PATCH 6/8] chore: format --- src/reviewbot/context.py | 1 + src/reviewbot/core/issues/issue_model.py | 1 - src/reviewbot/tools/search_codebase.py | 6 +++--- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/reviewbot/context.py b/src/reviewbot/context.py index 0eab583..08fcf8b 100644 --- a/src/reviewbot/context.py +++ b/src/reviewbot/context.py @@ -12,5 +12,6 @@ class Context(TypedDict): store_manager_ctx: ContextVar[Context | None] = ContextVar("store_manager", default=None) + def init_ctx() -> None: store_manager_ctx.set({"store_manager": None, "issue_store": None}) diff --git a/src/reviewbot/core/issues/issue_model.py b/src/reviewbot/core/issues/issue_model.py index 21f3ef9..a78d64b 100644 --- a/src/reviewbot/core/issues/issue_model.py +++ b/src/reviewbot/core/issues/issue_model.py @@ -1,4 +1,3 @@ - from pydantic import BaseModel, ConfigDict from reviewbot.core.issues.issue import Issue, IssueSeverity diff --git a/src/reviewbot/tools/search_codebase.py b/src/reviewbot/tools/search_codebase.py index e7b8dd5..a1bb303 100644 --- a/src/reviewbot/tools/search_codebase.py +++ b/src/reviewbot/tools/search_codebase.py @@ -45,9 +45,9 @@ def search_codebase(query: str) -> str: ] result = subprocess.run( - cmd, - capture_output=True, - text=True, + cmd, + capture_output=True, + text=True, ) print(cmd) print("================================================") From 0f4ea1fd0ba5ddf777ec89f9ebbbeeec08f6a8eb Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Tue, 6 Jan 2026 12:14:19 +0400 Subject: [PATCH 7/8] feat: improve review acknowledgment and summary handling - Updated the acknowledgment process to reuse existing "Starting" acknowledgments for in-progress reviews, enhancing efficiency. - Changed acknowledgment messages to use badge images for better visual representation. - Modified `post_discussion` to return both discussion and note IDs, streamlining the acknowledgment creation process. - Enhanced summary generation to include badges for issue severity and improved formatting for clarity. - Added a new utility function to generate line codes for GitLab position API, improving line tracking in discussions. - Refactored various functions to support these enhancements and improve code readability. --- src/reviewbot/agent/tasks/core.py | 2 +- src/reviewbot/agent/tasks/issues.py | 3 - src/reviewbot/agent/workflow.py | 250 ++++++++++++++++++++-------- src/reviewbot/infra/gitlab/note.py | 25 ++- 4 files changed, 197 insertions(+), 83 deletions(-) diff --git a/src/reviewbot/agent/tasks/core.py b/src/reviewbot/agent/tasks/core.py index 329f1b4..334c73f 100644 --- a/src/reviewbot/agent/tasks/core.py +++ b/src/reviewbot/agent/tasks/core.py @@ -76,7 +76,7 @@ def tool_caller(agent: Any, messages: list[BaseMessage], settings: ToolCallerSet else: final_response = content - console.print(f"[dim]Got final response: {final_response[:100]}...[/dim]") + console.print(f"[dim]Got final response: \n{final_response}...[/dim]") finished = True elif isinstance(latest_message, ToolMessage): diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py index 6cc3989..a05e314 100644 --- a/src/reviewbot/agent/tasks/issues.py +++ b/src/reviewbot/agent/tasks/issues.py @@ -282,8 +282,6 @@ def quick_scan_file( try: diff_content = get_diff_tool.invoke({"file_path": file_path}) - # Truncate diff for display if too long - display_diff = diff_content[:500] + "..." if len(diff_content) > 500 else diff_content except Exception as e: console.print(f"[yellow]Could not fetch diff for {file_path}: {e}[/yellow]") return True # If can't get diff, do deep review to be safe @@ -325,7 +323,6 @@ def quick_scan_file( try: console.print(f"[dim]Quick scanning: {file_path}[/dim]") - console.print(f"[dim]Diff preview:\n{display_diff}[/dim]") raw = with_retry(tool_caller, settings, agent, messages, settings) result = str(raw).strip().lower() diff --git a/src/reviewbot/agent/workflow.py b/src/reviewbot/agent/workflow.py index 01384cd..7090015 100644 --- a/src/reviewbot/agent/workflow.py +++ b/src/reviewbot/agent/workflow.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Any +from urllib.parse import quote from langchain.agents import create_agent # type: ignore from langchain.agents.middleware import ( # type: ignore @@ -337,22 +338,44 @@ def post_review_acknowledgment( mr_iid=mr_iid, ) - # Check if any discussion contains the acknowledgment marker - acknowledgment_marker = "šŸ¤– **Code Review Starting**" + # Only reuse "Starting" acknowledgments (in-progress reviews) + # Don't reuse "Complete" acknowledgments - create new ones for new review runs + starting_marker = ( + '' + ) + + # Find ALL "Starting" acknowledgments, then pick the most recent one + found_acknowledgments = [] for discussion in discussions: notes = discussion.get("notes", []) for note in notes: body = note.get("body", "") - if acknowledgment_marker in body: - console.print( - "[dim]Acknowledgment already exists, returning existing IDs[/dim]" - ) - # Return the existing discussion and note IDs + # Only check for "Starting" marker - this means review is in progress + if starting_marker in body: discussion_id = discussion.get("id") note_id = note.get("id") + created_at = note.get("created_at", "") if discussion_id and note_id: - return (str(discussion_id), str(note_id)) - return None + found_acknowledgments.append( + { + "discussion_id": str(discussion_id), + "note_id": str(note_id), + "created_at": created_at, + } + ) + + # If we found any in-progress acknowledgments, use the most recent one + if found_acknowledgments: + # Sort by created_at timestamp (most recent first) + found_acknowledgments.sort(key=lambda x: x["created_at"], reverse=True) + most_recent = found_acknowledgments[0] + console.print( + f"[dim]Found {len(found_acknowledgments)} in-progress review(s), reusing most recent[/dim]" + ) + return (most_recent["discussion_id"], most_recent["note_id"]) + + # No in-progress reviews found - will create a new acknowledgment + console.print("[dim]No in-progress reviews found, will create new acknowledgment[/dim]") except Exception as e: console.print(f"[yellow]⚠ Could not check for existing acknowledgment: {e}[/yellow]") # Continue anyway - better to post a duplicate than miss it @@ -392,7 +415,7 @@ def post_review_acknowledgment( summary = tool_caller(agent, messages, summary_settings) # Post as a discussion (so we can update it later) - acknowledgment_body = f"""šŸ¤– **Code Review Starting** + acknowledgment_body = f""" {summary} @@ -400,7 +423,8 @@ def post_review_acknowledgment( *Review powered by ReviewBot* """ - discussion_id = post_discussion( + # post_discussion now returns both discussion_id and note_id + discussion_id, note_id = post_discussion( api_v4=api_v4, token=token, project_id=project_id, @@ -408,29 +432,14 @@ def post_review_acknowledgment( body=acknowledgment_body, ) - # Get the note ID from the discussion - # The first note in a discussion is the original note - discussions = get_all_discussions( - api_v4=api_v4, - token=token, - project_id=project_id, - mr_iid=mr_iid, - ) - - note_id = None - for discussion in discussions: - if str(discussion.get("id")) == str(discussion_id): - notes = discussion.get("notes", []) - if notes: - note_id = str(notes[0].get("id")) - break - if not note_id: - console.print("[yellow]⚠ Created discussion but could not find note ID[/yellow]") + console.print("[yellow]⚠ Discussion created but no note ID returned[/yellow]") return None - console.print("[green]āœ“ Posted review acknowledgment[/green]") - return (str(discussion_id), note_id) + console.print( + f"[green]āœ“ Posted review acknowledgment (discussion: {discussion_id}, note: {note_id})[/green]" + ) + return (str(discussion_id), str(note_id)) except Exception as e: console.print(f"[yellow]⚠ Failed to post acknowledgment: {e}[/yellow]") @@ -540,42 +549,64 @@ def update_review_summary( # Build final summary combining statistics and LLM reasoning summary_parts = [ - "šŸ¤– **Code Review Complete**\n\n", + '\n\n', f"Reviewed **{total_files}** file(s), found **{len(issues)}** issue(s) across **{files_with_issues}** file(s).\n\n", - "**Summary:**\n", + "**Summary**\n\n", f"{llm_summary}\n\n", ] if issues: - summary_parts.append("**Issue Breakdown:**\n") + summary_parts.append("**Issue Breakdown**\n\n") if high_count > 0: - summary_parts.append(f"- šŸ”“ **{high_count}** High severity\n") + summary_parts.append( + f' \n' + ) if medium_count > 0: - summary_parts.append(f"- 🟠 **{medium_count}** Medium severity\n") + summary_parts.append( + f' \n' + ) if low_count > 0: - summary_parts.append(f"- 🟢 **{low_count}** Low severity\n") + summary_parts.append( + f' \n' + ) - summary_parts.append("\n**Issues by File:**\n") + summary_parts.append("\n
\n
\n\n**Issues by File**\n\n") for file_path, file_issues in sorted(issues_by_file.items()): high = sum(1 for i in file_issues if i.severity == IssueSeverity.HIGH) medium = sum(1 for i in file_issues if i.severity == IssueSeverity.MEDIUM) low = sum(1 for i in file_issues if i.severity == IssueSeverity.LOW) - severity_badges = [] + + # File name and issue count + summary_parts.append(f"`{file_path}`: {len(file_issues)} issue(s) \n\n") + + # Badges on separate lines if high > 0: - severity_badges.append(f"šŸ”“ {high}") + summary_parts.append( + f'\n' + ) if medium > 0: - severity_badges.append(f"🟠 {medium}") + summary_parts.append( + f'\n' + ) if low > 0: - severity_badges.append(f"🟢 {low}") - badges = " ".join(severity_badges) - summary_parts.append(f"- `{file_path}`: {len(file_issues)} issue(s) {badges}\n") + summary_parts.append( + f'\n' + ) + + # Spacing between files + summary_parts.append("\n
\n
\n\n") else: - summary_parts.append("\nāœ… **No issues found!**\n") + summary_parts.append( + '\n **No issues found!**\n' + ) summary_parts.append("\n---\n*Review powered by ReviewBot*") summary_body = "".join(summary_parts) + console.print( + f"[dim]Updating discussion {discussion_id}, note {note_id} with review summary...[/dim]" + ) try: update_discussion_note( api_v4=api_v4, @@ -589,6 +620,9 @@ def update_review_summary( console.print("[green]āœ“ Updated review acknowledgment with summary[/green]") except Exception as e: console.print(f"[yellow]⚠ Failed to update acknowledgment: {e}[/yellow]") + import traceback + + traceback.print_exc() # Don't fail the whole review if update fails @@ -656,6 +690,7 @@ def work_agent(config: Config, project_id: str, mr_iid: str) -> str: ) # Post acknowledgment that review is starting + console.print("[dim]Posting review acknowledgment...[/dim]") acknowledgment_ids = post_review_acknowledgment( api_v4=api_v4, token=token, @@ -664,6 +699,12 @@ def work_agent(config: Config, project_id: str, mr_iid: str) -> str: agent=low_effort_agent, diffs=filtered_diffs, ) + if acknowledgment_ids: + console.print( + f"[dim]Acknowledgment created: discussion={acknowledgment_ids[0]}, note={acknowledgment_ids[1]}[/dim]" + ) + else: + console.print("[yellow]⚠ Failed to create acknowledgment (returned None)[/yellow]") try: # Define callback to create discussions as each file's review completes @@ -693,8 +734,12 @@ def on_file_review_complete(file_path: str, issues: list[Any]) -> None: console.print(f"[bold cyan]šŸ“Š Total issues found: {len(issues)}[/bold cyan]") # Update the acknowledgment note with summary + console.print(f"[dim]Checking acknowledgment_ids: {acknowledgment_ids}[/dim]") if acknowledgment_ids: discussion_id, note_id = acknowledgment_ids + console.print( + f"[dim]Calling update_review_summary for discussion {discussion_id}, note {note_id}...[/dim]" + ) update_review_summary( api_v4=api_v4, token=token, @@ -706,6 +751,11 @@ def on_file_review_complete(file_path: str, issues: list[Any]) -> None: diffs=filtered_diffs, agent=low_effort_agent, ) + console.print("[dim]update_review_summary completed[/dim]") + else: + console.print( + "[yellow]⚠ No acknowledgment to update (initial acknowledgment may have failed)[/yellow]" + ) # Discussions are now created as reviews complete, but we still need to # handle any files that might have been processed but had no issues @@ -754,9 +804,9 @@ def handle_file_issues( # Severity, Color pairs severity_color_pairs = { - IssueSeverity.HIGH: "red", - IssueSeverity.MEDIUM: "orange", - IssueSeverity.LOW: "green", + IssueSeverity.HIGH: "#d73a49", # red + IssueSeverity.MEDIUM: "#fbca04", # yellow/orange + IssueSeverity.LOW: "#28a745", # green } discussion_id = None @@ -765,8 +815,10 @@ def handle_file_issues( first_issue = issues[0] discussion_title = "" + color = severity_color_pairs[first_issue.severity].strip("#") + # Build the discussion body with optional suggestion - discussion_body = f""" + discussion_body = f""" {first_issue.description} """ @@ -862,8 +914,11 @@ def handle_file_issues( issue.end_line, ) + label = issue.severity.value.upper() + color = severity_color_pairs[issue.severity] + # Format the reply with a diff block and optional suggestion - reply_body = f""" + reply_body = f"""" /> {issue.description} """ @@ -904,6 +959,32 @@ def handle_file_issues( traceback.print_exc() +def _generate_line_code(file_path: str, old_line: int | None, new_line: int | None) -> str: + """ + Generate a line_code string for GitLab position API. + + Format: __ + + Args: + file_path: Path to the file + old_line: Line number in old file (or None) + new_line: Line number in new file (or None) + + Returns: + Line code string + """ + import hashlib + + # Generate SHA1 hash of the file path + filename_sha = hashlib.sha1(file_path.encode()).hexdigest() + + # Format: sha_old_new, using empty string for None values + old_str = str(old_line) if old_line is not None else "" + new_str = str(new_line) if new_line is not None else "" + + return f"{filename_sha}_{old_str}_{new_str}" + + def create_position_for_issue( diff_text: str, issue_line_start: int, @@ -970,26 +1051,28 @@ def create_position_for_issue( current_old += 1 current_new += 1 - # Choose the best line to anchor the discussion: - # 1. Prefer the first added line (issues are usually about new code) - # 2. Fall back to middle context line - # 3. Finally use deleted line or start line - found_old_line = None - found_new_line = None + # Determine start and end lines for the position + # Priority: added lines > context lines > deleted lines + start_old_line = None + start_new_line = None + end_old_line = None + end_new_line = None if added_lines: - # Use the first added line in the range - found_old_line, found_new_line = added_lines[0] + # Use the first and last added lines in the range + start_old_line, start_new_line = added_lines[0] + end_old_line, end_new_line = added_lines[-1] elif context_lines: - # Use the middle context line - mid_idx = len(context_lines) // 2 - found_old_line, found_new_line = context_lines[mid_idx] + # Use the first and last context lines + start_old_line, start_new_line = context_lines[0] + end_old_line, end_new_line = context_lines[-1] elif deleted_lines: - # Use the first deleted line - found_old_line, found_new_line = deleted_lines[0] + # Use the first and last deleted lines + start_old_line, start_new_line = deleted_lines[0] + end_old_line, end_new_line = deleted_lines[-1] # If we didn't find any line in the diff, return None - if found_old_line is None and found_new_line is None: + if start_old_line is None and start_new_line is None: return None # Create position object @@ -1002,11 +1085,37 @@ def create_position_for_issue( "position_type": "text", } - if found_new_line is not None: - position["new_line"] = found_new_line - - if found_old_line is not None: - position["old_line"] = found_old_line + # For single-line issues, use the simple format + if issue_line_start == issue_line_end: + if start_new_line is not None: + position["new_line"] = start_new_line + if start_old_line is not None: + position["old_line"] = start_old_line + else: + # For multi-line issues, use line_range + # Determine the line type (new for added lines, old for deleted lines) + line_type = "new" if start_new_line is not None else "old" + + position["line_range"] = { + "start": { + "line_code": _generate_line_code(new_path, start_old_line, start_new_line), + "type": line_type, + }, + "end": { + "line_code": _generate_line_code(new_path, end_old_line, end_new_line), + "type": line_type, + }, + } + + # Add line numbers to start and end + if start_old_line is not None: + position["line_range"]["start"]["old_line"] = start_old_line + if start_new_line is not None: + position["line_range"]["start"]["new_line"] = start_new_line + if end_old_line is not None: + position["line_range"]["end"]["old_line"] = end_old_line + if end_new_line is not None: + position["line_range"]["end"]["new_line"] = end_new_line return position @@ -1032,7 +1141,8 @@ def create_discussion( # GitLab discussions don't have separate titles in the API, # so we include the title in the body with markdown formatting - discussion_id = post_discussion( + # post_discussion returns (discussion_id, note_id), we only need discussion_id + discussion_id, _ = post_discussion( api_v4=gitlab_config.api_v4, token=gitlab_config.token, project_id=gitlab_config.project_id, diff --git a/src/reviewbot/infra/gitlab/note.py b/src/reviewbot/infra/gitlab/note.py index 6bfa966..535c6c7 100644 --- a/src/reviewbot/infra/gitlab/note.py +++ b/src/reviewbot/infra/gitlab/note.py @@ -35,9 +35,9 @@ def post_discussion( body: str, position: dict[str, Any] | None = None, timeout: int = 30, -) -> str: +) -> tuple[str, str | None]: """ - Create a new discussion and return its ID. + Create a new discussion and return its ID and first note ID. Args: api_v4: GitLab API v4 base URL @@ -49,7 +49,7 @@ def post_discussion( timeout: Request timeout Returns: - The discussion ID from GitLab + Tuple of (discussion_id, note_id). note_id may be None if not found. """ url = f"{api_v4.rstrip('/')}/projects/{project_id}/merge_requests/{mr_iid}/discussions" @@ -90,14 +90,18 @@ def post_discussion( r.raise_for_status() - # GitLab returns the created discussion with an 'id' field + # GitLab returns the created discussion with an 'id' field and notes array response_data = r.json() discussion_id = response_data.get("id") if not discussion_id: raise RuntimeError(f"Discussion created but no ID returned: {response_data}") - return discussion_id + # Also return the first note ID (the discussion body note) + notes = response_data.get("notes", []) + note_id = notes[0].get("id") if notes else None + + return discussion_id, note_id def post_discussion_reply( @@ -137,7 +141,8 @@ def create_discussion( # GitLab discussions don't have separate titles, so we include it in the body full_body = f"## {title}\n\n{body}" - discussion_id = post_discussion( + # post_discussion returns (discussion_id, note_id), we only need discussion_id + discussion_id, _ = post_discussion( api_v4=api_v4, token=token, project_id=project_id, @@ -266,12 +271,14 @@ def update_discussion_note( timeout=timeout, ) + # Check for errors and raise with detailed information if r.status_code >= 400: console.print(f"[red]Failed to update note: {r.status_code} {r.reason}[/red]") try: error_response = r.json() console.print(f"[red]Error response: {error_response}[/red]") - except Exception: + except ValueError: + # JSON parsing failed, use text + error_response = r.text console.print(f"[red]Error response text: {r.text}[/red]") - - r.raise_for_status() + raise RuntimeError(f"Failed to update note: {r.status_code} {r.reason}: {error_response}") From c8c288c8c8ca3a48f08da2e2acf3f2c30398a8f4 Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Tue, 6 Jan 2026 18:08:00 +0400 Subject: [PATCH 8/8] feat: integrate reasoning tool and enhance review process - Added a new `think` tool to record internal reasoning during code reviews, improving context retention and analysis quality. - Updated the review process to require reasoning before generating output, ensuring deeper analysis of code changes. - Enhanced the summary generation to include previous reasoning context, providing reviewers with insights into past evaluations. - Modified the configuration to support thread creation options, allowing for more flexible discussion management. - Refactored various functions to accommodate these enhancements and improve overall code clarity. --- src/reviewbot/agent/tasks/issues.py | 105 ++++- src/reviewbot/agent/workflow.py | 596 ++++++++++++---------------- src/reviewbot/core/config.py | 1 + src/reviewbot/infra/config/env.py | 3 + src/reviewbot/infra/gitlab/diff.py | 5 +- src/reviewbot/infra/gitlab/note.py | 11 +- src/reviewbot/models/gpt.py | 2 +- src/reviewbot/tools/__init__.py | 2 + src/reviewbot/tools/think.py | 45 +++ 9 files changed, 411 insertions(+), 359 deletions(-) create mode 100644 src/reviewbot/tools/think.py diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py index a05e314..9a58f23 100644 --- a/src/reviewbot/agent/tasks/issues.py +++ b/src/reviewbot/agent/tasks/issues.py @@ -18,6 +18,34 @@ console = Console() +def get_reasoning_context() -> str: + """ + Retrieve stored reasoning history from the current context. + + Returns: + Formatted string of previous reasoning, or empty string if none exists. + """ + try: + context = store_manager_ctx.get() + issue_store = context.get("issue_store") + + if not issue_store or not hasattr(issue_store, "_reasoning_history"): + return "" + + reasoning_history = issue_store._reasoning_history + if not reasoning_history: + return "" + + # Format reasoning history for context + formatted = "\n\n**Your Previous Reasoning:**\n" + for i, reasoning in enumerate(reasoning_history, 1): + formatted += f"{i}. {reasoning}\n" + + return formatted + except Exception: + return "" + + def with_retry(func: Callable, settings: ToolCallerSettings, *args, **kwargs) -> Any: """ Execute a function with exponential backoff retry logic. @@ -298,6 +326,7 @@ def quick_scan_file( - Complex algorithms or data structures - Error handling changes - Configuration changes that affect behavior +- Use tool 'think' to reason. You must reason at least 10 times before giving an answer Return FALSE if: - Only formatting/whitespace changes @@ -383,10 +412,48 @@ def review_single_file( """ Review a single diff file and return issues found. """ + # Get any previous reasoning context + reasoning_context = get_reasoning_context() + + # Force a reasoning pass to ensure think() is invoked during deep review + try: + from reviewbot.tools import get_diff as get_diff_tool + + diff_content = get_diff_tool.invoke({"file_path": file_path}) + think_messages: list[BaseMessage] = [ + SystemMessage( + content=( + "You are a senior code reviewer. You MUST call think() exactly once " + "with 2-5 sentences of reasoning about the provided diff. " + "Do not use any other tools. After calling think(), reply with the " + "single word DONE." + ) + ), + HumanMessage( + content=f"""Diff for {file_path}: + +```diff +{diff_content} +``` +""", + ), + ] + think_settings = ToolCallerSettings(max_tool_calls=1, max_iterations=1) + tool_caller(agent, think_messages, think_settings) + except Exception as e: + console.print(f"[yellow]⚠ Failed to record reasoning for {file_path}: {e}[/yellow]") + messages: list[BaseMessage] = [ SystemMessage( content=f"""You are a senior code reviewer analyzing a specific file change. +REASONING TOOL: +- You have access to a `think()` tool for recording your internal reasoning +- Use it to plan your approach, analyze patterns, or reason about potential issues +- Your reasoning is stored and will be available in subsequent requests +- This helps maintain context and improves review quality{reasoning_context} + - During deep reviews, you MUST call think() before producing your JSON output + Your task: Review ONLY the file '{file_path}' from the merge request diff. IMPORTANT GUIDELINES: @@ -396,36 +463,42 @@ def review_single_file( - Only report issues with clear negative impact (bugs, security risks, performance problems, logic errors) - Avoid reporting issues about code style, formatting, or personal preferences unless they violate critical standards - Medium/High severity issues should be reserved for actual bugs, security vulnerabilities, or broken functionality +- The `description` field MUST include a fenced ```diff block quoting only the relevant added/removed/context lines without (@@ but + - is fine), followed by a short plain-text explanation (1-3 sentences) CRITICAL - KNOWLEDGE CUTOFF AWARENESS: -āš ļø Your training data has a cutoff date. The code you're reviewing may use: +Your training data has a cutoff date. The code you're reviewing may use: - Package versions released AFTER your training (e.g., v2, v3 of libraries) - Language versions you don't know about (e.g., Go 1.23+, Python 3.13+) - Import paths that have changed since your training - APIs that have been updated DO NOT FLAG as issues: -āŒ Version numbers (e.g., "Go 1.25 doesn't exist" - it might now!) -āŒ Import paths you don't recognize (e.g., "should be v1 not v2" - v2 might be correct!) -āŒ Package versions (e.g., "mongo-driver/v2" - newer versions exist!) -āŒ Language features you don't recognize (they might be new) -āŒ API methods you don't know (they might have been added) +Version numbers (e.g., "Go 1.25 doesn't exist" - it might now!) +Import paths you don't recognize (e.g., "should be v1 not v2" - v2 might be correct!) +Package versions (e.g., "mongo-driver/v2" - newer versions exist!) +Language features you don't recognize (they might be new) +API methods you don't know (they might have been added) ONLY flag version/import issues if: -āœ… There's an obvious typo (e.g., "monggo" instead of "mongo") -āœ… The code itself shows an error (e.g., import fails in the diff) -āœ… There's a clear pattern mismatch (e.g., mixing v1 and v2 imports inconsistently) +There's an obvious typo (e.g., "monggo" instead of "mongo") +The code itself shows an error (e.g., import fails in the diff) +There's a clear pattern mismatch (e.g., mixing v1 and v2 imports inconsistently) When in doubt about versions/imports: ASSUME THE DEVELOPER IS CORRECT and skip it. SUGGESTIONS: -- When the fix is OBVIOUS and simple, include a "suggestion" field with the corrected code -- The suggestion should contain ONLY the fixed code (not diff markers like +/-) -- Only include suggestions for simple fixes (typos, obvious bugs, missing fields, etc.) -- Do NOT include suggestions for complex refactorings or architectural changes -- DO NOT suggest version/import changes unless there's an obvious typo -- Format: just the corrected code, no explanations - +- When a fix is simple, provide a "suggestion" field. +- **GitLab Syntax Requirement**: You must format the suggestion using relative line offsets based on your `start_line` and `end_line`. +- **The Formula**: + 1. Calculate the offset: `L = end_line - start_line`. + 2. The header MUST be: ```suggestion:-L+0 +- **Example**: If `start_line` is 7 and `end_line` is 9, the offset `L` is 2. The header is ```suggestion:-2+0. +- **Content**: The suggestion must include the full corrected code for every line from `start_line` to `end_line`. +- **Indentation**: You MUST preserve the exact leading whitespace of the original code. +- Format: +```suggestion:-L+0 +[CORRECTED CODE BLOCK] +``` Output format: JSON array of issue objects following this schema: {IssueModel.model_json_schema()} diff --git a/src/reviewbot/agent/workflow.py b/src/reviewbot/agent/workflow.py index 7090015..0de5d04 100644 --- a/src/reviewbot/agent/workflow.py +++ b/src/reviewbot/agent/workflow.py @@ -1,4 +1,5 @@ import fnmatch +import hashlib import re from dataclasses import dataclass from pathlib import Path @@ -40,6 +41,7 @@ from reviewbot.tools import ( get_diff, read_file, + think, ) console = Console() @@ -192,87 +194,44 @@ def filter_diffs(diffs: list[FileDiff], reviewignore_patterns: list[str]) -> lis return filtered -def _extract_code_from_diff(diff_patch: str, line_start: int, line_end: int) -> str: - """ - Extract code lines from a unified diff patch for a given line range. - - Args: - diff_patch: The unified diff patch string - line_start: Starting line number (1-indexed, in the new file) - line_end: Ending line number (1-indexed, in the new file) - - Returns: - String containing the code lines from the diff - """ - import re - - lines = diff_patch.splitlines(keepends=True) - result_lines: list[str] = [] - current_new_line = 0 - current_old_line = 0 - in_target_range = False +def _extract_code_from_diff(diff_text: str, line_start: int, line_end: int) -> str: + hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@") + lines = diff_text.splitlines() - # Pattern to match hunk headers: @@ -old_start,old_count +new_start,new_count @@ - hunk_header_re = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@") + extracted = [] + current_new = 0 + in_hunk = False for line in lines: - # Check if this is a hunk header - match = hunk_header_re.match(line) + match = hunk_header_pattern.match(line) if match: - # new_start is the line number in the new file where this hunk starts - new_start = int(match.group(3)) - old_start = int(match.group(1)) - current_new_line = new_start - current_old_line = old_start - # Check if this hunk overlaps with our target range - new_count = int(match.group(4)) if match.group(4) else 1 - in_target_range = new_start <= line_end and (new_start + new_count) >= line_start + current_new = int(match.group(3)) + in_hunk = True continue - # Skip diff header lines - if line.startswith("diff --git") or line.startswith("---") or line.startswith("+++"): + if not in_hunk: continue - # Process diff lines - keep the prefixes to show the actual diff - # Include context lines to show proper indentation and structure + # We only care about the lines in the NEW file (the result of the change) if line.startswith("+"): - # Added line - this is in the new file - if current_new_line >= line_start and current_new_line <= line_end: - # Ensure space after '+' for proper markdown diff formatting - if len(line) > 1 and line[1] != " ": - formatted_line = "+ " + line[1:] - else: - formatted_line = line - result_lines.append(formatted_line) - current_new_line += 1 + if line_start <= current_new <= line_end: + extracted.append(line[1:]) # Remove '+' + current_new += 1 elif line.startswith("-"): + # Skip deleted lines for code extraction of the 'new' state continue - # Removed line - include it to show what was removed - # Include removals that are in the same hunk as our target range - # Also include nearby removals for context - if in_target_range or ( - current_old_line >= line_start - 3 and current_old_line <= line_end + 3 - ): - # Ensure space after '-' for proper markdown diff formatting - if len(line) > 1 and line[1] != " ": - formatted_line = "- " + line[1:] - else: - formatted_line = line - result_lines.append(formatted_line) - current_old_line += 1 - elif line.startswith(" "): - # Context line - this exists in both old and new files - # Include context lines within the range and a few lines before/after for structure - if current_new_line >= line_start - 2 and current_new_line <= line_end + 2: - # Context lines already have space prefix - result_lines.append(line) - current_new_line += 1 - current_old_line += 1 - elif line.startswith("\\"): - # End of file marker - skip - continue + else: + # Context line + if line_start <= current_new <= line_end: + extracted.append(line[1:] if line else "") + current_new += 1 - return "".join(result_lines) + # FIX: Exit early if we've passed the end of our requested range + if current_new > line_end: + if extracted: # Only break if we actually found lines + break + + return "\n".join(extracted) @dataclass @@ -456,6 +415,7 @@ def update_review_summary( note_id: str, issues: list[Issue], diffs: list[FileDiff], + diff_refs: dict[str, str], agent: Agent, ) -> None: """ @@ -471,6 +431,7 @@ def update_review_summary( note_id: Note ID to update issues: List of issues found during review diffs: List of file diffs that were reviewed + diff_refs: Diff references including head_sha and project_web_url agent: The agent to use for generating summary """ from langchain_core.messages import HumanMessage, SystemMessage @@ -506,14 +467,24 @@ def update_review_summary( messages = [ SystemMessage( - content="""You are a code review assistant. Generate a concise, professional summary of a code review with reasoning. + content="""You are a Merge Request reviewer. Generate a concise, professional summary of a code review with reasoning. IMPORTANT: -- Keep it SHORT (3-5 sentences max) -- Provide reasoning about the overall code quality +- Use EXACTLY two paragraphs, each wrapped in

tags. +- Provide reasoning about the overall merge request purpose and code quality. - Highlight key concerns or positive aspects - Be constructive and professional - DO NOT use any tools +- Use paragraphs with readable flow. +Paragraphs should be wrapped with

tags. Use new

tag for a newline. +Example +

+paragraph +

+
+

+paragraph2 +

- Focus on the big picture, not individual issue details""" ), HumanMessage( @@ -530,13 +501,11 @@ def update_review_summary( **Issues found:** {issues_text} -Generate a brief summary (3-5 sentences) that: -1. Provides overall assessment of the code quality +- Use EXACTLY two paragraphs, each wrapped in

tags. +1. Provides overall assessment of the purpose of the merge request purpose and code quality. 2. Highlights the most important concerns (if any) 3. Gives reasoning about the review findings -4. Is constructive and actionable - -If no issues were found, celebrate the good code quality.""" +4. Is constructive and actionable """ ), ] @@ -556,6 +525,9 @@ def update_review_summary( ] if issues: + project_web_url = diff_refs.get("project_web_url") + head_sha = diff_refs.get("head_sha") + summary_parts.append("**Issue Breakdown**\n\n") if high_count > 0: summary_parts.append( @@ -570,34 +542,69 @@ def update_review_summary( f' \n' ) - summary_parts.append("\n
\n
\n\n**Issues by File**\n\n") - for file_path, file_issues in sorted(issues_by_file.items()): - high = sum(1 for i in file_issues if i.severity == IssueSeverity.HIGH) - medium = sum(1 for i in file_issues if i.severity == IssueSeverity.MEDIUM) - low = sum(1 for i in file_issues if i.severity == IssueSeverity.LOW) + summary_parts.append("\n
\n
\n\n") + + non_dedicated_issues = [issue for issue in issues if not issue.discussion_id] + if non_dedicated_issues: + issues_by_file: dict[str, list[Issue]] = {} + for issue in non_dedicated_issues: + issues_by_file.setdefault(issue.file_path, []).append(issue) + + severity_badge_colors = { + IssueSeverity.HIGH: "red", + IssueSeverity.MEDIUM: "orange", + IssueSeverity.LOW: "green", + } + + for file_path, file_issues in sorted(issues_by_file.items()): + summary_parts.append(f"### šŸ“‚ {file_path}\n\n") + for issue in file_issues: + file_diff = next((fd for fd in diffs if fd.new_path == issue.file_path), None) + code_snippet = "" + if file_diff: + code_snippet = _extract_code_from_diff( + file_diff.patch, + issue.start_line, + issue.end_line, + ) + if not code_snippet: + code_snippet = "(no diff context available)" + + label = issue.severity.value.upper() + badge_color = severity_badge_colors[issue.severity] + file_url = None + if project_web_url and head_sha: + escaped_path = quote(issue.file_path, safe="/") + if issue.start_line == issue.end_line: + anchor = f"#L{issue.start_line}" + else: + anchor = f"#L{issue.start_line}-L{issue.end_line}" + file_url = f"{project_web_url}/-/blob/{head_sha}/{escaped_path}{anchor}" + if file_url: + location_line = ( + f'' + f"#L {issue.start_line}-{issue.end_line}" + f"" + ) + else: + location_line = f"#L {issue.start_line}-{issue.end_line}" - # File name and issue count - summary_parts.append(f"`{file_path}`: {len(file_issues)} issue(s) \n\n") + issue_body = f"""{issue.description} +""" + summary_parts.append( + f"""

+  {issue.title} ({location_line}) + +{issue_body} - # Badges on separate lines - if high > 0: - summary_parts.append( - f'\n' - ) - if medium > 0: - summary_parts.append( - f'\n' - ) - if low > 0: - summary_parts.append( - f'\n' - ) - # Spacing between files - summary_parts.append("\n
\n
\n\n") +
+""" + ) + summary_parts.append("\n") else: summary_parts.append( - '\n **No issues found!**\n' + '\n\n' ) summary_parts.append("\n---\n*Review powered by ReviewBot*") @@ -643,6 +650,7 @@ def work_agent(config: Config, project_id: str, mr_iid: str) -> str: tools = [ get_diff, # Primary tool: get the diff for the file read_file, # Optional: get additional context if needed + think, # Internal reasoning and thought process ] agent: Agent = create_agent( @@ -686,7 +694,7 @@ def work_agent(config: Config, project_id: str, mr_iid: str) -> str: ) low_effort_agent: Agent = create_agent( model=low_effort_model, - tools=[get_diff], # Only needs get_diff for quick scanning + tools=[get_diff, think], # Only needs get_diff for quick scanning ) # Post acknowledgment that review is starting @@ -713,6 +721,11 @@ def on_file_review_complete(file_path: str, issues: list[Any]) -> None: if not issues: console.print(f"[dim]No issues found in {file_path}, skipping discussion[/dim]") return + if not config.create_threads: + console.print( + f"[dim]Thread creation disabled, deferring issues in {file_path} to summary[/dim]" + ) + return # Convert IssueModel to Issue domain objects from reviewbot.core.issues.issue_model import IssueModel @@ -749,6 +762,7 @@ def on_file_review_complete(file_path: str, issues: list[Any]) -> None: note_id=note_id, issues=issues, diffs=filtered_diffs, + diff_refs=diff_refs, agent=low_effort_agent, ) console.print("[dim]update_review_summary completed[/dim]") @@ -782,7 +796,8 @@ def handle_file_issues( diff_refs: dict[str, str], # Add this parameter (contains base_sha, head_sha, start_sha) ) -> None: """ - Create one discussion per file with the first issue, and reply with subsequent issues. + Create positioned discussions for a capped set of high-priority issues, and + group the rest into a single per-file discussion with replies. Args: file_path: Path to the file being reviewed @@ -809,180 +824,124 @@ def handle_file_issues( IssueSeverity.LOW: "#28a745", # green } - discussion_id = None - - # Process the first issue - create a discussion with position - first_issue = issues[0] - discussion_title = "" - - color = severity_color_pairs[first_issue.severity].strip("#") - - # Build the discussion body with optional suggestion - discussion_body = f""" - -{first_issue.description} -""" + max_dedicated_threads = 3 + dedicated_issues: list[Issue] = [] + reply_issues: list[Issue] = [] - # Add suggestion if available (GitLab will render it as an applicable suggestion) - if first_issue.suggestion: - discussion_body += f""" - -```suggestion -{first_issue.suggestion} -``` -""" - - # Create position for the first issue - position = None - if ( - file_diff - and base_sha - and head_sha - and start_sha - and file_diff.old_path - and file_diff.new_path - ): - position = create_position_for_issue( - diff_text=file_diff.patch, - issue_line_start=first_issue.start_line, - issue_line_end=first_issue.end_line, - base_sha=base_sha, - head_sha=head_sha, - start_sha=start_sha, - old_path=file_diff.old_path, - new_path=file_diff.new_path, - ) - - # Create discussion for the first issue - try: - discussion_id = create_discussion( - title=discussion_title, - body=discussion_body, - gitlab_config=gitlab_config, - position=position, - ) - console.print( - f"[green]āœ“ Created discussion for issue at lines {first_issue.start_line}-{first_issue.end_line} (ID: {discussion_id})[/green]" - ) - except Exception as e: - if position: - # If position was provided and it failed, try without position - console.print( - f"[yellow]Failed with position for lines {first_issue.start_line}-{first_issue.end_line}, retrying without position: {e}[/yellow]" - ) - try: - discussion_id = create_discussion( - title=discussion_title, - body=discussion_body, - gitlab_config=gitlab_config, - position=None, - ) - console.print( - f"[green]āœ“ Created discussion without position (ID: {discussion_id})[/green]" - ) - except Exception as e2: - console.print( - f"[red]āœ— Failed to create discussion for issue at lines {first_issue.start_line}-{first_issue.end_line}: {e2}[/red]" - ) - import traceback - - traceback.print_exc() - return # Can't proceed without a discussion + for issue in issues: + needs_dedicated = issue.suggestion is not None or issue.severity == IssueSeverity.HIGH + if needs_dedicated and len(dedicated_issues) < max_dedicated_threads: + dedicated_issues.append(issue) else: - console.print( - f"[red]āœ— Failed to create discussion for issue at lines {first_issue.start_line}-{first_issue.end_line}: {e}[/red]" + reply_issues.append(issue) + + def build_position(issue: Issue) -> dict[str, Any] | None: + if ( + file_diff + and base_sha + and head_sha + and start_sha + and file_diff.old_path + and file_diff.new_path + ): + return create_position_for_issue( + diff_text=file_diff.patch, + issue_line_start=issue.start_line, + issue_line_end=issue.end_line, + base_sha=base_sha, + head_sha=head_sha, + start_sha=start_sha, + old_path=file_diff.old_path, + new_path=file_diff.new_path, ) - import traceback - - traceback.print_exc() - return # Can't proceed without a discussion - - # Process remaining issues - reply to the discussion with diff blocks - for issue in issues[1:]: - if not discussion_id: - console.print( - f"[yellow]⚠ Skipping issue at lines {issue.start_line}-{issue.end_line} (no discussion created)[/yellow]" - ) - continue - - # Extract the relevant code from the diff - code_snippet = "" - if file_diff: - code_snippet = _extract_code_from_diff( - file_diff.patch, - issue.start_line, - issue.end_line, - ) - - label = issue.severity.value.upper() - color = severity_color_pairs[issue.severity] + return None - # Format the reply with a diff block and optional suggestion - reply_body = f"""" /> + def create_discussion_for_issue(issue: Issue, include_suggestion: bool = True) -> str | None: + discussion_title = "" + color = severity_color_pairs[issue.severity].strip("#") + discussion_body = f""" {issue.description} """ + if include_suggestion and issue.suggestion: + discussion_body += f""" - # Add suggestion if available (GitLab will render it as an applicable suggestion) - if issue.suggestion: - reply_body += f""" - -```suggestion {issue.suggestion} -``` """ - else: - # If no suggestion, show the diff context - reply_body += f""" -```diff -{code_snippet} -``` -""" + position = build_position(issue) + if position: + console.print( + f"[dim]Position object for lines {issue.start_line}-{issue.end_line}:[/dim]" + ) + import json + + console.print(f"[dim]{json.dumps(position, indent=2)}[/dim]") - # Reply to the discussion try: - reply_to_discussion( - discussion_id=discussion_id, - body=reply_body, + discussion_id = create_discussion( + title=discussion_title, + body=discussion_body, gitlab_config=gitlab_config, + position=position, ) + issue.discussion_id = discussion_id console.print( - f"[green]āœ“ Added reply for issue at lines {issue.start_line}-{issue.end_line}[/green]" + f"[green]āœ“ Created discussion for issue at lines {issue.start_line}-{issue.end_line} (ID: {discussion_id})[/green]" ) + return discussion_id except Exception as e: + if position: + console.print( + f"[yellow]Failed with position for lines {issue.start_line}-{issue.end_line}, retrying without position: {e}[/yellow]" + ) + try: + discussion_id = create_discussion( + title=discussion_title, + body=discussion_body, + gitlab_config=gitlab_config, + position=None, + ) + issue.discussion_id = discussion_id + console.print( + f"[green]āœ“ Created discussion without position (ID: {discussion_id})[/green]" + ) + return discussion_id + except Exception as e2: + console.print( + f"[red]āœ— Failed to create discussion for issue at lines {issue.start_line}-{issue.end_line}: {e2}[/red]" + ) + import traceback + + traceback.print_exc() + return None + console.print( - f"[red]āœ— Failed to reply for issue at lines {issue.start_line}-{issue.end_line}: {e}[/red]" + f"[red]āœ— Failed to create discussion for issue at lines {issue.start_line}-{issue.end_line}: {e}[/red]" ) import traceback traceback.print_exc() + return None + for issue in dedicated_issues: + create_discussion_for_issue(issue, include_suggestion=True) -def _generate_line_code(file_path: str, old_line: int | None, new_line: int | None) -> str: - """ - Generate a line_code string for GitLab position API. - - Format: __ + if reply_issues: + console.print( + f"[dim]Leaving {len(reply_issues)} non-dedicated issue(s) for the summary[/dim]" + ) - Args: - file_path: Path to the file - old_line: Line number in old file (or None) - new_line: Line number in new file (or None) - Returns: - Line code string +def generate_line_code(file_path: str, old_line: int | None, new_line: int | None) -> str: """ - import hashlib - - # Generate SHA1 hash of the file path - filename_sha = hashlib.sha1(file_path.encode()).hexdigest() - - # Format: sha_old_new, using empty string for None values - old_str = str(old_line) if old_line is not None else "" - new_str = str(new_line) if new_line is not None else "" - - return f"{filename_sha}_{old_str}_{new_str}" + Generates a GitLab-compatible line_code. + Format: sha1(path) + "_" + old_line + "_" + new_line + """ + path_hash = hashlib.sha1(file_path.encode()).hexdigest() + old_s = str(old_line) if old_line is not None else "" + new_s = str(new_line) if new_line is not None else "" + return f"{path_hash}_{old_s}_{new_s}" def create_position_for_issue( @@ -995,127 +954,88 @@ def create_position_for_issue( old_path: str, new_path: str, ) -> dict[str, Any] | None: - """ - Create a GitLab position object for a specific issue line range. - - Args: - diff_text: The full diff text for the file - issue_line_start: Start line number of the issue (in new file) - issue_line_end: End line number of the issue (in new file) - base_sha, head_sha, start_sha: GitLab diff refs - old_path, new_path: File paths - - Returns: - Position dict for GitLab API, or None if line not found in diff - """ hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@") - lines = diff_text.splitlines() - current_old = 0 - current_new = 0 + + current_old, current_new = 0, 0 in_hunk = False - # Track all candidate lines in the range - # Priority: added lines > context lines > deleted lines - added_lines = [] - context_lines = [] - deleted_lines = [] + # Track the actual lines found in the diff to build the range + matched_lines = [] # List of (old_line, new_line) for line in lines: - # Check for hunk header match = hunk_header_pattern.match(line) if match: - current_old = int(match.group(1)) - current_new = int(match.group(3)) + current_old, current_new = int(match.group(1)), int(match.group(3)) in_hunk = True continue if not in_hunk: continue - # Collect all matching lines in the range - if line.startswith("-"): - # Deletion - only has old line number - if current_old >= issue_line_start and current_old <= issue_line_end: - deleted_lines.append((current_old, None)) - current_old += 1 - elif line.startswith("+"): - # Addition - only has new line number - if current_new >= issue_line_start and current_new <= issue_line_end: - added_lines.append((None, current_new)) + # Logic to determine if this specific line is within our target range + if line.startswith("+"): + if issue_line_start <= current_new <= issue_line_end: + matched_lines.append((None, current_new)) current_new += 1 + elif line.startswith("-"): + if issue_line_start <= current_old <= issue_line_end: + matched_lines.append((current_old, None)) + current_old += 1 else: - # Context line - has both - if current_new >= issue_line_start and current_new <= issue_line_end: - context_lines.append((current_old, current_new)) + if issue_line_start <= current_new <= issue_line_end: + matched_lines.append((current_old, current_new)) current_old += 1 current_new += 1 - # Determine start and end lines for the position - # Priority: added lines > context lines > deleted lines - start_old_line = None - start_new_line = None - end_old_line = None - end_new_line = None - - if added_lines: - # Use the first and last added lines in the range - start_old_line, start_new_line = added_lines[0] - end_old_line, end_new_line = added_lines[-1] - elif context_lines: - # Use the first and last context lines - start_old_line, start_new_line = context_lines[0] - end_old_line, end_new_line = context_lines[-1] - elif deleted_lines: - # Use the first and last deleted lines - start_old_line, start_new_line = deleted_lines[0] - end_old_line, end_new_line = deleted_lines[-1] - - # If we didn't find any line in the diff, return None - if start_old_line is None and start_new_line is None: + # FIX: Optimization to prevent "sticky" hunk matching. + # If we have passed the end_line in the NEW file, we stop. + if current_new > issue_line_end and not line.startswith("-"): + if matched_lines: + break + + if not matched_lines: return None - # Create position object + # We anchor the comment to the LAST line of the range so the code is visible + start_old, start_new = matched_lines[0] + end_old, end_new = matched_lines[-1] + + # Calculate line codes for the range + start_code = generate_line_code(new_path if start_new else old_path, start_old, start_new) + end_code = generate_line_code(new_path if end_new else old_path, end_old, end_new) + position = { "base_sha": base_sha, "head_sha": head_sha, "start_sha": start_sha, + "position_type": "text", "old_path": old_path, "new_path": new_path, - "position_type": "text", - } - - # For single-line issues, use the simple format - if issue_line_start == issue_line_end: - if start_new_line is not None: - position["new_line"] = start_new_line - if start_old_line is not None: - position["old_line"] = start_old_line - else: - # For multi-line issues, use line_range - # Determine the line type (new for added lines, old for deleted lines) - line_type = "new" if start_new_line is not None else "old" - - position["line_range"] = { + # Anchor the main comment on the end of the range + "new_line": end_new, + "old_line": end_old, + "line_range": { "start": { - "line_code": _generate_line_code(new_path, start_old_line, start_new_line), - "type": line_type, + "line_code": start_code, + "type": "new" if start_new else "old", + "new_line": start_new, + "old_line": start_old, }, "end": { - "line_code": _generate_line_code(new_path, end_old_line, end_new_line), - "type": line_type, + "line_code": end_code, + "type": "new" if end_new else "old", + "new_line": end_new, + "old_line": end_old, }, - } - - # Add line numbers to start and end - if start_old_line is not None: - position["line_range"]["start"]["old_line"] = start_old_line - if start_new_line is not None: - position["line_range"]["start"]["new_line"] = start_new_line - if end_old_line is not None: - position["line_range"]["end"]["old_line"] = end_old_line - if end_new_line is not None: - position["line_range"]["end"]["new_line"] = end_new_line + }, + } + + # Cleanup: GitLab doesn't like None values in the schema + if position["new_line"] is None: + del position["new_line"] + if position["old_line"] is None: + del position["old_line"] return position diff --git a/src/reviewbot/core/config.py b/src/reviewbot/core/config.py index 91c4767..a44e423 100644 --- a/src/reviewbot/core/config.py +++ b/src/reviewbot/core/config.py @@ -11,3 +11,4 @@ class Config: gitlab_api_v4: str gitlab_token: str gemini_project_id: str + create_threads: bool = False diff --git a/src/reviewbot/infra/config/env.py b/src/reviewbot/infra/config/env.py index e8d631a..e45b957 100644 --- a/src/reviewbot/infra/config/env.py +++ b/src/reviewbot/infra/config/env.py @@ -14,6 +14,7 @@ def load_env() -> Config: gitlab_api_v4 = os.getenv("GITLAB_API_V4_URL") gitlab_token = os.getenv("GITLAB_BOT_TOKEN") gemini_project_id = os.getenv("GEMINI_PROJECT_ID") + create_threads_raw = os.getenv("REVIEWBOT_CREATE_THREADS", "true") if ( not llm_api_key or not llm_base_url @@ -25,6 +26,7 @@ def load_env() -> Config: raise ValueError( "LLM_API_KEY, LLM_BASE_URL, LLM_MODEL, GITLAB_API_V4_URL, GITLAB_BOT_TOKEN, and GEMINI_PROJECT_ID must be set" ) + create_threads = create_threads_raw.strip().lower() not in {"0", "false", "no", "off"} return Config( llm_api_key=SecretStr(llm_api_key), llm_base_url=llm_base_url, @@ -32,4 +34,5 @@ def load_env() -> Config: gitlab_api_v4=gitlab_api_v4, gitlab_token=gitlab_token, gemini_project_id=gemini_project_id, + create_threads=create_threads, ) diff --git a/src/reviewbot/infra/gitlab/diff.py b/src/reviewbot/infra/gitlab/diff.py index 8e3c2f0..0743a16 100644 --- a/src/reviewbot/infra/gitlab/diff.py +++ b/src/reviewbot/infra/gitlab/diff.py @@ -128,10 +128,13 @@ def fetch_mr_diffs( mr_data = mr_response.json() # Get diff_refs for position objects - diff_refs = mr_data.get("diff_refs", {}) + diff_refs = mr_data.get("diff_refs") or {} base_sha = diff_refs.get("base_sha") head_sha = diff_refs.get("head_sha") start_sha = diff_refs.get("start_sha") + mr_web_url = mr_data.get("web_url") + if mr_web_url and "/-/merge_requests/" in mr_web_url: + diff_refs["project_web_url"] = mr_web_url.split("/-/merge_requests/")[0] # Try the new JSON changes endpoint first changes_response = requests.get(changes_url, headers=headers, timeout=timeout) diff --git a/src/reviewbot/infra/gitlab/note.py b/src/reviewbot/infra/gitlab/note.py index 535c6c7..5807e05 100644 --- a/src/reviewbot/infra/gitlab/note.py +++ b/src/reviewbot/infra/gitlab/note.py @@ -58,9 +58,14 @@ def post_discussion( # For file-level discussions without specific lines, don't include position data: dict[str, Any] = {"body": body} if position: - # Only include position if it has required fields (new_line or old_line) - # Otherwise GitLab will reject it as incomplete - has_line_info = "new_line" in position or "old_line" in position or "line_code" in position + # Only include position if it has required fields + # Can have: new_line, old_line, line_code (single line) OR line_range (multi-line) + has_line_info = ( + "new_line" in position + or "old_line" in position + or "line_code" in position + or "line_range" in position # Support multi-line positions + ) if has_line_info: data["position"] = position else: diff --git a/src/reviewbot/models/gpt.py b/src/reviewbot/models/gpt.py index 84201d9..db28e8c 100644 --- a/src/reviewbot/models/gpt.py +++ b/src/reviewbot/models/gpt.py @@ -16,7 +16,7 @@ def get_gpt_model( llm_api_key: SecretStr, base_url: str, temperature: float = 0.2, - reasoning_effort: str = "medium", + reasoning_effort: str = "low", ): return ChatOpenAI( model=llm_model_name, diff --git a/src/reviewbot/tools/__init__.py b/src/reviewbot/tools/__init__.py index 9aced75..574e906 100644 --- a/src/reviewbot/tools/__init__.py +++ b/src/reviewbot/tools/__init__.py @@ -4,10 +4,12 @@ search_codebase, search_codebase_semantic_search, ) +from .think import think __all__ = [ "get_diff", "read_file", "search_codebase", "search_codebase_semantic_search", + "think", ] diff --git a/src/reviewbot/tools/think.py b/src/reviewbot/tools/think.py new file mode 100644 index 0000000..0c6b052 --- /dev/null +++ b/src/reviewbot/tools/think.py @@ -0,0 +1,45 @@ +from langchain.tools import tool # type: ignore + +from reviewbot.context import store_manager_ctx + + +@tool +def think(reasoning: str) -> str: + """Record internal reasoning and thought process. + + Use this tool to think through problems, plan your approach, or reason about code before taking action. + The reasoning is stored and will be included in subsequent requests to maintain context. + + Args: + reasoning: Your internal thoughts, analysis, or reasoning about the current task. + This can include: + - Analysis of code patterns + - Planning next steps + - Reasoning about potential issues + - Conclusions drawn from observations + + Returns: + Confirmation that the reasoning was recorded + + Examples: + - "I notice this function has multiple responsibilities. It handles both data validation + and API calls, which violates the Single Responsibility Principle." + - "Before checking for issues, I should first understand the overall structure. + The code appears to be a REST API with three main endpoints." + - "This looks like a potential security issue - user input is being directly + concatenated into a SQL query. I should flag this as high severity." + """ + context = store_manager_ctx.get() + issue_store = context.get("issue_store") + + if not issue_store: + return "Context not available for storing reasoning." + + # Store reasoning in the issue store's metadata + if not hasattr(issue_store, "_reasoning_history"): + issue_store._reasoning_history = [] + + issue_store._reasoning_history.append(reasoning) + print("Reasoned:") + print(reasoning) + return f"Reasoning recorded: {reasoning[:100]}{'...' if len(reasoning) > 100 else ''}"