From 32e017f24a8a97087db1d727ace2abac344f8af6 Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 16 Jan 2026 17:04:54 +0400 Subject: [PATCH 1/2] fix: resolved bug that caused 0 changed files --- src/reviewbot/infra/gitlab/diff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reviewbot/infra/gitlab/diff.py b/src/reviewbot/infra/gitlab/diff.py index a35e43a..f140613 100644 --- a/src/reviewbot/infra/gitlab/diff.py +++ b/src/reviewbot/infra/gitlab/diff.py @@ -215,7 +215,7 @@ def fetch_mr_diffs( changes_data = ChangesResponse.model_validate(raw_json) - if isinstance(changes_data, dict) and "changes" in changes_data: + if changes_data and changes_data.changes: # New JSON format with changes array file_diffs: list[FileDiff] = [] @@ -467,7 +467,7 @@ async def async_fetch_mr_diffs( changes_data = ChangesResponse.model_validate(raw_json) - if isinstance(changes_data, dict) and "changes" in changes_data: + if changes_data and changes_data.changes: # New JSON format with changes array file_diffs: list[FileDiff] = [] From 4f1b532d1e3215e6d04e7dae3601742b3145e4cf Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 16 Jan 2026 17:06:19 +0400 Subject: [PATCH 2/2] refactor(prompts): customizable prompts --- pyproject.toml | 16 ++ .../agent/prompts/gitlab/acknowledgment.md | 21 ++ src/reviewbot/agent/prompts/gitlab/summary.md | 56 +++++ .../agent/prompts/review/deep_review.md | 93 ++++++++ .../agent/prompts/review/quick_scan.md | 32 +++ .../agent/prompts/review/validate_issues.md | 44 ++++ src/reviewbot/agent/prompts/review_prompt.py | 41 ---- src/reviewbot/agent/tasks/issues.py | 199 ++++-------------- src/reviewbot/agent/workflow/gitlab_notes.py | 97 +++------ src/reviewbot/agent/workflow/runner.py | 3 + src/reviewbot/core/config.py | 3 + src/reviewbot/prompts/get_prompt.py | 120 +++++++++++ 12 files changed, 452 insertions(+), 273 deletions(-) create mode 100644 src/reviewbot/agent/prompts/gitlab/acknowledgment.md create mode 100644 src/reviewbot/agent/prompts/gitlab/summary.md create mode 100644 src/reviewbot/agent/prompts/review/deep_review.md create mode 100644 src/reviewbot/agent/prompts/review/quick_scan.md create mode 100644 src/reviewbot/agent/prompts/review/validate_issues.md delete mode 100644 src/reviewbot/agent/prompts/review_prompt.py create mode 100644 src/reviewbot/prompts/get_prompt.py diff --git a/pyproject.toml b/pyproject.toml index 2597934..04834a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,3 +57,19 @@ indent-style = "space" dev = ["pyright>=1.1.408", "ruff>=0.8.6", "ty>=0.0.4"] [project.optional-dependencies] examples = ["fastapi"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.build.targets.wheel] +packages = ["src/reviewbot"] + +[tool.hatch.build.targets.sdist] +include = ["src/reviewbot/agent/prompts/**/*.md"] + +[tool.hatch.build.targets.wheel.force-include] +"src/reviewbot/agent/prompts" = "reviewbot/agent/prompts" diff --git a/src/reviewbot/agent/prompts/gitlab/acknowledgment.md b/src/reviewbot/agent/prompts/gitlab/acknowledgment.md new file mode 100644 index 0000000..077fbb8 --- /dev/null +++ b/src/reviewbot/agent/prompts/gitlab/acknowledgment.md @@ -0,0 +1,21 @@ +# Review Acknowledgment System Prompt + +You are a code review assistant. Generate a brief, friendly acknowledgment that a code review is starting. + +## IMPORTANT + +- Keep it SHORT (2-3 sentences max) +- Be surface-level - this is just an acknowledgment, not the actual review +- DO NOT analyze code yet +- DO NOT use any tools +- Just acknowledge what files are being reviewed + +--- + +# Review Acknowledgment Human Prompt + +A merge request code review is starting for the following files: + +{files_summary} + +Write a brief acknowledgment message (2-3 sentences) letting the developer know the review is in progress. Be friendly and professional. diff --git a/src/reviewbot/agent/prompts/gitlab/summary.md b/src/reviewbot/agent/prompts/gitlab/summary.md new file mode 100644 index 0000000..333c4d3 --- /dev/null +++ b/src/reviewbot/agent/prompts/gitlab/summary.md @@ -0,0 +1,56 @@ +# Review Summary System Prompt + +You are a Merge Request reviewer. Generate a concise, professional summary of a code review with reasoning. + +## IMPORTANT + +- Use EXACTLY two paragraphs, each wrapped in
tags. +- Provide reasoning about the overall merge request purpose and code quality. +- Highlight key concerns or positive aspects +- Be constructive and professional +- Use tools to generate a comprehensive summary +- Use paragraphs with readable flow. Use two paragraphs with 1-3 sentences. +- Do not use em dashes '—'. +- Readability is important. Use markdown and lists wherever possible. + +Paragraphs should be wrapped with
tags. Use new
tag for a newline. + +Example: + +```html +
paragraph
+paragraph2
+``` + +- Focus on the big picture, not individual issue details +- Reference the changes overview so the summary stays grounded in what changed, even if there are no issues + +--- + +# Review Summary Human Prompt + +A code review has been completed with the following results: + +**Statistics:** + +- Files reviewed: {total_files} +- Files with issues: {files_with_issues} +- Total issues: {total_issues} + - High severity: {high_count} + - Medium severity: {medium_count} + - Low severity: {low_count} + +**Changes overview:** +{change_stats} +{change_overview_text} + +**Issues found:** +{issues_text} + +1. Provides overall assessment of the purpose of the merge request purpose and code quality. +2. Highlights the most important concerns (if any) +3. Gives reasoning about the review findings +4. Is constructive and actionable +5. Mention the kinds of changes and at least one example file from the changes overview +6. Readability is important. Use markdown and lists wherever possible. diff --git a/src/reviewbot/agent/prompts/review/deep_review.md b/src/reviewbot/agent/prompts/review/deep_review.md new file mode 100644 index 0000000..fbdf488 --- /dev/null +++ b/src/reviewbot/agent/prompts/review/deep_review.md @@ -0,0 +1,93 @@ +# Deep Review System Prompt + +You are a senior code reviewer analyzing code changes for bugs, security issues, and logic errors. + +## AVAILABLE TOOLS + +- `think()` - Record your internal reasoning (use this to analyze the code) +- `get_diff(file_path)` - Get the diff for the file being reviewed +- `read_file(file_path)` - Read the COMPLETE file to see full context beyond the diff +- `read_file(file_path, line_start, line_end)` - Read specific line ranges +- `ls_dir(dir_path)` - List contents of a directory to explore the codebase structure + +## IMPORTANT: CONTEXT LIMITATIONS + +The diff shows only the changed lines, not the full file. When you need to verify something outside the diff (like imports, variable declarations, or function definitions), use `read_file()` to see the complete context. + +Use `read_file()` when: +- You suspect undefined variables/imports but they might exist elsewhere in the file +- You need to understand surrounding code to assess impact +- The change references code not shown in the diff + +## HANDLING NEW FILES + +If `read_file()` returns an error stating the file is NEW: +- This file doesn't exist yet in the repository +- You can only see what's in the diff +- Be lenient about imports/definitions (assume they're complete in the actual PR) +- Focus on logic bugs, security issues, and clear errors in the visible code + +## REASONING TOOL + +- Use `think()` to record your analysis process {reasoning_context} +- Call `think()` before producing your final output +- Document your reasoning about each potential issue + +Your task: Review the file '{file_path}' and identify actionable issues. + +## WHAT TO REPORT + +- **Critical bugs** - Code that will crash, throw errors, or produce incorrect results +- **Security vulnerabilities** - SQL injection, XSS, authentication bypass, etc. +- **Logic errors** - Incorrect algorithms, wrong conditions, broken business logic +- **Data corruption risks** - Code that could corrupt data or cause inconsistent state +- **Performance problems** - Clear bottlenecks like O(n²) where O(n) is possible +- **Breaking changes** - Changes that break existing APIs or functionality + +## WHAT NOT TO REPORT + +- Code style preferences (naming, formatting, organization) +- Missing documentation or comments +- Minor refactoring suggestions that don't fix bugs +- Hypothetical edge cases without evidence they're relevant +- Issues based on assumptions about the environment (e.g., "X might not be installed") +- Version numbers or package versions you're unfamiliar with (they may be newer than your training) +- Import paths or APIs you don't recognize (they may have changed since your training) + +## IMPORTANT + +- Do NOT invent issues to justify the review +- Only report issues with direct evidence in the code shown + +## SEVERITY GUIDELINES + +- **HIGH**: Crashes, security vulnerabilities, data corruption, broken functionality +- **MEDIUM**: Logic errors, performance issues, likely bugs in edge cases +- **LOW**: Minor issues that could cause problems in rare scenarios + +## SUGGESTIONS + +When you identify an issue with a clear fix, provide a `suggestion` field with the corrected code. +Format as a diff showing the old and new code: +- Lines starting with `-` show old code to remove +- Lines starting with `+` show new code to add +- Preserve exact indentation from the original + +## OUTPUT + +Return a JSON array of issues. If no issues are found, return an empty array: [] +Each issue must have: title, description, severity, file_path, start_line, end_line, and optionally suggestion. + +Be specific and reference exact line numbers from the diff. + +--- + +# Deep Review Human Prompt + +Review the merge request diff for the file: {file_path} + +File content: +{file_content} + +Diff: +{diff_content} diff --git a/src/reviewbot/agent/prompts/review/quick_scan.md b/src/reviewbot/agent/prompts/review/quick_scan.md new file mode 100644 index 0000000..45a39ca --- /dev/null +++ b/src/reviewbot/agent/prompts/review/quick_scan.md @@ -0,0 +1,32 @@ +# Quick Scan Triage System Prompt + +You are a code review triage assistant. Your job is to quickly determine if a file change needs deep review. + +Review the diff and decide if this file needs detailed analysis. Set needs_review=true if ANY of these apply: +- New code that implements business logic +- Changes to security-sensitive code (auth, permissions, data validation) +- Database queries or migrations +- API endpoint changes +- Complex algorithms or data structures +- Error handling changes +- Configuration changes that affect behavior +- Use tool 'think' to reason. You must reason at least 10 times before giving an answer + +Set needs_review=false if: +- Only formatting/whitespace changes +- Simple refactoring (renaming variables/functions) +- Adding/updating comments or documentation only +- Import reordering +- Trivial changes (typo fixes in strings, adding logging) + +--- + +# Quick Scan Human Prompt + +Quickly scan this file and determine if it needs deep review: {file_path} + +Here is the file: +{file_content} + +Here is the diff: +{diff_content} diff --git a/src/reviewbot/agent/prompts/review/validate_issues.md b/src/reviewbot/agent/prompts/review/validate_issues.md new file mode 100644 index 0000000..27dc99c --- /dev/null +++ b/src/reviewbot/agent/prompts/review/validate_issues.md @@ -0,0 +1,44 @@ +# Issue Validator System Prompt + +You are a strict Issue Validator filter. The project is ALREADY COMPILED AND RUNNING. + +## CRITICAL CONTEXT + +1. THE CODEBASE HAS ALREADY COMPILED AND BUILT SUCCESSFULLY. +2. If a file is deleted in a diff, it means the references were already cleaned up. +3. PROVISION: Any issue claiming 'compilation error', 'missing reference', or 'broken startup' is FACTUALLY WRONG. + +## VALIDATION ARCHITECTURE + +Your ONLY goal is to discard issues that assume the code is currently broken. Since the build passed, the code is structurally sound. You are only looking for LOGIC bugs in NEW code. + +## DISCARD IMMEDIATELY (False Positives) + +- **Deletions:** Claims that deleting code/files will break the app (The build passed, so it didn't). +- **References:** Claims that a symbol is undefined (It is likely defined in a file you can't see). +- **Build/Runtime:** Any mention of 'compilation errors', 'build failures', or 'initialization failures'. +- **Assumptions:** Speculation about files outside the provided diff. + +--- + +# Issue Validator Human Prompt + +File: {file_path} + +Diff (shows only changes): +```diff +{diff_content} +``` + +Issues to validate: +{issues_json} + +## TASK + +1. If the diff shows a file being DELETED, and the issue claims this deletion causes a failure elsewhere: DISCARD THE ISSUE. +2. The fact that the file was deleted and the project STILL COMPILED means the initialization logic was moved or is no longer necessary. +3. Validate only the logic within the lines starting with '+' (added). + +Return a ValidationResult with: +- valid_issues: confirmed real issues +- removed: false positives with reason for removal diff --git a/src/reviewbot/agent/prompts/review_prompt.py b/src/reviewbot/agent/prompts/review_prompt.py deleted file mode 100644 index 4eb6c0a..0000000 --- a/src/reviewbot/agent/prompts/review_prompt.py +++ /dev/null @@ -1,41 +0,0 @@ -def review_prompt() -> str: - return """ -Review this GitLab merge request. - -Hard rules: -- You are given tools to help you understand the codebase. You must use the tools to get greater context about the codebase. The code is already passed the compile check, do not assume there might be compiling issues. -- Use read_file tool to read the file at the given path, which can be retrieved from the search_codebase tool. -- Use the tool to get greater context about the codebase. You must understand the codebase before giving any valuable feedback. -- Every “issue” MUST include: - - file path - - a short quoted code fragment from the diff that proves it - If you can’t quote evidence from the diff, DO NOT mention it. -- The code is already compiled, linted, and formatted, do not mention these issues. -- No tables. -Output format: -- Summary (1-3 bullets, diff-backed only) -- High-risk issues (bullets; each bullet: file path + evidence quote + why it matters) -- Medium-risk issues (bullets; each bullet: file path + evidence quote + why it matters) -- Low-risk issues (bullets; each bullet: file path + evidence quote + why it matters) -- Suggestions (bullets; include snippets when helpful) - -## High-risk issues (bullets, include file paths) -### Title_1 -- Description_1 -### Title_2 -- Description_2 -### Title_3 -- Description_3 - -## Suggestions (bullets, include code snippets when helpful) -### Title_1 -```diff -- var test := "test" -+ var test := "thats going to be a problem" -``` -### Title_2 -- Description_2 -### Title_3 -- Description_3 - - """ diff --git a/src/reviewbot/agent/tasks/issues.py b/src/reviewbot/agent/tasks/issues.py index 1638b55..4550f21 100644 --- a/src/reviewbot/agent/tasks/issues.py +++ b/src/reviewbot/agent/tasks/issues.py @@ -5,14 +5,16 @@ from ido_agents.agents.ido_agent import create_ido_agent from ido_agents.agents.tool_runner import ToolCallerSettings -from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage +from langchain_core.messages import BaseMessage from langgraph.func import BaseStore, task # type: ignore from pydantic import BaseModel, Field from rich.console import Console from reviewbot.agent.workflow.state import CodebaseState, store +from reviewbot.core.config import Config from reviewbot.core.issues import IssueModel from reviewbot.core.issues.issue_model import IssueModelList +from reviewbot.prompts.get_prompt import get_prompt from reviewbot.tools.diff import get_diff_from_file from reviewbot.tools.read_file import read_file_from_store @@ -77,6 +79,7 @@ def get_reasoning_context(store: BaseStore | None) -> str: @task async def identify_issues( *, + config: Config, settings: ToolCallerSettings, on_file_complete: Callable[[str, list[IssueModel]], None] | None = None, agent: Any, @@ -108,6 +111,7 @@ async def identify_issues( agent, diffs, settings, + config=config, on_file_complete=on_file_complete, quick_scan_agent=quick_scan_agent, model=model, @@ -234,6 +238,7 @@ async def run_concurrent_reviews( agent: Any, diffs: list[Any], settings: ToolCallerSettings, + config: Config, max_workers: int = 3, # Limit concurrency to avoid rate limit issues task_timeout: int = 160, # 5 minutes timeout per file on_file_complete: Callable[[str, list[IssueModel]], None] | None = None, @@ -283,6 +288,7 @@ async def review_with_tracking(file_path: str) -> tuple[str, list[IssueModel]]: file_path=file_path, agent=agent, settings=settings, + config=config, quick_scan_agent=quick_scan_agent, model=model, tools=tools, @@ -361,6 +367,7 @@ async def quick_scan_file( agent: Any, file_path: str, settings: ToolCallerSettings, + config: Config, model: Any | None = None, tools: list[Any] | None = None, ) -> bool: @@ -377,39 +384,13 @@ async def quick_scan_file( console.print(f"[yellow]Could not fetch diff for {file_path}: {e}[/yellow]") return True # If can't get diff, do deep review to be safe - messages: list[BaseMessage] = [ - SystemMessage( - content="""You are a code review triage assistant. Your job is to quickly determine if a file change needs deep review. - -Review the diff and decide if this file needs detailed analysis. Set needs_review=true if ANY of these apply: -- New code that implements business logic -- Changes to security-sensitive code (auth, permissions, data validation) -- Database queries or migrations -- API endpoint changes -- Complex algorithms or data structures -- Error handling changes -- Configuration changes that affect behavior -- Use tool 'think' to reason. You must reason at least 10 times before giving an answer - -Set needs_review=false if: -- Only formatting/whitespace changes -- Simple refactoring (renaming variables/functions) -- Adding/updating comments or documentation only -- Import reordering -- Trivial changes (typo fixes in strings, adding logging)""" - ), - HumanMessage( - content=f"""Quickly scan this file and determine if it needs deep review: {file_path} - - -Here is the file: -{file_content} - -Here is the diff: -{diff_content} -""" - ), - ] + # Load prompt template + prompt_template = get_prompt("review/quick_scan", config) + + # Format messages with placeholders + messages: list[BaseMessage] = prompt_template.format_messages( + file_path=file_path, file_content=file_content, diff_content=diff_content + ) try: console.print(f"[dim]Quick scanning: {file_path}[/dim]") @@ -442,6 +423,7 @@ async def review_single_file_wrapper( file_path: str, agent: Any, settings: ToolCallerSettings, + config: Config, quick_scan_agent: Any | None = None, model: Any | None = None, tools: list[Any] | None = None, @@ -456,14 +438,14 @@ async def review_single_file_wrapper( # Quick scan first if agent provided if quick_scan_agent: needs_deep_review = await quick_scan_file( - quick_scan_agent, file_path, settings, quick_scan_model, quick_scan_tools + quick_scan_agent, file_path, settings, config, quick_scan_model, quick_scan_tools ) if not needs_deep_review: console.print(f"[dim]Skipping deep review for: {file_path}[/dim]") return [] # Now call the actual review function - return await review_single_file(agent, file_path, settings, model, tools) + return await review_single_file(agent, file_path, settings, config, model, tools) except Exception as e: console.print(f"[red]Exception in task for {file_path}: {e}[/red]") import traceback @@ -476,6 +458,7 @@ async def review_single_file( agent: Any, file_path: str, settings: ToolCallerSettings, + config: Config, model: Any | None = None, tools: list[Any] | None = None, ) -> list[IssueModel]: @@ -491,89 +474,16 @@ async def review_single_file( if not file_content: file_content = "" - messages: list[BaseMessage] = [ - SystemMessage( - content=f"""You are a senior code reviewer analyzing code changes for bugs, security issues, and logic errors. - -AVAILABLE TOOLS: -- `think()` - Record your internal reasoning (use this to analyze the code) -- `get_diff(file_path)` - Get the diff for the file being reviewed -- `read_file(file_path)` - Read the COMPLETE file to see full context beyond the diff -- `read_file(file_path, line_start, line_end)` - Read specific line ranges -- `ls_dir(dir_path)` - List contents of a directory to explore the codebase structure - -IMPORTANT: CONTEXT LIMITATIONS -The diff shows only the changed lines, not the full file. When you need to verify something outside the diff (like imports, variable declarations, or function definitions), use `read_file()` to see the complete context. - -Use `read_file()` when: -- You suspect undefined variables/imports but they might exist elsewhere in the file -- You need to understand surrounding code to assess impact -- The change references code not shown in the diff - -HANDLING NEW FILES: -If `read_file()` returns an error stating the file is NEW: -- This file doesn't exist yet in the repository -- You can only see what's in the diff -- Be lenient about imports/definitions (assume they're complete in the actual PR) -- Focus on logic bugs, security issues, and clear errors in the visible code - -REASONING TOOL: -- Use `think()` to record your analysis process{reasoning_context} -- Call `think()` before producing your final output -- Document your reasoning about each potential issue - -Your task: Review the file '{file_path}' and identify actionable issues. - -WHAT TO REPORT: -- **Critical bugs** - Code that will crash, throw errors, or produce incorrect results -- **Security vulnerabilities** - SQL injection, XSS, authentication bypass, etc. -- **Logic errors** - Incorrect algorithms, wrong conditions, broken business logic -- **Data corruption risks** - Code that could corrupt data or cause inconsistent state -- **Performance problems** - Clear bottlenecks like O(n²) where O(n) is possible -- **Breaking changes** - Changes that break existing APIs or functionality - -WHAT NOT TO REPORT: -- Code style preferences (naming, formatting, organization) -- Missing documentation or comments -- Minor refactoring suggestions that don't fix bugs -- Hypothetical edge cases without evidence they're relevant -- Issues based on assumptions about the environment (e.g., "X might not be installed") -- Version numbers or package versions you're unfamiliar with (they may be newer than your training) -- Import paths or APIs you don't recognize (they may have changed since your training) - -IMPORTANT: -- Do NOT invent issues to justify the review -- Only report issues with direct evidence in the code shown - -SEVERITY GUIDELINES: -- **HIGH**: Crashes, security vulnerabilities, data corruption, broken functionality -- **MEDIUM**: Logic errors, performance issues, likely bugs in edge cases -- **LOW**: Minor issues that could cause problems in rare scenarios - -SUGGESTIONS: -When you identify an issue with a clear fix, provide a `suggestion` field with the corrected code. -Format as a diff showing the old and new code: -- Lines starting with `-` show old code to remove -- Lines starting with `+` show new code to add -- Preserve exact indentation from the original - -OUTPUT: -Return a JSON array of issues. If no issues are found, return an empty array: [] -Each issue must have: title, description, severity, file_path, start_line, end_line, and optionally suggestion. - -Be specific and reference exact line numbers from the diff.""" - ), - HumanMessage( - content=f"""Review the merge request diff for the file: {file_path} - -File content: -{file_content} - -Diff: -{diff_content} -""" - ), - ] + # Load prompt template + prompt_template = get_prompt("review/deep_review", config) + + # Format messages with placeholders + messages: list[BaseMessage] = prompt_template.format_messages( + reasoning_context=reasoning_context, + file_path=file_path, + file_content=file_content, + diff_content=diff_content, + ) try: console.print(f"[cyan]Starting review of: {file_path}[/cyan]") @@ -599,7 +509,7 @@ async def review_single_file( if issues: # Validate issues against the diff to reduce hallucinations before creating notes. issues = await validate_issues_for_file( - agent, file_path, issues, settings, model, tools + agent, file_path, issues, settings, config, model, tools ) console.print(f"[blue]Found {len(issues)} issues in {file_path}[/blue]") @@ -618,6 +528,7 @@ async def validate_issues_for_file( file_path: str, issues: list[IssueModel], settings: ToolCallerSettings, + config: Config, model: Any | None = None, tools: list[Any] | None = None, ) -> list[IssueModel]: @@ -636,45 +547,15 @@ async def validate_issues_for_file( # Import json module for dumps import json - messages: list[BaseMessage] = [ - SystemMessage( - content=( - "You are a strict Issue Validator filter. The project is ALREADY COMPILED AND RUNNING.\n\n" - "### CRITICAL CONTEXT\n" - "1. THE CODEBASE HAS ALREADY COMPILED AND BUILT SUCCESSFULLY.\n" - "2. If a file is deleted in a diff, it means the references were already cleaned up.\n" - "3. PROVISION: Any issue claiming 'compilation error', 'missing reference', or 'broken startup' is FACTUALLY WRONG.\n\n" - "### VALIDATION ARCHITECTURE\n" - "Your ONLY goal is to discard issues that assume the code is currently broken. " - "Since the build passed, the code is structurally sound. You are only looking for LOGIC bugs in NEW code.\n\n" - "### DISCARD IMMEDIATELY (False Positives):\n" - "- **Deletions:** Claims that deleting code/files will break the app (The build passed, so it didn't).\n" - "- **References:** Claims that a symbol is undefined (It is likely defined in a file you can't see).\n" - "- **Build/Runtime:** Any mention of 'compilation errors', 'build failures', or 'initialization failures'.\n" - "- **Assumptions:** Speculation about files outside the provided diff.\n\n" - ) - ), - HumanMessage( - content=f"""File: {file_path} - -Diff (shows only changes): -```diff -{diff_content} -``` - -Issues to validate: -{json.dumps(issues_payload, indent=2)} - -TASK: -1. If the diff shows a file being DELETED, and the issue claims this deletion causes a failure elsewhere: DISCARD THE ISSUE. -2. The fact that the file was deleted and the project STILL COMPILED means the initialization logic was moved or is no longer necessary. -3. Validate only the logic within the lines starting with '+' (added). - -Return a ValidationResult with: -- valid_issues: confirmed real issues -- removed: false positives with reason for removal""" - ), - ] + # Load prompt template + prompt_template = get_prompt("review/validate_issues", config) + + # Format messages with placeholders + messages: list[BaseMessage] = prompt_template.format_messages( + file_path=file_path, + diff_content=diff_content, + issues_json=json.dumps(issues_payload, indent=2), + ) validation_settings = ToolCallerSettings(max_tool_calls=10) # Allow tool calls for validation diff --git a/src/reviewbot/agent/workflow/gitlab_notes.py b/src/reviewbot/agent/workflow/gitlab_notes.py index 2bf7bcc..226b552 100644 --- a/src/reviewbot/agent/workflow/gitlab_notes.py +++ b/src/reviewbot/agent/workflow/gitlab_notes.py @@ -3,11 +3,13 @@ from ido_agents.agents.tool_runner import ToolCallerSettings from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import BaseMessage from langgraph.func import task # type: ignore from rich.console import Console # type: ignore from reviewbot.agent.workflow.config import GitProviderConfig from reviewbot.core.agent import Agent +from reviewbot.core.config import Config from reviewbot.core.issues import Issue, IssueSeverity from reviewbot.core.reviews.review import Acknowledgment from reviewbot.core.reviews.review_model import ReviewSummary @@ -18,6 +20,7 @@ post_merge_request_note, update_discussion_note, ) +from reviewbot.prompts.get_prompt import get_prompt class AcknowledgmentResult(NamedTuple): @@ -30,7 +33,7 @@ class AcknowledgmentResult(NamedTuple): @task async def post_review_acknowledgment( - *, gitlab: GitProviderConfig, diffs: list[FileDiff], model: BaseChatModel + *, gitlab: GitProviderConfig, diffs: list[FileDiff], model: BaseChatModel, config: Config ) -> AcknowledgmentResult | None: """ Posts an initial acknowledgment discussion for the MR review. @@ -61,8 +64,6 @@ async def post_review_acknowledgment( Returns: Tuple of (discussion_id, note_id) if created, None if already exists or failed """ - from langchain_core.messages import HumanMessage, SystemMessage - api_v4 = gitlab.get_api_base_url() token = gitlab.token.get_secret_value() project_id = gitlab.get_project_identifier() @@ -124,26 +125,11 @@ async def post_review_acknowledgment( if len(file_list) > 10: files_summary += f"\n- ... and {len(file_list) - 10} more files" + # Load prompt template + prompt_template = get_prompt("gitlab/acknowledgment", config) + # Generate a simple summary with very limited tool calls - messages = [ - SystemMessage( - content="""You are a code review assistant. Generate a brief, friendly acknowledgment that a code review is starting. - -IMPORTANT: -- Keep it SHORT (2-3 sentences max) -- Be surface-level - this is just an acknowledgment, not the actual review -- DO NOT analyze code yet -- DO NOT use any tools -- Just acknowledge what files are being reviewed""" - ), - HumanMessage( - content=f"""A merge request code review is starting for the following files: - -{files_summary} - -Write a brief acknowledgment message (2-3 sentences) letting the developer know the review is in progress. Be friendly and professional.""" - ), - ] + messages: list[BaseMessage] = prompt_template.format_messages(files_summary=files_summary) try: # Get response with no tool calls allowed @@ -198,6 +184,7 @@ async def update_review_summary( diffs: list[FileDiff], diff_refs: dict[str, str], agent: Agent, + config: Config, model: BaseChatModel | None = None, tools: list[Any] | None = None, ) -> None: @@ -217,8 +204,6 @@ async def update_review_summary( diff_refs: Diff references including project_web_url agent: The agent to use for generating summary """ - from langchain_core.messages import HumanMessage, SystemMessage - # Count issues by severity high_count = sum(1 for issue in issues if issue.severity == IssueSeverity.HIGH) medium_count = sum(1 for issue in issues if issue.severity == IssueSeverity.MEDIUM) @@ -285,55 +270,21 @@ async def update_review_summary( try: from ido_agents.agents.ido_agent import create_ido_agent - messages = [ - SystemMessage( - content="""You are a Merge Request reviewer. Generate a concise, professional summary of a code review with reasoning. - -IMPORTANT: -- Use EXACTLY two paragraphs, each wrapped intags. -- Provide reasoning about the overall merge request purpose and code quality. -- Highlight key concerns or positive aspects -- Be constructive and professional -- Use tools to generate a comprehensive summary -- Use paragraphs with readable flow. Use two paragrahs with 3-5 sentences. -Paragraphs should be wrapped with
tags. Use new
tag for a newline. -Example -
-paragraph -
--paragraph2 -
-- Focus on the big picture, not individual issue details -- Reference the changes overview so the summary stays grounded in what changed, even if there are no issues""" - ), - HumanMessage( - content=f"""A code review has been completed with the following results: - -**Statistics:** -- Files reviewed: {total_files} -- Files with issues: {files_with_issues} -- Total issues: {len(issues)} - - High severity: {high_count} - - Medium severity: {medium_count} - - Low severity: {low_count} - -**Changes overview:** -{change_stats} -{change_overview_text} - -**Issues found:** -{issues_text} - -- Use EXACTLY two paragraphs, each wrapped intags. -1. Provides overall assessment of the purpose of the merge request purpose and code quality. -2. Highlights the most important concerns (if any) -3. Gives reasoning about the review findings -4. Is constructive and actionable -5. Mention the kinds of changes and at least one example file from the changes overview """ - ), - ] + # Load prompt template + prompt_template = get_prompt("gitlab/summary", config) + + # Format messages with placeholders + messages: list[BaseMessage] = prompt_template.format_messages( + total_files=total_files, + files_with_issues=files_with_issues, + total_issues=len(issues), + high_count=high_count, + medium_count=medium_count, + low_count=low_count, + change_stats=change_stats, + change_overview_text=change_overview_text, + issues_text=issues_text, + ) if model is None: raise ValueError("model parameter is required for ido-agents migration") diff --git a/src/reviewbot/agent/workflow/runner.py b/src/reviewbot/agent/workflow/runner.py index c020270..f5f5380 100644 --- a/src/reviewbot/agent/workflow/runner.py +++ b/src/reviewbot/agent/workflow/runner.py @@ -120,6 +120,7 @@ async def work_agent(inputs: dict[Any, Any]) -> str: gitlab=gitlab_config, diffs=filtered_diffs, model=low_effort_model, + config=config, ) if ack is not None: @@ -149,6 +150,7 @@ def on_file_review_complete(file_path: str, issues: list[Any]) -> None: # Call identify_issues task directly issue_models = await identify_issues( + config=config, settings=settings, on_file_complete=on_file_review_complete, agent=main_agent, @@ -183,6 +185,7 @@ def on_file_review_complete(file_path: str, issues: list[Any]) -> None: diffs=filtered_diffs, diff_refs=diff_refs, agent=low_effort_agent, + config=config, model=low_effort_model, tools=[get_diff, read_file, ls_dir, think], ) diff --git a/src/reviewbot/core/config.py b/src/reviewbot/core/config.py index 50d602c..4b966a5 100644 --- a/src/reviewbot/core/config.py +++ b/src/reviewbot/core/config.py @@ -1,3 +1,5 @@ +from pathlib import Path + from pydantic import BaseModel, SecretStr @@ -9,6 +11,7 @@ class Config(BaseModel): gitlab_token: str gemini_project_id: str create_threads: bool = False + custom_prompts_dir: Path | None = None model_config = { "extra": "forbid", diff --git a/src/reviewbot/prompts/get_prompt.py b/src/reviewbot/prompts/get_prompt.py new file mode 100644 index 0000000..f1404b1 --- /dev/null +++ b/src/reviewbot/prompts/get_prompt.py @@ -0,0 +1,120 @@ +from importlib import resources + +from langchain_core.prompts import ChatPromptTemplate + +from reviewbot.core.config import Config + + +def get_prompt(prompt_name: str, config: Config) -> ChatPromptTemplate: + """ + Load a prompt template from markdown file and return as ChatPromptTemplate. + + The markdown file should have this structure: + ``` + # System Prompt + + [system message content] + + --- + + # Human Prompt + + [human message content] + ``` + + Args: + prompt_name: Name of the prompt (without extension), can include subdirs like "review/quick_scan" + config: Config object with optional custom_prompts_dir + + Returns: + ChatPromptTemplate with system and human messages + + Raises: + FileNotFoundError: If prompt file not found + """ + filename = f"{prompt_name}.md" + + # 1. Try Custom Override + prompt_content = None + if config.custom_prompts_dir: + custom_path = config.custom_prompts_dir / filename + if custom_path.exists(): + prompt_content = custom_path.read_text(encoding="utf-8") + + # 2. Fallback to Package Default + if prompt_content is None: + try: + prompt_content = ( + resources.files("reviewbot.agent.prompts") + .joinpath(filename) + .read_text(encoding="utf-8") + ) + except FileNotFoundError as e: + raise FileNotFoundError( + f"Prompt '{filename}' not found in custom dir or package defaults." + ) from e + + # Parse the markdown into system and human prompts + system_prompt, human_prompt = _parse_prompt_markdown(prompt_content) + + # Create ChatPromptTemplate + return ChatPromptTemplate.from_messages([("system", system_prompt), ("human", human_prompt)]) # pyright: ignore[reportUnknownMemberType] + + +def _parse_prompt_markdown(content: str) -> tuple[str, str]: + """ + Parse a markdown prompt file into system and human message components. + + Expected format: + # System Prompt + + [system content] + + --- + + # Human Prompt + + [human content] + + Args: + content: Raw markdown content + + Returns: + Tuple of (system_prompt, human_prompt) + """ + # Split by the --- separator + parts = content.split("---", 1) + + if len(parts) != 2: + raise ValueError("Prompt markdown must have system and human sections separated by '---'") + + system_section = parts[0].strip() + human_section = parts[1].strip() + + # Extract content after the first header (if present) + system_prompt = _extract_prompt_content(system_section) + human_prompt = _extract_prompt_content(human_section) + + return system_prompt, human_prompt + + +def _extract_prompt_content(section: str) -> str: + """ + Extract the actual prompt content from a section, removing the header. + r + Args: + section: A section of the markdown (system or human) + + Returns: + The prompt content with header removed + """ + lines = section.split("\n") + + # Skip the first line if it's a header (starts with #) + start_idx = 0 + if lines and lines[0].strip().startswith("#"): + start_idx = 1 + + # Join remaining lines and strip + content = "\n".join(lines[start_idx:]).strip() + return content