From 2d7100920340a970c152dd6e0db049e39a4a8e09 Mon Sep 17 00:00:00 2001 From: Ernest Osiak Date: Wed, 11 Mar 2026 16:47:45 +0100 Subject: [PATCH] OPS-24224 add review metadata and context controls --- AUTO1_PATCHES.md | 1 + pr_agent/algo/context_enrichment.py | 87 ++++++++ pr_agent/algo/review_output_filter.py | 125 ++++++++++++ pr_agent/algo/utils.py | 64 +++++- pr_agent/git_providers/gitlab_provider.py | 9 +- pr_agent/git_providers/local_git_provider.py | 3 + pr_agent/settings/configuration.toml | 6 + pr_agent/settings/pr_reviewer_prompts.toml | 23 +++ pr_agent/tools/pr_code_suggestions.py | 21 +- pr_agent/tools/pr_reviewer.py | 54 ++++- tests/unittest/test_context_enrichment.py | 160 +++++++++++++++ tests/unittest/test_convert_to_markdown.py | 72 +++++++ tests/unittest/test_parse_code_suggestion.py | 12 +- tests/unittest/test_review_output_filter.py | 199 +++++++++++++++++++ 14 files changed, 822 insertions(+), 14 deletions(-) create mode 100644 pr_agent/algo/context_enrichment.py create mode 100644 pr_agent/algo/review_output_filter.py create mode 100644 tests/unittest/test_context_enrichment.py create mode 100644 tests/unittest/test_review_output_filter.py diff --git a/AUTO1_PATCHES.md b/AUTO1_PATCHES.md index 8e603d7fcd..cf2d50b610 100644 --- a/AUTO1_PATCHES.md +++ b/AUTO1_PATCHES.md @@ -19,6 +19,7 @@ Keep this list minimal to ease upstream rebases. | improve-reflect-fallback-score | OPS-24090 | Fix | pr_agent/tools/pr_code_suggestions.py | Prevent inline suggestion spam when self-reflection fails by defaulting all suggestion scores to 0. | Not upstreamed yet. | Remove once upstream handles reflect failure without inflating scores. | | describe-mermaid-sanitize | OPS-24090 | Fix | pr_agent/tools/pr_description.py | Strip backticks from Mermaid diagrams to avoid GitHub render failures. | Not upstreamed yet. | Remove once upstream sanitizes Mermaid diagrams or fixes prompt output. | | jira-cloud-ticket-context | OPS-24091 | Enhancement | pr_agent/algo/utils.py, pr_agent/tickets/jira_cloud.py, pr_agent/tools/ticket_pr_compliance_check.py, pr_agent/tools/pr_reviewer.py, pr_agent/tools/pr_description.py, pr_agent/tools/pr_config.py, tests/unittest/test_convert_to_markdown.py, tests/unittest/test_jira_cloud_ticket_context.py, tests/unittest/test_pr_reviewer_ticket_note.py, tests/unittest/test_ticket_pr_compliance_check.py | Fetch Jira Cloud ticket context using PR title first and branch name only as fallback, support both basic auth and scoped-token Jira Cloud API access, ignore placeholder compliance bullets and placeholder text like `None.`, show review notes when the Jira ticket cannot be fetched, and normalize empty discovered tickets before prompt rendering. | Not upstreamed yet. | Remove once upstream supports Jira Cloud ticket context from PR metadata with title-first precedence, scoped-token auth, placeholder-safe compliance rendering, comment-level fetch-failure notes, and prompt-safe normalization for empty ticket payloads. | +| review-metadata-context | OPS-24224 | Enhancement | pr_agent/algo/context_enrichment.py, pr_agent/algo/review_output_filter.py, pr_agent/algo/utils.py, pr_agent/git_providers/gitlab_provider.py, pr_agent/git_providers/local_git_provider.py, pr_agent/settings/configuration.toml, pr_agent/settings/pr_reviewer_prompts.toml, pr_agent/tools/pr_code_suggestions.py, pr_agent/tools/pr_reviewer.py, tests/unittest/test_context_enrichment.py, tests/unittest/test_convert_to_markdown.py, tests/unittest/test_parse_code_suggestion.py, tests/unittest/test_review_output_filter.py | Add optional review findings metadata (`confidence`, `evidence_type`), normalize parsed findings and enforce the configured finding cap at runtime, allow bounded full-file context for small touched files to improve review precision, let metadata badges be toggled independently from metadata generation, suppress inline code-suggestion label/importance suffixes when badges are disabled, rename the downstream config keys to `findings_metadata`, `findings_metadata_badges`, and `small_file_context`, and reuse cached local diff files for local review runs. | Not upstreamed yet. | Remove once upstream supports structured review findings metadata, runtime normalization, bounded small-file context enrichment for `/review`, independent metadata-badge rendering control across review output and inline suggestions, the renamed downstream config surface, and local review no longer needs the downstream diff-file cache adjustment. | ## Rebase checklist diff --git a/pr_agent/algo/context_enrichment.py b/pr_agent/algo/context_enrichment.py new file mode 100644 index 0000000000..ccc3f0ce9d --- /dev/null +++ b/pr_agent/algo/context_enrichment.py @@ -0,0 +1,87 @@ +import re + +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo + + +FILE_HEADER_PATTERN = r'^## File: (?:\'([^\']+)\'|"([^\"]+)")$' + + +def append_small_file_context_to_diff( + diff_text: str, + diff_files: list[FilePatchInfo], + token_handler, + *, + max_lines: int, + max_tokens: int, +) -> str: + context = build_small_file_context( + diff_text, + diff_files, + token_handler, + max_lines=max_lines, + max_tokens=max_tokens, + ) + if not context: + return diff_text + + return f"{diff_text}\n\nAdditional context for small touched files:\n======\n{context}\n======" + + +def build_small_file_context( + diff_text: str, + diff_files: list[FilePatchInfo], + token_handler, + *, + max_lines: int, + max_tokens: int, +) -> str: + if not diff_text or not diff_files or max_lines <= 0 or max_tokens <= 0: + return "" + + included_files = extract_files_in_diff(diff_text) + if not included_files: + return "" + + remaining_tokens = max_tokens + context_blocks = [] + candidate_files = [] + + for file in diff_files: + if file.filename not in included_files: + continue + if file.edit_type == EDIT_TYPE.DELETED or not file.patch or not file.head_file: + continue + + line_count = len(file.head_file.splitlines()) + if line_count > max_lines: + continue + + candidate_files.append((line_count, file.filename, file)) + + for _, _, file in sorted(candidate_files, key=lambda item: (item[0], item[1])): + context_block = render_full_file_context(file) + context_tokens = token_handler.count_tokens(context_block) + if context_tokens > remaining_tokens: + continue + + context_blocks.append(context_block) + remaining_tokens -= context_tokens + + return "\n\n".join(context_blocks) + + +def extract_files_in_diff(diff_text: str) -> set[str]: + matches = re.findall(FILE_HEADER_PATTERN, diff_text, flags=re.MULTILINE) + return {single_quoted or double_quoted for single_quoted, double_quoted in matches} + + +def render_full_file_context(file: FilePatchInfo) -> str: + numbered_lines = [] + for line_number, line in enumerate(file.head_file.splitlines(), start=1): + numbered_lines.append(f"{line_number} {line}") + + return ( + f"## Full file context: '{file.filename.strip()}'\n" + "### Current file content after PR changes\n" + + "\n".join(numbered_lines) + ) diff --git a/pr_agent/algo/review_output_filter.py b/pr_agent/algo/review_output_filter.py new file mode 100644 index 0000000000..d9425383dd --- /dev/null +++ b/pr_agent/algo/review_output_filter.py @@ -0,0 +1,125 @@ +import copy +import logging + + +VALID_CONFIDENCE_VALUES = {"high", "medium", "low"} +VALID_EVIDENCE_TYPES = {"diff", "ticket", "inferred"} +FINDINGS_FILTER_MODE_OFF = "off" +FINDINGS_FILTER_MODE_DROP_LOW_CONFIDENCE_INFERRED = "drop_low_confidence_inferred" +LOGGER = logging.getLogger(__name__) + + +def normalize_review_output( + data: dict, + max_findings: int, + findings_metadata: bool = False, + filter_mode: str = FINDINGS_FILTER_MODE_OFF, +) -> dict: + if not isinstance(data, dict) or "review" not in data or not isinstance(data["review"], dict): + return data + + if filter_mode != FINDINGS_FILTER_MODE_OFF and not findings_metadata: + _log_warning("findings_filter_mode has no effect when findings_metadata is false") + filter_mode = FINDINGS_FILTER_MODE_OFF + elif filter_mode not in {FINDINGS_FILTER_MODE_OFF, FINDINGS_FILTER_MODE_DROP_LOW_CONFIDENCE_INFERRED}: + _log_warning(f"Unknown findings_filter_mode: {filter_mode!r}, defaulting to off") + filter_mode = FINDINGS_FILTER_MODE_OFF + + normalized_data = copy.deepcopy(data) + issues = normalized_data["review"].get("key_issues_to_review") + if not isinstance(issues, list): + return normalized_data + + normalized_issues = [] + for issue in issues: + normalized_issue = normalize_issue(issue, findings_metadata=findings_metadata) + if normalized_issue: + normalized_issues.append(normalized_issue) + + filtered_issues, suppressed_count = filter_issues(normalized_issues, filter_mode=filter_mode) + if suppressed_count: + _log_debug("Suppressed low-confidence inferred findings", artifact={"count": suppressed_count}) + if max_findings > 0: + filtered_issues = filtered_issues[:max_findings] + normalized_data["review"]["key_issues_to_review"] = filtered_issues + + return normalized_data + + +def normalize_issue(issue: dict, findings_metadata: bool = False) -> dict | None: + if not isinstance(issue, dict): + return None + + normalized_issue = copy.deepcopy(issue) + normalized_issue["relevant_file"] = issue.get("relevant_file", "") + normalized_issue["issue_header"] = issue.get("issue_header", "") + normalized_issue["issue_content"] = issue.get("issue_content", "") + + if findings_metadata: + confidence = normalize_enum_value(issue.get("confidence"), VALID_CONFIDENCE_VALUES) + if confidence: + normalized_issue["confidence"] = confidence + else: + normalized_issue.pop("confidence", None) + + evidence_type = normalize_enum_value(issue.get("evidence_type"), VALID_EVIDENCE_TYPES) + if evidence_type: + normalized_issue["evidence_type"] = evidence_type + else: + normalized_issue.pop("evidence_type", None) + else: + normalized_issue.pop("confidence", None) + normalized_issue.pop("evidence_type", None) + + normalized_issue["start_line"] = issue.get("start_line", 0) + normalized_issue["end_line"] = issue.get("end_line", 0) + + return normalized_issue + + +def normalize_enum_value(value, valid_values: set[str]) -> str | None: + if value is None: + return None + + normalized_value = str(value).strip().lower() + if normalized_value in valid_values: + return normalized_value + + return None + + +def filter_issues(issues: list[dict], filter_mode: str = FINDINGS_FILTER_MODE_OFF) -> tuple[list[dict], int]: + if filter_mode != FINDINGS_FILTER_MODE_DROP_LOW_CONFIDENCE_INFERRED: + return issues, 0 + + filtered_issues = [] + suppressed_count = 0 + for issue in issues: + if issue.get("confidence") == "low" and issue.get("evidence_type") == "inferred": + suppressed_count += 1 + continue + filtered_issues.append(issue) + + return filtered_issues, suppressed_count + + +def _log_warning(message: str) -> None: + logger = _resolve_logger() + logger.warning(message) + + +def _log_debug(message: str, artifact: dict) -> None: + logger = _resolve_logger() + try: + logger.debug(message, artifact=artifact) + except TypeError: + logger.debug(message, extra={"artifact": artifact}) + + +def _resolve_logger(): + try: + from pr_agent.log import get_logger + + return get_logger() + except Exception: + return LOGGER diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 49411a2d0a..3164631c35 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -83,8 +83,6 @@ def get_setting(key: str) -> Any: return context.get("settings", global_settings).get(key, global_settings.get(key, None)) except Exception: return global_settings.get(key, None) - - def emphasize_header(text: str, only_markdown=False, reference_link=None) -> str: try: # Finding the position of the first occurrence of ": " @@ -130,7 +128,8 @@ def convert_to_markdown_v2(output_data: dict, incremental_review=None, git_provider=None, files=None, - ticket_note: str = '') -> str: + ticket_note: str = '', + findings_metadata_badges: bool | None = None) -> str: """ Convert a dictionary of data into markdown format. Args: @@ -139,6 +138,9 @@ def convert_to_markdown_v2(output_data: dict, str: The markdown formatted text generated from the input dictionary. """ + if findings_metadata_badges is None: + findings_metadata_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False) + emojis = { "Can be split": "🔀", "Key issues to review": "⚡", @@ -287,6 +289,16 @@ def convert_to_markdown_v2(output_data: dict, if issue_header.lower() == 'possible bug': issue_header = 'Possible Issue' # Make the header less frightening issue_content = issue.get('issue_content', '').strip() + issue_metadata = format_review_issue_metadata( + issue, + gfm_supported, + findings_metadata_badges, + ) + if issue_metadata: + if gfm_supported: + issue_content = f"{issue_metadata}
{issue_content}" if issue_content else issue_metadata + else: + issue_content = f"{issue_metadata}\n\n{issue_content}" if issue_content else issue_metadata start_line = int(str(issue.get('start_line', 0)).strip()) end_line = int(str(issue.get('end_line', 0)).strip()) @@ -339,6 +351,52 @@ def convert_to_markdown_v2(output_data: dict, return markdown_text + +def format_review_issue_metadata(issue: dict, gfm_supported: bool, enable_badges: bool = False) -> str: + if not isinstance(issue, dict): + return "" + if not enable_badges: + return "" + + metadata_labels = [] + confidence = str(issue.get("confidence", "")).strip().lower() + if confidence in {"high", "medium", "low"}: + metadata_labels.append(f"{confidence.capitalize()} confidence") + + evidence_type = str(issue.get("evidence_type", "")).strip().lower() + evidence_labels = { + "diff": "Diff evidence", + "ticket": "Ticket evidence", + "inferred": "Inferred evidence", + } + if evidence_type in evidence_labels: + metadata_labels.append(evidence_labels[evidence_type]) + + if not metadata_labels: + return "" + + if gfm_supported: + return " ".join([f"{label}" for label in metadata_labels]) + return " ".join([f"`{label}`" for label in metadata_labels]) + + +def format_code_suggestion_metadata(label: str, score=None, enable_badges: bool | None = None) -> str: + if enable_badges is None: + enable_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False) + if not enable_badges: + return "" + + label = str(label).strip() + if not label: + return "" + + score_value = "" if score is None else str(score).strip() + if score_value: + return f" [{label}, importance: {score_value}]" + + return f" [{label}]" + + def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str: """ Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content. diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 7f5937343a..6fc9cdd7d9 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -16,6 +16,7 @@ from ..algo.git_patch_processing import decode_if_bytes from ..algo.language_handler import is_valid_file from ..algo.utils import (clip_tokens, + format_code_suggestion_metadata, find_line_number_of_relevant_line_in_file, load_large_diff) from ..config_loader import get_settings @@ -606,7 +607,13 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f else: language = '' link = self.get_line_link(relevant_file, line_start, line_end) - body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n\n" + findings_metadata_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False) + suggestion_metadata = format_code_suggestion_metadata( + label, + score, + enable_badges=findings_metadata_badges, + ) + body_fallback = f"**Suggestion:** {content}{suggestion_metadata}\n\n" body_fallback +=f"\n\n
[{target_file.filename} [{line_start}-{line_end}]]({link}):\n\n" body_fallback += f"\n\n___\n\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`" body_fallback+="
\n\n" diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 420289761c..bab2e9f50b 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -63,6 +63,9 @@ def is_supported(self, capability: str) -> bool: return True def get_diff_files(self) -> list[FilePatchInfo]: + if self.diff_files is not None: + return self.diff_files + diffs = self.repo.head.commit.diff( self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]), create_patch=True, diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 12d8b5d0ce..05190b9c42 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -79,6 +79,12 @@ persistent_comment=true extra_instructions = "" num_max_findings = 3 final_update_message = true +findings_metadata = false +findings_metadata_badges = false +small_file_context = false +small_file_context_max_lines = 150 +small_file_context_max_tokens = 3500 +findings_filter_mode = "off" # review labels enable_review_labels_security=true enable_review_labels_effort=true diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 477f8553de..95e473ac18 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -46,6 +46,13 @@ __new hunk__ - When quoting variables, names or file paths from the code, use backticks (`) instead of single quote ('). - Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase. - Also note that if the code ends at an opening brace or statement that begins a new scope (like 'if', 'for', 'try'), don't treat it as incomplete. Instead, acknowledge the visible scope boundary and analyze only the code shown. +{%- if small_file_context %} +- Some files may include a `## Full file context:` section with the complete post-change contents of a small touched file. Use this only to validate issues related to the changed file, and do not report issues from untouched regions unless they are necessary to explain a defect introduced by the PR. +{%- endif %} +{%- if findings_metadata %} +- For each issue, set `confidence` from `high`, `medium`, or `low` based only on the visible evidence. +- Set `evidence_type` to `diff` when changed lines directly support the issue, `ticket` for a clear ticket mismatch, or `inferred` when the issue depends on indirect clues. +{%- endif %} {%- if extra_instructions %} @@ -69,6 +76,10 @@ class KeyIssuesComponentLink(BaseModel): relevant_file: str = Field(description="The full file path of the relevant file") issue_header: str = Field(description="One or two word title for the issue. For example: 'Possible Bug', etc.") issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not mention line numbers in this field.") +{%- if findings_metadata %} + confidence: str = Field(description="Your confidence in this issue based only on the visible evidence. Allowed values: high, medium, low") + evidence_type: str = Field(description="What directly supports this issue. Allowed values: diff, ticket, inferred") +{%- endif %} start_line: int = Field(description="The start line that corresponds to this issue in the relevant file") end_line: int = Field(description="The end line that corresponds to this issue in the relevant file") @@ -164,6 +175,12 @@ review: Possible Bug issue_content: | ... +{%- if findings_metadata %} + confidence: | + high + evidence_type: | + diff +{%- endif %} start_line: 12 end_line: 14 - ... @@ -302,6 +319,12 @@ review: ... issue_content: | ... +{%- if findings_metadata %} + confidence: | + high + evidence_type: | + diff +{%- endif %} start_line: ... end_line: ... - ... diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index 7bdf809760..b6e7d75edd 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -18,8 +18,11 @@ get_pr_diff, get_pr_multi_diffs, retry_with_fallback_models) from pr_agent.algo.token_handler import TokenHandler -from pr_agent.algo.utils import (ModelType, load_yaml, replace_code_tags, - show_relevant_configurations, get_max_tokens, clip_tokens, get_model) +from pr_agent.algo.utils import (ModelType, clip_tokens, + format_code_suggestion_metadata, get_max_tokens, + get_model, load_yaml, + replace_code_tags, + show_relevant_configurations) from pr_agent.config_loader import get_settings from pr_agent.git_providers import (AzureDevopsProvider, GithubProvider, GitLabProvider, get_git_provider, @@ -547,6 +550,8 @@ async def push_inline_code_suggestions(self, data): else: return self.git_provider.publish_comment('No suggestions found to improve this PR.') + findings_metadata_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False) + for d in data['code_suggestions']: try: if get_settings().config.verbosity_level >= 2: @@ -561,10 +566,12 @@ async def push_inline_code_suggestions(self, data): if new_code_snippet: new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet) - if d.get('score'): - body = f"**Suggestion:** {content} [{label}, importance: {d.get('score')}]\n```suggestion\n" + new_code_snippet + "\n```" - else: - body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```" + suggestion_metadata = format_code_suggestion_metadata( + label, + d.get('score'), + enable_badges=findings_metadata_badges, + ) + body = f"**Suggestion:** {content}{suggestion_metadata}\n```suggestion\n" + new_code_snippet + "\n```" code_suggestions.append({'body': body, 'relevant_file': relevant_file, 'relevant_lines_start': relevant_lines_start, 'relevant_lines_end': relevant_lines_end, @@ -941,4 +948,4 @@ async def self_reflect_on_suggestions(self, except Exception as e: get_logger().info(f"Could not reflect on suggestions, error: {e}") return "" - return response_reflect \ No newline at end of file + return response_reflect diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index f4cb563370..f5a7ec9dc9 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -9,12 +9,16 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from pr_agent.algo.context_enrichment import append_small_file_context_to_diff from pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files, + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD, get_pr_diff, retry_with_fallback_models) +from pr_agent.algo.review_output_filter import normalize_review_output from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import (ModelType, PRReviewHeader, - convert_to_markdown_v2, github_action_output, + convert_to_markdown_v2, get_max_tokens, + github_action_output, load_yaml, parse_requirement_items, show_relevant_configurations) from pr_agent.config_loader import get_settings @@ -27,6 +31,9 @@ from pr_agent.tools.ticket_pr_compliance_check import extract_and_cache_pr_tickets +SMALL_FILE_CONTEXT_PROMPT_OVERHEAD_TOKENS = 64 + + def append_ticket_compliance_note(ticket_note: str, extra_note: str) -> str: ticket_note = (ticket_note or "").strip() extra_note = (extra_note or "").strip() @@ -109,6 +116,9 @@ def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False, get_settings().set("config.enable_ai_metadata", False) get_logger().debug(f"AI metadata is disabled for this command") + self.findings_metadata = get_settings().pr_reviewer.get("findings_metadata", False) + self.small_file_context = get_settings().pr_reviewer.get("small_file_context", False) + self.vars = { "title": self.git_provider.pr.title, "branch": self.git_provider.get_pr_branch(), @@ -130,6 +140,8 @@ def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False, "commit_messages_str": self.git_provider.get_commit_messages(), "custom_labels": "", "enable_custom_labels": get_settings().config.enable_custom_labels, + "findings_metadata": self.findings_metadata, + "small_file_context": self.small_file_context, "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), "related_tickets": get_settings().get('related_tickets', []), "ticket_compliance_note": "", @@ -228,6 +240,7 @@ async def _prepare_prediction(self, model: str) -> None: model, add_line_numbers_to_hunks=True, disable_extra_lines=False,) + self.patches_diff = self._append_small_file_context(self.patches_diff, model) if self.patches_diff: get_logger().debug(f"PR diff", diff=self.patches_diff) @@ -273,12 +286,19 @@ def _prepare_pr_review(self) -> str: keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", "relevant_file:", "relevant_line:", "suggestion:"], first_key=first_key, last_key=last_key) - github_action_output(data, 'review') if 'review' not in data: get_logger().exception("Failed to parse review data", artifact={"data": data}) return "" + data = normalize_review_output( + data, + max_findings=get_settings().pr_reviewer.num_max_findings, + findings_metadata=self.findings_metadata, + filter_mode=get_settings().pr_reviewer.get("findings_filter_mode", "off"), + ) + github_action_output(data, 'review') + # move data['review'] 'key_issues_to_review' key to the end of the dictionary if 'key_issues_to_review' in data['review']: key_issues_to_review = data['review'].pop('key_issues_to_review') @@ -320,6 +340,36 @@ def _prepare_pr_review(self) -> str: return markdown_text + def _append_small_file_context(self, diff_text: str, model: str) -> str: + if not diff_text or not self.small_file_context: + return diff_text + + max_context_lines = int(get_settings().pr_reviewer.get("small_file_context_max_lines", 0)) + max_context_tokens = int(get_settings().pr_reviewer.get("small_file_context_max_tokens", 0)) + if max_context_lines <= 0 or max_context_tokens <= 0: + return diff_text + + diff_tokens = self.token_handler.count_tokens(diff_text) + # prompt_tokens is computed before the runtime diff is rendered into the prompt, so account for diff tokens here. + available_tokens = ( + get_max_tokens(model) + - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD + - self.token_handler.prompt_tokens + - diff_tokens + - SMALL_FILE_CONTEXT_PROMPT_OVERHEAD_TOKENS + ) + if available_tokens <= 0: + return diff_text + + enriched_diff = append_small_file_context_to_diff( + diff_text, + self.git_provider.get_diff_files(), + self.token_handler, + max_lines=max_context_lines, + max_tokens=min(max_context_tokens, available_tokens), + ) + return enriched_diff + def _get_user_answers(self) -> Tuple[str, str]: """ Retrieves the question and answer strings from the discussion messages related to a pull request. diff --git a/tests/unittest/test_context_enrichment.py b/tests/unittest/test_context_enrichment.py new file mode 100644 index 0000000000..0cfeedcf29 --- /dev/null +++ b/tests/unittest/test_context_enrichment.py @@ -0,0 +1,160 @@ +from pr_agent.algo.context_enrichment import append_small_file_context_to_diff, extract_files_in_diff, render_full_file_context +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo + + +class MockTokenHandler: + def count_tokens(self, text: str) -> int: + return len(text) + + +class TestContextEnrichment: + def test_append_small_file_context_to_diff_adds_only_small_included_files(self): + diff_text = "## File: 'src/small.py'\n\n@@ ... @@\n__new hunk__\n1 +print('hi')\n" + diff_files = [ + FilePatchInfo( + base_file="", + head_file="print('hi')\nprint('bye')\n", + patch="@@ -0,0 +1,2 @@\n+print('hi')\n+print('bye')\n", + filename="src/small.py", + edit_type=EDIT_TYPE.ADDED, + ), + FilePatchInfo( + base_file="", + head_file="\n".join([f"line {i}" for i in range(1, 201)]), + patch="@@ -0,0 +1,200 @@\n+...", + filename="src/large.py", + edit_type=EDIT_TYPE.ADDED, + ), + ] + + enriched_diff = append_small_file_context_to_diff( + diff_text, + diff_files, + MockTokenHandler(), + max_lines=10, + max_tokens=500, + ) + + assert "Additional context for small touched files" in enriched_diff + assert "## Full file context: 'src/small.py'" in enriched_diff + assert "1 print('hi')" in enriched_diff + assert "src/large.py" not in enriched_diff + + def test_append_small_file_context_to_diff_respects_token_budget(self): + diff_text = "## File: 'src/one.py'\n\n@@ ... @@\n__new hunk__\n1 +print('hi')\n" + diff_files = [ + FilePatchInfo( + base_file="", + head_file="print('hi')\nprint('bye')\n", + patch="@@ -0,0 +1,2 @@\n+print('hi')\n+print('bye')\n", + filename="src/one.py", + edit_type=EDIT_TYPE.ADDED, + ), + ] + + enriched_diff = append_small_file_context_to_diff( + diff_text, + diff_files, + MockTokenHandler(), + max_lines=10, + max_tokens=10, + ) + + assert enriched_diff == diff_text + + def test_append_small_file_context_to_diff_skips_deleted_or_missing_head_files(self): + diff_text = ( + "## File: 'src/deleted.py'\n\n@@ ... @@\n__new hunk__\n" + "\n## File: 'src/missing.py'\n\n@@ ... @@\n__new hunk__\n1 +print('hi')\n" + ) + diff_files = [ + FilePatchInfo( + base_file="print('bye')\n", + head_file="", + patch="@@ -1 +0,0 @@\n-print('bye')\n", + filename="src/deleted.py", + edit_type=EDIT_TYPE.DELETED, + ), + FilePatchInfo( + base_file="", + head_file=None, + patch="@@ -0,0 +1 @@\n+print('hi')\n", + filename="src/missing.py", + edit_type=EDIT_TYPE.ADDED, + ), + ] + + enriched_diff = append_small_file_context_to_diff( + diff_text, + diff_files, + MockTokenHandler(), + max_lines=10, + max_tokens=500, + ) + + assert enriched_diff == diff_text + + def test_append_small_file_context_to_diff_prefers_smaller_files_when_budget_is_limited(self): + diff_text = ( + "## File: 'src/medium.py'\n\n@@ ... @@\n__new hunk__\n1 +medium\n" + "\n## File: 'src/small_a.py'\n\n@@ ... @@\n__new hunk__\n1 +small_a\n" + "\n## File: 'src/small_b.py'\n\n@@ ... @@\n__new hunk__\n1 +small_b\n" + ) + medium_file = FilePatchInfo( + base_file="", + head_file="\n".join(["medium"] * 3), + patch="@@ -0,0 +1,3 @@\n+medium\n+medium\n+medium\n", + filename="src/medium.py", + edit_type=EDIT_TYPE.ADDED, + ) + small_a_file = FilePatchInfo( + base_file="", + head_file="small_a\n", + patch="@@ -0,0 +1 @@\n+small_a\n", + filename="src/small_a.py", + edit_type=EDIT_TYPE.ADDED, + ) + small_b_file = FilePatchInfo( + base_file="", + head_file="small_b\n", + patch="@@ -0,0 +1 @@\n+small_b\n", + filename="src/small_b.py", + edit_type=EDIT_TYPE.ADDED, + ) + max_tokens = len(render_full_file_context(small_a_file)) + len(render_full_file_context(small_b_file)) + + enriched_diff = append_small_file_context_to_diff( + diff_text, + [medium_file, small_a_file, small_b_file], + MockTokenHandler(), + max_lines=10, + max_tokens=max_tokens, + ) + + assert "## Full file context: 'src/medium.py'" not in enriched_diff + assert "## Full file context: 'src/small_a.py'" in enriched_diff + assert "## Full file context: 'src/small_b.py'" in enriched_diff + + def test_render_full_file_context_keeps_blank_line_numbers(self): + file = FilePatchInfo( + base_file="", + head_file="alpha\n\nomega\n", + patch="@@ -0,0 +1,3 @@\n+alpha\n+\n+omega\n", + filename="src/blank_lines.py", + edit_type=EDIT_TYPE.ADDED, + ) + + rendered_context = render_full_file_context(file) + + assert "1 alpha\n2 \n3 omega" in rendered_context + + def test_extract_files_in_diff_supports_matching_quotes_only(self): + diff_text = ( + "## File: 'src/single.py'\n" + '## File: "src/double.py"\n' + "## File: 'src/mismatch.py\"\n" + ) + + extracted_files = extract_files_in_diff(diff_text) + + assert extracted_files == {"src/single.py", "src/double.py"} diff --git a/tests/unittest/test_convert_to_markdown.py b/tests/unittest/test_convert_to_markdown.py index 73f9882a78..06d46a9318 100644 --- a/tests/unittest/test_convert_to_markdown.py +++ b/tests/unittest/test_convert_to_markdown.py @@ -126,6 +126,78 @@ def test_key_issues_to_review(self): assert convert_to_markdown_v2(input_data, git_provider=mock_git_provider).strip() == expected_output.strip() mock_git_provider.get_line_link.assert_called_with('src/utils.py', 30, 50) + def test_key_issues_to_review_hides_metadata_badges_by_default(self): + input_data = {'review': { + 'key_issues_to_review': [ + { + 'relevant_file': 'src/utils.py', + 'issue_header': 'Code Smell', + 'issue_content': 'The function is too long and complex.', + 'confidence': 'high', + 'evidence_type': 'diff', + 'start_line': 30, + 'end_line': 50, + } + ] + }} + mock_git_provider = Mock() + mock_git_provider.get_line_link.return_value = '' + + result = convert_to_markdown_v2(input_data, git_provider=mock_git_provider) + + assert 'High confidence' not in result + assert 'Diff evidence' not in result + assert 'The function is too long and complex.' in result + + def test_key_issues_to_review_renders_metadata_badges_when_enabled(self): + input_data = {'review': { + 'key_issues_to_review': [ + { + 'relevant_file': 'src/utils.py', + 'issue_header': 'Code Smell', + 'issue_content': 'The function is too long and complex.', + 'confidence': 'high', + 'evidence_type': 'diff', + 'start_line': 30, + 'end_line': 50, + } + ] + }} + + mock_git_provider = Mock() + mock_git_provider.get_line_link.return_value = '' + + result = convert_to_markdown_v2( + input_data, + git_provider=mock_git_provider, + findings_metadata_badges=True, + ) + + assert 'High confidence Diff evidence
The function is too long and complex.' in result + + def test_key_issues_to_review_renders_metadata_badges_without_gfm_when_enabled(self): + input_data = {'review': { + 'key_issues_to_review': [ + { + 'relevant_file': 'src/utils.py', + 'issue_header': 'Code Smell', + 'issue_content': 'The function is too long and complex.', + 'confidence': 'medium', + 'evidence_type': 'ticket', + 'start_line': 30, + 'end_line': 50, + } + ] + }} + + result = convert_to_markdown_v2( + input_data, + gfm_supported=False, + findings_metadata_badges=True, + ) + + assert '`Medium confidence` `Ticket evidence`' in result + def test_ticket_compliance(self): input_data = {'review': { 'ticket_compliance_check': [ diff --git a/tests/unittest/test_parse_code_suggestion.py b/tests/unittest/test_parse_code_suggestion.py index c959a2dcb6..367f61ce40 100644 --- a/tests/unittest/test_parse_code_suggestion.py +++ b/tests/unittest/test_parse_code_suggestion.py @@ -1,6 +1,7 @@ # Generated by CodiumAI -from pr_agent.algo.utils import parse_code_suggestion +from pr_agent.algo.utils import (format_code_suggestion_metadata, + parse_code_suggestion) """ Code Analysis @@ -35,6 +36,15 @@ class TestParseCodeSuggestion: + def test_format_code_suggestion_metadata_hidden_by_default(self): + assert format_code_suggestion_metadata("possible issue", 7) == "" + + def test_format_code_suggestion_metadata_renders_label_and_score_when_enabled(self): + assert format_code_suggestion_metadata("possible issue", 7, enable_badges=True) == " [possible issue, importance: 7]" + + def test_format_code_suggestion_metadata_renders_label_only_when_enabled(self): + assert format_code_suggestion_metadata("possible issue", enable_badges=True) == " [possible issue]" + # Tests that function returns empty string when input is an empty dictionary def test_empty_dict(self): input_data = {} diff --git a/tests/unittest/test_review_output_filter.py b/tests/unittest/test_review_output_filter.py new file mode 100644 index 0000000000..69c9b93e3d --- /dev/null +++ b/tests/unittest/test_review_output_filter.py @@ -0,0 +1,199 @@ +import pr_agent.algo.review_output_filter as review_output_filter + + +class TestReviewOutputFilter: + def test_normalize_review_output_caps_findings_and_keeps_valid_metadata(self): + data = { + "review": { + "key_issues_to_review": [ + { + "relevant_file": "src/one.py", + "issue_header": "Possible Bug", + "issue_content": "Issue one", + "confidence": "HIGH", + "evidence_type": "DIFF", + "start_line": 10, + "end_line": 12, + "unknown": "preserved value", + }, + { + "relevant_file": "src/two.py", + "issue_header": "Possible Issue", + "issue_content": "Issue two", + "confidence": "unsupported", + "evidence_type": "ticket", + "start_line": 20, + "end_line": 21, + }, + { + "relevant_file": "src/three.py", + "issue_header": "Possible Issue", + "issue_content": "Issue three", + "confidence": "low", + "evidence_type": "inferred", + "start_line": 30, + "end_line": 31, + }, + ] + } + } + + normalized = review_output_filter.normalize_review_output( + data, + max_findings=2, + findings_metadata=True, + filter_mode="off", + ) + + issues = normalized["review"]["key_issues_to_review"] + assert len(issues) == 2 + assert issues[0]["confidence"] == "high" + assert issues[0]["evidence_type"] == "diff" + assert issues[0]["unknown"] == "preserved value" + assert "confidence" not in issues[1] + assert issues[1]["evidence_type"] == "ticket" + + def test_normalize_review_output_can_drop_low_confidence_inferred_findings(self): + data = { + "review": { + "key_issues_to_review": [ + { + "relevant_file": "src/one.py", + "issue_header": "Possible Bug", + "issue_content": "Keep me", + "confidence": "medium", + "evidence_type": "diff", + "start_line": 10, + "end_line": 12, + }, + { + "relevant_file": "src/two.py", + "issue_header": "Possible Issue", + "issue_content": "Drop me", + "confidence": "low", + "evidence_type": "inferred", + "start_line": 20, + "end_line": 21, + }, + ] + } + } + + normalized = review_output_filter.normalize_review_output( + data, + max_findings=5, + findings_metadata=True, + filter_mode="drop_low_confidence_inferred", + ) + + issues = normalized["review"]["key_issues_to_review"] + assert len(issues) == 1 + assert issues[0]["relevant_file"] == "src/one.py" + + def test_normalize_review_output_keeps_all_findings_when_max_findings_is_zero(self): + data = { + "review": { + "key_issues_to_review": [ + { + "relevant_file": "src/one.py", + "issue_header": "Possible Bug", + "issue_content": "Issue one", + "start_line": 10, + "end_line": 12, + }, + { + "relevant_file": "src/two.py", + "issue_header": "Possible Issue", + "issue_content": "Issue two", + "start_line": 20, + "end_line": 21, + }, + ] + } + } + + normalized = review_output_filter.normalize_review_output(data, max_findings=0) + + issues = normalized["review"]["key_issues_to_review"] + assert len(issues) == 2 + + def test_normalize_review_output_strips_metadata_when_feature_is_disabled(self): + data = { + "review": { + "key_issues_to_review": [ + { + "relevant_file": "src/one.py", + "issue_header": "Possible Bug", + "issue_content": "Issue one", + "confidence": "high", + "evidence_type": "diff", + "start_line": 10, + "end_line": 12, + } + ] + } + } + + normalized = review_output_filter.normalize_review_output(data, max_findings=5, findings_metadata=False) + + issue = normalized["review"]["key_issues_to_review"][0] + assert "confidence" not in issue + assert "evidence_type" not in issue + + def test_normalize_review_output_disables_filter_when_metadata_is_disabled(self, monkeypatch): + warnings = [] + data = { + "review": { + "key_issues_to_review": [ + { + "relevant_file": "src/one.py", + "issue_header": "Possible Bug", + "issue_content": "Keep me", + "confidence": "low", + "evidence_type": "inferred", + "start_line": 10, + "end_line": 12, + } + ] + } + } + monkeypatch.setattr(review_output_filter, "_log_warning", warnings.append) + + normalized = review_output_filter.normalize_review_output( + data, + max_findings=5, + findings_metadata=False, + filter_mode="drop_low_confidence_inferred", + ) + + assert warnings == ["findings_filter_mode has no effect when findings_metadata is false"] + assert len(normalized["review"]["key_issues_to_review"]) == 1 + + def test_normalize_review_output_warns_on_unknown_filter_mode(self, monkeypatch): + warnings = [] + data = { + "review": { + "key_issues_to_review": [ + { + "relevant_file": "src/one.py", + "issue_header": "Possible Bug", + "issue_content": "Keep me", + "confidence": "low", + "evidence_type": "inferred", + "start_line": 10, + "end_line": 12, + } + ] + } + } + monkeypatch.setattr(review_output_filter, "_log_warning", warnings.append) + + normalized = review_output_filter.normalize_review_output( + data, + max_findings=5, + findings_metadata=True, + filter_mode="unknown_mode", + ) + + assert warnings == ["Unknown findings_filter_mode: 'unknown_mode', defaulting to off"] + assert len(normalized["review"]["key_issues_to_review"]) == 1