diff --git a/AUTO1_PATCHES.md b/AUTO1_PATCHES.md
index 8e603d7fcd..cf2d50b610 100644
--- a/AUTO1_PATCHES.md
+++ b/AUTO1_PATCHES.md
@@ -19,6 +19,7 @@ Keep this list minimal to ease upstream rebases.
| improve-reflect-fallback-score | OPS-24090 | Fix | pr_agent/tools/pr_code_suggestions.py | Prevent inline suggestion spam when self-reflection fails by defaulting all suggestion scores to 0. | Not upstreamed yet. | Remove once upstream handles reflect failure without inflating scores. |
| describe-mermaid-sanitize | OPS-24090 | Fix | pr_agent/tools/pr_description.py | Strip backticks from Mermaid diagrams to avoid GitHub render failures. | Not upstreamed yet. | Remove once upstream sanitizes Mermaid diagrams or fixes prompt output. |
| jira-cloud-ticket-context | OPS-24091 | Enhancement | pr_agent/algo/utils.py, pr_agent/tickets/jira_cloud.py, pr_agent/tools/ticket_pr_compliance_check.py, pr_agent/tools/pr_reviewer.py, pr_agent/tools/pr_description.py, pr_agent/tools/pr_config.py, tests/unittest/test_convert_to_markdown.py, tests/unittest/test_jira_cloud_ticket_context.py, tests/unittest/test_pr_reviewer_ticket_note.py, tests/unittest/test_ticket_pr_compliance_check.py | Fetch Jira Cloud ticket context using PR title first and branch name only as fallback, support both basic auth and scoped-token Jira Cloud API access, ignore placeholder compliance bullets and placeholder text like `None.`, show review notes when the Jira ticket cannot be fetched, and normalize empty discovered tickets before prompt rendering. | Not upstreamed yet. | Remove once upstream supports Jira Cloud ticket context from PR metadata with title-first precedence, scoped-token auth, placeholder-safe compliance rendering, comment-level fetch-failure notes, and prompt-safe normalization for empty ticket payloads. |
+| review-metadata-context | OPS-24224 | Enhancement | pr_agent/algo/context_enrichment.py, pr_agent/algo/review_output_filter.py, pr_agent/algo/utils.py, pr_agent/git_providers/gitlab_provider.py, pr_agent/git_providers/local_git_provider.py, pr_agent/settings/configuration.toml, pr_agent/settings/pr_reviewer_prompts.toml, pr_agent/tools/pr_code_suggestions.py, pr_agent/tools/pr_reviewer.py, tests/unittest/test_context_enrichment.py, tests/unittest/test_convert_to_markdown.py, tests/unittest/test_parse_code_suggestion.py, tests/unittest/test_review_output_filter.py | Add optional review findings metadata (`confidence`, `evidence_type`), normalize parsed findings and enforce the configured finding cap at runtime, allow bounded full-file context for small touched files to improve review precision, let metadata badges be toggled independently from metadata generation, suppress inline code-suggestion label/importance suffixes when badges are disabled, rename the downstream config keys to `findings_metadata`, `findings_metadata_badges`, and `small_file_context`, and reuse cached local diff files for local review runs. | Not upstreamed yet. | Remove once upstream supports structured review findings metadata, runtime normalization, bounded small-file context enrichment for `/review`, independent metadata-badge rendering control across review output and inline suggestions, the renamed downstream config surface, and local review no longer needs the downstream diff-file cache adjustment. |
## Rebase checklist
diff --git a/pr_agent/algo/context_enrichment.py b/pr_agent/algo/context_enrichment.py
new file mode 100644
index 0000000000..ccc3f0ce9d
--- /dev/null
+++ b/pr_agent/algo/context_enrichment.py
@@ -0,0 +1,87 @@
+import re
+
+from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
+
+
+FILE_HEADER_PATTERN = r'^## File: (?:\'([^\']+)\'|"([^\"]+)")$'
+
+
+def append_small_file_context_to_diff(
+ diff_text: str,
+ diff_files: list[FilePatchInfo],
+ token_handler,
+ *,
+ max_lines: int,
+ max_tokens: int,
+) -> str:
+ context = build_small_file_context(
+ diff_text,
+ diff_files,
+ token_handler,
+ max_lines=max_lines,
+ max_tokens=max_tokens,
+ )
+ if not context:
+ return diff_text
+
+ return f"{diff_text}\n\nAdditional context for small touched files:\n======\n{context}\n======"
+
+
+def build_small_file_context(
+ diff_text: str,
+ diff_files: list[FilePatchInfo],
+ token_handler,
+ *,
+ max_lines: int,
+ max_tokens: int,
+) -> str:
+ if not diff_text or not diff_files or max_lines <= 0 or max_tokens <= 0:
+ return ""
+
+ included_files = extract_files_in_diff(diff_text)
+ if not included_files:
+ return ""
+
+ remaining_tokens = max_tokens
+ context_blocks = []
+ candidate_files = []
+
+ for file in diff_files:
+ if file.filename not in included_files:
+ continue
+ if file.edit_type == EDIT_TYPE.DELETED or not file.patch or not file.head_file:
+ continue
+
+ line_count = len(file.head_file.splitlines())
+ if line_count > max_lines:
+ continue
+
+ candidate_files.append((line_count, file.filename, file))
+
+ for _, _, file in sorted(candidate_files, key=lambda item: (item[0], item[1])):
+ context_block = render_full_file_context(file)
+ context_tokens = token_handler.count_tokens(context_block)
+ if context_tokens > remaining_tokens:
+ continue
+
+ context_blocks.append(context_block)
+ remaining_tokens -= context_tokens
+
+ return "\n\n".join(context_blocks)
+
+
+def extract_files_in_diff(diff_text: str) -> set[str]:
+ matches = re.findall(FILE_HEADER_PATTERN, diff_text, flags=re.MULTILINE)
+ return {single_quoted or double_quoted for single_quoted, double_quoted in matches}
+
+
+def render_full_file_context(file: FilePatchInfo) -> str:
+ numbered_lines = []
+ for line_number, line in enumerate(file.head_file.splitlines(), start=1):
+ numbered_lines.append(f"{line_number} {line}")
+
+ return (
+ f"## Full file context: '{file.filename.strip()}'\n"
+ "### Current file content after PR changes\n"
+ + "\n".join(numbered_lines)
+ )
diff --git a/pr_agent/algo/review_output_filter.py b/pr_agent/algo/review_output_filter.py
new file mode 100644
index 0000000000..d9425383dd
--- /dev/null
+++ b/pr_agent/algo/review_output_filter.py
@@ -0,0 +1,125 @@
+import copy
+import logging
+
+
+VALID_CONFIDENCE_VALUES = {"high", "medium", "low"}
+VALID_EVIDENCE_TYPES = {"diff", "ticket", "inferred"}
+FINDINGS_FILTER_MODE_OFF = "off"
+FINDINGS_FILTER_MODE_DROP_LOW_CONFIDENCE_INFERRED = "drop_low_confidence_inferred"
+LOGGER = logging.getLogger(__name__)
+
+
+def normalize_review_output(
+ data: dict,
+ max_findings: int,
+ findings_metadata: bool = False,
+ filter_mode: str = FINDINGS_FILTER_MODE_OFF,
+) -> dict:
+ if not isinstance(data, dict) or "review" not in data or not isinstance(data["review"], dict):
+ return data
+
+ if filter_mode != FINDINGS_FILTER_MODE_OFF and not findings_metadata:
+ _log_warning("findings_filter_mode has no effect when findings_metadata is false")
+ filter_mode = FINDINGS_FILTER_MODE_OFF
+ elif filter_mode not in {FINDINGS_FILTER_MODE_OFF, FINDINGS_FILTER_MODE_DROP_LOW_CONFIDENCE_INFERRED}:
+ _log_warning(f"Unknown findings_filter_mode: {filter_mode!r}, defaulting to off")
+ filter_mode = FINDINGS_FILTER_MODE_OFF
+
+ normalized_data = copy.deepcopy(data)
+ issues = normalized_data["review"].get("key_issues_to_review")
+ if not isinstance(issues, list):
+ return normalized_data
+
+ normalized_issues = []
+ for issue in issues:
+ normalized_issue = normalize_issue(issue, findings_metadata=findings_metadata)
+ if normalized_issue:
+ normalized_issues.append(normalized_issue)
+
+ filtered_issues, suppressed_count = filter_issues(normalized_issues, filter_mode=filter_mode)
+ if suppressed_count:
+ _log_debug("Suppressed low-confidence inferred findings", artifact={"count": suppressed_count})
+ if max_findings > 0:
+ filtered_issues = filtered_issues[:max_findings]
+ normalized_data["review"]["key_issues_to_review"] = filtered_issues
+
+ return normalized_data
+
+
+def normalize_issue(issue: dict, findings_metadata: bool = False) -> dict | None:
+ if not isinstance(issue, dict):
+ return None
+
+ normalized_issue = copy.deepcopy(issue)
+ normalized_issue["relevant_file"] = issue.get("relevant_file", "")
+ normalized_issue["issue_header"] = issue.get("issue_header", "")
+ normalized_issue["issue_content"] = issue.get("issue_content", "")
+
+ if findings_metadata:
+ confidence = normalize_enum_value(issue.get("confidence"), VALID_CONFIDENCE_VALUES)
+ if confidence:
+ normalized_issue["confidence"] = confidence
+ else:
+ normalized_issue.pop("confidence", None)
+
+ evidence_type = normalize_enum_value(issue.get("evidence_type"), VALID_EVIDENCE_TYPES)
+ if evidence_type:
+ normalized_issue["evidence_type"] = evidence_type
+ else:
+ normalized_issue.pop("evidence_type", None)
+ else:
+ normalized_issue.pop("confidence", None)
+ normalized_issue.pop("evidence_type", None)
+
+ normalized_issue["start_line"] = issue.get("start_line", 0)
+ normalized_issue["end_line"] = issue.get("end_line", 0)
+
+ return normalized_issue
+
+
+def normalize_enum_value(value, valid_values: set[str]) -> str | None:
+ if value is None:
+ return None
+
+ normalized_value = str(value).strip().lower()
+ if normalized_value in valid_values:
+ return normalized_value
+
+ return None
+
+
+def filter_issues(issues: list[dict], filter_mode: str = FINDINGS_FILTER_MODE_OFF) -> tuple[list[dict], int]:
+ if filter_mode != FINDINGS_FILTER_MODE_DROP_LOW_CONFIDENCE_INFERRED:
+ return issues, 0
+
+ filtered_issues = []
+ suppressed_count = 0
+ for issue in issues:
+ if issue.get("confidence") == "low" and issue.get("evidence_type") == "inferred":
+ suppressed_count += 1
+ continue
+ filtered_issues.append(issue)
+
+ return filtered_issues, suppressed_count
+
+
+def _log_warning(message: str) -> None:
+ logger = _resolve_logger()
+ logger.warning(message)
+
+
+def _log_debug(message: str, artifact: dict) -> None:
+ logger = _resolve_logger()
+ try:
+ logger.debug(message, artifact=artifact)
+ except TypeError:
+ logger.debug(message, extra={"artifact": artifact})
+
+
+def _resolve_logger():
+ try:
+ from pr_agent.log import get_logger
+
+ return get_logger()
+ except Exception:
+ return LOGGER
diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index 49411a2d0a..3164631c35 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -83,8 +83,6 @@ def get_setting(key: str) -> Any:
return context.get("settings", global_settings).get(key, global_settings.get(key, None))
except Exception:
return global_settings.get(key, None)
-
-
def emphasize_header(text: str, only_markdown=False, reference_link=None) -> str:
try:
# Finding the position of the first occurrence of ": "
@@ -130,7 +128,8 @@ def convert_to_markdown_v2(output_data: dict,
incremental_review=None,
git_provider=None,
files=None,
- ticket_note: str = '') -> str:
+ ticket_note: str = '',
+ findings_metadata_badges: bool | None = None) -> str:
"""
Convert a dictionary of data into markdown format.
Args:
@@ -139,6 +138,9 @@ def convert_to_markdown_v2(output_data: dict,
str: The markdown formatted text generated from the input dictionary.
"""
+ if findings_metadata_badges is None:
+ findings_metadata_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False)
+
emojis = {
"Can be split": "🔀",
"Key issues to review": "âš¡",
@@ -287,6 +289,16 @@ def convert_to_markdown_v2(output_data: dict,
if issue_header.lower() == 'possible bug':
issue_header = 'Possible Issue' # Make the header less frightening
issue_content = issue.get('issue_content', '').strip()
+ issue_metadata = format_review_issue_metadata(
+ issue,
+ gfm_supported,
+ findings_metadata_badges,
+ )
+ if issue_metadata:
+ if gfm_supported:
+ issue_content = f"{issue_metadata}
{issue_content}" if issue_content else issue_metadata
+ else:
+ issue_content = f"{issue_metadata}\n\n{issue_content}" if issue_content else issue_metadata
start_line = int(str(issue.get('start_line', 0)).strip())
end_line = int(str(issue.get('end_line', 0)).strip())
@@ -339,6 +351,52 @@ def convert_to_markdown_v2(output_data: dict,
return markdown_text
+
+def format_review_issue_metadata(issue: dict, gfm_supported: bool, enable_badges: bool = False) -> str:
+ if not isinstance(issue, dict):
+ return ""
+ if not enable_badges:
+ return ""
+
+ metadata_labels = []
+ confidence = str(issue.get("confidence", "")).strip().lower()
+ if confidence in {"high", "medium", "low"}:
+ metadata_labels.append(f"{confidence.capitalize()} confidence")
+
+ evidence_type = str(issue.get("evidence_type", "")).strip().lower()
+ evidence_labels = {
+ "diff": "Diff evidence",
+ "ticket": "Ticket evidence",
+ "inferred": "Inferred evidence",
+ }
+ if evidence_type in evidence_labels:
+ metadata_labels.append(evidence_labels[evidence_type])
+
+ if not metadata_labels:
+ return ""
+
+ if gfm_supported:
+ return " ".join([f"{label}" for label in metadata_labels])
+ return " ".join([f"`{label}`" for label in metadata_labels])
+
+
+def format_code_suggestion_metadata(label: str, score=None, enable_badges: bool | None = None) -> str:
+ if enable_badges is None:
+ enable_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False)
+ if not enable_badges:
+ return ""
+
+ label = str(label).strip()
+ if not label:
+ return ""
+
+ score_value = "" if score is None else str(score).strip()
+ if score_value:
+ return f" [{label}, importance: {score_value}]"
+
+ return f" [{label}]"
+
+
def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str:
"""
Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content.
diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py
index 7f5937343a..6fc9cdd7d9 100644
--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@@ -16,6 +16,7 @@
from ..algo.git_patch_processing import decode_if_bytes
from ..algo.language_handler import is_valid_file
from ..algo.utils import (clip_tokens,
+ format_code_suggestion_metadata,
find_line_number_of_relevant_line_in_file,
load_large_diff)
from ..config_loader import get_settings
@@ -606,7 +607,13 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f
else:
language = ''
link = self.get_line_link(relevant_file, line_start, line_end)
- body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n\n"
+ findings_metadata_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False)
+ suggestion_metadata = format_code_suggestion_metadata(
+ label,
+ score,
+ enable_badges=findings_metadata_badges,
+ )
+ body_fallback = f"**Suggestion:** {content}{suggestion_metadata}\n\n"
body_fallback +=f"\n\n[{target_file.filename} [{line_start}-{line_end}]]({link}):
\n\n"
body_fallback += f"\n\n___\n\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`"
body_fallback+=" \n\n"
diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py
index 420289761c..bab2e9f50b 100644
--- a/pr_agent/git_providers/local_git_provider.py
+++ b/pr_agent/git_providers/local_git_provider.py
@@ -63,6 +63,9 @@ def is_supported(self, capability: str) -> bool:
return True
def get_diff_files(self) -> list[FilePatchInfo]:
+ if self.diff_files is not None:
+ return self.diff_files
+
diffs = self.repo.head.commit.diff(
self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]),
create_patch=True,
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 12d8b5d0ce..05190b9c42 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -79,6 +79,12 @@ persistent_comment=true
extra_instructions = ""
num_max_findings = 3
final_update_message = true
+findings_metadata = false
+findings_metadata_badges = false
+small_file_context = false
+small_file_context_max_lines = 150
+small_file_context_max_tokens = 3500
+findings_filter_mode = "off"
# review labels
enable_review_labels_security=true
enable_review_labels_effort=true
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 477f8553de..95e473ac18 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -46,6 +46,13 @@ __new hunk__
- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote (').
- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase.
- Also note that if the code ends at an opening brace or statement that begins a new scope (like 'if', 'for', 'try'), don't treat it as incomplete. Instead, acknowledge the visible scope boundary and analyze only the code shown.
+{%- if small_file_context %}
+- Some files may include a `## Full file context:` section with the complete post-change contents of a small touched file. Use this only to validate issues related to the changed file, and do not report issues from untouched regions unless they are necessary to explain a defect introduced by the PR.
+{%- endif %}
+{%- if findings_metadata %}
+- For each issue, set `confidence` from `high`, `medium`, or `low` based only on the visible evidence.
+- Set `evidence_type` to `diff` when changed lines directly support the issue, `ticket` for a clear ticket mismatch, or `inferred` when the issue depends on indirect clues.
+{%- endif %}
{%- if extra_instructions %}
@@ -69,6 +76,10 @@ class KeyIssuesComponentLink(BaseModel):
relevant_file: str = Field(description="The full file path of the relevant file")
issue_header: str = Field(description="One or two word title for the issue. For example: 'Possible Bug', etc.")
issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not mention line numbers in this field.")
+{%- if findings_metadata %}
+ confidence: str = Field(description="Your confidence in this issue based only on the visible evidence. Allowed values: high, medium, low")
+ evidence_type: str = Field(description="What directly supports this issue. Allowed values: diff, ticket, inferred")
+{%- endif %}
start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")
@@ -164,6 +175,12 @@ review:
Possible Bug
issue_content: |
...
+{%- if findings_metadata %}
+ confidence: |
+ high
+ evidence_type: |
+ diff
+{%- endif %}
start_line: 12
end_line: 14
- ...
@@ -302,6 +319,12 @@ review:
...
issue_content: |
...
+{%- if findings_metadata %}
+ confidence: |
+ high
+ evidence_type: |
+ diff
+{%- endif %}
start_line: ...
end_line: ...
- ...
diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py
index 7bdf809760..b6e7d75edd 100644
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@@ -18,8 +18,11 @@
get_pr_diff, get_pr_multi_diffs,
retry_with_fallback_models)
from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import (ModelType, load_yaml, replace_code_tags,
- show_relevant_configurations, get_max_tokens, clip_tokens, get_model)
+from pr_agent.algo.utils import (ModelType, clip_tokens,
+ format_code_suggestion_metadata, get_max_tokens,
+ get_model, load_yaml,
+ replace_code_tags,
+ show_relevant_configurations)
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import (AzureDevopsProvider, GithubProvider,
GitLabProvider, get_git_provider,
@@ -547,6 +550,8 @@ async def push_inline_code_suggestions(self, data):
else:
return self.git_provider.publish_comment('No suggestions found to improve this PR.')
+ findings_metadata_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False)
+
for d in data['code_suggestions']:
try:
if get_settings().config.verbosity_level >= 2:
@@ -561,10 +566,12 @@ async def push_inline_code_suggestions(self, data):
if new_code_snippet:
new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet)
- if d.get('score'):
- body = f"**Suggestion:** {content} [{label}, importance: {d.get('score')}]\n```suggestion\n" + new_code_snippet + "\n```"
- else:
- body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```"
+ suggestion_metadata = format_code_suggestion_metadata(
+ label,
+ d.get('score'),
+ enable_badges=findings_metadata_badges,
+ )
+ body = f"**Suggestion:** {content}{suggestion_metadata}\n```suggestion\n" + new_code_snippet + "\n```"
code_suggestions.append({'body': body, 'relevant_file': relevant_file,
'relevant_lines_start': relevant_lines_start,
'relevant_lines_end': relevant_lines_end,
@@ -941,4 +948,4 @@ async def self_reflect_on_suggestions(self,
except Exception as e:
get_logger().info(f"Could not reflect on suggestions, error: {e}")
return ""
- return response_reflect
\ No newline at end of file
+ return response_reflect
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index f4cb563370..f5a7ec9dc9 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -9,12 +9,16 @@
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
+from pr_agent.algo.context_enrichment import append_small_file_context_to_diff
from pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files,
+ OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD,
get_pr_diff,
retry_with_fallback_models)
+from pr_agent.algo.review_output_filter import normalize_review_output
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import (ModelType, PRReviewHeader,
- convert_to_markdown_v2, github_action_output,
+ convert_to_markdown_v2, get_max_tokens,
+ github_action_output,
load_yaml, parse_requirement_items,
show_relevant_configurations)
from pr_agent.config_loader import get_settings
@@ -27,6 +31,9 @@
from pr_agent.tools.ticket_pr_compliance_check import extract_and_cache_pr_tickets
+SMALL_FILE_CONTEXT_PROMPT_OVERHEAD_TOKENS = 64
+
+
def append_ticket_compliance_note(ticket_note: str, extra_note: str) -> str:
ticket_note = (ticket_note or "").strip()
extra_note = (extra_note or "").strip()
@@ -109,6 +116,9 @@ def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False,
get_settings().set("config.enable_ai_metadata", False)
get_logger().debug(f"AI metadata is disabled for this command")
+ self.findings_metadata = get_settings().pr_reviewer.get("findings_metadata", False)
+ self.small_file_context = get_settings().pr_reviewer.get("small_file_context", False)
+
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
@@ -130,6 +140,8 @@ def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False,
"commit_messages_str": self.git_provider.get_commit_messages(),
"custom_labels": "",
"enable_custom_labels": get_settings().config.enable_custom_labels,
+ "findings_metadata": self.findings_metadata,
+ "small_file_context": self.small_file_context,
"is_ai_metadata": get_settings().get("config.enable_ai_metadata", False),
"related_tickets": get_settings().get('related_tickets', []),
"ticket_compliance_note": "",
@@ -228,6 +240,7 @@ async def _prepare_prediction(self, model: str) -> None:
model,
add_line_numbers_to_hunks=True,
disable_extra_lines=False,)
+ self.patches_diff = self._append_small_file_context(self.patches_diff, model)
if self.patches_diff:
get_logger().debug(f"PR diff", diff=self.patches_diff)
@@ -273,12 +286,19 @@ def _prepare_pr_review(self) -> str:
keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
"relevant_file:", "relevant_line:", "suggestion:"],
first_key=first_key, last_key=last_key)
- github_action_output(data, 'review')
if 'review' not in data:
get_logger().exception("Failed to parse review data", artifact={"data": data})
return ""
+ data = normalize_review_output(
+ data,
+ max_findings=get_settings().pr_reviewer.num_max_findings,
+ findings_metadata=self.findings_metadata,
+ filter_mode=get_settings().pr_reviewer.get("findings_filter_mode", "off"),
+ )
+ github_action_output(data, 'review')
+
# move data['review'] 'key_issues_to_review' key to the end of the dictionary
if 'key_issues_to_review' in data['review']:
key_issues_to_review = data['review'].pop('key_issues_to_review')
@@ -320,6 +340,36 @@ def _prepare_pr_review(self) -> str:
return markdown_text
+ def _append_small_file_context(self, diff_text: str, model: str) -> str:
+ if not diff_text or not self.small_file_context:
+ return diff_text
+
+ max_context_lines = int(get_settings().pr_reviewer.get("small_file_context_max_lines", 0))
+ max_context_tokens = int(get_settings().pr_reviewer.get("small_file_context_max_tokens", 0))
+ if max_context_lines <= 0 or max_context_tokens <= 0:
+ return diff_text
+
+ diff_tokens = self.token_handler.count_tokens(diff_text)
+ # prompt_tokens is computed before the runtime diff is rendered into the prompt, so account for diff tokens here.
+ available_tokens = (
+ get_max_tokens(model)
+ - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD
+ - self.token_handler.prompt_tokens
+ - diff_tokens
+ - SMALL_FILE_CONTEXT_PROMPT_OVERHEAD_TOKENS
+ )
+ if available_tokens <= 0:
+ return diff_text
+
+ enriched_diff = append_small_file_context_to_diff(
+ diff_text,
+ self.git_provider.get_diff_files(),
+ self.token_handler,
+ max_lines=max_context_lines,
+ max_tokens=min(max_context_tokens, available_tokens),
+ )
+ return enriched_diff
+
def _get_user_answers(self) -> Tuple[str, str]:
"""
Retrieves the question and answer strings from the discussion messages related to a pull request.
diff --git a/tests/unittest/test_context_enrichment.py b/tests/unittest/test_context_enrichment.py
new file mode 100644
index 0000000000..0cfeedcf29
--- /dev/null
+++ b/tests/unittest/test_context_enrichment.py
@@ -0,0 +1,160 @@
+from pr_agent.algo.context_enrichment import append_small_file_context_to_diff, extract_files_in_diff, render_full_file_context
+from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
+
+
+class MockTokenHandler:
+ def count_tokens(self, text: str) -> int:
+ return len(text)
+
+
+class TestContextEnrichment:
+ def test_append_small_file_context_to_diff_adds_only_small_included_files(self):
+ diff_text = "## File: 'src/small.py'\n\n@@ ... @@\n__new hunk__\n1 +print('hi')\n"
+ diff_files = [
+ FilePatchInfo(
+ base_file="",
+ head_file="print('hi')\nprint('bye')\n",
+ patch="@@ -0,0 +1,2 @@\n+print('hi')\n+print('bye')\n",
+ filename="src/small.py",
+ edit_type=EDIT_TYPE.ADDED,
+ ),
+ FilePatchInfo(
+ base_file="",
+ head_file="\n".join([f"line {i}" for i in range(1, 201)]),
+ patch="@@ -0,0 +1,200 @@\n+...",
+ filename="src/large.py",
+ edit_type=EDIT_TYPE.ADDED,
+ ),
+ ]
+
+ enriched_diff = append_small_file_context_to_diff(
+ diff_text,
+ diff_files,
+ MockTokenHandler(),
+ max_lines=10,
+ max_tokens=500,
+ )
+
+ assert "Additional context for small touched files" in enriched_diff
+ assert "## Full file context: 'src/small.py'" in enriched_diff
+ assert "1 print('hi')" in enriched_diff
+ assert "src/large.py" not in enriched_diff
+
+ def test_append_small_file_context_to_diff_respects_token_budget(self):
+ diff_text = "## File: 'src/one.py'\n\n@@ ... @@\n__new hunk__\n1 +print('hi')\n"
+ diff_files = [
+ FilePatchInfo(
+ base_file="",
+ head_file="print('hi')\nprint('bye')\n",
+ patch="@@ -0,0 +1,2 @@\n+print('hi')\n+print('bye')\n",
+ filename="src/one.py",
+ edit_type=EDIT_TYPE.ADDED,
+ ),
+ ]
+
+ enriched_diff = append_small_file_context_to_diff(
+ diff_text,
+ diff_files,
+ MockTokenHandler(),
+ max_lines=10,
+ max_tokens=10,
+ )
+
+ assert enriched_diff == diff_text
+
+ def test_append_small_file_context_to_diff_skips_deleted_or_missing_head_files(self):
+ diff_text = (
+ "## File: 'src/deleted.py'\n\n@@ ... @@\n__new hunk__\n"
+ "\n## File: 'src/missing.py'\n\n@@ ... @@\n__new hunk__\n1 +print('hi')\n"
+ )
+ diff_files = [
+ FilePatchInfo(
+ base_file="print('bye')\n",
+ head_file="",
+ patch="@@ -1 +0,0 @@\n-print('bye')\n",
+ filename="src/deleted.py",
+ edit_type=EDIT_TYPE.DELETED,
+ ),
+ FilePatchInfo(
+ base_file="",
+ head_file=None,
+ patch="@@ -0,0 +1 @@\n+print('hi')\n",
+ filename="src/missing.py",
+ edit_type=EDIT_TYPE.ADDED,
+ ),
+ ]
+
+ enriched_diff = append_small_file_context_to_diff(
+ diff_text,
+ diff_files,
+ MockTokenHandler(),
+ max_lines=10,
+ max_tokens=500,
+ )
+
+ assert enriched_diff == diff_text
+
+ def test_append_small_file_context_to_diff_prefers_smaller_files_when_budget_is_limited(self):
+ diff_text = (
+ "## File: 'src/medium.py'\n\n@@ ... @@\n__new hunk__\n1 +medium\n"
+ "\n## File: 'src/small_a.py'\n\n@@ ... @@\n__new hunk__\n1 +small_a\n"
+ "\n## File: 'src/small_b.py'\n\n@@ ... @@\n__new hunk__\n1 +small_b\n"
+ )
+ medium_file = FilePatchInfo(
+ base_file="",
+ head_file="\n".join(["medium"] * 3),
+ patch="@@ -0,0 +1,3 @@\n+medium\n+medium\n+medium\n",
+ filename="src/medium.py",
+ edit_type=EDIT_TYPE.ADDED,
+ )
+ small_a_file = FilePatchInfo(
+ base_file="",
+ head_file="small_a\n",
+ patch="@@ -0,0 +1 @@\n+small_a\n",
+ filename="src/small_a.py",
+ edit_type=EDIT_TYPE.ADDED,
+ )
+ small_b_file = FilePatchInfo(
+ base_file="",
+ head_file="small_b\n",
+ patch="@@ -0,0 +1 @@\n+small_b\n",
+ filename="src/small_b.py",
+ edit_type=EDIT_TYPE.ADDED,
+ )
+ max_tokens = len(render_full_file_context(small_a_file)) + len(render_full_file_context(small_b_file))
+
+ enriched_diff = append_small_file_context_to_diff(
+ diff_text,
+ [medium_file, small_a_file, small_b_file],
+ MockTokenHandler(),
+ max_lines=10,
+ max_tokens=max_tokens,
+ )
+
+ assert "## Full file context: 'src/medium.py'" not in enriched_diff
+ assert "## Full file context: 'src/small_a.py'" in enriched_diff
+ assert "## Full file context: 'src/small_b.py'" in enriched_diff
+
+ def test_render_full_file_context_keeps_blank_line_numbers(self):
+ file = FilePatchInfo(
+ base_file="",
+ head_file="alpha\n\nomega\n",
+ patch="@@ -0,0 +1,3 @@\n+alpha\n+\n+omega\n",
+ filename="src/blank_lines.py",
+ edit_type=EDIT_TYPE.ADDED,
+ )
+
+ rendered_context = render_full_file_context(file)
+
+ assert "1 alpha\n2 \n3 omega" in rendered_context
+
+ def test_extract_files_in_diff_supports_matching_quotes_only(self):
+ diff_text = (
+ "## File: 'src/single.py'\n"
+ '## File: "src/double.py"\n'
+ "## File: 'src/mismatch.py\"\n"
+ )
+
+ extracted_files = extract_files_in_diff(diff_text)
+
+ assert extracted_files == {"src/single.py", "src/double.py"}
diff --git a/tests/unittest/test_convert_to_markdown.py b/tests/unittest/test_convert_to_markdown.py
index 73f9882a78..06d46a9318 100644
--- a/tests/unittest/test_convert_to_markdown.py
+++ b/tests/unittest/test_convert_to_markdown.py
@@ -126,6 +126,78 @@ def test_key_issues_to_review(self):
assert convert_to_markdown_v2(input_data, git_provider=mock_git_provider).strip() == expected_output.strip()
mock_git_provider.get_line_link.assert_called_with('src/utils.py', 30, 50)
+ def test_key_issues_to_review_hides_metadata_badges_by_default(self):
+ input_data = {'review': {
+ 'key_issues_to_review': [
+ {
+ 'relevant_file': 'src/utils.py',
+ 'issue_header': 'Code Smell',
+ 'issue_content': 'The function is too long and complex.',
+ 'confidence': 'high',
+ 'evidence_type': 'diff',
+ 'start_line': 30,
+ 'end_line': 50,
+ }
+ ]
+ }}
+ mock_git_provider = Mock()
+ mock_git_provider.get_line_link.return_value = ''
+
+ result = convert_to_markdown_v2(input_data, git_provider=mock_git_provider)
+
+ assert 'High confidence' not in result
+ assert 'Diff evidence' not in result
+ assert 'The function is too long and complex.' in result
+
+ def test_key_issues_to_review_renders_metadata_badges_when_enabled(self):
+ input_data = {'review': {
+ 'key_issues_to_review': [
+ {
+ 'relevant_file': 'src/utils.py',
+ 'issue_header': 'Code Smell',
+ 'issue_content': 'The function is too long and complex.',
+ 'confidence': 'high',
+ 'evidence_type': 'diff',
+ 'start_line': 30,
+ 'end_line': 50,
+ }
+ ]
+ }}
+
+ mock_git_provider = Mock()
+ mock_git_provider.get_line_link.return_value = ''
+
+ result = convert_to_markdown_v2(
+ input_data,
+ git_provider=mock_git_provider,
+ findings_metadata_badges=True,
+ )
+
+ assert 'High confidence Diff evidence
The function is too long and complex.' in result
+
+ def test_key_issues_to_review_renders_metadata_badges_without_gfm_when_enabled(self):
+ input_data = {'review': {
+ 'key_issues_to_review': [
+ {
+ 'relevant_file': 'src/utils.py',
+ 'issue_header': 'Code Smell',
+ 'issue_content': 'The function is too long and complex.',
+ 'confidence': 'medium',
+ 'evidence_type': 'ticket',
+ 'start_line': 30,
+ 'end_line': 50,
+ }
+ ]
+ }}
+
+ result = convert_to_markdown_v2(
+ input_data,
+ gfm_supported=False,
+ findings_metadata_badges=True,
+ )
+
+ assert '`Medium confidence` `Ticket evidence`' in result
+
def test_ticket_compliance(self):
input_data = {'review': {
'ticket_compliance_check': [
diff --git a/tests/unittest/test_parse_code_suggestion.py b/tests/unittest/test_parse_code_suggestion.py
index c959a2dcb6..367f61ce40 100644
--- a/tests/unittest/test_parse_code_suggestion.py
+++ b/tests/unittest/test_parse_code_suggestion.py
@@ -1,6 +1,7 @@
# Generated by CodiumAI
-from pr_agent.algo.utils import parse_code_suggestion
+from pr_agent.algo.utils import (format_code_suggestion_metadata,
+ parse_code_suggestion)
"""
Code Analysis
@@ -35,6 +36,15 @@
class TestParseCodeSuggestion:
+ def test_format_code_suggestion_metadata_hidden_by_default(self):
+ assert format_code_suggestion_metadata("possible issue", 7) == ""
+
+ def test_format_code_suggestion_metadata_renders_label_and_score_when_enabled(self):
+ assert format_code_suggestion_metadata("possible issue", 7, enable_badges=True) == " [possible issue, importance: 7]"
+
+ def test_format_code_suggestion_metadata_renders_label_only_when_enabled(self):
+ assert format_code_suggestion_metadata("possible issue", enable_badges=True) == " [possible issue]"
+
# Tests that function returns empty string when input is an empty dictionary
def test_empty_dict(self):
input_data = {}
diff --git a/tests/unittest/test_review_output_filter.py b/tests/unittest/test_review_output_filter.py
new file mode 100644
index 0000000000..69c9b93e3d
--- /dev/null
+++ b/tests/unittest/test_review_output_filter.py
@@ -0,0 +1,199 @@
+import pr_agent.algo.review_output_filter as review_output_filter
+
+
+class TestReviewOutputFilter:
+ def test_normalize_review_output_caps_findings_and_keeps_valid_metadata(self):
+ data = {
+ "review": {
+ "key_issues_to_review": [
+ {
+ "relevant_file": "src/one.py",
+ "issue_header": "Possible Bug",
+ "issue_content": "Issue one",
+ "confidence": "HIGH",
+ "evidence_type": "DIFF",
+ "start_line": 10,
+ "end_line": 12,
+ "unknown": "preserved value",
+ },
+ {
+ "relevant_file": "src/two.py",
+ "issue_header": "Possible Issue",
+ "issue_content": "Issue two",
+ "confidence": "unsupported",
+ "evidence_type": "ticket",
+ "start_line": 20,
+ "end_line": 21,
+ },
+ {
+ "relevant_file": "src/three.py",
+ "issue_header": "Possible Issue",
+ "issue_content": "Issue three",
+ "confidence": "low",
+ "evidence_type": "inferred",
+ "start_line": 30,
+ "end_line": 31,
+ },
+ ]
+ }
+ }
+
+ normalized = review_output_filter.normalize_review_output(
+ data,
+ max_findings=2,
+ findings_metadata=True,
+ filter_mode="off",
+ )
+
+ issues = normalized["review"]["key_issues_to_review"]
+ assert len(issues) == 2
+ assert issues[0]["confidence"] == "high"
+ assert issues[0]["evidence_type"] == "diff"
+ assert issues[0]["unknown"] == "preserved value"
+ assert "confidence" not in issues[1]
+ assert issues[1]["evidence_type"] == "ticket"
+
+ def test_normalize_review_output_can_drop_low_confidence_inferred_findings(self):
+ data = {
+ "review": {
+ "key_issues_to_review": [
+ {
+ "relevant_file": "src/one.py",
+ "issue_header": "Possible Bug",
+ "issue_content": "Keep me",
+ "confidence": "medium",
+ "evidence_type": "diff",
+ "start_line": 10,
+ "end_line": 12,
+ },
+ {
+ "relevant_file": "src/two.py",
+ "issue_header": "Possible Issue",
+ "issue_content": "Drop me",
+ "confidence": "low",
+ "evidence_type": "inferred",
+ "start_line": 20,
+ "end_line": 21,
+ },
+ ]
+ }
+ }
+
+ normalized = review_output_filter.normalize_review_output(
+ data,
+ max_findings=5,
+ findings_metadata=True,
+ filter_mode="drop_low_confidence_inferred",
+ )
+
+ issues = normalized["review"]["key_issues_to_review"]
+ assert len(issues) == 1
+ assert issues[0]["relevant_file"] == "src/one.py"
+
+ def test_normalize_review_output_keeps_all_findings_when_max_findings_is_zero(self):
+ data = {
+ "review": {
+ "key_issues_to_review": [
+ {
+ "relevant_file": "src/one.py",
+ "issue_header": "Possible Bug",
+ "issue_content": "Issue one",
+ "start_line": 10,
+ "end_line": 12,
+ },
+ {
+ "relevant_file": "src/two.py",
+ "issue_header": "Possible Issue",
+ "issue_content": "Issue two",
+ "start_line": 20,
+ "end_line": 21,
+ },
+ ]
+ }
+ }
+
+ normalized = review_output_filter.normalize_review_output(data, max_findings=0)
+
+ issues = normalized["review"]["key_issues_to_review"]
+ assert len(issues) == 2
+
+ def test_normalize_review_output_strips_metadata_when_feature_is_disabled(self):
+ data = {
+ "review": {
+ "key_issues_to_review": [
+ {
+ "relevant_file": "src/one.py",
+ "issue_header": "Possible Bug",
+ "issue_content": "Issue one",
+ "confidence": "high",
+ "evidence_type": "diff",
+ "start_line": 10,
+ "end_line": 12,
+ }
+ ]
+ }
+ }
+
+ normalized = review_output_filter.normalize_review_output(data, max_findings=5, findings_metadata=False)
+
+ issue = normalized["review"]["key_issues_to_review"][0]
+ assert "confidence" not in issue
+ assert "evidence_type" not in issue
+
+ def test_normalize_review_output_disables_filter_when_metadata_is_disabled(self, monkeypatch):
+ warnings = []
+ data = {
+ "review": {
+ "key_issues_to_review": [
+ {
+ "relevant_file": "src/one.py",
+ "issue_header": "Possible Bug",
+ "issue_content": "Keep me",
+ "confidence": "low",
+ "evidence_type": "inferred",
+ "start_line": 10,
+ "end_line": 12,
+ }
+ ]
+ }
+ }
+ monkeypatch.setattr(review_output_filter, "_log_warning", warnings.append)
+
+ normalized = review_output_filter.normalize_review_output(
+ data,
+ max_findings=5,
+ findings_metadata=False,
+ filter_mode="drop_low_confidence_inferred",
+ )
+
+ assert warnings == ["findings_filter_mode has no effect when findings_metadata is false"]
+ assert len(normalized["review"]["key_issues_to_review"]) == 1
+
+ def test_normalize_review_output_warns_on_unknown_filter_mode(self, monkeypatch):
+ warnings = []
+ data = {
+ "review": {
+ "key_issues_to_review": [
+ {
+ "relevant_file": "src/one.py",
+ "issue_header": "Possible Bug",
+ "issue_content": "Keep me",
+ "confidence": "low",
+ "evidence_type": "inferred",
+ "start_line": 10,
+ "end_line": 12,
+ }
+ ]
+ }
+ }
+ monkeypatch.setattr(review_output_filter, "_log_warning", warnings.append)
+
+ normalized = review_output_filter.normalize_review_output(
+ data,
+ max_findings=5,
+ findings_metadata=True,
+ filter_mode="unknown_mode",
+ )
+
+ assert warnings == ["Unknown findings_filter_mode: 'unknown_mode', defaulting to off"]
+ assert len(normalized["review"]["key_issues_to_review"]) == 1