Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTO1_PATCHES.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Keep this list minimal to ease upstream rebases.
| improve-reflect-fallback-score | OPS-24090 | Fix | pr_agent/tools/pr_code_suggestions.py | Prevent inline suggestion spam when self-reflection fails by defaulting all suggestion scores to 0. | Not upstreamed yet. | Remove once upstream handles reflect failure without inflating scores. |
| describe-mermaid-sanitize | OPS-24090 | Fix | pr_agent/tools/pr_description.py | Strip backticks from Mermaid diagrams to avoid GitHub render failures. | Not upstreamed yet. | Remove once upstream sanitizes Mermaid diagrams or fixes prompt output. |
| jira-cloud-ticket-context | OPS-24091 | Enhancement | pr_agent/algo/utils.py, pr_agent/tickets/jira_cloud.py, pr_agent/tools/ticket_pr_compliance_check.py, pr_agent/tools/pr_reviewer.py, pr_agent/tools/pr_description.py, pr_agent/tools/pr_config.py, tests/unittest/test_convert_to_markdown.py, tests/unittest/test_jira_cloud_ticket_context.py, tests/unittest/test_pr_reviewer_ticket_note.py, tests/unittest/test_ticket_pr_compliance_check.py | Fetch Jira Cloud ticket context using PR title first and branch name only as fallback, support both basic auth and scoped-token Jira Cloud API access, ignore placeholder compliance bullets and placeholder text like `None.`, show review notes when the Jira ticket cannot be fetched, and normalize empty discovered tickets before prompt rendering. | Not upstreamed yet. | Remove once upstream supports Jira Cloud ticket context from PR metadata with title-first precedence, scoped-token auth, placeholder-safe compliance rendering, comment-level fetch-failure notes, and prompt-safe normalization for empty ticket payloads. |
| review-metadata-context | OPS-24224 | Enhancement | pr_agent/algo/context_enrichment.py, pr_agent/algo/review_output_filter.py, pr_agent/algo/utils.py, pr_agent/git_providers/gitlab_provider.py, pr_agent/git_providers/local_git_provider.py, pr_agent/settings/configuration.toml, pr_agent/settings/pr_reviewer_prompts.toml, pr_agent/tools/pr_code_suggestions.py, pr_agent/tools/pr_reviewer.py, tests/unittest/test_context_enrichment.py, tests/unittest/test_convert_to_markdown.py, tests/unittest/test_parse_code_suggestion.py, tests/unittest/test_review_output_filter.py | Add optional review findings metadata (`confidence`, `evidence_type`), normalize parsed findings and enforce the configured finding cap at runtime, allow bounded full-file context for small touched files to improve review precision, let metadata badges be toggled independently from metadata generation, suppress inline code-suggestion label/importance suffixes when badges are disabled, rename the downstream config keys to `findings_metadata`, `findings_metadata_badges`, and `small_file_context`, and reuse cached local diff files for local review runs. | Not upstreamed yet. | Remove once upstream supports structured review findings metadata, runtime normalization, bounded small-file context enrichment for `/review`, independent metadata-badge rendering control across review output and inline suggestions, the renamed downstream config surface, and local review no longer needs the downstream diff-file cache adjustment. |

## Rebase checklist

Expand Down
87 changes: 87 additions & 0 deletions pr_agent/algo/context_enrichment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import re

from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo


FILE_HEADER_PATTERN = r'^## File: (?:\'([^\']+)\'|"([^\"]+)")$'


def append_small_file_context_to_diff(
diff_text: str,
diff_files: list[FilePatchInfo],
token_handler,
*,
max_lines: int,
max_tokens: int,
) -> str:
context = build_small_file_context(
diff_text,
diff_files,
token_handler,
max_lines=max_lines,
max_tokens=max_tokens,
)
if not context:
return diff_text

return f"{diff_text}\n\nAdditional context for small touched files:\n======\n{context}\n======"


def build_small_file_context(
diff_text: str,
diff_files: list[FilePatchInfo],
token_handler,
*,
max_lines: int,
max_tokens: int,
) -> str:
if not diff_text or not diff_files or max_lines <= 0 or max_tokens <= 0:
return ""

included_files = extract_files_in_diff(diff_text)
if not included_files:
return ""

remaining_tokens = max_tokens
context_blocks = []
candidate_files = []

for file in diff_files:
if file.filename not in included_files:
continue
if file.edit_type == EDIT_TYPE.DELETED or not file.patch or not file.head_file:
continue

line_count = len(file.head_file.splitlines())
if line_count > max_lines:
continue

candidate_files.append((line_count, file.filename, file))

for _, _, file in sorted(candidate_files, key=lambda item: (item[0], item[1])):
context_block = render_full_file_context(file)
context_tokens = token_handler.count_tokens(context_block)
if context_tokens > remaining_tokens:
continue

context_blocks.append(context_block)
remaining_tokens -= context_tokens

return "\n\n".join(context_blocks)


def extract_files_in_diff(diff_text: str) -> set[str]:
matches = re.findall(FILE_HEADER_PATTERN, diff_text, flags=re.MULTILINE)
return {single_quoted or double_quoted for single_quoted, double_quoted in matches}


def render_full_file_context(file: FilePatchInfo) -> str:
numbered_lines = []
for line_number, line in enumerate(file.head_file.splitlines(), start=1):
numbered_lines.append(f"{line_number} {line}")

return (
f"## Full file context: '{file.filename.strip()}'\n"
"### Current file content after PR changes\n"
+ "\n".join(numbered_lines)
)
125 changes: 125 additions & 0 deletions pr_agent/algo/review_output_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import copy
import logging


VALID_CONFIDENCE_VALUES = {"high", "medium", "low"}
VALID_EVIDENCE_TYPES = {"diff", "ticket", "inferred"}
FINDINGS_FILTER_MODE_OFF = "off"
FINDINGS_FILTER_MODE_DROP_LOW_CONFIDENCE_INFERRED = "drop_low_confidence_inferred"
LOGGER = logging.getLogger(__name__)


def normalize_review_output(
data: dict,
max_findings: int,
findings_metadata: bool = False,
filter_mode: str = FINDINGS_FILTER_MODE_OFF,
) -> dict:
if not isinstance(data, dict) or "review" not in data or not isinstance(data["review"], dict):
return data

if filter_mode != FINDINGS_FILTER_MODE_OFF and not findings_metadata:
_log_warning("findings_filter_mode has no effect when findings_metadata is false")
filter_mode = FINDINGS_FILTER_MODE_OFF
elif filter_mode not in {FINDINGS_FILTER_MODE_OFF, FINDINGS_FILTER_MODE_DROP_LOW_CONFIDENCE_INFERRED}:
_log_warning(f"Unknown findings_filter_mode: {filter_mode!r}, defaulting to off")
filter_mode = FINDINGS_FILTER_MODE_OFF

normalized_data = copy.deepcopy(data)
issues = normalized_data["review"].get("key_issues_to_review")
if not isinstance(issues, list):
return normalized_data

normalized_issues = []
for issue in issues:
normalized_issue = normalize_issue(issue, findings_metadata=findings_metadata)
if normalized_issue:
normalized_issues.append(normalized_issue)

filtered_issues, suppressed_count = filter_issues(normalized_issues, filter_mode=filter_mode)
if suppressed_count:
_log_debug("Suppressed low-confidence inferred findings", artifact={"count": suppressed_count})
if max_findings > 0:
filtered_issues = filtered_issues[:max_findings]
normalized_data["review"]["key_issues_to_review"] = filtered_issues

return normalized_data


def normalize_issue(issue: dict, findings_metadata: bool = False) -> dict | None:
if not isinstance(issue, dict):
return None

normalized_issue = copy.deepcopy(issue)
normalized_issue["relevant_file"] = issue.get("relevant_file", "")
normalized_issue["issue_header"] = issue.get("issue_header", "")
normalized_issue["issue_content"] = issue.get("issue_content", "")

if findings_metadata:
confidence = normalize_enum_value(issue.get("confidence"), VALID_CONFIDENCE_VALUES)
if confidence:
normalized_issue["confidence"] = confidence
else:
normalized_issue.pop("confidence", None)

evidence_type = normalize_enum_value(issue.get("evidence_type"), VALID_EVIDENCE_TYPES)
if evidence_type:
normalized_issue["evidence_type"] = evidence_type
else:
normalized_issue.pop("evidence_type", None)
else:
normalized_issue.pop("confidence", None)
normalized_issue.pop("evidence_type", None)

normalized_issue["start_line"] = issue.get("start_line", 0)
normalized_issue["end_line"] = issue.get("end_line", 0)

return normalized_issue


def normalize_enum_value(value, valid_values: set[str]) -> str | None:
if value is None:
return None

normalized_value = str(value).strip().lower()
if normalized_value in valid_values:
return normalized_value

return None


def filter_issues(issues: list[dict], filter_mode: str = FINDINGS_FILTER_MODE_OFF) -> tuple[list[dict], int]:
if filter_mode != FINDINGS_FILTER_MODE_DROP_LOW_CONFIDENCE_INFERRED:
return issues, 0

filtered_issues = []
suppressed_count = 0
for issue in issues:
if issue.get("confidence") == "low" and issue.get("evidence_type") == "inferred":
suppressed_count += 1
continue
filtered_issues.append(issue)

return filtered_issues, suppressed_count


def _log_warning(message: str) -> None:
logger = _resolve_logger()
logger.warning(message)


def _log_debug(message: str, artifact: dict) -> None:
logger = _resolve_logger()
try:
logger.debug(message, artifact=artifact)
except TypeError:
logger.debug(message, extra={"artifact": artifact})


def _resolve_logger():
try:
from pr_agent.log import get_logger

return get_logger()
except Exception:
return LOGGER
64 changes: 61 additions & 3 deletions pr_agent/algo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@ def get_setting(key: str) -> Any:
return context.get("settings", global_settings).get(key, global_settings.get(key, None))
except Exception:
return global_settings.get(key, None)


def emphasize_header(text: str, only_markdown=False, reference_link=None) -> str:
try:
# Finding the position of the first occurrence of ": "
Expand Down Expand Up @@ -130,7 +128,8 @@ def convert_to_markdown_v2(output_data: dict,
incremental_review=None,
git_provider=None,
files=None,
ticket_note: str = '') -> str:
ticket_note: str = '',
findings_metadata_badges: bool | None = None) -> str:
"""
Convert a dictionary of data into markdown format.
Args:
Expand All @@ -139,6 +138,9 @@ def convert_to_markdown_v2(output_data: dict,
str: The markdown formatted text generated from the input dictionary.
"""

if findings_metadata_badges is None:
findings_metadata_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False)

emojis = {
"Can be split": "🔀",
"Key issues to review": "⚡",
Expand Down Expand Up @@ -287,6 +289,16 @@ def convert_to_markdown_v2(output_data: dict,
if issue_header.lower() == 'possible bug':
issue_header = 'Possible Issue' # Make the header less frightening
issue_content = issue.get('issue_content', '').strip()
issue_metadata = format_review_issue_metadata(
issue,
gfm_supported,
findings_metadata_badges,
)
if issue_metadata:
if gfm_supported:
issue_content = f"{issue_metadata}<br>{issue_content}" if issue_content else issue_metadata
else:
issue_content = f"{issue_metadata}\n\n{issue_content}" if issue_content else issue_metadata
start_line = int(str(issue.get('start_line', 0)).strip())
end_line = int(str(issue.get('end_line', 0)).strip())

Expand Down Expand Up @@ -339,6 +351,52 @@ def convert_to_markdown_v2(output_data: dict,

return markdown_text


def format_review_issue_metadata(issue: dict, gfm_supported: bool, enable_badges: bool = False) -> str:
if not isinstance(issue, dict):
return ""
if not enable_badges:
return ""

metadata_labels = []
confidence = str(issue.get("confidence", "")).strip().lower()
if confidence in {"high", "medium", "low"}:
metadata_labels.append(f"{confidence.capitalize()} confidence")

evidence_type = str(issue.get("evidence_type", "")).strip().lower()
evidence_labels = {
"diff": "Diff evidence",
"ticket": "Ticket evidence",
"inferred": "Inferred evidence",
}
if evidence_type in evidence_labels:
metadata_labels.append(evidence_labels[evidence_type])

if not metadata_labels:
return ""

if gfm_supported:
return " ".join([f"<code>{label}</code>" for label in metadata_labels])
return " ".join([f"`{label}`" for label in metadata_labels])


def format_code_suggestion_metadata(label: str, score=None, enable_badges: bool | None = None) -> str:
if enable_badges is None:
enable_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False)
if not enable_badges:
return ""

label = str(label).strip()
if not label:
return ""

score_value = "" if score is None else str(score).strip()
if score_value:
return f" [{label}, importance: {score_value}]"

return f" [{label}]"


def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str:
"""
Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content.
Expand Down
9 changes: 8 additions & 1 deletion pr_agent/git_providers/gitlab_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ..algo.git_patch_processing import decode_if_bytes
from ..algo.language_handler import is_valid_file
from ..algo.utils import (clip_tokens,
format_code_suggestion_metadata,
find_line_number_of_relevant_line_in_file,
load_large_diff)
from ..config_loader import get_settings
Expand Down Expand Up @@ -606,7 +607,13 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f
else:
language = ''
link = self.get_line_link(relevant_file, line_start, line_end)
body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n\n"
findings_metadata_badges = get_settings().pr_reviewer.get("findings_metadata_badges", False)
suggestion_metadata = format_code_suggestion_metadata(
label,
score,
enable_badges=findings_metadata_badges,
)
body_fallback = f"**Suggestion:** {content}{suggestion_metadata}\n\n"
body_fallback +=f"\n\n<details><summary>[{target_file.filename} [{line_start}-{line_end}]]({link}):</summary>\n\n"
body_fallback += f"\n\n___\n\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`"
body_fallback+="</details>\n\n"
Expand Down
3 changes: 3 additions & 0 deletions pr_agent/git_providers/local_git_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ def is_supported(self, capability: str) -> bool:
return True

def get_diff_files(self) -> list[FilePatchInfo]:
if self.diff_files is not None:
return self.diff_files

diffs = self.repo.head.commit.diff(
self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]),
create_patch=True,
Expand Down
6 changes: 6 additions & 0 deletions pr_agent/settings/configuration.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ persistent_comment=true
extra_instructions = ""
num_max_findings = 3
final_update_message = true
findings_metadata = false
findings_metadata_badges = false
small_file_context = false
small_file_context_max_lines = 150
small_file_context_max_tokens = 3500
findings_filter_mode = "off"
# review labels
enable_review_labels_security=true
enable_review_labels_effort=true
Expand Down
Loading