diff --git a/AUTO1_PATCHES.md b/AUTO1_PATCHES.md index cf2d50b610..f03adbf9f8 100644 --- a/AUTO1_PATCHES.md +++ b/AUTO1_PATCHES.md @@ -20,6 +20,7 @@ Keep this list minimal to ease upstream rebases. | describe-mermaid-sanitize | OPS-24090 | Fix | pr_agent/tools/pr_description.py | Strip backticks from Mermaid diagrams to avoid GitHub render failures. | Not upstreamed yet. | Remove once upstream sanitizes Mermaid diagrams or fixes prompt output. | | jira-cloud-ticket-context | OPS-24091 | Enhancement | pr_agent/algo/utils.py, pr_agent/tickets/jira_cloud.py, pr_agent/tools/ticket_pr_compliance_check.py, pr_agent/tools/pr_reviewer.py, pr_agent/tools/pr_description.py, pr_agent/tools/pr_config.py, tests/unittest/test_convert_to_markdown.py, tests/unittest/test_jira_cloud_ticket_context.py, tests/unittest/test_pr_reviewer_ticket_note.py, tests/unittest/test_ticket_pr_compliance_check.py | Fetch Jira Cloud ticket context using PR title first and branch name only as fallback, support both basic auth and scoped-token Jira Cloud API access, ignore placeholder compliance bullets and placeholder text like `None.`, show review notes when the Jira ticket cannot be fetched, and normalize empty discovered tickets before prompt rendering. | Not upstreamed yet. | Remove once upstream supports Jira Cloud ticket context from PR metadata with title-first precedence, scoped-token auth, placeholder-safe compliance rendering, comment-level fetch-failure notes, and prompt-safe normalization for empty ticket payloads. | | review-metadata-context | OPS-24224 | Enhancement | pr_agent/algo/context_enrichment.py, pr_agent/algo/review_output_filter.py, pr_agent/algo/utils.py, pr_agent/git_providers/gitlab_provider.py, pr_agent/git_providers/local_git_provider.py, pr_agent/settings/configuration.toml, pr_agent/settings/pr_reviewer_prompts.toml, pr_agent/tools/pr_code_suggestions.py, pr_agent/tools/pr_reviewer.py, tests/unittest/test_context_enrichment.py, tests/unittest/test_convert_to_markdown.py, tests/unittest/test_parse_code_suggestion.py, tests/unittest/test_review_output_filter.py | Add optional review findings metadata (`confidence`, `evidence_type`), normalize parsed findings and enforce the configured finding cap at runtime, allow bounded full-file context for small touched files to improve review precision, let metadata badges be toggled independently from metadata generation, suppress inline code-suggestion label/importance suffixes when badges are disabled, rename the downstream config keys to `findings_metadata`, `findings_metadata_badges`, and `small_file_context`, and reuse cached local diff files for local review runs. | Not upstreamed yet. | Remove once upstream supports structured review findings metadata, runtime normalization, bounded small-file context enrichment for `/review`, independent metadata-badge rendering control across review output and inline suggestions, the renamed downstream config surface, and local review no longer needs the downstream diff-file cache adjustment. | +| github-review-resilience | OPS-24224 | Fix | pr_agent/algo/git_patch_processing.py, pr_agent/git_providers/github_provider.py, pr_agent/tools/pr_description.py, pr_agent/tools/pr_reviewer.py, tests/unittest/test_github_review_resilience.py | Treat missing unified-diff hunk sizes as single-line hunks, keep `/review` token estimation from touching stale cached ticket payloads, make transient GitHub language lookups non-fatal, and improve inline-comment and empty-diff diagnostics. | Not upstreamed yet. | Remove once upstream handles single-line hunks correctly, avoids prompt rendering against stale cached ticket data during reviewer initialization, degrades gracefully on transient GitHub language API failures, and logs actionable inline-comment and empty-diff failures. | ## Rebase checklist diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 81e05d500d..b18826e25c 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -212,16 +212,11 @@ def check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1): def extract_hunk_headers(match): - res = list(match.groups()) - for i in range(len(res)): - if res[i] is None: - res[i] = 0 - try: - start1, size1, start2, size2 = map(int, res[:4]) - except: # '@@ -0,0 +1 @@' case - start1, size1, size2 = map(int, res[:3]) - start2 = 0 - section_header = res[4] + start1, size1, start2, size2, section_header = match.groups() + start1 = int(start1) + start2 = int(start2) + size1 = int(size1) if size1 is not None else 1 + size2 = int(size2) if size2 is not None else 1 return section_header, size1, size2, start1, start2 diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index fa52b7dc05..05f204d6e8 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -12,6 +12,7 @@ from github.Issue import Issue from github import AppAuthentication, Auth, Github, GithubException +from requests.exceptions import RequestException from retry import retry from starlette_context import context @@ -416,7 +417,14 @@ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = # publish all comments in a single message self.pr.create_review(commit=self.last_commit_id, comments=comments) except Exception as e: - get_logger().info(f"Initially failed to publish inline comments as committable") + get_logger().warning( + "Initially failed to publish inline comments as committable", + artifact={ + "error": str(e), + "status": getattr(e, "status", None), + "comments_count": len(comments), + }, + ) if (getattr(e, "status", None) == 422 and not disable_fallback): pass # continue to try _publish_inline_comments_fallback_with_verification @@ -475,8 +483,15 @@ def _publish_inline_comments_fallback_with_verification(self, comments: list[dic if verified_comments: try: self.pr.create_review(commit=self.last_commit_id, comments=verified_comments) - except: - pass + except Exception as e: + get_logger().warning( + "Failed to publish verified inline comments as a grouped review", + artifact={ + "error": str(e), + "status": getattr(e, "status", None), + "comments_count": len(verified_comments), + }, + ) # try to publish one by one the invalid comments as a one-line code comment if invalid_comments and get_settings().github.try_fix_invalid_inline_comments: @@ -486,8 +501,15 @@ def _publish_inline_comments_fallback_with_verification(self, comments: list[dic try: self.publish_inline_comments([comment], disable_fallback=True) get_logger().info(f"Published invalid comment as a single line comment: {comment}") - except: - get_logger().error(f"Failed to publish invalid comment as a single line comment: {comment}") + except Exception as e: + get_logger().error( + f"Failed to publish invalid comment as a single line comment: {comment}", + artifact={ + "error": str(e), + "status": getattr(e, "status", None), + "comment": comment, + }, + ) def _verify_code_comment(self, comment: dict): is_verified = False @@ -695,8 +717,23 @@ def get_title(self): return self.pr.title def get_languages(self): - languages = self._get_repo().get_languages() - return languages + try: + languages = self._get_repo().get_languages() + return languages + except GithubException as e: + if getattr(e, "status", None) in {500, 502, 503, 504}: + get_logger().warning( + "Failed to fetch repository languages from GitHub, using empty language map", + artifact={"error": str(e), "status": e.status, "repo": self.repo}, + ) + return {} + raise + except RequestException as e: + get_logger().warning( + "Failed to fetch repository languages from GitHub, using empty language map", + artifact={"error": str(e), "repo": self.repo}, + ) + return {} def get_pr_branch(self): return self.pr.head.ref diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 2db2c6ba08..36d9d774a6 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -225,7 +225,11 @@ async def _prepare_prediction(self, model: str) -> None: self.prediction = await self.extend_uncovered_files(self.prediction) else: get_logger().error(f"Error getting PR diff {self.pr_id}", - artifact={"traceback": traceback.format_exc()}) + artifact={ + "large_pr_handling": large_pr_handling, + "patches_diff_type": type(patches_diff).__name__, + "remaining_files": len(remaining_files_list), + }) self.prediction = None else: # get the diff in multiple patches, with the token handler only for the files prompt diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index f5a7ec9dc9..024787d222 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -143,7 +143,7 @@ def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False, "findings_metadata": self.findings_metadata, "small_file_context": self.small_file_context, "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), - "related_tickets": get_settings().get('related_tickets', []), + "related_tickets": [], "ticket_compliance_note": "", 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False), "date": datetime.datetime.now().strftime('%Y-%m-%d'), diff --git a/tests/unittest/test_github_review_resilience.py b/tests/unittest/test_github_review_resilience.py new file mode 100644 index 0000000000..8736878588 --- /dev/null +++ b/tests/unittest/test_github_review_resilience.py @@ -0,0 +1,150 @@ +import re + +import pytest +from github import GithubException +from requests.exceptions import RequestException + +from pr_agent.algo.git_patch_processing import extract_hunk_headers +from pr_agent.algo.types import FilePatchInfo +from pr_agent.git_providers.github_provider import GithubProvider +from pr_agent.tools.pr_reviewer import PRReviewer + + +def test_extract_hunk_headers_defaults_missing_sizes_to_one(): + match = re.match(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)", "@@ -12 +34 @@ section") + + assert extract_hunk_headers(match) == ("section", 1, 1, 12, 34) + + +def test_extract_hunk_headers_preserves_explicit_zero_and_single_line_new_range(): + match = re.match(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)", "@@ -0,0 +1 @@") + + assert extract_hunk_headers(match) == ("", 0, 1, 0, 1) + + +def test_validate_comments_inside_hunks_handles_single_line_hunks(): + provider = GithubProvider.__new__(GithubProvider) + provider.get_diff_files = lambda: [ + FilePatchInfo( + base_file="old\n", + head_file="new\n", + patch="@@ -1 +1 @@\n-old\n+new\n", + filename="foo.py", + ) + ] + + suggestions = [ + { + "body": "```suggestion\nnew\n```", + "relevant_file": "foo.py", + "relevant_lines_start": 1, + "relevant_lines_end": 1, + "original_suggestion": {"existing_code": "old", "improved_code": "new"}, + } + ] + + validated = provider.validate_comments_inside_hunks(suggestions) + + assert validated[0]["relevant_lines_start"] == 1 + assert validated[0]["relevant_lines_end"] == 1 + + +def test_get_languages_returns_empty_map_for_transient_github_errors(): + provider = GithubProvider.__new__(GithubProvider) + provider.repo = "owner/repo" + + class FailingRepo: + def get_languages(self): + raise GithubException(503, {"message": "Service unavailable"}, None) + + provider._get_repo = lambda: FailingRepo() + + assert provider.get_languages() == {} + + +def test_get_languages_reraises_non_transient_github_errors(): + provider = GithubProvider.__new__(GithubProvider) + + class FailingRepo: + def get_languages(self): + raise GithubException(404, {"message": "Not found"}, None) + + provider._get_repo = lambda: FailingRepo() + + with pytest.raises(GithubException): + provider.get_languages() + + +def test_get_languages_returns_empty_map_for_request_errors(): + provider = GithubProvider.__new__(GithubProvider) + provider.repo = "owner/repo" + + class FailingRepo: + def get_languages(self): + raise RequestException("connection dropped") + + provider._get_repo = lambda: FailingRepo() + + assert provider.get_languages() == {} + + +def test_pr_reviewer_initial_vars_ignore_cached_related_tickets(monkeypatch): + class FakeGitProvider: + def __init__(self): + self.pr = type("PR", (), {"title": "review resilience test"})() + + def get_languages(self): + return {} + + def get_files(self): + return [] + + def get_incremental_commits(self, _incremental): + return None + + def is_supported(self, _capability): + return True + + def get_pr_description(self, split_changes_walkthrough=False): + if split_changes_walkthrough: + return "", [] + return "" + + def get_pr_branch(self): + return "review-resilience-test" + + def get_num_of_files(self): + return 0 + + def get_commit_messages(self): + return "" + + captured_vars = {} + + class FakeTokenHandler: + def __init__(self, _pr, vars, _system, _user): + captured_vars.update(vars) + + class FakeAIHandler: + def __init__(self): + self.main_pr_language = None + + from pr_agent.config_loader import get_settings + import pr_agent.tools.pr_reviewer as pr_reviewer_module + + settings = get_settings() + previous_related_tickets = settings.get("related_tickets", []) + + monkeypatch.setattr(pr_reviewer_module, "get_git_provider_with_context", lambda _pr_url: FakeGitProvider()) + monkeypatch.setattr(pr_reviewer_module, "get_main_pr_language", lambda _languages, _files: "python") + monkeypatch.setattr(pr_reviewer_module, "TokenHandler", FakeTokenHandler) + + try: + settings.set("related_tickets", [{"title": "stale", "labels": ["bad-shape"]}]) + + reviewer = PRReviewer("https://example.com/org/repo/pull/1", ai_handler=FakeAIHandler) + + assert reviewer.vars["related_tickets"] == [] + assert captured_vars["related_tickets"] == [] + finally: + settings.set("related_tickets", previous_related_tickets)