From 5c0a4c979142e35d4db50c9f11e5d322a1380663 Mon Sep 17 00:00:00 2001
From: Aryn <63111101+arynyklas@users.noreply.github.com>
Date: Wed, 25 Mar 2026 02:19:42 +0500
Subject: [PATCH 1/4] feat(ollama): Add API key support for Ollama Cloud
 (#2278)

Add support for OLLAMA.API_KEY configuration to enable authentication
with Ollama Cloud (ollama.com). Previously only local Ollama instances
were supported without authentication.

- Pass api_key to litellm completion calls when configured
- Update secrets template with documentation for the new api_key field
- Clarify api_base comment to distinguish between Ollama Cloud and local
---
 pr_agent/algo/ai_handlers/litellm_ai_handler.py | 4 ++++
 pr_agent/settings/.secrets_template.toml        | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 9fb9d8add3..f51dcc8285 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -82,6 +82,8 @@ def __init__(self):
         if get_settings().get("OLLAMA.API_BASE", None):
             litellm.api_base = get_settings().ollama.api_base
             self.api_base = get_settings().ollama.api_base
+        if get_settings().get("OLLAMA.API_KEY", None):
+            litellm.api_key = get_settings().ollama.api_key
         if get_settings().get("HUGGINGFACE.REPETITION_PENALTY", None):
             self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)
         if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):
@@ -404,6 +406,8 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
                 get_logger().info(f"\nSystem prompt:\n{system}")
                 get_logger().info(f"\nUser prompt:\n{user}")
 
+            kwargs["api_key"] = litellm.api_key
+
             # Get completion with automatic streaming detection
             resp, finish_reason, response_obj = await self._get_completion(**kwargs)
 
diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml
index 70238c2d91..e66ffbe632 100644
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@@ -50,7 +50,8 @@ key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acq
 api_base = "" # the base url for your huggingface inference endpoint
 
 [ollama]
-api_base = "" # the base url for your local Llama 2, Code Llama, and other models inference endpoint. Acquire through https://ollama.ai/
+api_base = "" # the base url for your Ollama endpoint, e.g. https://ollama.com for Ollama Cloud or http://localhost:11434 for local
+api_key = "" # required for Ollama Cloud (ollama.com); leave empty for local Ollama
 
 [vertexai]
 vertex_project = "" # the google cloud platform project name for your vertexai deployment

From 69ff274fa4da0d59fd9f641ca1a96f078555f5b1 Mon Sep 17 00:00:00 2001
From: Peter Dave Hello <3691490+PeterDaveHello@users.noreply.github.com>
Date: Wed, 25 Mar 2026 05:20:54 +0800
Subject: [PATCH 2/4] docs: fix minor text issues in prompts and comments
 (#2284)

Fix a few unambiguous spelling, punctuation, and grammar issues
in prompt text, comments, docs, and related configuration help text.

These changes are documentation-only and do not affect runtime behavior.
---
 docs/docs/installation/locally.md                    | 2 +-
 pr_agent/algo/git_patch_processing.py                | 2 +-
 pr_agent/algo/utils.py                               | 2 +-
 pr_agent/custom_merge_loader.py                      | 2 +-
 pr_agent/servers/github_polling.py                   | 2 +-
 pr_agent/settings/.secrets_template.toml             | 2 +-
 pr_agent/settings/configuration.toml                 | 4 ++--
 pr_agent/settings/pr_help_docs_headings_prompts.toml | 2 +-
 pr_agent/settings/pr_help_docs_prompts.toml          | 2 +-
 pr_agent/settings/pr_help_prompts.toml               | 2 +-
 pr_agent/settings/pr_reviewer_prompts.toml           | 8 ++++----
 11 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/docs/docs/installation/locally.md b/docs/docs/installation/locally.md
index 9543fe3274..b2717c5ce5 100644
--- a/docs/docs/installation/locally.md
+++ b/docs/docs/installation/locally.md
@@ -1,7 +1,7 @@
 To run PR-Agent locally, you first need to acquire two keys:
 
 1. An OpenAI key from [here](https://platform.openai.com/api-keys){:target="_blank"}, with access to GPT-4 and o4-mini (or a key for other [language models](../usage-guide/changing_a_model.md), if you prefer).
-2. A personal access token from your Git platform (GitHub, GitLab, BitBucket,Gitea) with repo scope. GitHub token, for example, can be issued from [here](https://github.com/settings/tokens){:target="_blank"}
+2. A personal access token from your Git platform (GitHub, GitLab, BitBucket, Gitea) with repo scope. GitHub token, for example, can be issued from [here](https://github.com/settings/tokens){:target="_blank"}
 
 ## Using Docker image
 
diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py
index 81e05d500d..3a37e88d2c 100644
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@@ -114,7 +114,7 @@ def _calc_context_limits(patch_lines_before):
                                         found_header = True
                                         section_header = ''
                                     else:
-                                        pass  # its ok to be here. We cant apply dynamic context if the lines are different if 'old' and 'new' hunks
+                                        pass  # its ok to be here. We can't apply dynamic context if the lines are different if 'old' and 'new' hunks
                                     break
 
                             if not found_header:
diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index 74ac1272a5..3e8576753f 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -1381,7 +1381,7 @@ def process_description(description_full: str) -> Tuple[str, List]:
                         pattern_back = r'<details>\s*<summary><strong>(.*?)</strong><dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\n\n\s*(.*?)</details>'
                         res = re.search(pattern_back, file_data, re.DOTALL)
                     if not res or res.lastindex != 4:
-                        pattern_back = r'<details>\s*<summary><strong>(.*?)</strong>\s*<dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*-\s*(.*?)\s*</details>' # looking for hypen ('- ')
+                        pattern_back = r'<details>\s*<summary><strong>(.*?)</strong>\s*<dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*-\s*(.*?)\s*</details>' # looking for hyphen ('- ')
                         res = re.search(pattern_back, file_data, re.DOTALL)
                     if res and res.lastindex == 4:
                         short_filename = res.group(1).strip()
diff --git a/pr_agent/custom_merge_loader.py b/pr_agent/custom_merge_loader.py
index 75b07a7718..abb11e3799 100644
--- a/pr_agent/custom_merge_loader.py
+++ b/pr_agent/custom_merge_loader.py
@@ -23,7 +23,7 @@ def load(obj, env=None, silent=True, key=None, filename=None):
         None
     """
 
-    MAX_TOML_SIZE_IN_BYTES = 100 * 1024 * 1024 # Prevent out of mem. exceptions by limiting to 100 MBs which is sufficient for upto 1M lines
+    MAX_TOML_SIZE_IN_BYTES = 100 * 1024 * 1024 # Prevent out of mem. exceptions by limiting to 100 MBs which is sufficient for up to 1M lines
 
     # Get the list of files to load
     # TODO: hasattr(obj, 'settings_files') for some reason returns False. Need to use 'settings_file'
diff --git a/pr_agent/servers/github_polling.py b/pr_agent/servers/github_polling.py
index 95f9d911b5..ab02339109 100644
--- a/pr_agent/servers/github_polling.py
+++ b/pr_agent/servers/github_polling.py
@@ -226,7 +226,7 @@ async def polling_loop():
                                     break
                             task_queue.clear()
 
-                            # Dont wait for all processes to complete. Move on to the next iteration
+                            # Don't wait for all processes to complete. Move on to the next iteration
                             # for p in processes:
                             #     p.join()
 
diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml
index e66ffbe632..b8a4875976 100644
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@@ -108,7 +108,7 @@ pat = ""
 
 [azure_devops_server]
 # For Azure devops Server basic auth - configured in the webhook creation
-# Optional, uncomment if you want to use Azure devops webhooks. Value assinged when you create the webhook
+# Optional, uncomment if you want to use Azure devops webhooks. Value assigned when you create the webhook
 # webhook_username = "<basic auth user>"
 # webhook_password = "<basic auth password>"
 
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 27dfc061aa..16ffbcae2a 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -21,7 +21,7 @@ use_wiki_settings_file=true
 use_repo_settings_file=true
 use_global_settings_file=true
 disable_auto_feedback = false
-ai_timeout=120 # 2minutes
+ai_timeout=120 # 2 minutes
 skip_keys = []
 custom_reasoning_model = false # when true, disables system messages and temperature controls for models that don't support chat-style inputs
 response_language="en-US" # Language locales code for PR responses in ISO 3166 and ISO 639 format (e.g., "en-US", "it-IT", "zh-CN", ...)
@@ -130,7 +130,7 @@ use_conversation_history=true
 
 [pr_code_suggestions] # /improve #
 commitable_code_suggestions = false
-dual_publishing_score_threshold=-1 # -1 to disable, [0-10] to set the threshold (>=) for publishing a code suggestion both in a table and as commitable
+dual_publishing_score_threshold=-1 # -1 to disable, [0-10] to set the threshold (>=) for publishing a code suggestion both in a table and as committable
 focus_only_on_problems=true
 #
 extra_instructions = ""
diff --git a/pr_agent/settings/pr_help_docs_headings_prompts.toml b/pr_agent/settings/pr_help_docs_headings_prompts.toml
index da9d6e5334..05bc579116 100644
--- a/pr_agent/settings/pr_help_docs_headings_prompts.toml
+++ b/pr_agent/settings/pr_help_docs_headings_prompts.toml
@@ -1,7 +1,7 @@
 
 [pr_help_docs_headings_prompts]
 system="""You are Doc-helper, a language model that ranks documentation files based on their relevance to user questions.
-You will receive a question, a repository url and file names along with optional groups of headings extracted from such files from that repository (either as markdown or as restructred text).
+You will receive a question, a repository url and file names along with optional groups of headings extracted from such files from that repository (either as markdown or as restructured text).
 Your task is to rank file paths based on how likely they contain the answer to a user's question, using only the headings from each such file and the file name.
 
 ======
diff --git a/pr_agent/settings/pr_help_docs_prompts.toml b/pr_agent/settings/pr_help_docs_prompts.toml
index c73e1d958c..16358a5010 100644
--- a/pr_agent/settings/pr_help_docs_prompts.toml
+++ b/pr_agent/settings/pr_help_docs_prompts.toml
@@ -1,6 +1,6 @@
 [pr_help_docs_prompts]
 system="""You are Doc-helper, a language model designed to answer questions about a documentation website for a given repository.
-You will receive a question, a repository url and the full documentation content for that repository (either as markdown or as restructred text).
+You will receive a question, a repository url and the full documentation content for that repository (either as markdown or as restructured text).
 Your goal is to provide the best answer to the question using the documentation provided.
 
 Additional instructions:
diff --git a/pr_agent/settings/pr_help_prompts.toml b/pr_agent/settings/pr_help_prompts.toml
index 8bd182005a..274940fd2a 100644
--- a/pr_agent/settings/pr_help_prompts.toml
+++ b/pr_agent/settings/pr_help_prompts.toml
@@ -1,5 +1,5 @@
 [pr_help_prompts]
-system="""You are Doc-helper, a language models designed to answer questions about a documentation website for an open-soure project called "PR-Agent" (recently renamed to "Qodo Merge").
+system="""You are Doc-helper, a language model designed to answer questions about a documentation website for an open-source project called "PR-Agent" (recently renamed to "Qodo Merge").
 You will receive a question, and the full documentation website content.
 Your goal is to provide the best answer to the question using the documentation provided.
 
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 2f253199d8..bbe6c6d04c 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -115,7 +115,7 @@ class Review(BaseModel):
     ticket_compliance_check: List[TicketCompliance] = Field(description="A list of compliance checks for the related tickets")
 {%- endif %}
 {%- if require_estimate_effort_to_review %}
-    estimated_effort_to_review_[1-5]: int = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.")
+    estimated_effort_to_review_[1-5]: int = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review, 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.")
 {%- endif %}
 {%- if require_estimate_contribution_time_cost %}
     contribution_time_cost_estimate: ContributionTimeCostEstimate = Field(description="An estimate of the time required to implement the changes, based on the quantity, quality, and complexity of the contribution, as well as the context from the PR description and commit messages.")
@@ -124,20 +124,20 @@ class Review(BaseModel):
     score: str = Field(description="Rate this PR on a scale of 0-100 (inclusive), where 0 means the worst possible PR code, and 100 means PR code of the highest quality, without any bugs or performance issues, that is ready to be merged immediately and run in production at scale.")
 {%- endif %}
 {%- if require_tests %}
-    relevant_tests: str = Field(description="yes/no question: does this PR have relevant tests added or updated ?")
+    relevant_tests: str = Field(description="yes/no question: does this PR have relevant tests added or updated?")
 {%- endif %}
 {%- if question_str %}
     insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
 {%- endif %}
     key_issues_to_review: List[KeyIssuesComponentLink] = Field("A concise list (0-{{ num_max_findings }} issues) of bugs, security vulnerabilities, or significant performance concerns introduced in this PR. Only include issues you are confident about. If confidence is limited but the potential impact is high (e.g., data loss, security), you may include it only if you explicitly note what remains uncertain. Each issue must identify a concrete problem with a realistic trigger scenario. An empty list is acceptable if no clear issues are found.")
 {%- if require_security_review %}
-    security_concerns: str = Field(description="Does this PR code introduce vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...', etc. Explain your answer. Be specific and give examples if possible")
+    security_concerns: str = Field(description="Does this PR code introduce vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...', etc. Explain your answer. Be specific and give examples if possible")
 {%- endif %}
 {%- if require_todo_scan %}
     todo_sections: Union[List[TodoSection], str] = Field(description="A list of TODO comments found in the PR code. Return 'No' (as a string) if there are no TODO comments in the PR")
 {%- endif %}
 {%- if require_can_be_split_review %}
-    can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.")
+    can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represents a meaningful independent task. Output an empty list if the PR code does not need to be split.")
 {%- endif %}
 
 class PRReview(BaseModel):

From d5712acf62a28aa9cafb176ef2f3df7491b678cb Mon Sep 17 00:00:00 2001
From: Takaya Nagai <nagai@mog-software.jp>
Date: Wed, 25 Mar 2026 06:22:13 +0900
Subject: [PATCH 3/4] fix: sanitize changes diagram input (#2212)

* feat: sanitize changes diagram input and add unit tests

* fix: add input validation to sanitize_diagram function

* fix: update sanitize_diagram to require mermaid code fence

* fix: refactor sanitize_diagram to improve readability and maintainability

* fix: reorder imports to follow isort convention in test_pr_description

* fix: clean up sanitize_diagram function and remove unnecessary imports in test_pr_description
---
 pr_agent/tools/pr_description.py      | 34 ++++++++++--
 tests/unittest/test_pr_description.py | 79 +++++++++++++++++++++++++++
 2 files changed, 108 insertions(+), 5 deletions(-)
 create mode 100644 tests/unittest/test_pr_description.py

diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py
index db1492c6bb..26ea5d190a 100644
--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@@ -461,11 +461,9 @@ def _prepare_data(self):
         if 'description' in self.data:
             self.data['description'] = self.data.pop('description')
         if 'changes_diagram' in self.data:
-            changes_diagram = self.data.pop('changes_diagram').strip()
-            if changes_diagram.startswith('```'):
-                if not changes_diagram.endswith('```'):  # fallback for missing closing
-                    changes_diagram += '\n```'
-                self.data['changes_diagram'] = '\n'+ changes_diagram
+            sanitized = sanitize_diagram(self.data.pop('changes_diagram'))
+            if sanitized:
+                self.data['changes_diagram'] = sanitized
         if 'pr_files' in self.data:
             self.data['pr_files'] = self.data.pop('pr_files')
 
@@ -771,6 +769,32 @@ def add_file_data(self, delta_nbsp, diff_plus_minus, file_change_description_br,
 """
         return pr_body
 
+
+def sanitize_diagram(diagram_raw: str) -> str:
+    """Sanitize a diagram string: fix missing closing fence and remove backticks."""
+    if not isinstance(diagram_raw, str):
+        return ''
+    diagram = diagram_raw.strip()
+    if not diagram.startswith('```mermaid'):
+        return ''
+
+    # fallback missing closing
+    if not diagram.endswith('```'):
+        diagram += '\n```'
+
+
+    # remove backticks inside node labels: ["`label`"] -> ["label"]
+    result = []
+    for line in diagram.split('\n'):
+        line = re.sub(
+            r'\["([^"]*?)"\]',
+            lambda m: '["' + m.group(1).replace('`', '') + '"]',
+            line,
+        )
+        result.append(line)
+    return '\n' + '\n'.join(result)
+
+
 def count_chars_without_html(string):
     if '<' not in string:
         return len(string)
diff --git a/tests/unittest/test_pr_description.py b/tests/unittest/test_pr_description.py
new file mode 100644
index 0000000000..c99f547e7d
--- /dev/null
+++ b/tests/unittest/test_pr_description.py
@@ -0,0 +1,79 @@
+from unittest.mock import MagicMock, patch
+
+import yaml
+
+from pr_agent.tools.pr_description import PRDescription, sanitize_diagram
+
+KEYS_FIX = ["filename:", "language:", "changes_summary:", "changes_title:", "description:", "title:"]
+
+def _make_instance(prediction_yaml: str):
+    """Create a PRDescription instance, bypassing __init__."""
+    with patch.object(PRDescription, '__init__', lambda self, *a, **kw: None):
+        obj = PRDescription.__new__(PRDescription)
+    obj.prediction = prediction_yaml
+    obj.keys_fix = KEYS_FIX
+    obj.user_description = ""
+    return obj
+
+
+def _mock_settings():
+    """Mock get_settings used by _prepare_data."""
+    settings = MagicMock()
+    settings.pr_description.add_original_user_description = False
+    return settings
+
+
+def _prediction_with_diagram(diagram_value: str) -> str:
+    """Build a minimal YAML prediction string that includes changes_diagram."""
+    return yaml.dump({
+        'title': 'test',
+        'description': 'test',
+        'changes_diagram': diagram_value,
+    })
+
+
+class TestPRDescriptionDiagram:
+
+    @patch('pr_agent.tools.pr_description.get_settings')
+    def test_diagram_not_starting_with_fence_is_removed(self, mock_get_settings):
+        mock_get_settings.return_value = _mock_settings()
+        obj = _make_instance(_prediction_with_diagram('graph LR\nA --> B'))
+        obj._prepare_data()
+        assert 'changes_diagram' not in obj.data
+
+    @patch('pr_agent.tools.pr_description.get_settings')
+    def test_diagram_missing_closing_fence_is_appended(self, mock_get_settings):
+        mock_get_settings.return_value = _mock_settings()
+        obj = _make_instance(_prediction_with_diagram('```mermaid\ngraph LR\nA --> B'))
+        obj._prepare_data()
+        assert obj.data['changes_diagram'] == '\n```mermaid\ngraph LR\nA --> B\n```'
+
+    @patch('pr_agent.tools.pr_description.get_settings')
+    def test_backticks_inside_label_are_removed(self, mock_get_settings):
+        mock_get_settings.return_value = _mock_settings()
+        obj = _make_instance(_prediction_with_diagram('```mermaid\ngraph LR\nA["`file`"] --> B\n```'))
+        obj._prepare_data()
+        assert obj.data['changes_diagram'] == '\n```mermaid\ngraph LR\nA["file"] --> B\n```'
+
+    @patch('pr_agent.tools.pr_description.get_settings')
+    def test_backticks_outside_label_are_kept(self, mock_get_settings):
+        mock_get_settings.return_value = _mock_settings()
+        obj = _make_instance(_prediction_with_diagram('```mermaid\ngraph LR\nA["`file`"] -->|`edge`| B\n```'))
+        obj._prepare_data()
+        assert obj.data['changes_diagram'] == '\n```mermaid\ngraph LR\nA["file"] -->|`edge`| B\n```'
+
+    @patch('pr_agent.tools.pr_description.get_settings')
+    def test_normal_diagram_only_adds_newline(self, mock_get_settings):
+        mock_get_settings.return_value = _mock_settings()
+        obj = _make_instance(_prediction_with_diagram('```mermaid\ngraph LR\nA["file.py"] --> B["output"]\n```'))
+        obj._prepare_data()
+        assert obj.data['changes_diagram'] == '\n```mermaid\ngraph LR\nA["file.py"] --> B["output"]\n```'
+
+    def test_none_input_returns_empty(self):
+        assert sanitize_diagram(None) == ''
+
+    def test_non_string_input_returns_empty(self):
+        assert sanitize_diagram(123) == ''
+
+    def test_non_mermaid_fence_returns_empty(self):
+        assert sanitize_diagram('```python\nprint("hello")\n```') == ''

From 0b0c175f6a0ae73f75bfb12a0eecb440a216a891 Mon Sep 17 00:00:00 2001
From: Ryno <rmathee@gmail.com>
Date: Tue, 24 Mar 2026 23:28:46 +0200
Subject: [PATCH 4/4] perf: optimize regex compilation in patch processing
 (#2263)

Hoisted `RE_HUNK_HEADER` regex compilation to the module level in `pr_agent/algo/git_patch_processing.py`. This avoids redundant compilation and internal cache lookups on every call to patch processing functions like `extend_patch`, `omit_deletion_hunks`, and `extract_hunk_lines_from_patch`.

Average time per iteration for `extend_patch` showed a stable performance.
In a benchmark of 10,000 iterations, the total time was ~21.9 seconds.

Verified that all relevant unit tests pass.

Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
Co-authored-by: rynomster <6912789+rynomster@users.noreply.github.com>
---
 pr_agent/algo/git_patch_processing.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py
index 3a37e88d2c..553914e8d9 100644
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@@ -7,6 +7,11 @@
 from pr_agent.config_loader import get_settings
 from pr_agent.log import get_logger
 
+# Optimized: Pre-compile the hunk header regex at the module level to avoid redundant compilation
+# in performance-critical patch processing functions.
+RE_HUNK_HEADER = re.compile(
+    r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
+
 
 def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0,
                  patch_extra_lines_after=0, filename: str = "", new_file_str="") -> str:
@@ -65,8 +70,6 @@ def process_patch_lines(patch_str, original_file_str, patch_extra_lines_before,
 
     is_valid_hunk = True
     start1, size1, start2, size2 = -1, -1, -1, -1
-    RE_HUNK_HEADER = re.compile(
-        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
     try:
         for i,line in enumerate(patch_lines):
             if line.startswith('@@'):
@@ -238,8 +241,6 @@ def omit_deletion_hunks(patch_lines) -> str:
     added_patched = []
     add_hunk = False
     inside_hunk = False
-    RE_HUNK_HEADER = re.compile(
-        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))?\ @@[ ]?(.*)")
 
     for line in patch_lines:
         if line.startswith('@@'):
@@ -341,8 +342,6 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
         patch_with_lines_str = ""
 
     patch_lines = patch.splitlines()
-    RE_HUNK_HEADER = re.compile(
-        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
     new_content_lines = []
     old_content_lines = []
     match = None
@@ -417,8 +416,6 @@ def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, s
         patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
         selected_lines = ""
         patch_lines = patch.splitlines()
-        RE_HUNK_HEADER = re.compile(
-            r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
         match = None
         start1, size1, start2, size2 = -1, -1, -1, -1
         skip_hunk = False