From f0edaf6649630607c6256de4e1bf2b49c86ae84d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 13:42:01 +0000
Subject: [PATCH 1/3] Initial plan


From 7e9dae785a9632e7a4efabb63ad6dc6d73ad2014 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 13:53:25 +0000
Subject: [PATCH 2/3] fix: add dedicated BATCH_TRIAGE_SYSTEM_PROMPT and
 parameterize language in prompts

- Add BATCH_TRIAGE_SYSTEM_PROMPT separate from SYSTEM_PROMPT to avoid
  conflicting output format instructions for batch triage
- Add BATCH_TRIAGE_PROMPT with {language} parameter instead of hardcoded python
- Parameterize {language} in SUGGESTION_PROMPT, SAFETY_CHECK_PROMPT, and
  DOCUMENTATION_PROMPT
- Add evaluate_issues_batch method using the dedicated system prompt
- Update generate_suggestion and generate_documentation to accept language param
- Add comprehensive tests for batch triage functionality

Co-authored-by: omsherikar <180152315+omsherikar@users.noreply.github.com>
---
 refactron/llm/orchestrator.py  | 117 ++++++++++++++++++++-
 refactron/llm/prompts.py       |  42 +++++++-
 tests/test_llm_batch_triage.py | 180 +++++++++++++++++++++++++++++++++
 3 files changed, 330 insertions(+), 9 deletions(-)
 create mode 100644 tests/test_llm_batch_triage.py

diff --git a/refactron/llm/orchestrator.py b/refactron/llm/orchestrator.py
index f4da49d..2608384 100644
--- a/refactron/llm/orchestrator.py
+++ b/refactron/llm/orchestrator.py
@@ -5,13 +5,19 @@
 import os
 import re
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 from refactron.core.models import CodeIssue, IssueCategory, IssueLevel
 from refactron.llm.backend_client import BackendLLMClient
 from refactron.llm.client import GroqClient
 from refactron.llm.models import RefactoringSuggestion, SuggestionStatus
-from refactron.llm.prompts import DOCUMENTATION_PROMPT, SUGGESTION_PROMPT, SYSTEM_PROMPT
+from refactron.llm.prompts import (
+    BATCH_TRIAGE_PROMPT,
+    BATCH_TRIAGE_SYSTEM_PROMPT,
+    DOCUMENTATION_PROMPT,
+    SUGGESTION_PROMPT,
+    SYSTEM_PROMPT,
+)
 from refactron.llm.safety import SafetyGate
 from refactron.rag.retriever import ContextRetriever
 
@@ -43,12 +49,15 @@ def __init__(
 
         self.safety_gate = safety_gate or SafetyGate()
 
-    def generate_suggestion(self, issue: CodeIssue, original_code: str) -> RefactoringSuggestion:
+    def generate_suggestion(
+        self, issue: CodeIssue, original_code: str, language: str = "python"
+    ) -> RefactoringSuggestion:
         """Generate a refactoring suggestion for a code issue.
 
         Args:
             issue: The code issue to fix
             original_code: The failing code snippet
+            language: The programming language of the code (default: "python")
 
         Returns:
             A validated refactoring suggestion
@@ -75,6 +84,7 @@ def generate_suggestion(self, issue: CodeIssue, original_code: str) -> Refactori
             severity=issue.level.value,
             original_code=original_code,
             rag_context=rag_context,
+            language=language,
         )
 
         # 3. Call LLM
@@ -156,13 +166,14 @@ def generate_suggestion(self, issue: CodeIssue, original_code: str) -> Refactori
         return suggestion
 
     def generate_documentation(
-        self, code: str, file_path: str = "unknown"
+        self, code: str, file_path: str = "unknown", language: str = "python"
     ) -> RefactoringSuggestion:
         """Generate documentation for the provided code.
 
         Args:
             code: The code to document
             file_path: Optional file path for context
+            language: The programming language of the code (default: "python")
 
         Returns:
             A suggestion containing the documented code
@@ -189,7 +200,9 @@ def generate_documentation(
         rag_context = "\n\n".join(context_snippets) if context_snippets else "No context available."
 
         # 2. Construct Prompt
-        prompt = DOCUMENTATION_PROMPT.format(original_code=code, rag_context=rag_context)
+        prompt = DOCUMENTATION_PROMPT.format(
+            original_code=code, rag_context=rag_context, language=language
+        )
 
         # 3. Call LLM
         try:
@@ -246,6 +259,100 @@ def generate_documentation(
                 status=SuggestionStatus.FAILED,
             )
 
+    def evaluate_issues_batch(
+        self,
+        issues: List[CodeIssue],
+        source_code: str,
+        language: str = "python",
+    ) -> Dict[str, float]:
+        """Evaluate a batch of issues for a single file to suppress false positives.
+
+        Args:
+            issues: List of CodeIssues found in the file
+            source_code: The full source code of the file
+            language: The programming language of the code (default: "python")
+
+        Returns:
+            Dict mapping issue IDs (using rule_id or index) to confidence scores
+        """
+        if not issues:
+            return {}
+
+        # 1. Retrieve Context
+        context_snippets = []
+        if self.retriever:
+            try:
+                results = self.retriever.retrieve_similar(source_code[:1000], top_k=3)
+                context_snippets = [r.content for r in results]
+            except Exception as e:
+                logger.warning(f"Context retrieval failed: {e}")
+
+        rag_context = (
+            "\n\n".join(context_snippets) if context_snippets else "No context available."
+        )
+
+        # 2. Construct JSON for issues
+        issues_data = {}
+        for i, issue in enumerate(issues):
+            base_id = getattr(issue, "rule_id", None) or "issue"
+            line_number = getattr(issue, "line_number", None)
+            id_parts = [str(base_id)]
+            if line_number is not None:
+                id_parts.append(str(line_number))
+            id_parts.append(str(i))
+            issue_id = ":".join(id_parts)
+
+            # Ensure uniqueness in case of unexpected collisions
+            unique_id = issue_id
+            suffix = 1
+            while unique_id in issues_data:
+                suffix += 1
+                unique_id = f"{issue_id}_{suffix}"
+
+            issues_data[unique_id] = {
+                "rule_id": getattr(issue, "rule_id", None),
+                "message": issue.message,
+                "line": issue.line_number,
+                "category": (
+                    issue.category.value
+                    if hasattr(issue.category, "value")
+                    else str(issue.category)
+                ),
+                "severity": (
+                    issue.level.value if hasattr(issue.level, "value") else str(issue.level)
+                ),
+            }
+
+        # 3. Construct Prompt
+        prompt = BATCH_TRIAGE_PROMPT.format(
+            source_code=source_code,
+            rag_context=rag_context,
+            issues_json=json.dumps(issues_data, indent=2),
+            language=language,
+        )
+
+        # 4. Call LLM with dedicated batch triage system prompt
+        try:
+            response_text = self.client.generate(
+                prompt=prompt, system=BATCH_TRIAGE_SYSTEM_PROMPT, temperature=0.1
+            )
+            clean_text = self._clean_json_response(response_text)
+            data = json.loads(clean_text, strict=False)
+
+            # Ensure we return a Dict[str, float]
+            result = {}
+            for k, v in data.items():
+                try:
+                    result[str(k)] = float(v)
+                except (ValueError, TypeError):
+                    result[str(k)] = 0.5  # Fallback for parsing errors
+            return result
+
+        except Exception as e:
+            logger.error(f"Batch triage failed: {e}")
+            # Fallback: return default confidence
+            return {str(k): 0.5 for k in issues_data.keys()}
+
     def _clean_json_response(self, text: str) -> str:
         """Clean LLM response to extract JSON."""
         text = text.strip()
diff --git a/refactron/llm/prompts.py b/refactron/llm/prompts.py
index ade8031..daf1980 100644
--- a/refactron/llm/prompts.py
+++ b/refactron/llm/prompts.py
@@ -27,7 +27,7 @@
 Severity: {severity}
 
 Original Code:
-```python
+```{language}
 {original_code}
 ```
 
@@ -42,7 +42,7 @@
 SAFETY_CHECK_PROMPT = """
 Analyze the following code patch for safety risks:
 
-```python
+```{language}
 {proposed_code}
 ```
 
@@ -62,10 +62,10 @@
 """
 
 DOCUMENTATION_PROMPT = """
-Analyze the following Python code and generate a comprehensive MARKDOWN documentation file.
+Analyze the following {language} code and generate a comprehensive MARKDOWN documentation file.
 
 Original Code:
-```python
+```{language}
 {original_code}
 ```
 
@@ -92,3 +92,37 @@
 The complete Markdown documentation content including the mermaid diagram
 @@@END@@@
 """
+
+BATCH_TRIAGE_SYSTEM_PROMPT = """You are a code triage expert.
+Your goal is to evaluate code issues and determine whether each is a true positive
+(requiring fixing) or a false positive.
+
+RESPONSE FORMAT:
+You must output ONLY valid JSON.
+- Do not output markdown code blocks, just the raw JSON object.
+- The JSON must be a flat map where keys are issue IDs (strings) and values are
+  confidence scores (floats between 0.0 and 1.0).
+- A score of 0.0 means the issue is very likely a false positive.
+- A score of 1.0 means the issue is very likely a true positive requiring a fix.
+"""
+
+BATCH_TRIAGE_PROMPT = """
+Evaluate the following list of code issues found in a single file and determine
+the confidence that each is a true positive (requiring fixing) rather than a
+false positive.
+
+File Source Code:
+```{language}
+{source_code}
+```
+
+Relevant Context (RAG):
+{rag_context}
+
+Issues to evaluate:
+{issues_json}
+
+Return ONLY a JSON map where the keys are the issue IDs and the values are the
+confidence scores (float between 0.0 and 1.0).
+Do NOT return anything except the JSON object.
+"""
diff --git a/tests/test_llm_batch_triage.py b/tests/test_llm_batch_triage.py
new file mode 100644
index 0000000..db0debf
--- /dev/null
+++ b/tests/test_llm_batch_triage.py
@@ -0,0 +1,180 @@
+"""Tests for the Batched Triage & RAG Context in LLMOrchestrator."""
+
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import pytest
+
+from refactron.core.models import CodeIssue, IssueCategory, IssueLevel
+from refactron.llm.orchestrator import LLMOrchestrator
+from refactron.llm.prompts import BATCH_TRIAGE_SYSTEM_PROMPT
+from refactron.rag.retriever import ContextRetriever
+
+
+@pytest.fixture
+def mock_retriever():
+    retriever = MagicMock(spec=ContextRetriever)
+    mock_result = MagicMock()
+    mock_result.content = "Some context snippet"
+    retriever.retrieve_similar.return_value = [mock_result]
+    return retriever
+
+
+@pytest.fixture
+def mock_llm_client():
+    client = MagicMock()
+    client.model = "mock-model"
+    # Provide a mock JSON response for evaluate_issues_batch
+    client.generate.return_value = """```json
+{
+  "issue:10:0": 0.85,
+  "issue:20:1": 0.12,
+  "E101:30:2": 0.95
+}
+```"""
+    return client
+
+
+def test_evaluate_issues_batch(mock_llm_client, mock_retriever):
+    """Test that batch evaluation correctly parses JSON map from the LLM."""
+    orchestrator = LLMOrchestrator(retriever=mock_retriever, llm_client=mock_llm_client)
+
+    issues = [
+        CodeIssue(
+            category=IssueCategory.COMPLEXITY,
+            level=IssueLevel.WARNING,
+            message="Too complex",
+            file_path=Path("test.py"),
+            line_number=10,
+        ),
+        CodeIssue(
+            category=IssueCategory.STYLE,
+            level=IssueLevel.INFO,
+            message="Line too long",
+            file_path=Path("test.py"),
+            line_number=20,
+        ),
+        CodeIssue(
+            category=IssueCategory.CODE_SMELL,
+            level=IssueLevel.WARNING,
+            message="Bad smell",
+            file_path=Path("test.py"),
+            line_number=30,
+            rule_id="E101",
+        ),
+    ]
+
+    source_code = "def complex_function():\n    pass\n" * 10
+
+    result = orchestrator.evaluate_issues_batch(issues, source_code)
+
+    # Check that ContextRetriever was called for RAG Context
+    mock_retriever.retrieve_similar.assert_called_once()
+    assert "def complex_function" in mock_retriever.retrieve_similar.call_args[0][0]
+
+    # Check JSON map parsing
+    assert isinstance(result, dict)
+    assert result.get("issue:10:0") == 0.85
+    assert result.get("issue:20:1") == 0.12
+    assert result.get("E101:30:2") == 0.95
+
+    # Ensure there's exactly 3 keys corresponding to the 3 returned mapping
+    assert len(result) == 3
+
+
+def test_evaluate_issues_batch_uses_dedicated_system_prompt(mock_llm_client, mock_retriever):
+    """Test that batch evaluation uses BATCH_TRIAGE_SYSTEM_PROMPT, not SYSTEM_PROMPT."""
+    orchestrator = LLMOrchestrator(retriever=mock_retriever, llm_client=mock_llm_client)
+
+    issues = [
+        CodeIssue(
+            category=IssueCategory.STYLE,
+            level=IssueLevel.INFO,
+            message="Style issue",
+            file_path=Path("test.py"),
+            line_number=1,
+        ),
+    ]
+
+    orchestrator.evaluate_issues_batch(issues, "x = 1")
+
+    # Verify that the system prompt used is BATCH_TRIAGE_SYSTEM_PROMPT
+    call_kwargs = mock_llm_client.generate.call_args.kwargs
+    assert call_kwargs["system"] == BATCH_TRIAGE_SYSTEM_PROMPT
+    assert "code triage expert" in call_kwargs["system"]
+    # Ensure it does NOT contain the refactoring system prompt instructions
+    assert "proposed_code" not in call_kwargs["system"]
+
+
+def test_evaluate_issues_batch_language_parameter(mock_llm_client, mock_retriever):
+    """Test that the language parameter is passed through to the prompt."""
+    orchestrator = LLMOrchestrator(retriever=mock_retriever, llm_client=mock_llm_client)
+
+    issues = [
+        CodeIssue(
+            category=IssueCategory.STYLE,
+            level=IssueLevel.INFO,
+            message="Style issue",
+            file_path=Path("test.js"),
+            line_number=1,
+        ),
+    ]
+
+    orchestrator.evaluate_issues_batch(issues, "const x = 1;", language="javascript")
+
+    # Verify the prompt contains the language
+    call_kwargs = mock_llm_client.generate.call_args.kwargs
+    assert "```javascript" in call_kwargs["prompt"]
+
+
+def test_evaluate_issues_batch_empty_issues(mock_llm_client, mock_retriever):
+    """Test batch evaluation handles empty issues correctly."""
+    orchestrator = LLMOrchestrator(retriever=mock_retriever, llm_client=mock_llm_client)
+
+    result = orchestrator.evaluate_issues_batch([], "source")
+    assert result == {}
+    mock_llm_client.generate.assert_not_called()
+
+
+def test_evaluate_issues_batch_fallback_on_error(mock_llm_client, mock_retriever):
+    """Test batch evaluation handles LLM errors using a fallback mechanism."""
+    mock_llm_client.generate.side_effect = Exception("LLM Error")
+
+    orchestrator = LLMOrchestrator(retriever=mock_retriever, llm_client=mock_llm_client)
+
+    issues = [
+        CodeIssue(
+            category=IssueCategory.STYLE,
+            level=IssueLevel.INFO,
+            message="Line too long",
+            file_path=Path("test.py"),
+            line_number=20,
+        )
+    ]
+
+    result = orchestrator.evaluate_issues_batch(issues, "source")
+
+    # It should fallback to 0.5 confidence
+    assert len(result) == 1
+    assert all(v == 0.5 for v in result.values())
+
+
+def test_evaluate_issues_batch_fallback_on_bad_json(mock_llm_client, mock_retriever):
+    """Test batch evaluation handles invalid JSON appropriately."""
+    mock_llm_client.generate.return_value = "not a json string at all"
+    orchestrator = LLMOrchestrator(retriever=mock_retriever, llm_client=mock_llm_client)
+
+    issues = [
+        CodeIssue(
+            category=IssueCategory.STYLE,
+            level=IssueLevel.INFO,
+            message="Line too long",
+            file_path=Path("test.py"),
+            line_number=20,
+        )
+    ]
+
+    result = orchestrator.evaluate_issues_batch(issues, "source")
+
+    assert len(result) == 1
+    assert all(v == 0.5 for v in result.values())

From f429e6a0415ee73419a61a751a83c041b6236422 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Mar 2026 13:55:48 +0000
Subject: [PATCH 3/3] fix: add warning log for issue ID collisions in
 evaluate_issues_batch

Co-authored-by: omsherikar <180152315+omsherikar@users.noreply.github.com>
---
 refactron/llm/orchestrator.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/refactron/llm/orchestrator.py b/refactron/llm/orchestrator.py
index 2608384..42072b2 100644
--- a/refactron/llm/orchestrator.py
+++ b/refactron/llm/orchestrator.py
@@ -308,6 +308,9 @@ def evaluate_issues_batch(
             while unique_id in issues_data:
                 suffix += 1
                 unique_id = f"{issue_id}_{suffix}"
+                logger.warning(
+                    f"Issue ID collision detected for '{issue_id}', using '{unique_id}'"
+                )
 
             issues_data[unique_id] = {
                 "rule_id": getattr(issue, "rule_id", None),