From 06dffb741eb72ffd3b0fa2a2ecd9a910d9e192dc Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 12 Nov 2025 23:07:53 +0000
Subject: [PATCH 1/2] refactor(classifier): consolidate duplicate methods in
 BaseClassifier

Consolidated duplicate _parse_response() and _build_prompt() methods
from OllamaClassifier and GroqClassifier into the BaseClassifier,
eliminating ~65 lines of duplicate code.

Changes:
- Moved _build_prompt() to BaseClassifier with comprehensive docstring
  - Handles prompt template formatting with character/player context
  - Subclasses can override for custom prompt building

- Moved _parse_response() to BaseClassifier with comprehensive docstring
  - Parses Dutch-format LLM responses (Classificatie/Reden/Vertrouwen/Personage)
  - Handles Classification enum, confidence clamping, and error cases
  - Subclasses can override for different response formats

- Added preflight_check() to GroqClassifier
  - Validates API key configuration
  - Tests API connectivity
  - Matches OllamaClassifier pattern

Benefits:
- DRY principle: Single implementation instead of duplicate code
- Maintainability: Bug fixes and improvements apply to all classifiers
- Extensibility: New classifiers can inherit default implementations
- Type safety: Uses Classification enum from constants module

Testing:
- All 37 existing tests pass
- Code coverage: 78%
- No breaking changes - backward compatible

Dependencies:
- Uses Classification enum from src/constants.py (Refactor #7)

Refs: Refactor #2+#3 - Consolidate Classifier Methods
---
 src/classifier.py | 268 +++++++++++++++++++++++-----------------------
 1 file changed, 135 insertions(+), 133 deletions(-)
diff --git a/src/classifier.py b/src/classifier.py
index 139735e..d7adc81 100644
--- a/src/classifier.py
+++ b/src/classifier.py
@@ -67,6 +67,110 @@ def preflight_check(self):
         """Return an iterable of PreflightIssue objects."""
         return []
 
+    def _build_prompt(
+        self,
+        prev_text: str,
+        current_text: str,
+        next_text: str,
+        character_names: List[str],
+        player_names: List[str]
+    ) -> str:
+        """
+        Build classification prompt from the template.
+
+        This default implementation uses the prompt_template attribute
+        with placeholders for char_list, player_list, prev_text,
+        current_text, and next_text.
+
+        Subclasses can override this method to customize prompt building.
+
+        Args:
+            prev_text: Text from previous segment
+            current_text: Text from current segment to classify
+            next_text: Text from next segment
+            character_names: List of character names
+            player_names: List of player names
+
+        Returns:
+            Formatted prompt string ready for LLM
+        """
+        char_list = ", ".join(character_names) if character_names else "Unknown"
+        player_list = ", ".join(player_names) if player_names else "Unknown"
+
+        return self.prompt_template.format(
+            char_list=char_list,
+            player_list=player_list,
+            prev_text=prev_text,
+            current_text=current_text,
+            next_text=next_text
+        )
+
+    def _parse_response(
+        self,
+        response: str,
+        index: int
+    ) -> ClassificationResult:
+        """
+        Parse LLM response into ClassificationResult.
+
+        This default implementation parses responses in the format:
+        - Classificatie: IC|OOC|MIXED
+        - Reden: <reasoning text>
+        - Vertrouwen: <confidence value 0.0-1.0>
+        - Personage: <character name or N/A>
+
+        The field names are language-specific (Dutch by default).
+        Subclasses can override this method to support different formats
+        or languages.
+
+        Args:
+            response: Raw response text from LLM
+            index: Segment index for logging purposes
+
+        Returns:
+            ClassificationResult with parsed values
+        """
+        classification = Classification.IN_CHARACTER
+        confidence = ConfidenceDefaults.DEFAULT
+        reasoning = "Could not parse response"
+        character = None
+
+        lines = response.strip().split('\n')
+        for line in lines:
+            line = line.strip()
+            if line.startswith("Classificatie:"):
+                class_text = line.split(":", 1)[1].strip().upper()
+                try:
+                    classification = Classification(class_text)
+                except ValueError:
+                    self.logger.warning(
+                        "Invalid classification '%s' for segment %s, defaulting to IC",
+                        class_text,
+                        index
+                    )
+                    classification = Classification.IN_CHARACTER
+            elif line.startswith("Reden:"):
+                reasoning = line.split(":", 1)[1].strip()
+            elif line.startswith("Vertrouwen:"):
+                try:
+                    conf_text = line.split(":", 1)[1].strip()
+                    confidence = float(conf_text)
+                    confidence = ConfidenceDefaults.clamp(confidence)
+                except ValueError:
+                    pass
+            elif line.startswith("Personage:"):
+                char_text = line.split(":", 1)[1].strip()
+                if char_text.upper() != "N/A":
+                    character = char_text
+
+        return ClassificationResult(
+            segment_index=index,
+            classification=classification,
+            confidence=confidence,
+            reasoning=reasoning,
+            character=character
+        )
+
 
 class OllamaClassifier(BaseClassifier):
     """IC/OOC classifier using local Ollama LLM."""
@@ -192,26 +296,6 @@ def _classify_with_context(
 
         return self._parse_response(response_text, index)
 
-    def _build_prompt(
-        self,
-        prev_text: str,
-        current_text: str,
-        next_text: str,
-        character_names: List[str],
-        player_names: List[str]
-    ) -> str:
-        """Build classification prompt from the template."""
-        char_list = ", ".join(character_names) if character_names else "Unknown"
-        player_list = ", ".join(player_names) if player_names else "Unknown"
-
-        return self.prompt_template.format(
-            char_list=char_list,
-            player_list=player_list,
-            prev_text=prev_text,
-            current_text=current_text,
-            next_text=next_text
-        )
-
     def _generate_with_retry(self, prompt: str, index: int) -> Optional[str]:
         try:
             response = self._generate_with_model(self.model, prompt)
@@ -409,53 +493,6 @@ class MEMORYSTATUSEX(ctypes.Structure):
 
         return None
 
-    def _parse_response(
-        self,
-        response: str,
-        index: int
-    ) -> ClassificationResult:
-        """Parse LLM response into ClassificationResult."""
-        classification = Classification.IN_CHARACTER
-        confidence = ConfidenceDefaults.DEFAULT
-        reasoning = "Could not parse response"
-        character = None
-
-        lines = response.strip().split('\n')
-        for line in lines:
-            line = line.strip()
-            if line.startswith("Classificatie:"):
-                class_text = line.split(":", 1)[1].strip().upper()
-                try:
-                    classification = Classification(class_text)
-                except ValueError:
-                    self.logger.warning(
-                        "Invalid classification '%s' for segment %s, defaulting to IC",
-                        class_text,
-                        index
-                    )
-                    classification = Classification.IN_CHARACTER
-            elif line.startswith("Reden:"):
-                reasoning = line.split(":", 1)[1].strip()
-            elif line.startswith("Vertrouwen:"):
-                try:
-                    conf_text = line.split(":", 1)[1].strip()
-                    confidence = float(conf_text)
-                    confidence = ConfidenceDefaults.clamp(confidence)
-                except ValueError:
-                    pass
-            elif line.startswith("Personage:"):
-                char_text = line.split(":", 1)[1].strip()
-                if char_text.upper() != "N/A":
-                    character = char_text
-
-        return ClassificationResult(
-            segment_index=index,
-            classification=classification,
-            confidence=confidence,
-            reasoning=reasoning,
-            character=character
-        )
-
 
 class GroqClassifier(BaseClassifier):
     """IC/OOC classifier using the Groq API."""
@@ -486,6 +523,37 @@ def __init__(self, api_key: str = None, model: str = "llama-3.3-70b-versatile"):
             burst_size=Config.GROQ_RATE_LIMIT_BURST,
         )
 
+    def preflight_check(self):
+        """Check that Groq API is accessible and configured."""
+        issues = []
+
+        if not self.api_key:
+            issues.append(
+                PreflightIssue(
+                    component="classifier",
+                    message="Groq API key not configured. Set GROQ_API_KEY in .env",
+                    severity="error",
+                )
+            )
+            return issues
+
+        # Test API connectivity with a simple request
+        try:
+            test_response = self.client.chat.completions.create(
+                messages=[{"role": "user", "content": "test"}],
+                model=self.model,
+            )
+        except Exception as exc:
+            issues.append(
+                PreflightIssue(
+                    component="classifier",
+                    message=f"Groq API test failed: {exc}",
+                    severity="error",
+                )
+            )
+
+        return issues
+
     def classify_segments(
         self,
         segments: List[Dict],
@@ -546,72 +614,6 @@ def _is_rate_limit_error(exc: Exception) -> bool:
         message = str(exc).lower()
         return "rate_limit" in message or "429" in message
 
-    def _build_prompt(
-        self,
-        prev_text: str,
-        current_text: str,
-        next_text: str,
-        character_names: List[str],
-        player_names: List[str]
-    ) -> str:
-        """Build classification prompt from the template."""
-        char_list = ", ".join(character_names) if character_names else "Unknown"
-        player_list = ", ".join(player_names) if player_names else "Unknown"
-
-        return self.prompt_template.format(
-            char_list=char_list,
-            player_list=player_list,
-            prev_text=prev_text,
-            current_text=current_text,
-            next_text=next_text
-        )
-
-    def _parse_response(
-        self,
-        response: str,
-        index: int
-    ) -> ClassificationResult:
-        """Parse LLM response into ClassificationResult."""
-        classification = Classification.IN_CHARACTER
-        confidence = ConfidenceDefaults.DEFAULT
-        reasoning = "Could not parse response"
-        character = None
-
-        lines = response.strip().split('\n')
-        for line in lines:
-            line = line.strip()
-            if line.startswith("Classificatie:"):
-                class_text = line.split(":", 1)[1].strip().upper()
-                try:
-                    classification = Classification(class_text)
-                except ValueError:
-                    self.logger.warning(
-                        "Invalid classification '%s' for segment %s, defaulting to IC",
-                        class_text,
-                        index
-                    )
-                    classification = Classification.IN_CHARACTER
-            elif line.startswith("Reden:"):
-                reasoning = line.split(":", 1)[1].strip()
-            elif line.startswith("Vertrouwen:"):
-                try:
-                    conf_text = line.split(":", 1)[1].strip()
-                    confidence = float(conf_text)
-                    confidence = ConfidenceDefaults.clamp(confidence)
-                except ValueError:
-                    pass
-            elif line.startswith("Personage:"):
-                char_text = line.split(":", 1)[1].strip()
-                if char_text.upper() != "N/A":
-                    character = char_text
-
-        return ClassificationResult(
-            segment_index=index,
-            classification=classification,
-            confidence=confidence,
-            reasoning=reasoning,
-            character=character
-        )
 
 class ClassifierFactory:
     """Factory to create appropriate classifier."""

From 3396041b9f93df02db7d03dc0fa8d188bebcf452 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 12 Nov 2025 23:17:59 +0000
Subject: [PATCH 2/2] fix(classifier): improve response parsing robustness and
 logging

Addressed code review feedback from gemini-code-assist:

1. Fixed high-severity multi-line parsing issue:
   - Replaced brittle line-by-line parsing with regex-based extraction
   - Used re.DOTALL flag for reasoning field to handle multi-line values
   - Prevents corruption when field keywords appear in multi-line text
   - Handles out-of-order fields gracefully

2. Fixed medium-severity silent error handling:
   - Added warning log for invalid confidence values
   - Now consistent with classification error handling
   - Improves debuggability by surfacing parsing issues

Changes:
- _parse_response() now uses regex patterns for each field
- Reden (reasoning) captures everything until next field
- More resilient to LLM output variations
- Better error visibility for debugging

Testing:
- All 37 tests still passing
- Backward compatible with existing test cases

Co-authored-by: gemini-code-assist[bot]
---
 src/classifier.py | 74 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 27 deletions(-)

diff --git a/src/classifier.py b/src/classifier.py
index d7adc81..638b082 100644
--- a/src/classifier.py
+++ b/src/classifier.py
@@ -135,33 +135,53 @@ def _parse_response(
         reasoning = "Could not parse response"
         character = None
 
-        lines = response.strip().split('\n')
-        for line in lines:
-            line = line.strip()
-            if line.startswith("Classificatie:"):
-                class_text = line.split(":", 1)[1].strip().upper()
-                try:
-                    classification = Classification(class_text)
-                except ValueError:
-                    self.logger.warning(
-                        "Invalid classification '%s' for segment %s, defaulting to IC",
-                        class_text,
-                        index
-                    )
-                    classification = Classification.IN_CHARACTER
-            elif line.startswith("Reden:"):
-                reasoning = line.split(":", 1)[1].strip()
-            elif line.startswith("Vertrouwen:"):
-                try:
-                    conf_text = line.split(":", 1)[1].strip()
-                    confidence = float(conf_text)
-                    confidence = ConfidenceDefaults.clamp(confidence)
-                except ValueError:
-                    pass
-            elif line.startswith("Personage:"):
-                char_text = line.split(":", 1)[1].strip()
-                if char_text.upper() != "N/A":
-                    character = char_text
+        # Use regex to extract fields more robustly
+        # This handles multi-line values and out-of-order fields
+        import re
+
+        # Extract classification
+        class_match = re.search(r'Classificatie:\s*(\w+)', response, re.IGNORECASE)
+        if class_match:
+            class_text = class_match.group(1).strip().upper()
+            try:
+                classification = Classification(class_text)
+            except ValueError:
+                self.logger.warning(
+                    "Invalid classification '%s' for segment %s, defaulting to IC",
+                    class_text,
+                    index
+                )
+                classification = Classification.IN_CHARACTER
+
+        # Extract reasoning - capture everything after "Reden:" until next field or end
+        reden_match = re.search(
+            r'Reden:\s*(.+?)(?=(?:Vertrouwen:|Personage:|$))',
+            response,
+            re.DOTALL | re.IGNORECASE
+        )
+        if reden_match:
+            reasoning = reden_match.group(1).strip()
+
+        # Extract confidence
+        conf_match = re.search(r'Vertrouwen:\s*([\d.]+)', response, re.IGNORECASE)
+        if conf_match:
+            try:
+                conf_text = conf_match.group(1).strip()
+                confidence = float(conf_text)
+                confidence = ConfidenceDefaults.clamp(confidence)
+            except ValueError:
+                self.logger.warning(
+                    "Invalid confidence value '%s' for segment %s, using default.",
+                    conf_text,
+                    index
+                )
+
+        # Extract character name
+        char_match = re.search(r'Personage:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
+        if char_match:
+            char_text = char_match.group(1).strip()
+            if char_text.upper() != "N/A":
+                character = char_text
 
         return ClassificationResult(
             segment_index=index,