From cebf1c8686a164066582c38bb6f26698e5f9171f Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sat, 28 Mar 2026 12:37:25 +0000
Subject: [PATCH 1/3] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Combine=20keyword=20del?=
 =?UTF-8?q?imiters=20into=20single=20regex=20pattern?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com>
---
 src/codeweaver/engine/chunker/delimiter.py | 78 ++++++++++++----------
 1 file changed, 42 insertions(+), 36 deletions(-)

diff --git a/src/codeweaver/engine/chunker/delimiter.py b/src/codeweaver/engine/chunker/delimiter.py
index 07b00994..46a08f60 100644
--- a/src/codeweaver/engine/chunker/delimiter.py
+++ b/src/codeweaver/engine/chunker/delimiter.py
@@ -451,6 +451,9 @@ def _match_keyword_delimiters(
         # Filter out delimiters with empty start strings - they match everywhere!
         keyword_delimiters = [d for d in keyword_delimiters if d.start]
 
+        if not keyword_delimiters:
+            return matches
+
         # Define structural delimiters that can complete keywords
         # Map opening structural chars to their closing counterparts
         structural_pairs = {
@@ -459,49 +462,52 @@ def _match_keyword_delimiters(
             "=>": "",  # Arrow functions often have expression bodies
         }
 
-        for delimiter in keyword_delimiters:
-            # Find all keyword occurrences using word boundary matching
-            pattern = rf"\b{re.escape(delimiter.start)}\b"
+        # Optimization: combining multiple keyword delimiters into a single compiled regex pattern
+        # using alternation (?:...) avoids looping over keywords with individual re.finditer calls.
+        delimiter_map = {d.start: d for d in keyword_delimiters}
+        combined_pattern = rf"\b(?:{'|'.join(re.escape(d.start) for d in keyword_delimiters)})\b"
 
-            for match in re.finditer(pattern, content):
-                keyword_pos = match.start()
+        for match in re.finditer(combined_pattern, content):
+            keyword_pos = match.start()
+            matched_text = match.group(0)
+            delimiter = delimiter_map[matched_text]
 
-                # Skip if keyword is inside a string or comment
-                if self._is_inside_string_or_comment(content, keyword_pos):
-                    continue
+            # Skip if keyword is inside a string or comment
+            if self._is_inside_string_or_comment(content, keyword_pos):
+                continue
 
-                # Find the next structural opening after the keyword
-                struct_start, struct_char = self._find_next_structural_with_char(
-                    content,
-                    start=keyword_pos + len(delimiter.start),
-                    allowed=set(structural_pairs.keys()),
-                )
+            # Find the next structural opening after the keyword
+            struct_start, struct_char = self._find_next_structural_with_char(
+                content,
+                start=keyword_pos + len(delimiter.start),
+                allowed=set(structural_pairs.keys()),
+            )
 
-                if struct_start is None:
-                    continue
+            if struct_start is None:
+                continue
 
-                # Find the matching closing delimiter for the structural character
-                struct_end = self._find_matching_close(
-                    content,
-                    struct_start,
-                    struct_char or "",
-                    structural_pairs.get(cast(str, struct_char), ""),
-                )
+            # Find the matching closing delimiter for the structural character
+            struct_end = self._find_matching_close(
+                content,
+                struct_start,
+                struct_char or "",
+                structural_pairs.get(cast(str, struct_char), ""),
+            )
 
-                if struct_end is not None:
-                    # Calculate nesting level by counting parent structures
-                    nesting_level = self._calculate_nesting_level(content, keyword_pos)
+            if struct_end is not None:
+                # Calculate nesting level by counting parent structures
+                nesting_level = self._calculate_nesting_level(content, keyword_pos)
 
-                    # Create a complete match from keyword to closing structure
-                    # This represents the entire construct (e.g., function...})
-                    matches.append(
-                        DelimiterMatch(
-                            delimiter=delimiter,
-                            start_pos=keyword_pos,
-                            end_pos=struct_end,
-                            nesting_level=nesting_level,
-                        )
+                # Create a complete match from keyword to closing structure
+                # This represents the entire construct (e.g., function...})
+                matches.append(
+                    DelimiterMatch(
+                        delimiter=delimiter,
+                        start_pos=keyword_pos,
+                        end_pos=struct_end,
+                        nesting_level=nesting_level,
                     )
+                )
 
         return matches
 
@@ -1280,7 +1286,7 @@ def _load_custom_delimiters(
         patterns when merged with the full delimiter list.
 
         Args:
-            normalized_language: Snake-case normalised language identifier.
+            normalized_language: Snake-case normalized language identifier.
             language: Original language string (used for logging only).
 
         Returns:

From 33c7ffa623e3b2fb16764df057a8949c7129e564 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sat, 28 Mar 2026 19:53:09 +0000
Subject: [PATCH 2/3] Fix dictionary collision mapping bug

Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com>
---
 src/codeweaver/engine/chunker/delimiter.py | 73 ++++++++++++----------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/src/codeweaver/engine/chunker/delimiter.py b/src/codeweaver/engine/chunker/delimiter.py
index 46a08f60..e318fd5c 100644
--- a/src/codeweaver/engine/chunker/delimiter.py
+++ b/src/codeweaver/engine/chunker/delimiter.py
@@ -464,50 +464,57 @@ def _match_keyword_delimiters(
 
         # Optimization: combining multiple keyword delimiters into a single compiled regex pattern
         # using alternation (?:...) avoids looping over keywords with individual re.finditer calls.
-        delimiter_map = {d.start: d for d in keyword_delimiters}
-        combined_pattern = rf"\b(?:{'|'.join(re.escape(d.start) for d in keyword_delimiters)})\b"
+
+        # Build map of start strings to lists of delimiters to handle collisions
+        # e.g. 'type' or 'extension' being used for multiple structures
+        from collections import defaultdict
+        delimiter_map = defaultdict(list)
+        for d in keyword_delimiters:
+            delimiter_map[d.start].append(d)
+
+        combined_pattern = rf"\b(?:{'|'.join(re.escape(start) for start in delimiter_map)})\b"
 
         for match in re.finditer(combined_pattern, content):
             keyword_pos = match.start()
             matched_text = match.group(0)
-            delimiter = delimiter_map[matched_text]
 
-            # Skip if keyword is inside a string or comment
-            if self._is_inside_string_or_comment(content, keyword_pos):
-                continue
+            for delimiter in delimiter_map[matched_text]:
+                # Skip if keyword is inside a string or comment
+                if self._is_inside_string_or_comment(content, keyword_pos):
+                    continue
 
-            # Find the next structural opening after the keyword
-            struct_start, struct_char = self._find_next_structural_with_char(
-                content,
-                start=keyword_pos + len(delimiter.start),
-                allowed=set(structural_pairs.keys()),
-            )
+                # Find the next structural opening after the keyword
+                struct_start, struct_char = self._find_next_structural_with_char(
+                    content,
+                    start=keyword_pos + len(delimiter.start),
+                    allowed=set(structural_pairs.keys()),
+                )
 
-            if struct_start is None:
-                continue
+                if struct_start is None:
+                    continue
 
-            # Find the matching closing delimiter for the structural character
-            struct_end = self._find_matching_close(
-                content,
-                struct_start,
-                struct_char or "",
-                structural_pairs.get(cast(str, struct_char), ""),
-            )
+                # Find the matching closing delimiter for the structural character
+                struct_end = self._find_matching_close(
+                    content,
+                    struct_start,
+                    struct_char or "",
+                    structural_pairs.get(cast(str, struct_char), ""),
+                )
 
-            if struct_end is not None:
-                # Calculate nesting level by counting parent structures
-                nesting_level = self._calculate_nesting_level(content, keyword_pos)
+                if struct_end is not None:
+                    # Calculate nesting level by counting parent structures
+                    nesting_level = self._calculate_nesting_level(content, keyword_pos)
 
-                # Create a complete match from keyword to closing structure
-                # This represents the entire construct (e.g., function...})
-                matches.append(
-                    DelimiterMatch(
-                        delimiter=delimiter,
-                        start_pos=keyword_pos,
-                        end_pos=struct_end,
-                        nesting_level=nesting_level,
+                    # Create a complete match from keyword to closing structure
+                    # This represents the entire construct (e.g., function...})
+                    matches.append(
+                        DelimiterMatch(
+                            delimiter=delimiter,
+                            start_pos=keyword_pos,
+                            end_pos=struct_end,
+                            nesting_level=nesting_level,
+                        )
                     )
-                )
 
         return matches
 

From 1a98b3914e84058ee065909b82c62d1e6772c9e9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 28 Mar 2026 20:00:39 +0000
Subject: [PATCH 3/3] refactor: move defaultdict import to module level and
 optimize per-position lookups

- Move 'from collections import defaultdict' from inline function body to module-level imports
- Add type annotation: defaultdict[str, list[Delimiter]]
- Check _is_inside_string_or_comment once per match position (not once per delimiter)
- Call _find_next_structural_with_char and _find_matching_close once per position
- Use matches.extend() with a generator instead of an append loop
---
 src/codeweaver/engine/chunker/delimiter.py | 72 +++++++++++-----------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/codeweaver/engine/chunker/delimiter.py b/src/codeweaver/engine/chunker/delimiter.py
index e318fd5c..a13390a2 100644
--- a/src/codeweaver/engine/chunker/delimiter.py
+++ b/src/codeweaver/engine/chunker/delimiter.py
@@ -14,6 +14,7 @@
 import logging
 import re
 
+from collections import defaultdict
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, NamedTuple, cast
@@ -464,11 +465,9 @@ def _match_keyword_delimiters(
 
         # Optimization: combining multiple keyword delimiters into a single compiled regex pattern
         # using alternation (?:...) avoids looping over keywords with individual re.finditer calls.
-
-        # Build map of start strings to lists of delimiters to handle collisions
-        # e.g. 'type' or 'extension' being used for multiple structures
-        from collections import defaultdict
-        delimiter_map = defaultdict(list)
+        # Use defaultdict(list) to support multiple delimiters sharing the same start string
+        # (e.g., "type" matches both STRUCT and TYPE_ALIAS patterns).
+        delimiter_map: defaultdict[str, list[Delimiter]] = defaultdict(list)
         for d in keyword_delimiters:
             delimiter_map[d.start].append(d)
 
@@ -478,43 +477,44 @@ def _match_keyword_delimiters(
             keyword_pos = match.start()
             matched_text = match.group(0)
 
-            for delimiter in delimiter_map[matched_text]:
-                # Skip if keyword is inside a string or comment
-                if self._is_inside_string_or_comment(content, keyword_pos):
-                    continue
+            # Skip if keyword is inside a string or comment (same position for all delimiters)
+            if self._is_inside_string_or_comment(content, keyword_pos):
+                continue
 
-                # Find the next structural opening after the keyword
-                struct_start, struct_char = self._find_next_structural_with_char(
-                    content,
-                    start=keyword_pos + len(delimiter.start),
-                    allowed=set(structural_pairs.keys()),
-                )
+            # Find the next structural opening after the keyword.
+            # All delimiters sharing this start string have the same length, so we compute once.
+            struct_start, struct_char = self._find_next_structural_with_char(
+                content,
+                start=keyword_pos + len(matched_text),
+                allowed=set(structural_pairs.keys()),
+            )
 
-                if struct_start is None:
-                    continue
+            if struct_start is None:
+                continue
 
-                # Find the matching closing delimiter for the structural character
-                struct_end = self._find_matching_close(
-                    content,
-                    struct_start,
-                    struct_char or "",
-                    structural_pairs.get(cast(str, struct_char), ""),
-                )
+            # Find the matching closing delimiter for the structural character
+            struct_end = self._find_matching_close(
+                content,
+                struct_start,
+                struct_char or "",
+                structural_pairs.get(cast(str, struct_char), ""),
+            )
 
-                if struct_end is not None:
-                    # Calculate nesting level by counting parent structures
-                    nesting_level = self._calculate_nesting_level(content, keyword_pos)
+            if struct_end is not None:
+                # Calculate nesting level by counting parent structures
+                nesting_level = self._calculate_nesting_level(content, keyword_pos)
 
-                    # Create a complete match from keyword to closing structure
-                    # This represents the entire construct (e.g., function...})
-                    matches.append(
-                        DelimiterMatch(
-                            delimiter=delimiter,
-                            start_pos=keyword_pos,
-                            end_pos=struct_end,
-                            nesting_level=nesting_level,
-                        )
+                # Create a complete match for every delimiter that shares this start string
+                # (e.g., both STRUCT and TYPE_ALIAS for "type")
+                matches.extend(
+                    DelimiterMatch(
+                        delimiter=delimiter,
+                        start_pos=keyword_pos,
+                        end_pos=struct_end,
+                        nesting_level=nesting_level,
                     )
+                    for delimiter in delimiter_map[matched_text]
+                )
 
         return matches