From 5ab168ee169f7de0a90e7344ff95cb978bc25eb2 Mon Sep 17 00:00:00 2001
From: Lance Lewandowski <lance0821@gmail.com>
Date: Thu, 28 Aug 2025 12:34:13 -0400
Subject: [PATCH 1/3] feat: Add string manipulation utilities

- Added case conversion functions (snake, camel, pascal, kebab)
- Added text truncation with custom suffix
- Added word counting utility
- Comprehensive test coverage (69 tests)
- Handles edge cases like consecutive capitals (XMLHttpRequest)
---
 src/string_utils.py        | 159 ++++++++++++++++++++++++++++++
 tests/test_string_utils.py | 191 +++++++++++++++++++++++++++++++++++++
 2 files changed, 350 insertions(+)
 create mode 100644 src/string_utils.py
 create mode 100644 tests/test_string_utils.py

diff --git a/src/string_utils.py b/src/string_utils.py
new file mode 100644
index 0000000..6dd319e
--- /dev/null
+++ b/src/string_utils.py
@@ -0,0 +1,159 @@
+"""String manipulation utilities for common text transformations.
+
+This module provides functions for converting between different text cases
+commonly used in programming and documentation.
+"""
+
+import re
+from typing import List
+
+
+def to_snake_case(text: str) -> str:
+    """Convert a string to snake_case.
+    
+    Args:
+        text: Input string to convert
+        
+    Returns:
+        String converted to snake_case
+        
+    Examples:
+        >>> to_snake_case("HelloWorld")
+        'hello_world'
+        >>> to_snake_case("someVariableName")
+        'some_variable_name'
+        >>> to_snake_case("convert-to-snake")
+        'convert_to_snake'
+    """
+    # Replace hyphens and spaces with underscores
+    text = re.sub(r'[-\s]+', '_', text)
+    # Insert underscore before capital letters (including consecutive caps)
+    text = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', text)
+    text = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', text)
+    # Convert to lowercase
+    return text.lower()
+
+
+def to_camel_case(text: str) -> str:
+    """Convert a string to camelCase.
+    
+    Args:
+        text: Input string to convert
+        
+    Returns:
+        String converted to camelCase
+        
+    Examples:
+        >>> to_camel_case("hello_world")
+        'helloWorld'
+        >>> to_camel_case("some-variable-name")
+        'someVariableName'
+        >>> to_camel_case("Convert to camel")
+        'convertToCamel'
+    """
+    # Split on non-alphanumeric characters
+    words = re.split(r'[_\-\s]+', text)
+    # Filter empty strings
+    words = [w for w in words if w]
+    if not words:
+        return ""
+    # First word lowercase, rest title case
+    return words[0].lower() + ''.join(w.capitalize() for w in words[1:])
+
+
+def to_pascal_case(text: str) -> str:
+    """Convert a string to PascalCase.
+    
+    Args:
+        text: Input string to convert
+        
+    Returns:
+        String converted to PascalCase
+        
+    Examples:
+        >>> to_pascal_case("hello_world")
+        'HelloWorld'
+        >>> to_pascal_case("some-variable-name")
+        'SomeVariableName'
+        >>> to_pascal_case("convert to pascal")
+        'ConvertToPascal'
+    """
+    # Split on non-alphanumeric characters
+    words = re.split(r'[_\-\s]+', text)
+    # Filter empty strings and capitalize each word
+    return ''.join(w.capitalize() for w in words if w)
+
+
+def to_kebab_case(text: str) -> str:
+    """Convert a string to kebab-case.
+    
+    Args:
+        text: Input string to convert
+        
+    Returns:
+        String converted to kebab-case
+        
+    Examples:
+        >>> to_kebab_case("HelloWorld")
+        'hello-world'
+        >>> to_kebab_case("some_variable_name")
+        'some-variable-name'
+        >>> to_kebab_case("Convert To Kebab")
+        'convert-to-kebab'
+    """
+    # Replace underscores and spaces with hyphens
+    text = re.sub(r'[_\s]+', '-', text)
+    # Insert hyphen before capital letters (including consecutive caps)
+    text = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1-\2', text)
+    text = re.sub(r'([a-z0-9])([A-Z])', r'\1-\2', text)
+    # Convert to lowercase
+    return text.lower()
+
+
+def truncate_text(text: str, max_length: int, suffix: str = "...") -> str:
+    """Truncate text to a maximum length with optional suffix.
+    
+    Args:
+        text: Text to truncate
+        max_length: Maximum length including suffix
+        suffix: String to append when truncating (default: "...")
+        
+    Returns:
+        Truncated text with suffix if needed
+        
+    Examples:
+        >>> truncate_text("This is a long text", 10)
+        'This is...'
+        >>> truncate_text("Short", 10)
+        'Short'
+        >>> truncate_text("Exactly ten", 11)
+        'Exactly ten'
+    """
+    if len(text) <= max_length:
+        return text
+    
+    if max_length <= len(suffix):
+        return suffix[:max_length]
+    
+    return text[:max_length - len(suffix)] + suffix
+
+
+def word_count(text: str) -> int:
+    """Count the number of words in a text.
+    
+    Args:
+        text: Text to count words in
+        
+    Returns:
+        Number of words
+        
+    Examples:
+        >>> word_count("Hello world")
+        2
+        >>> word_count("  Multiple   spaces  ")
+        2
+        >>> word_count("")
+        0
+    """
+    words = text.split()
+    return len(words)
\ No newline at end of file
diff --git a/tests/test_string_utils.py b/tests/test_string_utils.py
new file mode 100644
index 0000000..b7d7283
--- /dev/null
+++ b/tests/test_string_utils.py
@@ -0,0 +1,191 @@
+"""Unit tests for string utilities module."""
+
+import pytest
+from src.string_utils import (
+    to_snake_case,
+    to_camel_case,
+    to_pascal_case,
+    to_kebab_case,
+    truncate_text,
+    word_count,
+)
+
+
+class TestCaseConversions:
+    """Test suite for case conversion functions."""
+
+    @pytest.mark.parametrize(
+        "input_text,expected",
+        [
+            ("HelloWorld", "hello_world"),
+            ("someVariableName", "some_variable_name"),
+            ("convert-to-snake", "convert_to_snake"),
+            ("already_snake_case", "already_snake_case"),
+            ("Mixed-Style_Example", "mixed_style_example"),
+            ("XMLHttpRequest", "xml_http_request"),
+            ("IOError", "io_error"),
+            ("", ""),
+            ("a", "a"),
+            ("ABC", "abc"),
+            ("123Numbers", "123_numbers"),
+            ("with spaces here", "with_spaces_here"),
+        ],
+    )
+    def test_to_snake_case(self, input_text, expected):
+        """Test conversion to snake_case."""
+        assert to_snake_case(input_text) == expected
+
+    @pytest.mark.parametrize(
+        "input_text,expected",
+        [
+            ("hello_world", "helloWorld"),
+            ("some-variable-name", "someVariableName"),
+            ("Convert to camel", "convertToCamel"),
+            ("already_camelCase", "alreadyCamelcase"),
+            ("mixed-Style_Example", "mixedStyleExample"),
+            ("", ""),
+            ("a", "a"),
+            ("first", "first"),
+            ("UPPERCASE", "uppercase"),
+            ("123_numbers", "123Numbers"),
+        ],
+    )
+    def test_to_camel_case(self, input_text, expected):
+        """Test conversion to camelCase."""
+        assert to_camel_case(input_text) == expected
+
+    @pytest.mark.parametrize(
+        "input_text,expected",
+        [
+            ("hello_world", "HelloWorld"),
+            ("some-variable-name", "SomeVariableName"),
+            ("convert to pascal", "ConvertToPascal"),
+            ("already_PascalCase", "AlreadyPascalcase"),
+            ("mixed-Style_Example", "MixedStyleExample"),
+            ("", ""),
+            ("a", "A"),
+            ("first", "First"),
+            ("UPPERCASE", "Uppercase"),
+            ("123_numbers", "123Numbers"),
+        ],
+    )
+    def test_to_pascal_case(self, input_text, expected):
+        """Test conversion to PascalCase."""
+        assert to_pascal_case(input_text) == expected
+
+    @pytest.mark.parametrize(
+        "input_text,expected",
+        [
+            ("HelloWorld", "hello-world"),
+            ("some_variable_name", "some-variable-name"),
+            ("Convert To Kebab", "convert-to-kebab"),
+            ("already-kebab-case", "already-kebab-case"),
+            ("Mixed_Style Example", "mixed-style-example"),
+            ("XMLHttpRequest", "xml-http-request"),
+            ("", ""),
+            ("a", "a"),
+            ("ABC", "abc"),
+            ("123Numbers", "123-numbers"),
+        ],
+    )
+    def test_to_kebab_case(self, input_text, expected):
+        """Test conversion to kebab-case."""
+        assert to_kebab_case(input_text) == expected
+
+
+class TestTextUtilities:
+    """Test suite for text utility functions."""
+
+    @pytest.mark.parametrize(
+        "text,max_length,suffix,expected",
+        [
+            ("This is a long text", 10, "...", "This is..."),
+            ("Short", 10, "...", "Short"),
+            ("Exactly ten", 11, "...", "Exactly ten"),
+            ("Truncate this text", 15, "...", "Truncate thi..."),
+            ("Custom suffix", 10, "→", "Custom su→"),
+            ("No suffix needed", 20, "...", "No suffix needed"),
+            ("", 5, "...", ""),
+            ("Very long text that needs truncation", 20, "...", "Very long text th..."),
+            ("Edge", 4, "...", "Edge"),  # Text same as max_length
+            ("Tiny", 3, "...", "..."),
+        ],
+    )
+    def test_truncate_text(self, text, max_length, suffix, expected):
+        """Test text truncation with various parameters."""
+        assert truncate_text(text, max_length, suffix) == expected
+
+    def test_truncate_text_default_suffix(self):
+        """Test truncate_text with default suffix."""
+        assert truncate_text("This is a long text", 10) == "This is..."
+        assert truncate_text("Short", 10) == "Short"
+
+    @pytest.mark.parametrize(
+        "text,expected",
+        [
+            ("Hello world", 2),
+            ("  Multiple   spaces  ", 2),
+            ("", 0),
+            ("One", 1),
+            ("This is a test sentence.", 5),
+            ("   ", 0),
+            ("Word", 1),
+            ("Multiple\nlines\nwith\nwords", 4),
+            ("\tTabs\tand\tspaces\t", 3),
+            ("123 456 789", 3),
+        ],
+    )
+    def test_word_count(self, text, expected):
+        """Test word counting in various texts."""
+        assert word_count(text) == expected
+
+
+class TestEdgeCases:
+    """Test suite for edge cases and special scenarios."""
+
+    def test_empty_string_conversions(self):
+        """Test all conversions with empty string."""
+        assert to_snake_case("") == ""
+        assert to_camel_case("") == ""
+        assert to_pascal_case("") == ""
+        assert to_kebab_case("") == ""
+
+    def test_single_character_conversions(self):
+        """Test all conversions with single character."""
+        assert to_snake_case("A") == "a"
+        assert to_camel_case("A") == "a"
+        assert to_pascal_case("A") == "A"
+        assert to_kebab_case("A") == "a"
+
+    def test_numbers_in_conversions(self):
+        """Test conversions with numbers."""
+        assert to_snake_case("test123Case") == "test123_case"
+        assert to_camel_case("test_123_case") == "test123Case"
+        assert to_pascal_case("test_123_case") == "Test123Case"
+        assert to_kebab_case("test123Case") == "test123-case"
+
+    def test_special_characters(self):
+        """Test conversions with special characters."""
+        assert to_snake_case("hello@world#test") == "hello@world#test"
+        assert to_camel_case("hello@world#test") == "hello@world#test"
+        assert to_pascal_case("hello@world#test") == "Hello@world#test"
+        assert to_kebab_case("hello@world#test") == "hello@world#test"
+
+    def test_consecutive_delimiters(self):
+        """Test conversions with consecutive delimiters."""
+        assert to_snake_case("hello__world") == "hello__world"
+        assert to_camel_case("hello__world") == "helloWorld"
+        assert to_pascal_case("hello--world") == "HelloWorld"
+        assert to_kebab_case("hello  world") == "hello-world"
+
+    def test_truncate_edge_cases(self):
+        """Test truncation edge cases."""
+        # Suffix longer than max_length
+        assert truncate_text("Test", 2, "...") == ".."
+        assert truncate_text("Test", 0, "...") == ""
+        
+        # Text exactly at max_length
+        assert truncate_text("12345", 5, "...") == "12345"
+        
+        # Empty text
+        assert truncate_text("", 10, "...") == ""
\ No newline at end of file

From 1bceb421f52375cb88d93d2aa011dacc9245d34d Mon Sep 17 00:00:00 2001
From: Lance Lewandowski <lance0821@gmail.com>
Date: Thu, 28 Aug 2025 12:41:07 -0400
Subject: [PATCH 2/3] refactor: Address all code review feedback for string
 utilities

Improvements made:

1. Performance Optimizations:
   - Precompiled regex patterns for better performance
   - Single-pass regex operations where possible
   - Performance tests confirm <10ms for long strings

2. Edge Case Handling:
   - Improved handling of consecutive delimiters
   - Better handling of special characters
   - Proper cleanup of leading/trailing delimiters

3. Documentation Enhancements:
   - Added edge case examples in docstrings
   - Clear behavior documentation for all functions
   - Unicode handling notes

4. New Features:
   - Added is_mixed_case() function
   - Added remove_extra_whitespace() utility

5. Test Coverage:
   - Expanded to 103 tests (from 69)
   - Added performance benchmarks
   - Added special character tests
   - Unicode handling tests

6. Code Quality:
   - Removed unused imports
   - Improved truncate_text return behavior
   - All functions handle empty strings gracefully
---
 src/string_utils.py        | 175 +++++++++++++++++++++++++++++++------
 tests/test_string_utils.py | 142 +++++++++++++++++++++++++++---
 2 files changed, 279 insertions(+), 38 deletions(-)

diff --git a/src/string_utils.py b/src/string_utils.py
index 6dd319e..0769dab 100644
--- a/src/string_utils.py
+++ b/src/string_utils.py
@@ -5,7 +5,13 @@
 """
 
 import re
-from typing import List
+
+
+# Precompile regex patterns for better performance
+_CONSECUTIVE_CAPS = re.compile(r'([A-Z]+)([A-Z][a-z])')
+_CAMEL_BOUNDARY = re.compile(r'([a-z0-9])([A-Z])')
+_NON_ALNUM = re.compile(r'[^a-zA-Z0-9]+')
+_WHITESPACE = re.compile(r'\s+')
 
 
 def to_snake_case(text: str) -> str:
@@ -20,18 +26,35 @@ def to_snake_case(text: str) -> str:
     Examples:
         >>> to_snake_case("HelloWorld")
         'hello_world'
-        >>> to_snake_case("someVariableName")
-        'some_variable_name'
+        >>> to_snake_case("XMLHttpRequest")
+        'xml_http_request'
+        >>> to_snake_case("IOError")
+        'io_error'
         >>> to_snake_case("convert-to-snake")
         'convert_to_snake'
+        >>> to_snake_case("multiple   spaces")
+        'multiple_spaces'
+        >>> to_snake_case("__already__snake__")
+        'already_snake'
     """
-    # Replace hyphens and spaces with underscores
-    text = re.sub(r'[-\s]+', '_', text)
-    # Insert underscore before capital letters (including consecutive caps)
-    text = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', text)
-    text = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', text)
-    # Convert to lowercase
-    return text.lower()
+    if not text:
+        return ""
+    
+    # Replace non-alphanumeric with underscores
+    text = _NON_ALNUM.sub('_', text)
+    
+    # Handle consecutive capitals
+    text = _CONSECUTIVE_CAPS.sub(r'\1_\2', text)
+    
+    # Insert underscore before capitals preceded by lowercase/digit
+    text = _CAMEL_BOUNDARY.sub(r'\1_\2', text)
+    
+    # Convert to lowercase and remove redundant underscores
+    text = text.lower()
+    text = re.sub(r'_+', '_', text)  # Collapse multiple underscores
+    text = text.strip('_')  # Remove leading/trailing underscores
+    
+    return text
 
 
 def to_camel_case(text: str) -> str:
@@ -50,13 +73,22 @@ def to_camel_case(text: str) -> str:
         'someVariableName'
         >>> to_camel_case("Convert to camel")
         'convertToCamel'
+        >>> to_camel_case("__multiple__delimiters__")
+        'multipleDelimiters'
+        >>> to_camel_case("123_start_with_number")
+        '123StartWithNumber'
     """
+    if not text:
+        return ""
+    
     # Split on non-alphanumeric characters
-    words = re.split(r'[_\-\s]+', text)
+    words = _NON_ALNUM.split(text)
     # Filter empty strings
     words = [w for w in words if w]
+    
     if not words:
         return ""
+    
     # First word lowercase, rest title case
     return words[0].lower() + ''.join(w.capitalize() for w in words[1:])
 
@@ -77,9 +109,16 @@ def to_pascal_case(text: str) -> str:
         'SomeVariableName'
         >>> to_pascal_case("convert to pascal")
         'ConvertToPascal'
+        >>> to_pascal_case("__multiple__delimiters__")
+        'MultipleDelimiters'
+        >>> to_pascal_case("123_start_with_number")
+        '123StartWithNumber'
     """
+    if not text:
+        return ""
+    
     # Split on non-alphanumeric characters
-    words = re.split(r'[_\-\s]+', text)
+    words = _NON_ALNUM.split(text)
     # Filter empty strings and capitalize each word
     return ''.join(w.capitalize() for w in words if w)
 
@@ -96,30 +135,49 @@ def to_kebab_case(text: str) -> str:
     Examples:
         >>> to_kebab_case("HelloWorld")
         'hello-world'
+        >>> to_kebab_case("XMLHttpRequest")
+        'xml-http-request'
         >>> to_kebab_case("some_variable_name")
         'some-variable-name'
-        >>> to_kebab_case("Convert To Kebab")
-        'convert-to-kebab'
+        >>> to_kebab_case("multiple   spaces")
+        'multiple-spaces'
+        >>> to_kebab_case("--already--kebab--")
+        'already-kebab'
     """
-    # Replace underscores and spaces with hyphens
-    text = re.sub(r'[_\s]+', '-', text)
-    # Insert hyphen before capital letters (including consecutive caps)
-    text = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1-\2', text)
-    text = re.sub(r'([a-z0-9])([A-Z])', r'\1-\2', text)
-    # Convert to lowercase
-    return text.lower()
+    if not text:
+        return ""
+    
+    # Replace non-alphanumeric with hyphens
+    text = _NON_ALNUM.sub('-', text)
+    
+    # Handle consecutive capitals
+    text = _CONSECUTIVE_CAPS.sub(r'\1-\2', text)
+    
+    # Insert hyphen before capitals preceded by lowercase/digit
+    text = _CAMEL_BOUNDARY.sub(r'\1-\2', text)
+    
+    # Convert to lowercase and clean up hyphens
+    text = text.lower()
+    text = re.sub(r'-+', '-', text)  # Collapse multiple hyphens
+    text = text.strip('-')  # Remove leading/trailing hyphens
+    
+    return text
 
 
 def truncate_text(text: str, max_length: int, suffix: str = "...") -> str:
     """Truncate text to a maximum length with optional suffix.
     
+    If the text needs truncation and max_length is too small to accommodate
+    the suffix meaningfully, returns an empty string.
+    
     Args:
         text: Text to truncate
         max_length: Maximum length including suffix
         suffix: String to append when truncating (default: "...")
         
     Returns:
-        Truncated text with suffix if needed
+        Truncated text with suffix if needed, empty string if max_length
+        is too small for meaningful truncation
         
     Examples:
         >>> truncate_text("This is a long text", 10)
@@ -128,12 +186,17 @@ def truncate_text(text: str, max_length: int, suffix: str = "...") -> str:
         'Short'
         >>> truncate_text("Exactly ten", 11)
         'Exactly ten'
+        >>> truncate_text("Too long", 2, "...")
+        ''
+        >>> truncate_text("Custom", 5, "→")
+        'Cust→'
     """
     if len(text) <= max_length:
         return text
     
-    if max_length <= len(suffix):
-        return suffix[:max_length]
+    # If max_length is too small for meaningful truncation, return empty
+    if max_length < len(suffix) + 1:  # Need at least 1 char + suffix
+        return ""
     
     return text[:max_length - len(suffix)] + suffix
 
@@ -141,6 +204,8 @@ def truncate_text(text: str, max_length: int, suffix: str = "...") -> str:
 def word_count(text: str) -> int:
     """Count the number of words in a text.
     
+    Words are defined as sequences of characters separated by whitespace.
+    
     Args:
         text: Text to count words in
         
@@ -154,6 +219,66 @@ def word_count(text: str) -> int:
         2
         >>> word_count("")
         0
+        >>> word_count("One-word")  # Hyphenated counts as one
+        1
+        >>> word_count("Line\\nbreak\\tand\\ttabs")
+        3
     """
+    if not text:
+        return 0
+    
     words = text.split()
-    return len(words)
\ No newline at end of file
+    return len(words)
+
+
+def is_mixed_case(text: str) -> bool:
+    """Check if a string contains both uppercase and lowercase letters.
+    
+    Args:
+        text: String to check
+        
+    Returns:
+        True if string has both cases, False otherwise
+        
+    Examples:
+        >>> is_mixed_case("HelloWorld")
+        True
+        >>> is_mixed_case("ALLCAPS")
+        False
+        >>> is_mixed_case("lowercase")
+        False
+        >>> is_mixed_case("noLetters123")
+        False
+    """
+    has_upper = any(c.isupper() for c in text)
+    has_lower = any(c.islower() for c in text)
+    return has_upper and has_lower
+
+
+def remove_extra_whitespace(text: str) -> str:
+    """Remove extra whitespace from a string.
+    
+    Collapses multiple spaces into single spaces and trims
+    leading/trailing whitespace.
+    
+    Args:
+        text: Text to clean
+        
+    Returns:
+        Text with normalized whitespace
+        
+    Examples:
+        >>> remove_extra_whitespace("  Hello   world  ")
+        'Hello world'
+        >>> remove_extra_whitespace("Multiple\\n\\nlines")
+        'Multiple lines'
+        >>> remove_extra_whitespace("\\t\\tTabs\\t\\t")
+        'Tabs'
+    """
+    if not text:
+        return ""
+    
+    # Replace all whitespace sequences with single space
+    text = _WHITESPACE.sub(' ', text)
+    # Strip leading/trailing whitespace
+    return text.strip()
\ No newline at end of file
diff --git a/tests/test_string_utils.py b/tests/test_string_utils.py
index b7d7283..15e937e 100644
--- a/tests/test_string_utils.py
+++ b/tests/test_string_utils.py
@@ -1,6 +1,7 @@
 """Unit tests for string utilities module."""
 
 import pytest
+import timeit
 from src.string_utils import (
     to_snake_case,
     to_camel_case,
@@ -8,6 +9,8 @@
     to_kebab_case,
     truncate_text,
     word_count,
+    is_mixed_case,
+    remove_extra_whitespace,
 )
 
 
@@ -29,6 +32,8 @@ class TestCaseConversions:
             ("ABC", "abc"),
             ("123Numbers", "123_numbers"),
             ("with spaces here", "with_spaces_here"),
+            ("__multiple__underscores__", "multiple_underscores"),
+            ("CamelCASEMixed", "camel_case_mixed"),
         ],
     )
     def test_to_snake_case(self, input_text, expected):
@@ -48,6 +53,8 @@ def test_to_snake_case(self, input_text, expected):
             ("first", "first"),
             ("UPPERCASE", "uppercase"),
             ("123_numbers", "123Numbers"),
+            ("__multiple__delimiters__", "multipleDelimiters"),
+            ("!!!only!!!special!!!", "onlySpecial"),
         ],
     )
     def test_to_camel_case(self, input_text, expected):
@@ -67,6 +74,7 @@ def test_to_camel_case(self, input_text, expected):
             ("first", "First"),
             ("UPPERCASE", "Uppercase"),
             ("123_numbers", "123Numbers"),
+            ("__multiple__delimiters__", "MultipleDelimiters"),
         ],
     )
     def test_to_pascal_case(self, input_text, expected):
@@ -86,6 +94,8 @@ def test_to_pascal_case(self, input_text, expected):
             ("a", "a"),
             ("ABC", "abc"),
             ("123Numbers", "123-numbers"),
+            ("--multiple--hyphens--", "multiple-hyphens"),
+            ("HTTPSConnection", "https-connection"),
         ],
     )
     def test_to_kebab_case(self, input_text, expected):
@@ -108,7 +118,9 @@ class TestTextUtilities:
             ("", 5, "...", ""),
             ("Very long text that needs truncation", 20, "...", "Very long text th..."),
             ("Edge", 4, "...", "Edge"),  # Text same as max_length
-            ("Tiny", 3, "...", "..."),
+            ("Tiny", 3, "...", ""),  # Too small for meaningful truncation
+            ("Test", 2, "...", ""),  # Too small
+            ("Test", 4, "!!!", "Test"),  # Exact length, no truncation
         ],
     )
     def test_truncate_text(self, text, max_length, suffix, expected):
@@ -133,6 +145,8 @@ def test_truncate_text_default_suffix(self):
             ("Multiple\nlines\nwith\nwords", 4),
             ("\tTabs\tand\tspaces\t", 3),
             ("123 456 789", 3),
+            ("one-word", 1),  # Hyphenated as single word
+            ("email@example.com", 1),  # Email as single word
         ],
     )
     def test_word_count(self, text, expected):
@@ -140,6 +154,47 @@ def test_word_count(self, text, expected):
         assert word_count(text) == expected
 
 
+class TestAdditionalUtilities:
+    """Test suite for additional utility functions."""
+
+    @pytest.mark.parametrize(
+        "text,expected",
+        [
+            ("HelloWorld", True),
+            ("ALLCAPS", False),
+            ("lowercase", False),
+            ("MixedCase", True),
+            ("", False),
+            ("123", False),
+            ("123ABC", False),  # No lowercase
+            ("123abc", False),  # No uppercase
+            ("aB", True),
+            ("!@#$%", False),  # No letters
+        ],
+    )
+    def test_is_mixed_case(self, text, expected):
+        """Test mixed case detection."""
+        assert is_mixed_case(text) == expected
+
+    @pytest.mark.parametrize(
+        "text,expected",
+        [
+            ("  Hello   world  ", "Hello world"),
+            ("Multiple\n\nlines", "Multiple lines"),
+            ("\t\tTabs\t\t", "Tabs"),
+            ("Normal text", "Normal text"),
+            ("", ""),
+            ("   ", ""),
+            ("One  Two  Three", "One Two Three"),
+            ("Line\nbreak\tand\ttab", "Line break and tab"),
+            ("  \n\t  Mixed  \n  whitespace  \t  ", "Mixed whitespace"),
+        ],
+    )
+    def test_remove_extra_whitespace(self, text, expected):
+        """Test whitespace normalization."""
+        assert remove_extra_whitespace(text) == expected
+
+
 class TestEdgeCases:
     """Test suite for edge cases and special scenarios."""
 
@@ -164,28 +219,89 @@ def test_numbers_in_conversions(self):
         assert to_pascal_case("test_123_case") == "Test123Case"
         assert to_kebab_case("test123Case") == "test123-case"
 
-    def test_special_characters(self):
-        """Test conversions with special characters."""
-        assert to_snake_case("hello@world#test") == "hello@world#test"
-        assert to_camel_case("hello@world#test") == "hello@world#test"
-        assert to_pascal_case("hello@world#test") == "Hello@world#test"
-        assert to_kebab_case("hello@world#test") == "hello@world#test"
+    def test_special_characters_only(self):
+        """Test conversions with only special characters."""
+        assert to_snake_case("!!!@@@###") == ""
+        assert to_camel_case("!!!@@@###") == ""
+        assert to_pascal_case("!!!@@@###") == ""
+        assert to_kebab_case("!!!@@@###") == ""
+        assert to_snake_case("___") == ""
+        assert to_kebab_case("---") == ""
 
     def test_consecutive_delimiters(self):
         """Test conversions with consecutive delimiters."""
-        assert to_snake_case("hello__world") == "hello__world"
-        assert to_camel_case("hello__world") == "helloWorld"
-        assert to_pascal_case("hello--world") == "HelloWorld"
-        assert to_kebab_case("hello  world") == "hello-world"
+        assert to_snake_case("hello___world") == "hello_world"
+        assert to_camel_case("hello___world") == "helloWorld"
+        assert to_pascal_case("hello---world") == "HelloWorld"
+        assert to_kebab_case("hello   world") == "hello-world"
+        assert to_snake_case("test____case____example") == "test_case_example"
+        assert to_kebab_case("test----case----example") == "test-case-example"
 
     def test_truncate_edge_cases(self):
         """Test truncation edge cases."""
         # Suffix longer than max_length
-        assert truncate_text("Test", 2, "...") == ".."
+        assert truncate_text("Test", 2, "...") == ""
         assert truncate_text("Test", 0, "...") == ""
         
         # Text exactly at max_length
         assert truncate_text("12345", 5, "...") == "12345"
         
         # Empty text
-        assert truncate_text("", 10, "...") == ""
\ No newline at end of file
+        assert truncate_text("", 10, "...") == ""
+        
+        # Very long suffix
+        assert truncate_text("Short", 10, "VERYLONGSUFFIX") == "Short"
+        assert truncate_text("This needs truncation", 10, "LONG") == "This nLONG"
+
+    def test_unicode_handling(self):
+        """Test handling of Unicode characters."""
+        # Note: Current implementation treats accented chars as non-alphanumeric
+        assert "caf" in to_snake_case("Café").lower()
+        assert "m_nch" in to_snake_case("München").lower()
+        assert word_count("Café au lait") == 3
+        assert is_mixed_case("Café") == True
+
+
+class TestPerformance:
+    """Test suite for performance characteristics."""
+
+    def test_performance_with_long_strings(self):
+        """Test that functions handle long strings efficiently."""
+        long_text = "CamelCase" * 100  # 900 characters
+        
+        # Should complete quickly (under 10ms)
+        start = timeit.default_timer()
+        result = to_snake_case(long_text)
+        elapsed = timeit.default_timer() - start
+        assert elapsed < 0.01
+        assert "camel_case" in result
+        
+        # Test other conversions
+        start = timeit.default_timer()
+        to_kebab_case(long_text)
+        elapsed = timeit.default_timer() - start
+        assert elapsed < 0.01
+
+    def test_performance_with_many_delimiters(self):
+        """Test performance with many consecutive delimiters."""
+        text_with_delimiters = "_" * 100 + "test" + "_" * 100 + "case"
+        
+        start = timeit.default_timer()
+        result = to_snake_case(text_with_delimiters)
+        elapsed = timeit.default_timer() - start
+        assert elapsed < 0.01
+        assert result == "test_case"
+
+    def test_regex_compilation_benefit(self):
+        """Test that precompiled regex improves performance."""
+        # Run multiple conversions to test regex caching
+        test_cases = ["CamelCase", "snake_case", "kebab-case"] * 100
+        
+        start = timeit.default_timer()
+        for text in test_cases:
+            to_snake_case(text)
+            to_kebab_case(text)
+        elapsed = timeit.default_timer() - start
+        
+        # Should complete 600 conversions in under 100ms
+        assert elapsed < 0.1
\ No newline at end of file

From 38e304a7e0034353c500c200e21ac5f66d70fd7b Mon Sep 17 00:00:00 2001
From: Lance Lewandowski <lance0821@gmail.com>
Date: Thu, 28 Aug 2025 12:47:42 -0400
Subject: [PATCH 3/3] refactor: Implement shared tokenizer and add validation
 per review feedback

Addressed final review feedback from PR #14:

1. Shared Tokenizer Implementation:
   - Created _tokenize_string() for consistent word boundary detection
   - All case conversion functions now use the same tokenizer
   - Properly handles case transitions (HelloWorld -> Hello, World)
   - Correctly tokenizes consecutive capitals (XMLHttpRequest -> XML, Http, Request)
   - Ensures consistent behavior across all conversion functions

2. Non-Negative Validation:
   - Added validation in truncate_text() for max_length parameter
   - Raises ValueError for negative values
   - Makes API safer and more predictable

3. Test Improvements:
   - Added dedicated test suite for tokenizer
   - Added test for ValueError on negative max_length
   - Updated expectations for consistent tokenization
   - All 110 tests passing

The implementation now provides consistent behavior across all case
conversion functions and safer parameter validation.
---
 src/string_utils.py        | 128 ++++++++++++++++++++-----------------
 tests/test_string_utils.py |  37 ++++++++++-
 2 files changed, 106 insertions(+), 59 deletions(-)

diff --git a/src/string_utils.py b/src/string_utils.py
index 0769dab..efcfb09 100644
--- a/src/string_utils.py
+++ b/src/string_utils.py
@@ -5,6 +5,7 @@
 """
 
 import re
+from typing import List
 
 
 # Precompile regex patterns for better performance
@@ -14,6 +15,44 @@
 _WHITESPACE = re.compile(r'\s+')
 
 
+def _tokenize_string(text: str) -> List[str]:
+    """
+    Tokenize a string into words based on case transitions, delimiters, and numbers.
+    
+    This shared tokenizer ensures consistent behavior across all case conversion functions.
+    
+    Args:
+        text: Input string to tokenize
+        
+    Returns:
+        List of word tokens
+        
+    Examples:
+        >>> _tokenize_string("HelloWorld")
+        ['Hello', 'World']
+        >>> _tokenize_string("XMLHttpRequest")
+        ['XML', 'Http', 'Request']
+        >>> _tokenize_string("snake_case_example")
+        ['snake', 'case', 'example']
+    """
+    if not text:
+        return []
+    
+    # First, replace non-alphanumeric with spaces
+    text = _NON_ALNUM.sub(' ', text)
+    
+    # Handle consecutive capitals (e.g., XMLHttp -> XML Http)
+    text = _CONSECUTIVE_CAPS.sub(r'\1 \2', text)
+    
+    # Insert space before capitals preceded by lowercase/digit
+    text = _CAMEL_BOUNDARY.sub(r'\1 \2', text)
+    
+    # Split on whitespace and filter empty strings
+    tokens = text.split()
+    
+    return [token for token in tokens if token]
+
+
 def to_snake_case(text: str) -> str:
     """Convert a string to snake_case.
     
@@ -37,24 +76,11 @@ def to_snake_case(text: str) -> str:
         >>> to_snake_case("__already__snake__")
         'already_snake'
     """
-    if not text:
+    tokens = _tokenize_string(text)
+    if not tokens:
         return ""
     
-    # Replace non-alphanumeric with underscores
-    text = _NON_ALNUM.sub('_', text)
-    
-    # Handle consecutive capitals
-    text = _CONSECUTIVE_CAPS.sub(r'\1_\2', text)
-    
-    # Insert underscore before capitals preceded by lowercase/digit
-    text = _CAMEL_BOUNDARY.sub(r'\1_\2', text)
-    
-    # Convert to lowercase and remove redundant underscores
-    text = text.lower()
-    text = re.sub(r'_+', '_', text)  # Collapse multiple underscores
-    text = text.strip('_')  # Remove leading/trailing underscores
-    
-    return text
+    return '_'.join(token.lower() for token in tokens)
 
 
 def to_camel_case(text: str) -> str:
@@ -73,24 +99,19 @@ def to_camel_case(text: str) -> str:
         'someVariableName'
         >>> to_camel_case("Convert to camel")
         'convertToCamel'
+        >>> to_camel_case("HelloWorld")
+        'helloWorld'
+        >>> to_camel_case("XMLHttpRequest")
+        'xmlHttpRequest'
         >>> to_camel_case("__multiple__delimiters__")
         'multipleDelimiters'
-        >>> to_camel_case("123_start_with_number")
-        '123StartWithNumber'
     """
-    if not text:
-        return ""
-    
-    # Split on non-alphanumeric characters
-    words = _NON_ALNUM.split(text)
-    # Filter empty strings
-    words = [w for w in words if w]
-    
-    if not words:
+    tokens = _tokenize_string(text)
+    if not tokens:
         return ""
     
-    # First word lowercase, rest title case
-    return words[0].lower() + ''.join(w.capitalize() for w in words[1:])
+    # First token lowercase, rest capitalized
+    return tokens[0].lower() + ''.join(token.capitalize() for token in tokens[1:])
 
 
 def to_pascal_case(text: str) -> str:
@@ -109,18 +130,18 @@ def to_pascal_case(text: str) -> str:
         'SomeVariableName'
         >>> to_pascal_case("convert to pascal")
         'ConvertToPascal'
+        >>> to_pascal_case("helloWorld")
+        'HelloWorld'
+        >>> to_pascal_case("XMLHttpRequest")
+        'XmlHttpRequest'
         >>> to_pascal_case("__multiple__delimiters__")
         'MultipleDelimiters'
-        >>> to_pascal_case("123_start_with_number")
-        '123StartWithNumber'
     """
-    if not text:
+    tokens = _tokenize_string(text)
+    if not tokens:
         return ""
     
-    # Split on non-alphanumeric characters
-    words = _NON_ALNUM.split(text)
-    # Filter empty strings and capitalize each word
-    return ''.join(w.capitalize() for w in words if w)
+    return ''.join(token.capitalize() for token in tokens)
 
 
 def to_kebab_case(text: str) -> str:
@@ -144,24 +165,11 @@ def to_kebab_case(text: str) -> str:
         >>> to_kebab_case("--already--kebab--")
         'already-kebab'
     """
-    if not text:
+    tokens = _tokenize_string(text)
+    if not tokens:
         return ""
     
-    # Replace non-alphanumeric with hyphens
-    text = _NON_ALNUM.sub('-', text)
-    
-    # Handle consecutive capitals
-    text = _CONSECUTIVE_CAPS.sub(r'\1-\2', text)
-    
-    # Insert hyphen before capitals preceded by lowercase/digit
-    text = _CAMEL_BOUNDARY.sub(r'\1-\2', text)
-    
-    # Convert to lowercase and clean up hyphens
-    text = text.lower()
-    text = re.sub(r'-+', '-', text)  # Collapse multiple hyphens
-    text = text.strip('-')  # Remove leading/trailing hyphens
-    
-    return text
+    return '-'.join(token.lower() for token in tokens)
 
 
 def truncate_text(text: str, max_length: int, suffix: str = "...") -> str:
@@ -172,13 +180,16 @@ def truncate_text(text: str, max_length: int, suffix: str = "...") -> str:
     
     Args:
         text: Text to truncate
-        max_length: Maximum length including suffix
+        max_length: Maximum length including suffix (must be non-negative)
         suffix: String to append when truncating (default: "...")
         
     Returns:
         Truncated text with suffix if needed, empty string if max_length
         is too small for meaningful truncation
         
+    Raises:
+        ValueError: If max_length is negative
+        
     Examples:
         >>> truncate_text("This is a long text", 10)
         'This is...'
@@ -186,11 +197,14 @@ def truncate_text(text: str, max_length: int, suffix: str = "...") -> str:
         'Short'
         >>> truncate_text("Exactly ten", 11)
         'Exactly ten'
-        >>> truncate_text("Too long", 2, "...")
-        ''
-        >>> truncate_text("Custom", 5, "→")
-        'Cust→'
+        >>> truncate_text("Too long", -1)
+        Traceback (most recent call last):
+            ...
+        ValueError: max_length must be non-negative, got -1
     """
+    if max_length < 0:
+        raise ValueError(f"max_length must be non-negative, got {max_length}")
+    
     if len(text) <= max_length:
         return text
     
diff --git a/tests/test_string_utils.py b/tests/test_string_utils.py
index 15e937e..3f555f7 100644
--- a/tests/test_string_utils.py
+++ b/tests/test_string_utils.py
@@ -11,6 +11,7 @@
     word_count,
     is_mixed_case,
     remove_extra_whitespace,
+    _tokenize_string,  # Import for testing
 )
 
 
@@ -33,6 +34,7 @@ class TestCaseConversions:
             ("123Numbers", "123_numbers"),
             ("with spaces here", "with_spaces_here"),
             ("__multiple__underscores__", "multiple_underscores"),
+            ("HelloWorld", "hello_world"),  # Now handles camelCase input
             ("CamelCASEMixed", "camel_case_mixed"),
         ],
     )
@@ -46,7 +48,7 @@ def test_to_snake_case(self, input_text, expected):
             ("hello_world", "helloWorld"),
             ("some-variable-name", "someVariableName"),
             ("Convert to camel", "convertToCamel"),
-            ("already_camelCase", "alreadyCamelcase"),
+            ("already_camelCase", "alreadyCamelCase"),  # Preserves existing case transitions
             ("mixed-Style_Example", "mixedStyleExample"),
             ("", ""),
             ("a", "a"),
@@ -55,6 +57,8 @@ def test_to_snake_case(self, input_text, expected):
             ("123_numbers", "123Numbers"),
             ("__multiple__delimiters__", "multipleDelimiters"),
             ("!!!only!!!special!!!", "onlySpecial"),
+            ("HelloWorld", "helloWorld"),  # Now properly handles CamelCase
+            ("XMLHttpRequest", "xmlHttpRequest"),  # Consistent tokenization
         ],
     )
     def test_to_camel_case(self, input_text, expected):
@@ -67,7 +71,7 @@ def test_to_camel_case(self, input_text, expected):
             ("hello_world", "HelloWorld"),
             ("some-variable-name", "SomeVariableName"),
             ("convert to pascal", "ConvertToPascal"),
-            ("already_PascalCase", "AlreadyPascalcase"),
+            ("already_PascalCase", "AlreadyPascalCase"),  # Preserves case transitions
             ("mixed-Style_Example", "MixedStyleExample"),
             ("", ""),
             ("a", "A"),
@@ -75,6 +79,8 @@ def test_to_camel_case(self, input_text, expected):
             ("UPPERCASE", "Uppercase"),
             ("123_numbers", "123Numbers"),
             ("__multiple__delimiters__", "MultipleDelimiters"),
+            ("helloWorld", "HelloWorld"),  # Handles camelCase input
+            ("XMLHttpRequest", "XmlHttpRequest"),  # Consistent tokenization
         ],
     )
     def test_to_pascal_case(self, input_text, expected):
@@ -195,6 +201,29 @@ def test_remove_extra_whitespace(self, text, expected):
         assert remove_extra_whitespace(text) == expected
 
 
+class TestTokenizer:
+    """Test suite for the shared tokenizer."""
+    
+    def test_tokenizer_basic(self):
+        """Test basic tokenization."""
+        assert _tokenize_string("HelloWorld") == ["Hello", "World"]
+        assert _tokenize_string("snake_case") == ["snake", "case"]
+        assert _tokenize_string("kebab-case") == ["kebab", "case"]
+        assert _tokenize_string("XMLHttpRequest") == ["XML", "Http", "Request"]
+        assert _tokenize_string("IOError") == ["IO", "Error"]
+        assert _tokenize_string("") == []
+    
+    def test_tokenizer_consistency(self):
+        """Test that all conversion functions use consistent tokenization."""
+        test_string = "XMLHttpRequest"
+        
+        # All should tokenize the same way
+        assert to_snake_case(test_string) == "xml_http_request"
+        assert to_camel_case(test_string) == "xmlHttpRequest"
+        assert to_pascal_case(test_string) == "XmlHttpRequest"
+        assert to_kebab_case(test_string) == "xml-http-request"
+
+
 class TestEdgeCases:
     """Test suite for edge cases and special scenarios."""
 
@@ -252,6 +281,10 @@ def test_truncate_edge_cases(self):
         # Very long suffix
         assert truncate_text("Short", 10, "VERYLONGSUFFIX") == "Short"
         assert truncate_text("This needs truncation", 10, "LONG") == "This nLONG"
+        
+        # Negative max_length should raise ValueError
+        with pytest.raises(ValueError, match="max_length must be non-negative"):
+            truncate_text("Test", -1)
 
     def test_unicode_handling(self):
         """Test handling of Unicode characters."""