wwcohen · Joshh99 · Jan 9, 2026
diff --git a/examples/test_type_parsing.py b/examples/test_type_parsing.py
@@ -0,0 +1,168 @@
+"""
+Test cases demonstrating type parsing improvements in parse_llm_output.
+
+Fixes:
+1. Boolean parsing: bool("False") no longer returns True
+2. Tuple parsing: tuple(string) no longer converts character-by-character
+3. Complex type handling: list, dict, set now use ast.literal_eval
+"""
+
+import sys
+import os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+import secretagent as sec
+import ast
+import re
+
+def mock_llm(prompt, service='null', model=None, echo_service=False):
+    """Mock LLM that returns specific answers based on test context."""
+    if 'dangerous' in prompt.lower():
+        if 'sunny meadow' in prompt.lower():
+            return '<answer>False</answer>'
+        elif 'poison gas' in prompt.lower():
+            return '<answer>True</answer>'
+    elif 'analyze_sentence' in prompt.lower():
+        return '<answer>("Santi Cazorla", "scored a touchdown")</answer>'
+    elif 'get_coords' in prompt.lower():
+        return '<answer>[10, 20, 30]</answer>'
+    return '<answer>False</answer>'
+
+# Patch LLM for testing
+sec.llm_util.llm = mock_llm
+sec.configure(service='null', model='test')
+
+@sec.subagent()
+def is_dangerous(room_desc: str) -> bool:
+    """Determine if a room description sounds dangerous."""
+    pass
+
+@sec.subagent()
+def analyze_sentence(sentence: str) -> tuple:
+    """Extract player name and action from sports sentence."""
+    pass
+
+@sec.subagent()
+def get_coords(location: str) -> list:
+    """Get coordinates for a location."""
+    pass
+
+def test_boolean_with_llm():
+    """Test boolean parsing with actual @subagent calls."""
+    print("Testing boolean parsing with @subagent calls...")
+
+    result1 = is_dangerous("A sunny meadow with butterflies")
+    assert result1 is False, f"Expected False, got {result1}"
+    print(f"  sunny meadow -> {result1}")
+
+    result2 = is_dangerous("A room filled with poison gas")
+    assert result2 is True, f"Expected True, got {result2}"
+    print(f"  poison gas -> {result2}")
+
+def test_tuple_with_llm():
+    """Test tuple parsing with actual @subagent calls."""
+    print("\nTesting tuple parsing with @subagent calls...")
+
+    result = analyze_sentence("Santi Cazorla scored a touchdown")
+    assert isinstance(result, tuple), f"Expected tuple, got {type(result)}"
+    assert len(result) == 2, f"Expected 2 elements, got {len(result)}"
+    assert result == ("Santi Cazorla", "scored a touchdown"), f"Got {result}"
+    print(f"  Tuple parsing: {result}")
+
+def test_list_with_llm():
+    """Test list parsing with actual @subagent calls."""
+    print("\nTesting list parsing with @subagent calls...")
+
+    result = get_coords("home")
+    assert isinstance(result, list), f"Expected list, got {type(result)}"
+    assert result == [10, 20, 30], f"Got {result}"
+    print(f"  List parsing: {result}")
+
+def test_boolean_parsing_variations():
+    """Test all boolean string variations."""
+    print("\nTesting boolean parsing variations...")
+
+    @sec.subagent()
+    def dummy_bool(x: str) -> bool:
+        """Dummy function for testing."""
+        pass
+
+    test_cases = [
+        ("<answer>True</answer>", True),
+        ("<answer>False</answer>", False),
+        ("<answer>true</answer>", True),
+        ("<answer>false</answer>", False),
+        ("<answer>yes</answer>", True),
+        ("<answer>no</answer>", False),
+        ("<answer>1</answer>", True),
+        ("<answer>0</answer>", False),
+    ]
+
+    for llm_output, expected in test_cases:
+        result = sec.parse_llm_output(dummy_bool, llm_output)
+        assert result == expected, f"Failed: {llm_output} -> {result}, expected {expected}"
+        print(f"  {llm_output} -> {result}")
+
+def test_complex_type_parsing():
+    """Test tuple, list, dict parsing directly."""
+    print("\nTesting complex type parsing...")
+
+    @sec.subagent()
+    def dummy_tuple(x: str) -> tuple:
+        pass
+
+    @sec.subagent()
+    def dummy_list(x: str) -> list:
+        pass
+
+    @sec.subagent()
+    def dummy_dict(x: str) -> dict:
+        pass
+
+    test_cases = [
+        (dummy_tuple, '<answer>("hello", "world")</answer>', ("hello", "world")),
+        (dummy_list, '<answer>[1, 2, 3]</answer>', [1, 2, 3]),
+        (dummy_dict, '<answer>{"key": "value"}</answer>', {"key": "value"}),
+    ]
+
+    for func, llm_output, expected in test_cases:
+        result = sec.parse_llm_output(func, llm_output)
+        assert result == expected, f"Failed: {llm_output} -> {result}, expected {expected}"
+        print(f"  {type(expected).__name__}: {result}")
+
+def test_other_types():
+    """Test that other return types still work correctly."""
+    print("\nTesting other return types...")
+
+    @sec.subagent()
+    def dummy_int(x: str) -> int:
+        pass
+
+    @sec.subagent()
+    def dummy_str(x: str) -> str:
+        pass
+
+    test_cases = [
+        (dummy_int, "<answer>42</answer>", 42),
+        (dummy_str, "<answer>hello</answer>", "hello"),
+    ]
+
+    for func, llm_output, expected in test_cases:
+        result = sec.parse_llm_output(func, llm_output)
+        assert result == expected, f"Failed: {llm_output} -> {result}, expected {expected}"
+        print(f"  {func.__annotations__['return'].__name__}: {result}")
+
+if __name__ == '__main__':
+    try:
+        test_boolean_with_llm()
+        test_tuple_with_llm()
+        test_list_with_llm()
+        test_boolean_parsing_variations()
+        test_complex_type_parsing()
+        test_other_types()
+        print("\n" + "="*60)
+        print("All tests passed! Type parsing improvements verified.")
+        print("="*60)
+    except AssertionError as e:
+        print(f"\nTest failed: {e}")
+        sys.exit(1)
diff --git a/secretagent.py b/secretagent.py
@@ -106,14 +106,51 @@ def parse_llm_output(func, text):
         raise AttributeError('cannot find final answer')
 
     return_type = func.__annotations__.get('return', str)
+
+    # SPECIAL HANDLING FOR BOOLEANS
+    if return_type is bool:
+        # Handle common boolean representations
+        final_answer_lower = final_answer.lower().strip()
+        if final_answer_lower in ('true', 'yes', '1', 'y'):
+            return True
+        elif final_answer_lower in ('false', 'no', '0', 'n'):
+            return False
+        else:
+            # Fallback to ast.literal_eval for "True"/"False" strings
+            try:
+                return ast.literal_eval(final_answer.capitalize())
+            except:
+                raise ValueError(f"Cannot parse '{final_answer}' as boolean")
+
+    # SPECIAL HANDLING FOR TUPLES AND OTHER COMPLEX TYPES
+    # Check if return_type is a complex type (tuple, list, dict, etc.)
+    # For Python 3.9+, these are typically typing.* or have __origin__
+    is_complex_type = (
+        hasattr(return_type, '__origin__') or  # typing.Tuple[str, str]
+        return_type in (tuple, list, dict, set)  # plain tuple, list, etc.
+    )
+
+    if is_complex_type:
+        # For complex types, always use ast.literal_eval
+        try:
+            result = ast.literal_eval(final_answer)
+        except (ValueError, SyntaxError) as e:
+            raise ValueError(f"Cannot parse '{final_answer}' as {return_type}: {e}")
+        return result
+
     try:
-        # type is something simple like 'str', 'int'
+        # type is something simple like 'str', 'int', 'float'
         result = return_type(final_answer)
-    except TypeError:
-        # type is complex - for now don't both validating it
-        result = ast.literal_eval(final_answer)
+    except (TypeError, ValueError):
+        # type is complex - use ast.literal_eval
+        # Also handles strings that look like Python literals
+        try:
+            result = ast.literal_eval(final_answer)
+        except (ValueError, SyntaxError):
+            # If all else fails, return as string
+            result = final_answer
     return result
-
+ 
 def subagent(**subagent_kw):
     """Decorator to mark a function as implemented via an LLM prompt.
     """