diff --git a/examples/test_type_parsing.py b/examples/test_type_parsing.py new file mode 100644 index 0000000..0d300c5 --- /dev/null +++ b/examples/test_type_parsing.py @@ -0,0 +1,168 @@ +""" +Test cases demonstrating type parsing improvements in parse_llm_output. + +Fixes: +1. Boolean parsing: bool("False") no longer returns True +2. Tuple parsing: tuple(string) no longer converts character-by-character +3. Complex type handling: list, dict, set now use ast.literal_eval +""" + +import sys +import os +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +import secretagent as sec +import ast +import re + +def mock_llm(prompt, service='null', model=None, echo_service=False): + """Mock LLM that returns specific answers based on test context.""" + if 'dangerous' in prompt.lower(): + if 'sunny meadow' in prompt.lower(): + return 'False' + elif 'poison gas' in prompt.lower(): + return 'True' + elif 'analyze_sentence' in prompt.lower(): + return '("Santi Cazorla", "scored a touchdown")' + elif 'get_coords' in prompt.lower(): + return '[10, 20, 30]' + return 'False' + +# Patch LLM for testing +sec.llm_util.llm = mock_llm +sec.configure(service='null', model='test') + +@sec.subagent() +def is_dangerous(room_desc: str) -> bool: + """Determine if a room description sounds dangerous.""" + pass + +@sec.subagent() +def analyze_sentence(sentence: str) -> tuple: + """Extract player name and action from sports sentence.""" + pass + +@sec.subagent() +def get_coords(location: str) -> list: + """Get coordinates for a location.""" + pass + +def test_boolean_with_llm(): + """Test boolean parsing with actual @subagent calls.""" + print("Testing boolean parsing with @subagent calls...") + + result1 = is_dangerous("A sunny meadow with butterflies") + assert result1 is False, f"Expected False, got {result1}" + print(f" sunny meadow -> {result1}") + + result2 = is_dangerous("A room filled with poison gas") + assert result2 is True, f"Expected True, got {result2}" + print(f" poison gas -> {result2}") + +def test_tuple_with_llm(): + """Test tuple parsing with actual @subagent calls.""" + print("\nTesting tuple parsing with @subagent calls...") + + result = analyze_sentence("Santi Cazorla scored a touchdown") + assert isinstance(result, tuple), f"Expected tuple, got {type(result)}" + assert len(result) == 2, f"Expected 2 elements, got {len(result)}" + assert result == ("Santi Cazorla", "scored a touchdown"), f"Got {result}" + print(f" Tuple parsing: {result}") + +def test_list_with_llm(): + """Test list parsing with actual @subagent calls.""" + print("\nTesting list parsing with @subagent calls...") + + result = get_coords("home") + assert isinstance(result, list), f"Expected list, got {type(result)}" + assert result == [10, 20, 30], f"Got {result}" + print(f" List parsing: {result}") + +def test_boolean_parsing_variations(): + """Test all boolean string variations.""" + print("\nTesting boolean parsing variations...") + + @sec.subagent() + def dummy_bool(x: str) -> bool: + """Dummy function for testing.""" + pass + + test_cases = [ + ("True", True), + ("False", False), + ("true", True), + ("false", False), + ("yes", True), + ("no", False), + ("1", True), + ("0", False), + ] + + for llm_output, expected in test_cases: + result = sec.parse_llm_output(dummy_bool, llm_output) + assert result == expected, f"Failed: {llm_output} -> {result}, expected {expected}" + print(f" {llm_output} -> {result}") + +def test_complex_type_parsing(): + """Test tuple, list, dict parsing directly.""" + print("\nTesting complex type parsing...") + + @sec.subagent() + def dummy_tuple(x: str) -> tuple: + pass + + @sec.subagent() + def dummy_list(x: str) -> list: + pass + + @sec.subagent() + def dummy_dict(x: str) -> dict: + pass + + test_cases = [ + (dummy_tuple, '("hello", "world")', ("hello", "world")), + (dummy_list, '[1, 2, 3]', [1, 2, 3]), + (dummy_dict, '{"key": "value"}', {"key": "value"}), + ] + + for func, llm_output, expected in test_cases: + result = sec.parse_llm_output(func, llm_output) + assert result == expected, f"Failed: {llm_output} -> {result}, expected {expected}" + print(f" {type(expected).__name__}: {result}") + +def test_other_types(): + """Test that other return types still work correctly.""" + print("\nTesting other return types...") + + @sec.subagent() + def dummy_int(x: str) -> int: + pass + + @sec.subagent() + def dummy_str(x: str) -> str: + pass + + test_cases = [ + (dummy_int, "42", 42), + (dummy_str, "hello", "hello"), + ] + + for func, llm_output, expected in test_cases: + result = sec.parse_llm_output(func, llm_output) + assert result == expected, f"Failed: {llm_output} -> {result}, expected {expected}" + print(f" {func.__annotations__['return'].__name__}: {result}") + +if __name__ == '__main__': + try: + test_boolean_with_llm() + test_tuple_with_llm() + test_list_with_llm() + test_boolean_parsing_variations() + test_complex_type_parsing() + test_other_types() + print("\n" + "="*60) + print("All tests passed! Type parsing improvements verified.") + print("="*60) + except AssertionError as e: + print(f"\nTest failed: {e}") + sys.exit(1) \ No newline at end of file diff --git a/secretagent.py b/secretagent.py index c76822c..3c81526 100644 --- a/secretagent.py +++ b/secretagent.py @@ -106,14 +106,51 @@ def parse_llm_output(func, text): raise AttributeError('cannot find final answer') return_type = func.__annotations__.get('return', str) + + # SPECIAL HANDLING FOR BOOLEANS + if return_type is bool: + # Handle common boolean representations + final_answer_lower = final_answer.lower().strip() + if final_answer_lower in ('true', 'yes', '1', 'y'): + return True + elif final_answer_lower in ('false', 'no', '0', 'n'): + return False + else: + # Fallback to ast.literal_eval for "True"/"False" strings + try: + return ast.literal_eval(final_answer.capitalize()) + except: + raise ValueError(f"Cannot parse '{final_answer}' as boolean") + + # SPECIAL HANDLING FOR TUPLES AND OTHER COMPLEX TYPES + # Check if return_type is a complex type (tuple, list, dict, etc.) + # For Python 3.9+, these are typically typing.* or have __origin__ + is_complex_type = ( + hasattr(return_type, '__origin__') or # typing.Tuple[str, str] + return_type in (tuple, list, dict, set) # plain tuple, list, etc. + ) + + if is_complex_type: + # For complex types, always use ast.literal_eval + try: + result = ast.literal_eval(final_answer) + except (ValueError, SyntaxError) as e: + raise ValueError(f"Cannot parse '{final_answer}' as {return_type}: {e}") + return result + try: - # type is something simple like 'str', 'int' + # type is something simple like 'str', 'int', 'float' result = return_type(final_answer) - except TypeError: - # type is complex - for now don't both validating it - result = ast.literal_eval(final_answer) + except (TypeError, ValueError): + # type is complex - use ast.literal_eval + # Also handles strings that look like Python literals + try: + result = ast.literal_eval(final_answer) + except (ValueError, SyntaxError): + # If all else fails, return as string + result = final_answer return result - + def subagent(**subagent_kw): """Decorator to mark a function as implemented via an LLM prompt. """