Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions examples/test_type_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""
Test cases demonstrating type parsing improvements in parse_llm_output.

Fixes:
1. Boolean parsing: bool("False") no longer returns True
2. Tuple parsing: tuple(string) no longer converts character-by-character
3. Complex type handling: list, dict, set now use ast.literal_eval
"""

import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import secretagent as sec
import ast
import re

def mock_llm(prompt, service='null', model=None, echo_service=False):
"""Mock LLM that returns specific answers based on test context."""
if 'dangerous' in prompt.lower():
if 'sunny meadow' in prompt.lower():
return '<answer>False</answer>'
elif 'poison gas' in prompt.lower():
return '<answer>True</answer>'
elif 'analyze_sentence' in prompt.lower():
return '<answer>("Santi Cazorla", "scored a touchdown")</answer>'
elif 'get_coords' in prompt.lower():
return '<answer>[10, 20, 30]</answer>'
return '<answer>False</answer>'

# Patch LLM for testing
sec.llm_util.llm = mock_llm
sec.configure(service='null', model='test')

@sec.subagent()
def is_dangerous(room_desc: str) -> bool:
"""Determine if a room description sounds dangerous."""
pass

@sec.subagent()
def analyze_sentence(sentence: str) -> tuple:
"""Extract player name and action from sports sentence."""
pass

@sec.subagent()
def get_coords(location: str) -> list:
"""Get coordinates for a location."""
pass

def test_boolean_with_llm():
"""Test boolean parsing with actual @subagent calls."""
print("Testing boolean parsing with @subagent calls...")

result1 = is_dangerous("A sunny meadow with butterflies")
assert result1 is False, f"Expected False, got {result1}"
print(f" sunny meadow -> {result1}")

result2 = is_dangerous("A room filled with poison gas")
assert result2 is True, f"Expected True, got {result2}"
print(f" poison gas -> {result2}")

def test_tuple_with_llm():
"""Test tuple parsing with actual @subagent calls."""
print("\nTesting tuple parsing with @subagent calls...")

result = analyze_sentence("Santi Cazorla scored a touchdown")
assert isinstance(result, tuple), f"Expected tuple, got {type(result)}"
assert len(result) == 2, f"Expected 2 elements, got {len(result)}"
assert result == ("Santi Cazorla", "scored a touchdown"), f"Got {result}"
print(f" Tuple parsing: {result}")

def test_list_with_llm():
"""Test list parsing with actual @subagent calls."""
print("\nTesting list parsing with @subagent calls...")

result = get_coords("home")
assert isinstance(result, list), f"Expected list, got {type(result)}"
assert result == [10, 20, 30], f"Got {result}"
print(f" List parsing: {result}")

def test_boolean_parsing_variations():
"""Test all boolean string variations."""
print("\nTesting boolean parsing variations...")

@sec.subagent()
def dummy_bool(x: str) -> bool:
"""Dummy function for testing."""
pass

test_cases = [
("<answer>True</answer>", True),
("<answer>False</answer>", False),
("<answer>true</answer>", True),
("<answer>false</answer>", False),
("<answer>yes</answer>", True),
("<answer>no</answer>", False),
("<answer>1</answer>", True),
("<answer>0</answer>", False),
]

for llm_output, expected in test_cases:
result = sec.parse_llm_output(dummy_bool, llm_output)
assert result == expected, f"Failed: {llm_output} -> {result}, expected {expected}"
print(f" {llm_output} -> {result}")

def test_complex_type_parsing():
"""Test tuple, list, dict parsing directly."""
print("\nTesting complex type parsing...")

@sec.subagent()
def dummy_tuple(x: str) -> tuple:
pass

@sec.subagent()
def dummy_list(x: str) -> list:
pass

@sec.subagent()
def dummy_dict(x: str) -> dict:
pass

test_cases = [
(dummy_tuple, '<answer>("hello", "world")</answer>', ("hello", "world")),
(dummy_list, '<answer>[1, 2, 3]</answer>', [1, 2, 3]),
(dummy_dict, '<answer>{"key": "value"}</answer>', {"key": "value"}),
]

for func, llm_output, expected in test_cases:
result = sec.parse_llm_output(func, llm_output)
assert result == expected, f"Failed: {llm_output} -> {result}, expected {expected}"
print(f" {type(expected).__name__}: {result}")

def test_other_types():
"""Test that other return types still work correctly."""
print("\nTesting other return types...")

@sec.subagent()
def dummy_int(x: str) -> int:
pass

@sec.subagent()
def dummy_str(x: str) -> str:
pass

test_cases = [
(dummy_int, "<answer>42</answer>", 42),
(dummy_str, "<answer>hello</answer>", "hello"),
]

for func, llm_output, expected in test_cases:
result = sec.parse_llm_output(func, llm_output)
assert result == expected, f"Failed: {llm_output} -> {result}, expected {expected}"
print(f" {func.__annotations__['return'].__name__}: {result}")

if __name__ == '__main__':
try:
test_boolean_with_llm()
test_tuple_with_llm()
test_list_with_llm()
test_boolean_parsing_variations()
test_complex_type_parsing()
test_other_types()
print("\n" + "="*60)
print("All tests passed! Type parsing improvements verified.")
print("="*60)
except AssertionError as e:
print(f"\nTest failed: {e}")
sys.exit(1)
47 changes: 42 additions & 5 deletions secretagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,51 @@ def parse_llm_output(func, text):
raise AttributeError('cannot find final answer')

return_type = func.__annotations__.get('return', str)

# SPECIAL HANDLING FOR BOOLEANS
if return_type is bool:
# Handle common boolean representations
final_answer_lower = final_answer.lower().strip()
if final_answer_lower in ('true', 'yes', '1', 'y'):
return True
elif final_answer_lower in ('false', 'no', '0', 'n'):
return False
else:
# Fallback to ast.literal_eval for "True"/"False" strings
try:
return ast.literal_eval(final_answer.capitalize())
except:
raise ValueError(f"Cannot parse '{final_answer}' as boolean")

# SPECIAL HANDLING FOR TUPLES AND OTHER COMPLEX TYPES
# Check if return_type is a complex type (tuple, list, dict, etc.)
# For Python 3.9+, these are typically typing.* or have __origin__
is_complex_type = (
hasattr(return_type, '__origin__') or # typing.Tuple[str, str]
return_type in (tuple, list, dict, set) # plain tuple, list, etc.
)

if is_complex_type:
# For complex types, always use ast.literal_eval
try:
result = ast.literal_eval(final_answer)
except (ValueError, SyntaxError) as e:
raise ValueError(f"Cannot parse '{final_answer}' as {return_type}: {e}")
return result

try:
# type is something simple like 'str', 'int'
# type is something simple like 'str', 'int', 'float'
result = return_type(final_answer)
except TypeError:
# type is complex - for now don't both validating it
result = ast.literal_eval(final_answer)
except (TypeError, ValueError):
# type is complex - use ast.literal_eval
# Also handles strings that look like Python literals
try:
result = ast.literal_eval(final_answer)
except (ValueError, SyntaxError):
# If all else fails, return as string
result = final_answer
return result

def subagent(**subagent_kw):
"""Decorator to mark a function as implemented via an LLM prompt.
"""
Expand Down