Skip to content

Commit 1d80ca0

Browse files
committed
fix using math verify
1 parent bf6c6ec commit 1d80ca0

File tree

3 files changed

+62
-15
lines changed

3 files changed

+62
-15
lines changed

optillm/mars/mars.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -348,22 +348,36 @@ def _synthesize_final_solution(
348348
if extracted_answer is not None:
349349
logger.info(f"🗳️ VOTING: Agent {solution.agent_id} extracted answer '{extracted_answer}' via unified extraction (confidence: {solution.confidence:.2f})")
350350

351-
# Handle both numeric and non-numeric answers
352-
if isinstance(extracted_answer, (int, float)):
353-
# Numeric answer - add to numerical voting
354-
numerical_answers.append((int(extracted_answer), solution))
355-
extracted_answers_info.append((str(int(extracted_answer)), solution, "unified_numeric"))
356-
elif isinstance(extracted_answer, str):
357-
# Non-numeric answer (formulas, sets, etc.) - store for synthesis
358-
extracted_answers_info.append((extracted_answer, solution, "unified_formula"))
359-
logger.info(f"🗳️ VOTING: Non-numeric answer stored for synthesis: '{extracted_answer}'")
360-
elif isinstance(extracted_answer, set):
361-
# Set answers (e.g., for Problem 1) - convert to string for synthesis
362-
set_str = "{" + ", ".join(map(str, sorted(extracted_answer))) + "}"
363-
extracted_answers_info.append((set_str, solution, "unified_set"))
364-
logger.info(f"🗳️ VOTING: Set answer stored for synthesis: '{set_str}'")
351+
# Math-verify returns a list of all possible matches
352+
# Iterate through list to find first valid answer
353+
answers_to_process = []
354+
if isinstance(extracted_answer, list):
355+
answers_to_process = extracted_answer
365356
else:
366-
# Other types - convert to string
357+
answers_to_process = [extracted_answer]
358+
359+
# Process each answer in the list
360+
for ans in answers_to_process:
361+
# Handle both numeric and non-numeric answers
362+
if isinstance(ans, (int, float)):
363+
# Numeric answer - add to numerical voting
364+
numerical_answers.append((int(ans), solution))
365+
extracted_answers_info.append((str(int(ans)), solution, "unified_numeric"))
366+
break # Use first numeric answer found
367+
elif isinstance(ans, str) and ans.strip():
368+
# Non-numeric answer (formulas, sets, etc.) - store for synthesis
369+
extracted_answers_info.append((ans, solution, "unified_formula"))
370+
logger.info(f"🗳️ VOTING: Non-numeric answer stored for synthesis: '{ans}'")
371+
break # Use first valid string
372+
elif isinstance(ans, set):
373+
# Set answers (e.g., for Problem 1) - convert to string for synthesis
374+
set_str = "{" + ", ".join(map(str, sorted(ans))) + "}"
375+
extracted_answers_info.append((set_str, solution, "unified_set"))
376+
logger.info(f"🗳️ VOTING: Set answer stored for synthesis: '{set_str}'")
377+
break # Use first set found
378+
379+
# If no valid answer found after iterating list, log as other type
380+
if not any(isinstance(ans, (int, float, str, set)) for ans in answers_to_process if isinstance(ans, str) and ans.strip()):
367381
extracted_answers_info.append((str(extracted_answer), solution, "unified_other"))
368382
logger.info(f"🗳️ VOTING: Other answer type stored for synthesis: '{extracted_answer}'")
369383
else:

scripts/eval_aime_benchmark.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,20 @@ def extract_answer(response: str) -> Optional[int]:
105105
if extracted_answer is None:
106106
return None
107107

108+
# Math-verify returns a list of all possible matches
109+
# Check if extracted_answer is a list and find first valid integer
110+
if isinstance(extracted_answer, list):
111+
for item in extracted_answer:
112+
if isinstance(item, (int, float)):
113+
answer = int(item)
114+
if 0 <= answer <= 999:
115+
return answer
116+
elif isinstance(item, str) and item.isdigit():
117+
answer = int(item)
118+
if 0 <= answer <= 999:
119+
return answer
120+
return None
121+
108122
# Convert to integer if needed - AIME answers are always integers
109123
if isinstance(extracted_answer, (int, float)):
110124
answer = int(extracted_answer)

scripts/eval_imo25_benchmark.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,25 @@ def extract_answer_from_solution(solution: str, problem_id: int) -> str:
118118
if extracted_answer is None:
119119
return None
120120

121+
# Math-verify returns a list of all possible matches
122+
# Iterate through list to find first valid format for this problem
123+
if isinstance(extracted_answer, list):
124+
for item in extracted_answer:
125+
# Try each type conversion
126+
if isinstance(item, set):
127+
sorted_elements = sorted(list(item))
128+
return "{" + ", ".join(map(str, sorted_elements)) + "}"
129+
elif isinstance(item, (int, float)):
130+
if problem_id == 3:
131+
return f"c = {int(item)}"
132+
else:
133+
return str(int(item))
134+
elif isinstance(item, str) and item.strip():
135+
# Skip empty strings, return first non-empty string
136+
return item
137+
# If no valid item found in list, convert list to string
138+
return str(extracted_answer)
139+
121140
# Convert extracted answer to string format expected by evaluation
122141
if isinstance(extracted_answer, set):
123142
# Convert set to string format: {0, 1, 2, 3}

0 commit comments

Comments
 (0)