Skip to content

Commit 5b48bdf

Browse files
committed
fixes
1 parent 5cd17d6 commit 5b48bdf

File tree

9 files changed

+1246
-183
lines changed

9 files changed

+1246
-183
lines changed

optillm/mars/agent.py

Lines changed: 76 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ def _get_reasoning_effort(self) -> str:
4242

4343
def generate_solution(self, problem: str, request_id: str = None) -> Tuple[AgentSolution, int]:
4444
"""Generate a solution for the given problem using reasoning API"""
45-
logger.info(f"Agent {self.agent_id} generating solution with temperature {self.temperature}")
45+
import time
46+
start_time = time.time()
47+
logger.info(f"🤖 AGENT {self.agent_id}: Starting solution generation (temp: {self.temperature}, effort: {self._get_reasoning_effort()})")
48+
logger.info(f"🤖 AGENT {self.agent_id}: Problem length: {len(problem)} characters")
4649

4750
# Prepare the prompt
4851
exploration_prompt = AGENT_EXPLORATION_PROMPT.format(
@@ -54,13 +57,16 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
5457
# Configure reasoning parameters - simplified with effort only
5558
reasoning_effort = self._get_reasoning_effort()
5659
max_tokens = self.config['max_tokens']
60+
logger.info(f"🤖 AGENT {self.agent_id}: Using max_tokens={max_tokens}, reasoning_effort={reasoning_effort}")
5761

5862
reasoning_config = {
5963
"effort": reasoning_effort
6064
}
6165

6266
try:
6367
# Make API call with reasoning via extra_body for OpenRouter compatibility
68+
api_start = time.time()
69+
logger.info(f"🤖 AGENT {self.agent_id}: Making API call to {self.model}...")
6470
response = self.client.chat.completions.create(
6571
model=self.model,
6672
messages=[
@@ -74,15 +80,23 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
7480
"reasoning": reasoning_config
7581
}
7682
)
83+
api_duration = time.time() - api_start
84+
logger.info(f"🤖 AGENT {self.agent_id}: API call completed in {api_duration:.2f}s")
7785

7886
solution_text = response.choices[0].message.content.strip()
7987

8088
# ENHANCED LOGGING: Log solution details
8189
solution_length = len(solution_text)
90+
word_count = len(solution_text.split())
91+
has_boxed = "\\boxed{" in solution_text
92+
has_proof_words = any(word in solution_text.lower() for word in ['therefore', 'thus', 'proof', 'qed'])
8293

83-
logger.info(f"Agent {self.agent_id} solution details:")
84-
logger.info(f" - Length: {solution_length} characters")
85-
logger.info(f" - Last 100 chars: ...{solution_text[-100:] if solution_length > 100 else solution_text}")
94+
logger.info(f"🤖 AGENT {self.agent_id}: Solution analysis:")
95+
logger.info(f" 📝 Length: {solution_length:,} chars, {word_count:,} words")
96+
logger.info(f" 📦 Has boxed answer: {has_boxed}")
97+
logger.info(f" 🔍 Has proof indicators: {has_proof_words}")
98+
logger.info(f" 📄 Preview: {solution_text[:200]}{'...' if len(solution_text) > 200 else ''}")
99+
logger.info(f" 📄 Last 100 chars: ...{solution_text[-100:] if solution_length > 100 else solution_text}")
86100

87101
# Extract reasoning tokens from the correct nested structure
88102
reasoning_tokens = 0
@@ -97,10 +111,12 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
97111
if reasoning_tokens == 0:
98112
reasoning_tokens = getattr(response.usage, 'reasoning_tokens', 0)
99113

100-
logger.info(f"Agent {self.agent_id} token usage: reasoning={reasoning_tokens}, total={total_tokens}")
114+
reasoning_ratio = (reasoning_tokens / total_tokens * 100) if total_tokens > 0 else 0
115+
logger.info(f"🤖 AGENT {self.agent_id}: Token usage: reasoning={reasoning_tokens:,}, total={total_tokens:,} ({reasoning_ratio:.1f}% reasoning)")
101116

102117
# Extract confidence from solution (heuristic based on response characteristics)
103118
confidence = self._estimate_confidence(solution_text)
119+
logger.info(f"🤖 AGENT {self.agent_id}: Estimated confidence: {confidence:.3f}")
104120

105121
# Create agent solution object with enhanced metadata
106122
agent_solution = AgentSolution(
@@ -113,11 +129,14 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
113129
temperature=self.temperature
114130
)
115131

116-
logger.info(f"Agent {self.agent_id} generated solution with {reasoning_tokens} reasoning tokens")
132+
total_duration = time.time() - start_time
133+
logger.info(f"🤖 AGENT {self.agent_id}: ✅ Solution generated in {total_duration:.2f}s (API: {api_duration:.2f}s, processing: {total_duration-api_duration:.2f}s)")
117134
return agent_solution, reasoning_tokens
118135

119136
except Exception as e:
120-
logger.error(f"Agent {self.agent_id} error generating solution: {str(e)}")
137+
error_duration = time.time() - start_time
138+
logger.error(f"🤖 AGENT {self.agent_id}: ❌ Error generating solution after {error_duration:.2f}s: {str(e)}")
139+
logger.error(f"🤖 AGENT {self.agent_id}: Model: {self.model}, Temperature: {self.temperature}, Max tokens: {max_tokens}")
121140
# Return empty solution with error indication
122141
error_message = f"Error generating solution: {str(e)}"
123142
error_solution = AgentSolution(
@@ -133,7 +152,10 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
133152

134153
def verify_solution(self, problem: str, solution: str, verifier_id: int, solution_agent_id: int, request_id: str = None) -> VerificationResult:
135154
"""Verify a solution using mathematical reasoning"""
136-
logger.info(f"Agent {self.agent_id} verifying solution (verifier_id: {verifier_id})")
155+
import time
156+
start_time = time.time()
157+
logger.info(f"🔍 VERIFIER {self.agent_id}: Starting verification (target: Agent {solution_agent_id}, verifier_id: {verifier_id})")
158+
logger.info(f"🔍 VERIFIER {self.agent_id}: Solution length: {len(solution):,} chars")
137159

138160
verification_prompt = VERIFICATION_PROMPT.format(
139161
problem=problem,
@@ -144,6 +166,8 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
144166
max_tokens = self.config['max_tokens']
145167

146168
try:
169+
api_start = time.time()
170+
logger.info(f"🔍 VERIFIER {self.agent_id}: Making verification API call...")
147171
response = self.client.chat.completions.create(
148172
model=self.model,
149173
messages=[
@@ -159,13 +183,19 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
159183
}
160184
}
161185
)
186+
api_duration = time.time() - api_start
187+
logger.info(f"🔍 VERIFIER {self.agent_id}: Verification API call completed in {api_duration:.2f}s")
162188

163189
verification_text = response.choices[0].message.content.strip()
164190

165191
# Parse verification result
166192
assessment, confidence, issues, suggestions = self._parse_verification(verification_text)
193+
logger.info(f"🔍 VERIFIER {self.agent_id}: Assessment: {assessment}, Confidence: {confidence:.3f}")
194+
logger.info(f"🔍 VERIFIER {self.agent_id}: Issues found: {len(issues)}, Suggestions: {len(suggestions)}")
195+
if issues:
196+
logger.info(f"🔍 VERIFIER {self.agent_id}: Key issues: {issues[:2]}")
167197

168-
return VerificationResult(
198+
result = VerificationResult(
169199
verifier_id=verifier_id,
170200
solution_id=f"agent_{solution_agent_id}_iter_0", # Use the solution's agent_id
171201
assessment=assessment,
@@ -176,8 +206,13 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
176206
timestamp=datetime.now()
177207
)
178208

209+
total_duration = time.time() - start_time
210+
logger.info(f"🔍 VERIFIER {self.agent_id}: ✅ Verification completed in {total_duration:.2f}s")
211+
return result
212+
179213
except Exception as e:
180-
logger.error(f"Agent {self.agent_id} error in verification: {str(e)}")
214+
error_duration = time.time() - start_time
215+
logger.error(f"🔍 VERIFIER {self.agent_id}: ❌ Verification error after {error_duration:.2f}s: {str(e)}")
181216
return VerificationResult(
182217
verifier_id=verifier_id,
183218
solution_id=f"agent_{solution_agent_id}_iter_0",
@@ -191,7 +226,11 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
191226

192227
def improve_solution(self, problem: str, current_solution: str, feedback: str, issues: list, request_id: str = None) -> Tuple[str, int]:
193228
"""Improve a solution based on verification feedback"""
194-
logger.info(f"Agent {self.agent_id} improving solution based on feedback")
229+
import time
230+
start_time = time.time()
231+
logger.info(f"🔧 IMPROVER {self.agent_id}: Starting solution improvement")
232+
logger.info(f"🔧 IMPROVER {self.agent_id}: Current solution: {len(current_solution):,} chars")
233+
logger.info(f"🔧 IMPROVER {self.agent_id}: Issues to address: {len(issues)}")
195234

196235
improvement_prompt = IMPROVEMENT_PROMPT.format(
197236
problem=problem,
@@ -204,6 +243,8 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
204243
max_tokens = self.config['max_tokens']
205244

206245
try:
246+
api_start = time.time()
247+
logger.info(f"🔧 IMPROVER {self.agent_id}: Making improvement API call...")
207248
response = self.client.chat.completions.create(
208249
model=self.model,
209250
messages=[
@@ -219,40 +260,61 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
219260
}
220261
}
221262
)
263+
api_duration = time.time() - api_start
264+
logger.info(f"🔧 IMPROVER {self.agent_id}: Improvement API call completed in {api_duration:.2f}s")
222265

223266
improved_solution = response.choices[0].message.content.strip()
224267
reasoning_tokens = getattr(response.usage, 'reasoning_tokens', 0)
225268

226-
logger.info(f"Agent {self.agent_id} improved solution with {reasoning_tokens} reasoning tokens")
269+
# Log improvement analysis
270+
length_change = len(improved_solution) - len(current_solution)
271+
logger.info(f"🔧 IMPROVER {self.agent_id}: Solution length change: {length_change:+,} chars")
272+
logger.info(f"🔧 IMPROVER {self.agent_id}: Improved solution preview: {improved_solution[:200]}{'...' if len(improved_solution) > 200 else ''}")
273+
274+
total_duration = time.time() - start_time
275+
logger.info(f"🔧 IMPROVER {self.agent_id}: ✅ Solution improved in {total_duration:.2f}s with {reasoning_tokens:,} reasoning tokens")
227276
return improved_solution, reasoning_tokens
228277

229278
except Exception as e:
230-
logger.error(f"Agent {self.agent_id} error improving solution: {str(e)}")
279+
error_duration = time.time() - start_time
280+
logger.error(f"🔧 IMPROVER {self.agent_id}: ❌ Improvement error after {error_duration:.2f}s: {str(e)}")
281+
logger.warning(f"🔧 IMPROVER {self.agent_id}: Returning original solution due to error")
231282
return current_solution, 0 # Return original solution if improvement fails
232283

233284
def _estimate_confidence(self, solution: str) -> float:
234285
"""Estimate confidence based on solution characteristics"""
235286
confidence = 0.5 # Base confidence
287+
confidence_factors = []
236288

237289
# Check for mathematical rigor indicators
238290
if "\\boxed{" in solution:
239291
confidence += 0.2
292+
confidence_factors.append("boxed_answer")
240293
if "therefore" in solution.lower() or "thus" in solution.lower():
241294
confidence += 0.1
295+
confidence_factors.append("logical_connectors")
242296
if "proof" in solution.lower():
243297
confidence += 0.1
298+
confidence_factors.append("proof_structure")
244299
if len(solution.split()) > 200: # Detailed solutions tend to be more confident
245300
confidence += 0.1
301+
confidence_factors.append("detailed_solution")
246302
if "let" in solution.lower() and "assume" in solution.lower():
247303
confidence += 0.1
304+
confidence_factors.append("formal_approach")
248305

249306
# Check for uncertainty indicators
307+
uncertainty_factors = []
250308
if "might" in solution.lower() or "possibly" in solution.lower():
251309
confidence -= 0.1
310+
uncertainty_factors.append("hedging_language")
252311
if "unsure" in solution.lower() or "not sure" in solution.lower():
253312
confidence -= 0.2
313+
uncertainty_factors.append("explicit_uncertainty")
254314

255-
return max(0.1, min(1.0, confidence))
315+
final_confidence = max(0.1, min(1.0, confidence))
316+
logger.debug(f"🤖 AGENT {self.agent_id}: Confidence factors: +{confidence_factors}, -{uncertainty_factors}{final_confidence:.3f}")
317+
return final_confidence
256318

257319
def _parse_verification(self, verification_text: str) -> Tuple[str, float, list, list]:
258320
"""Parse verification result to extract structured information"""

0 commit comments

Comments
 (0)