From 48d5f8f93e3df2628eace789750f3b57ae8a7dca Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 01:59:04 +0500 Subject: [PATCH 01/13] Refactor react_agent.py to improve security and maintainability --- WebAgent/WebSailor/src/react_agent.py | 97 ++++++++++++++------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index 9e5e1db2..2517a182 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -38,8 +38,8 @@ def __init__(self, def call_server(self, msgs, max_tries=10): # Set OpenAI API key and base URL using vLLM API server - openai_api_key = "EMPTY" - openai_api_base = "http://127.0.0.1:6001/v1" + openai_api_key = os.getenv("OPENAI_API_KEY", "EMPTY") + openai_api_base = os.getenv("OPENAI_API_BASE", "http://127.0.0.1:6001/v1") client = OpenAI( api_key=openai_api_key, @@ -77,15 +77,53 @@ def count_tokens(self, messages, model="gpt-4o"): return len(tokenizer.encode(full_prompt)) + def _process_tool_call(self, content, messages): + if '' in content and '' in content: + tool_call = content.split('')[1].split('')[0] + try: + tool_call = json.loads(tool_call) + tool_name = tool_call.get('name', '') + tool_args = tool_call.get('arguments', {}) + result = self._call_tool(tool_name, tool_args) + except: + result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' + result = "\n" + result + "\n" + messages.append({"role": "user", "content": result}) + return messages + + def _handle_token_limit(self, messages, question, answer, rollout_id): + print(f"Token count exceeds limit") + + messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" + content = self.call_server(messages) + messages.append({"role": "assistant", "content": content.strip()}) + if '' in content and '' in content: + prediction = messages[-1]['content'].split('')[1].split('')[0] + termination = 'generate an answer as token limit reached' + else: + prediction = messages[-1]['content'] + termination = 'format error: generate an answer as token limit reached' + return self._generate_result(question, answer, rollout_id, messages, prediction, termination) + + def _generate_result(self, question, answer, rollout_id, messages, prediction, termination): + return { + "question": question, + "answer": answer, + "rollout_id": rollout_id, + "messages": messages, + "prediction": prediction, + "termination": termination + } + def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[Message]]: self.model=model - try: - question = data['item']['question'] - except: - raw_msg = data['item']['messages'][1]["content"] - question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg + question = data.get('item', {}).get('question', '') + if not question: + raw_msg = data.get('item', {}).get('messages', [{}, {}])[1].get("content", "") + question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg - answer = data['item']['answer'] + answer = data.get('item', {}).get('answer', '') + rollout_id = data.get('rollout_id', '') self.user_prompt = user_prompt self.user_prompt = self.user_prompt + question messages = [{"role": "system", "content": self.system_message}, {"role": "user", "content": self.user_prompt}] @@ -100,17 +138,7 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M pos = content.find('') content = content[:pos] messages.append({"role": "assistant", "content": content.strip()}) - if '' in content and '' in content: - tool_call = content.split('')[1].split('')[0] - try: - tool_call = json.loads(tool_call) - tool_name = tool_call.get('name', '') - tool_args = tool_call.get('arguments', {}) - result = self._call_tool(tool_name, tool_args) - except: - result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' - result = "\n" + result + "\n" - messages.append({"role": "user", "content": result}) + messages = self._process_tool_call(content, messages) if '' in content and '' in content: termination = 'answer' break @@ -122,26 +150,7 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M print(f"round: {round}, token count: {token_count}") if token_count > max_tokens: - print(f"Token count exceeds limit: {token_count} > {max_tokens}") - - messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" - content = self.call_server(messages) - messages.append({"role": "assistant", "content": content.strip()}) - if '' in content and '' in content: - prediction = messages[-1]['content'].split('')[1].split('')[0] - termination = 'generate an answer as token limit reached' - else: - prediction = messages[-1]['content'] - termination = 'format error: generate an answer as token limit reached' - result = { - "question": question, - "answer": answer, - "rollout_id": data['rollout_id'], - "messages": messages, - "prediction": prediction, - "termination": termination - } - return result + return self._handle_token_limit(messages, question, answer, rollout_id) if '' in messages[-1]['content']: prediction = messages[-1]['content'].split('')[1].split('')[0] @@ -151,12 +160,4 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M termination = 'answer not found' if num_llm_calls_available == 0: termination = 'exceed available llm calls' - result = { - "question": question, - "answer": answer, - "rollout_id": data['rollout_id'], - "messages": messages, - "prediction": prediction, - "termination": termination - } - return result + return self._generate_result(question, answer, rollout_id, messages, prediction, termination) \ No newline at end of file From a9256b7dfa1355f42d04cfef79f08c5c592758ad Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 11:34:35 +0500 Subject: [PATCH 02/13] chore(ci): add basic Ruff lint workflow --- .github/workflows/ci-lint.yml | 20 ++++++++++++++++++++ commit_message.txt | 1 + 2 files changed, 21 insertions(+) create mode 100644 .github/workflows/ci-lint.yml create mode 100644 commit_message.txt diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml new file mode 100644 index 00000000..864e85b7 --- /dev/null +++ b/.github/workflows/ci-lint.yml @@ -0,0 +1,20 @@ +name: CI - Lint + +on: + pull_request: + branches: ["main"] + push: + branches: ["chore/add-ci-lint"] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install ruff + run: pip install ruff==0.5.6 + - name: Lint + run: ruff check --output-format=github . \ No newline at end of file diff --git a/commit_message.txt b/commit_message.txt new file mode 100644 index 00000000..0e40afe0 --- /dev/null +++ b/commit_message.txt @@ -0,0 +1 @@ +Refactor react_agent.py to improve security and maintainability \ No newline at end of file From ee250218b8b6acd6e596bae47ba0f23bf24dfb84 Mon Sep 17 00:00:00 2001 From: MirzaSamadAhmedBaig <89132160+Mirza-Samad-Ahmed-Baig@users.noreply.github.com> Date: Sun, 21 Sep 2025 11:37:06 +0500 Subject: [PATCH 03/13] deleting cm --- commit_message.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 commit_message.txt diff --git a/commit_message.txt b/commit_message.txt deleted file mode 100644 index 0e40afe0..00000000 --- a/commit_message.txt +++ /dev/null @@ -1 +0,0 @@ -Refactor react_agent.py to improve security and maintainability \ No newline at end of file From d60f2545369347a48f8d4aa1253f6abb9c021f6c Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 11:48:06 +0500 Subject: [PATCH 04/13] Fix E722 ruff errors --- .../WebDancer/demos/assistant_qwq_chat.py | 8 ++----- WebAgent/WebDancer/demos/llm/oai.py | 3 +-- .../WebDancer/demos/tools/private/search.py | 6 ++--- .../WebDancer/demos/tools/private/visit.py | 4 ++-- WebAgent/WebDancer/demos/utils/logs.py | 1 - WebAgent/WebSailor/src/evaluate.py | 4 ++-- WebAgent/WebSailor/src/react_agent.py | 8 +++---- WebAgent/WebSailor/src/tool_search.py | 7 +++--- WebAgent/WebSailor/src/tool_visit.py | 11 +++++----- WebAgent/WebWalker/src/agent.py | 4 ++-- WebAgent/WebWalker/src/app.py | 1 - evaluation/evaluate_deepsearch_official.py | 21 ++++++++---------- inference/file_tools/file_parser.py | 7 +++--- inference/file_tools/idp.py | 6 ++--- inference/file_tools/video_agent.py | 11 ---------- inference/react_agent.py | 15 +++++-------- inference/run_multi_react.py | 2 -- inference/tool_file.py | 22 ++++--------------- inference/tool_python.py | 15 ++++++------- inference/tool_scholar.py | 7 +++--- inference/tool_search.py | 9 ++------ inference/tool_visit.py | 10 ++------- 22 files changed, 62 insertions(+), 120 deletions(-) diff --git a/WebAgent/WebDancer/demos/assistant_qwq_chat.py b/WebAgent/WebDancer/demos/assistant_qwq_chat.py index 3fa38ff3..b0a48a69 100644 --- a/WebAgent/WebDancer/demos/assistant_qwq_chat.py +++ b/WebAgent/WebDancer/demos/assistant_qwq_chat.py @@ -2,15 +2,11 @@ import os -from qwen_agent.agents import Assistant -from qwen_agent.utils.output_beautify import typewriter_print from demos.agents.search_agent import SearchAgent from demos.llm.oai import TextChatAtOAI -from demos.llm.qwen_dashscope import QwenChatAtDS from demos.gui.web_ui import WebUI from demos.utils.date import date2str, get_date_now -from demos.tools import Visit, Search ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource') @@ -50,8 +46,8 @@ def make_system_prompt(): llm=llm_cfg, function_list=tools, system_message="", - name=f'WebDancer', - description=f"I am WebDancer, a web information seeking agent, welcome to try!", + name='WebDancer', + description="I am WebDancer, a web information seeking agent, welcome to try!", extra={ 'reasoning': reasoning, 'max_llm_calls': max_llm_calls, diff --git a/WebAgent/WebDancer/demos/llm/oai.py b/WebAgent/WebDancer/demos/llm/oai.py index 0df27fe9..53a261da 100644 --- a/WebAgent/WebDancer/demos/llm/oai.py +++ b/WebAgent/WebDancer/demos/llm/oai.py @@ -2,7 +2,6 @@ import json import logging import os -from http import HTTPStatus from pprint import pformat from typing import Dict, Iterator, List, Optional, Literal, Union @@ -15,7 +14,7 @@ from qwen_agent.llm.base import ModelServiceError, register_llm from qwen_agent.llm.function_calling import BaseFnCallModel, simulate_response_completion_with_chat -from qwen_agent.llm.schema import ASSISTANT, Message, FunctionCall +from qwen_agent.llm.schema import ASSISTANT, Message from qwen_agent.log import logger diff --git a/WebAgent/WebDancer/demos/tools/private/search.py b/WebAgent/WebDancer/demos/tools/private/search.py index 29278ec7..7f994ee5 100644 --- a/WebAgent/WebDancer/demos/tools/private/search.py +++ b/WebAgent/WebDancer/demos/tools/private/search.py @@ -30,7 +30,7 @@ def call(self, params: str, **kwargs) -> str: try: params = self._verify_json_format_args(params) query = params["query"][:MAX_MULTIQUERY_NUM] - except: + except Exception: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): @@ -57,9 +57,9 @@ def google_search(self, query: str) -> str: response = requests.post(url, headers=headers, data=json.dumps(data)) results = response.json() break - except Exception as e: + except Exception: if i == 4: - return f"Google search Timeout, return None, Please try again later." + return "Google search Timeout, return None, Please try again later." continue if response.status_code != 200: diff --git a/WebAgent/WebDancer/demos/tools/private/visit.py b/WebAgent/WebDancer/demos/tools/private/visit.py index b911cc60..fef4abe4 100644 --- a/WebAgent/WebDancer/demos/tools/private/visit.py +++ b/WebAgent/WebDancer/demos/tools/private/visit.py @@ -62,7 +62,7 @@ def jina_readpage(url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception as e: + except Exception: if attempt == max_retries - 1: return "[visit] Failed to read page." @@ -97,7 +97,7 @@ def call(self, params: str, **kwargs) -> str: params = self._verify_json_format_args(params) url = params["url"] goal = params["goal"] - except: + except Exception: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" if isinstance(url, str): response = self.readpage(url, goal) diff --git a/WebAgent/WebDancer/demos/utils/logs.py b/WebAgent/WebDancer/demos/utils/logs.py index c8cd4329..629814de 100644 --- a/WebAgent/WebDancer/demos/utils/logs.py +++ b/WebAgent/WebDancer/demos/utils/logs.py @@ -1,6 +1,5 @@ # coding=utf-8 import os -import sys import logging diff --git a/WebAgent/WebSailor/src/evaluate.py b/WebAgent/WebSailor/src/evaluate.py index 6b3a22c2..0386a54e 100644 --- a/WebAgent/WebSailor/src/evaluate.py +++ b/WebAgent/WebSailor/src/evaluate.py @@ -289,7 +289,7 @@ def main(): for i in [1, 2, 3] } - print(f"===========") + print("===========") print(f"Avg. Pass@3 {avg_pass_at_3}%") print(f"Best Pass@1 {best_pass_at_1}%") print(f"Pass@3 {pass_at_3}%") @@ -297,7 +297,7 @@ def main(): print(f"# Invalid {aggr_statistics['num_invalid']} # Extra Length {aggr_statistics['extra_length']}") print(f"Avg. Action {aggr_statistics['avg_action']:.2f} Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f} Avg. Search Action {aggr_statistics['avg_search_action']:.2f} Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f} Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}") - print(f"===========" ) + print("===========" ) overall_eval_dict = { "dataset": dataset, diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index 2517a182..79df9acd 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -61,7 +61,7 @@ def call_server(self, msgs, max_tries=10): except Exception as e: if attempt == (max_tries - 1): print(f"SGLang server error {e}") - return f"SGLang server error" + return "SGLang server error" continue return "SGLang server empty response" @@ -69,7 +69,7 @@ def call_server(self, msgs, max_tries=10): def count_tokens(self, messages, model="gpt-4o"): try: tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model(model) full_message = [Message(**x) for x in messages] @@ -85,14 +85,14 @@ def _process_tool_call(self, content, messages): tool_name = tool_call.get('name', '') tool_args = tool_call.get('arguments', {}) result = self._call_tool(tool_name, tool_args) - except: + except Exception: result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' result = "\n" + result + "\n" messages.append({"role": "user", "content": result}) return messages def _handle_token_limit(self, messages, question, answer, rollout_id): - print(f"Token count exceeds limit") + print("Token count exceeds limit") messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" content = self.call_server(messages) diff --git a/WebAgent/WebSailor/src/tool_search.py b/WebAgent/WebSailor/src/tool_search.py index 3643c53a..ba0da496 100644 --- a/WebAgent/WebSailor/src/tool_search.py +++ b/WebAgent/WebSailor/src/tool_search.py @@ -3,7 +3,6 @@ from concurrent.futures import ThreadPoolExecutor from typing import List, Union import requests -from qwen_agent.tools.base import BaseTool, register_tool import os SEARCH_API_URL = os.getenv("SEARCH_API_URL") @@ -50,7 +49,7 @@ def google_search(self, query: str): except Exception as e: print(e) if i == 4: - return f"Google search Timeout, return None, Please try again later." + return "Google search Timeout, return None, Please try again later." if response.status_code != 200: raise Exception(f"Error: {response.status_code} - {response.text}") @@ -82,7 +81,7 @@ def google_search(self, query: str): content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets) return content - except: + except Exception: return f"No results found for '{query}'. Try with a more general query, or remove the year filter." @@ -90,7 +89,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: assert GOOGLE_SEARCH_KEY is not None, "Please set the GOOGLE_SEARCH_KEY environment variable." try: query = params["query"] - except: + except Exception: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/WebAgent/WebSailor/src/tool_visit.py b/WebAgent/WebSailor/src/tool_visit.py index ac8e5e61..fbae5e9b 100644 --- a/WebAgent/WebSailor/src/tool_visit.py +++ b/WebAgent/WebSailor/src/tool_visit.py @@ -6,7 +6,6 @@ from prompt import EXTRACTOR_PROMPT import os from openai import OpenAI -import random WEBCONTENT_MAXLENGTH = int(os.getenv("WEBCONTENT_MAXLENGTH", 150000)) @@ -46,7 +45,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: url = params["url"] goal = params["goal"] - except: + except Exception: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" if isinstance(url, str): @@ -87,14 +86,14 @@ def call_server(self, msgs, max_tries=10): if content: try: json.loads(content) - except: + except Exception: # extract json from string left = content.find('{') right = content.rfind('}') if left != -1 and right != -1 and left <= right: content = content[left:right+1] return content - except: + except Exception: if attempt == (max_tries - 1): return "" continue @@ -129,7 +128,7 @@ def jina_readpage(self, url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception as e: + except Exception: if attempt == max_retries - 1: return "[visit] Failed to read page." @@ -191,7 +190,7 @@ def readpage(self, url: str, goal: str) -> str: # 尝试 parse json raw = json.loads(raw) break - except: + except Exception: raw = self.call_server(messages) parse_retry_times += 1 # parse 失败 diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py index c8e0421e..c5274e7e 100644 --- a/WebAgent/WebWalker/src/agent.py +++ b/WebAgent/WebWalker/src/agent.py @@ -63,7 +63,7 @@ def observation_information_extraction(self, query, observation): if "true" in response.choices[0].message.content: try: return json.loads(response.choices[0].message.content)["information"] - except: + except Exception: return response.choices[0].message.content else: return None @@ -97,7 +97,7 @@ def critic_information(self, query, memory): if "true" in response.choices[0].message.content: try: return json.loads(response.choices[0].message.content)["answer"] - except: + except Exception: return response.choices[0].message.content else: return None diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py index 73bc5571..73513686 100644 --- a/WebAgent/WebWalker/src/app.py +++ b/WebAgent/WebWalker/src/app.py @@ -3,7 +3,6 @@ import json5 from agent import WebWalker from qwen_agent.tools.base import BaseTool, register_tool -import os import re import json import asyncio diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py index 036973b1..aaf384cb 100644 --- a/evaluation/evaluate_deepsearch_official.py +++ b/evaluation/evaluate_deepsearch_official.py @@ -1,7 +1,5 @@ -from pydantic import BaseModel from openai import OpenAI import concurrent.futures -from typing import Literal import litellm import os import argparse @@ -9,7 +7,6 @@ import concurrent from tqdm import tqdm from transformers import AutoTokenizer -import re from prompt import * import traceback import tiktoken @@ -178,7 +175,7 @@ def count_tokens_with_tokenizer(text, tokenizer): return len(tokenizer.encode(text)) else: return len(tokenizer.encode(text)) - except: + except Exception: return len(text) // 4 @@ -224,7 +221,7 @@ def single_round_statistics(input_file): try: tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", "")) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model("gpt-4o") for item in contents: @@ -303,7 +300,7 @@ def single_round_statistics(input_file): try: if len(tokenizer.encode("".join([msg["content"] for msg in messages]))) > 30000: num_extra += 1 - except: + except Exception: pass total_questions = len(contents) @@ -329,7 +326,7 @@ def calculate_enhanced_statistics(round_results, round_items): try: tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", "")) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model("gpt-4o") enhanced_stats = {} @@ -345,7 +342,7 @@ def calculate_enhanced_statistics(round_results, round_items): continue try: matching_item = [item for item in items if item['messages'][1]['content'] == result['question']] - except: + except Exception: items = [item for item in items if len(item['messages'])>0] matching_item = [item for item in items if item['messages'][1]['content'] == result['question']] if not matching_item: @@ -530,7 +527,7 @@ def main(): for i in [1, 2, 3] } - print(f"===========") + print("===========") print(f"Avg. Pass@3 {avg_pass_at_3}%") print(f"Best Pass@1 {best_pass_at_1}%") print(f"Pass@3 {pass_at_3}%") @@ -541,18 +538,18 @@ def main(): print(f"Avg. Action {aggr_statistics['avg_action']:.2f} Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f} Avg. Search Action {aggr_statistics['avg_search_action']:.2f} Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f} Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}") enhanced_statistics = calculate_enhanced_statistics(round_results, round_items) - print(f"\n=== ADDITIONAL STATISTICS ===") + print("\n=== ADDITIONAL STATISTICS ===") print(f"Avg. Tool Calls per Question: {aggr_statistics['avg_tool_calls_per_question']:.2f}") print(f"Avg. Tool Calls per Question (Correctly Solved): {enhanced_statistics['avg_tool_calls_per_question_correctly_solved']:.2f}") print(f"Avg. Assistant Tokens per Question: {aggr_statistics['avg_assistant_tokens_per_question']:.2f}") print(f"Avg. Assistant Tokens per Question (Correctly Solved): {enhanced_statistics['avg_assistant_tokens_per_question_correctly_solved']:.2f}") print(f"Avg. Assistant Tokens per Message: {aggr_statistics['avg_assistant_tokens_per_message']:.2f}") - print(f"\n=== TERMINATION FREQUENCIES ===") + print("\n=== TERMINATION FREQUENCIES ===") for termination_type, frequency in aggr_statistics['termination_freq'].items(): print(f"{termination_type}: {frequency:.3f}") - print(f"===========" ) + print("===========" ) overall_eval_dict = { "dataset": dataset, diff --git a/inference/file_tools/file_parser.py b/inference/file_tools/file_parser.py index ecbace9b..3410da72 100644 --- a/inference/file_tools/file_parser.py +++ b/inference/file_tools/file_parser.py @@ -4,7 +4,6 @@ import time import zipfile import math -from pathlib import Path from typing import Any, Dict, List, Optional, Union from collections import Counter @@ -17,7 +16,7 @@ from tabulate import tabulate from qwen_agent.log import logger from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS -from qwen_agent.tools.base import BaseTool, register_tool +from qwen_agent.tools.base import BaseTool from qwen_agent.tools.storage import KeyNotExistsError, Storage from file_tools.utils import (get_file_type, hash_sha256, is_http_url, get_basename_from_url, sanitize_chrome_file_path, save_url_to_local_work_dir) @@ -522,7 +521,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]: if USE_IDP and file_type in idp_types: try: results = parse_file_by_idp(file_path=file_path) - except Exception as e: + except Exception: results = self.parsers[file_type](file_path) else: results = self.parsers[file_type](file_path) @@ -536,7 +535,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]: tokens += para['token'] if not results or not tokens: - logger.error(f"Parsing failed: No information was parsed") + logger.error("Parsing failed: No information was parsed") raise FileParserError("Document parsing failed") else: self._cache_result(file_path, results) diff --git a/inference/file_tools/idp.py b/inference/file_tools/idp.py index 71199cbf..b77872ba 100644 --- a/inference/file_tools/idp.py +++ b/inference/file_tools/idp.py @@ -1,12 +1,10 @@ import os -import json from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client from alibabacloud_tea_openapi import models as open_api_models from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models from alibabacloud_tea_util.client import Client as UtilClient from alibabacloud_tea_util import models as util_models -from alibabacloud_credentials.client import Client as CredClient key = os.environ.get('IDP_KEY_ID') secret = os.environ.get('IDP_KEY_SECRET') @@ -18,7 +16,7 @@ def __init__(self): access_key_id=key, access_key_secret=secret ) - config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com' + config.endpoint = 'docmind-api.cn-hangzhou.aliyuncs.com' self.client = docmind_api20220711Client(config) def file_submit_with_url(self, file_url): @@ -84,7 +82,7 @@ def file_parser_query(self,fid): responses = result else: responses['layouts'].extend(result['layouts']) - except Exception as error: + except Exception: return None,status_parse return responses,status_parse \ No newline at end of file diff --git a/inference/file_tools/video_agent.py b/inference/file_tools/video_agent.py index 7d9b709c..37857d98 100644 --- a/inference/file_tools/video_agent.py +++ b/inference/file_tools/video_agent.py @@ -9,22 +9,11 @@ """ import sys import os -import re -import copy import json -from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional -import json5 import asyncio -from openai import OpenAI from qwen_agent.tools.base import BaseTool, register_tool -from qwen_agent.agents import Assistant -from qwen_agent.llm import BaseChatModel -from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE from qwen_agent.tools import BaseTool -from qwen_agent.log import logger -from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer -from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(current_dir)) diff --git a/inference/react_agent.py b/inference/react_agent.py index 1824666c..6c6e57be 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -1,7 +1,6 @@ -import json import json5 import os -from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union +from typing import Dict, List, Optional, Union from qwen_agent.llm.schema import Message from qwen_agent.utils.utils import build_text_completion_prompt from openai import OpenAI, APIError, APIConnectionError, APITimeoutError @@ -10,10 +9,8 @@ from datetime import datetime from qwen_agent.agents.fncall_agent import FnCallAgent from qwen_agent.llm import BaseChatModel -from qwen_agent.llm.schema import ASSISTANT, DEFAULT_SYSTEM_MESSAGE, Message from qwen_agent.settings import MAX_LLM_CALL_PER_RUN from qwen_agent.tools import BaseTool -from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs from prompt import * import time import asyncio @@ -109,12 +106,12 @@ def call_server(self, msgs, planning_port, max_tries=10): else: print("Error: All retry attempts have been exhausted. The call has failed.") - return f"vllm server error!!!" + return "vllm server error!!!" def count_tokens(self, messages, model="gpt-4o"): try: tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model(model) full_message = [Message(**x) for x in messages] @@ -126,7 +123,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: self.model=model try: question = data['item']['question'] - except: + except Exception: raw_msg = data['item']['messages'][1]["content"] question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg @@ -168,7 +165,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: try: code_raw=content.split('')[1].split('')[0].split('')[1].split('')[0].strip() result = TOOL_MAP['PythonInterpreter'].call(code_raw) - except: + except Exception: result = "[Python Interpreter Error]: Formatting error." else: @@ -177,7 +174,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: tool_args = tool_call.get('arguments', {}) result = self.custom_call_tool(tool_name, tool_args) - except: + except Exception: result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' result = "\n" + result + "\n" # print(result) diff --git a/inference/run_multi_react.py b/inference/run_multi_react.py index 1056a0a7..4a517eaf 100644 --- a/inference/run_multi_react.py +++ b/inference/run_multi_react.py @@ -5,9 +5,7 @@ import concurrent.futures from tqdm import tqdm import threading -from datetime import datetime from react_agent import MultiTurnReactAgent -import time import math if __name__ == "__main__": diff --git a/inference/tool_file.py b/inference/tool_file.py index 77c4960f..7fb1ce91 100644 --- a/inference/tool_file.py +++ b/inference/tool_file.py @@ -9,26 +9,12 @@ """ import sys import os -import re -import time -import copy import json -from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional -import json5 -import asyncio -from openai import OpenAI, AsyncOpenAI -import pdb -import bdb - -from qwen_agent.tools.base import BaseTool, register_tool -from qwen_agent.agents import Assistant -from qwen_agent.llm import BaseChatModel -from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS -from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE + +from qwen_agent.tools.base import BaseTool +from qwen_agent.settings import DEFAULT_MAX_INPUT_TOKENS from qwen_agent.tools import BaseTool -from qwen_agent.log import logger -from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer -from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS +from qwen_agent.utils.tokenization_qwen import count_tokens current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(current_dir)) diff --git a/inference/tool_python.py b/inference/tool_python.py index e8e55223..d851ee84 100644 --- a/inference/tool_python.py +++ b/inference/tool_python.py @@ -1,14 +1,13 @@ import re -from typing import Dict, List, Optional, Union +from typing import Dict, Optional, Union import json5 from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool from qwen_agent.utils.utils import extract_code -from sandbox_fusion import run_code, RunCodeRequest, RunStatus +from sandbox_fusion import run_code, RunCodeRequest from requests.exceptions import Timeout import os import random import time -from concurrent.futures import ThreadPoolExecutor, as_completed # Array of sandbox fusion endpoints SANDBOX_FUSION_ENDPOINTS = [] @@ -80,12 +79,12 @@ def call(self, params, files= None, timeout = 50, **kwargs) -> str: if code_result.run_result.stderr: result.append(f"stderr:\n{code_result.run_result.stderr}") if code_result.run_result.execution_time >= timeout-1: - result.append(f"[PythonInterpreter Error] TimeoutError: Execution timed out.") + result.append("[PythonInterpreter Error] TimeoutError: Execution timed out.") result = '\n'.join(result) print('SUCCESS RUNNING TOOL') return result if result.strip() else 'Finished execution.' - except Timeout as e: + except Timeout: last_error = f'[Python Interpreter Error] TimeoutError: Execution timed out on endpoint {endpoint}.' print(f"Timeout on attempt {attempt + 1}: {last_error}") if attempt == 4: # Last attempt @@ -137,7 +136,7 @@ def call_specific_endpoint(self, params: Union[str, dict], endpoint: str, timeou execution_time = end_time - start_time return True, result if result.strip() else 'Finished execution.', execution_time - except Timeout as e: - return False, f'[Python Interpreter Error] TimeoutError: Execution timed out.', None + except Timeout: + return False, '[Python Interpreter Error] TimeoutError: Execution timed out.', None except Exception as e: - return False, f'[Python Interpreter Error]: {str(e)}', None + return False, f'[Python Interpreter Error]: {str(e)}', None \ No newline at end of file diff --git a/inference/tool_scholar.py b/inference/tool_scholar.py index ae021b38..90e97c57 100644 --- a/inference/tool_scholar.py +++ b/inference/tool_scholar.py @@ -1,6 +1,5 @@ import os import json -import requests from typing import Union, List from qwen_agent.tools.base import BaseTool, register_tool from concurrent.futures import ThreadPoolExecutor @@ -44,7 +43,7 @@ def google_scholar_with_serp(self, query: str): except Exception as e: print(e) if i == 4: - return f"Google Scholar Timeout, return None, Please try again later." + return "Google Scholar Timeout, return None, Please try again later." continue @@ -87,7 +86,7 @@ def google_scholar_with_serp(self, query: str): content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets) return content - except: + except Exception: return f"No results found for '{query}'. Try with a more general query." @@ -96,7 +95,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: params = self._verify_json_format_args(params) query = params["query"] - except: + except Exception: return "[google_scholar] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/inference/tool_search.py b/inference/tool_search.py index 1a3f7b53..ea693406 100644 --- a/inference/tool_search.py +++ b/inference/tool_search.py @@ -1,13 +1,8 @@ import json -from concurrent.futures import ThreadPoolExecutor from typing import List, Union -import requests from qwen_agent.tools.base import BaseTool, register_tool -import asyncio -from typing import Dict, List, Optional, Union -import uuid +from typing import Optional import http.client -import json import os @@ -68,7 +63,7 @@ def contains_chinese_basic(text: str) -> bool: except Exception as e: print(e) if i == 4: - return f"Google search Timeout, return None, Please try again later." + return "Google search Timeout, return None, Please try again later." continue data = res.read() diff --git a/inference/tool_visit.py b/inference/tool_visit.py index 92e4e3af..cdee8bfa 100644 --- a/inference/tool_visit.py +++ b/inference/tool_visit.py @@ -1,17 +1,11 @@ import json import os -import signal -import threading -from concurrent.futures import ThreadPoolExecutor, as_completed from typing import List, Union import requests from qwen_agent.tools.base import BaseTool, register_tool from prompt import EXTRACTOR_PROMPT from openai import OpenAI -import random -from urllib.parse import urlparse, unquote import time -from transformers import AutoTokenizer import tiktoken VISIT_SERVER_TIMEOUT = int(os.getenv("VISIT_SERVER_TIMEOUT", 200)) @@ -122,7 +116,7 @@ def call_server(self, msgs, max_retries=2): if left != -1 and right != -1 and left <= right: content = content[left:right+1] return content - except Exception as e: + except Exception: # print(e) if attempt == (max_retries - 1): return "" @@ -159,7 +153,7 @@ def jina_readpage(self, url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception as e: + except Exception: time.sleep(0.5) if attempt == max_retries - 1: return "[visit] Failed to read page." From ff94125461682fed4c23b3527559d4eb2466c54d Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 11:49:20 +0500 Subject: [PATCH 05/13] Fix remaining E722 ruff errors --- commit_message.txt | 1 + inference/tool_search.py | 4 ++-- inference/tool_visit.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 commit_message.txt diff --git a/commit_message.txt b/commit_message.txt new file mode 100644 index 00000000..8f1b83e5 --- /dev/null +++ b/commit_message.txt @@ -0,0 +1 @@ +Fix remaining E722 ruff errors \ No newline at end of file diff --git a/inference/tool_search.py b/inference/tool_search.py index ea693406..499ff0cb 100644 --- a/inference/tool_search.py +++ b/inference/tool_search.py @@ -96,7 +96,7 @@ def contains_chinese_basic(text: str) -> bool: content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets) return content - except: + except Exception: return f"No results found for '{query}'. Try with a more general query." @@ -108,7 +108,7 @@ def search_with_serp(self, query: str): def call(self, params: Union[str, dict], **kwargs) -> str: try: query = params["query"] - except: + except Exception: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/inference/tool_visit.py b/inference/tool_visit.py index cdee8bfa..97284ff7 100644 --- a/inference/tool_visit.py +++ b/inference/tool_visit.py @@ -59,7 +59,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: url = params["url"] goal = params["goal"] - except: + except Exception: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" start_time = time.time() @@ -109,7 +109,7 @@ def call_server(self, msgs, max_retries=2): if content: try: json.loads(content) - except: + except Exception: # extract json from string left = content.find('{') right = content.rfind('}') @@ -221,7 +221,7 @@ def readpage_jina(self, url: str, goal: str) -> str: try: raw = json.loads(raw) break - except: + except Exception: raw = summary_page_func(messages, max_retries=max_retries) parse_retry_times += 1 From 3f517c712e752ef444dd5127f7f82e98b30ba93e Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 11:51:19 +0500 Subject: [PATCH 06/13] Fix F403 and F405 ruff errors --- WebAgent/WebSailor/src/react_agent.py | 2 +- WebAgent/WebSailor/src/run_multi_react.py | 2 +- WebAgent/WebWalker/src/agent.py | 2 +- WebAgent/WebWalker/src/app.py | 2 +- commit_message.txt | 2 +- evaluation/evaluate_deepsearch_official.py | 2 +- inference/react_agent.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index 79df9acd..f6f1fc63 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -12,7 +12,7 @@ from qwen_agent.tools import BaseTool -MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 40)) + MAX_TOKEN_LENGTH = int(os.getenv('MAX_LENGTH', 31 * 1024 - 500)) print(f'Running with MAX_LLM_CALL_PER_RUN = {MAX_LLM_CALL_PER_RUN}') diff --git a/WebAgent/WebSailor/src/run_multi_react.py b/WebAgent/WebSailor/src/run_multi_react.py index d86489ae..7057a079 100644 --- a/WebAgent/WebSailor/src/run_multi_react.py +++ b/WebAgent/WebSailor/src/run_multi_react.py @@ -8,7 +8,7 @@ from react_agent import MultiTurnReactAgent from prompt import SYSTEM_PROMPT_MULTI, USER_PROMPT from tool_search import * -from tool_visit import * +from tool_visit import Visit if __name__ == "__main__": diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py index c5274e7e..5ec7a826 100644 --- a/WebAgent/WebWalker/src/agent.py +++ b/WebAgent/WebWalker/src/agent.py @@ -9,7 +9,7 @@ from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs from openai import OpenAI import time -from prompts import * +from prompts import STSTEM_CRITIIC_INFORMATION, STSTEM_CRITIIC_ANSWER, SYSTEM_EXPLORER TOOL_DESC = ( diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py index 73513686..5b122e25 100644 --- a/WebAgent/WebWalker/src/app.py +++ b/WebAgent/WebWalker/src/app.py @@ -6,7 +6,7 @@ import re import json import asyncio -from utils import * +from utils import process_url, get_info, get_content_between_a_b import base64 from PIL import Image from bs4 import BeautifulSoup diff --git a/commit_message.txt b/commit_message.txt index 8f1b83e5..baecec51 100644 --- a/commit_message.txt +++ b/commit_message.txt @@ -1 +1 @@ -Fix remaining E722 ruff errors \ No newline at end of file +Fix F403 and F405 ruff errors \ No newline at end of file diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py index aaf384cb..5b3e6d92 100644 --- a/evaluation/evaluate_deepsearch_official.py +++ b/evaluation/evaluate_deepsearch_official.py @@ -7,7 +7,7 @@ import concurrent from tqdm import tqdm from transformers import AutoTokenizer -from prompt import * +from prompt import JUDGE_PROMPT_GAIA, JUDGE_PROMPT_XBENCH, JUDGE_PROMPT_BROWSECOMP_OFFICIAL import traceback import tiktoken import time diff --git a/inference/react_agent.py b/inference/react_agent.py index 6c6e57be..bd541923 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -19,7 +19,7 @@ from tool_scholar import * from tool_python import * from tool_search import * -from tool_visit import * +from tool_visit import Visit OBS_START = '' OBS_END = '\n' From d9c0bd453fb7355fe7a26be49e642e837a3d514d Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:01:47 +0500 Subject: [PATCH 07/13] Revert "Refactor react_agent.py to improve security and maintainability" This reverts commit 48d5f8f93e3df2628eace789750f3b57ae8a7dca. --- WebAgent/WebSailor/src/react_agent.py | 99 +++++++++++++-------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index f6f1fc63..1ec9352a 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -12,7 +12,7 @@ from qwen_agent.tools import BaseTool - +MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 40)) MAX_TOKEN_LENGTH = int(os.getenv('MAX_LENGTH', 31 * 1024 - 500)) print(f'Running with MAX_LLM_CALL_PER_RUN = {MAX_LLM_CALL_PER_RUN}') @@ -38,8 +38,8 @@ def __init__(self, def call_server(self, msgs, max_tries=10): # Set OpenAI API key and base URL using vLLM API server - openai_api_key = os.getenv("OPENAI_API_KEY", "EMPTY") - openai_api_base = os.getenv("OPENAI_API_BASE", "http://127.0.0.1:6001/v1") + openai_api_key = "EMPTY" + openai_api_base = "http://127.0.0.1:6001/v1" client = OpenAI( api_key=openai_api_key, @@ -77,53 +77,15 @@ def count_tokens(self, messages, model="gpt-4o"): return len(tokenizer.encode(full_prompt)) - def _process_tool_call(self, content, messages): - if '' in content and '' in content: - tool_call = content.split('')[1].split('')[0] - try: - tool_call = json.loads(tool_call) - tool_name = tool_call.get('name', '') - tool_args = tool_call.get('arguments', {}) - result = self._call_tool(tool_name, tool_args) - except Exception: - result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' - result = "\n" + result + "\n" - messages.append({"role": "user", "content": result}) - return messages - - def _handle_token_limit(self, messages, question, answer, rollout_id): - print("Token count exceeds limit") - - messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" - content = self.call_server(messages) - messages.append({"role": "assistant", "content": content.strip()}) - if '' in content and '' in content: - prediction = messages[-1]['content'].split('')[1].split('')[0] - termination = 'generate an answer as token limit reached' - else: - prediction = messages[-1]['content'] - termination = 'format error: generate an answer as token limit reached' - return self._generate_result(question, answer, rollout_id, messages, prediction, termination) - - def _generate_result(self, question, answer, rollout_id, messages, prediction, termination): - return { - "question": question, - "answer": answer, - "rollout_id": rollout_id, - "messages": messages, - "prediction": prediction, - "termination": termination - } - def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[Message]]: self.model=model - question = data.get('item', {}).get('question', '') - if not question: - raw_msg = data.get('item', {}).get('messages', [{}, {}])[1].get("content", "") - question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg + try: + question = data['item']['question'] + except: + raw_msg = data['item']['messages'][1]["content"] + question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg - answer = data.get('item', {}).get('answer', '') - rollout_id = data.get('rollout_id', '') + answer = data['item']['answer'] self.user_prompt = user_prompt self.user_prompt = self.user_prompt + question messages = [{"role": "system", "content": self.system_message}, {"role": "user", "content": self.user_prompt}] @@ -138,7 +100,17 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M pos = content.find('') content = content[:pos] messages.append({"role": "assistant", "content": content.strip()}) - messages = self._process_tool_call(content, messages) + if '' in content and '' in content: + tool_call = content.split('')[1].split('')[0] + try: + tool_call = json.loads(tool_call) + tool_name = tool_call.get('name', '') + tool_args = tool_call.get('arguments', {}) + result = self._call_tool(tool_name, tool_args) + except: + result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' + result = "\n" + result + "\n" + messages.append({"role": "user", "content": result}) if '' in content and '' in content: termination = 'answer' break @@ -150,7 +122,26 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M print(f"round: {round}, token count: {token_count}") if token_count > max_tokens: - return self._handle_token_limit(messages, question, answer, rollout_id) + print(f"Token count exceeds limit: {token_count} > {max_tokens}") + + messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" + content = self.call_server(messages) + messages.append({"role": "assistant", "content": content.strip()}) + if '' in content and '' in content: + prediction = messages[-1]['content'].split('')[1].split('')[0] + termination = 'generate an answer as token limit reached' + else: + prediction = messages[-1]['content'] + termination = 'format error: generate an answer as token limit reached' + result = { + "question": question, + "answer": answer, + "rollout_id": data['rollout_id'], + "messages": messages, + "prediction": prediction, + "termination": termination + } + return result if '' in messages[-1]['content']: prediction = messages[-1]['content'].split('')[1].split('')[0] @@ -160,4 +151,12 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M termination = 'answer not found' if num_llm_calls_available == 0: termination = 'exceed available llm calls' - return self._generate_result(question, answer, rollout_id, messages, prediction, termination) \ No newline at end of file + result = { + "question": question, + "answer": answer, + "rollout_id": data['rollout_id'], + "messages": messages, + "prediction": prediction, + "termination": termination + } + return result \ No newline at end of file From 2c82bf9850c0455f2953f6f109fb96ed66cba978 Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:02:18 +0500 Subject: [PATCH 08/13] Revert "chore(ci): add basic Ruff lint workflow" This reverts commit a9256b7dfa1355f42d04cfef79f08c5c592758ad. --- .github/workflows/ci-lint.yml | 20 -------------------- commit_message.txt | 1 - 2 files changed, 21 deletions(-) delete mode 100644 .github/workflows/ci-lint.yml delete mode 100644 commit_message.txt diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml deleted file mode 100644 index 864e85b7..00000000 --- a/.github/workflows/ci-lint.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: CI - Lint - -on: - pull_request: - branches: ["main"] - push: - branches: ["chore/add-ci-lint"] - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Install ruff - run: pip install ruff==0.5.6 - - name: Lint - run: ruff check --output-format=github . \ No newline at end of file diff --git a/commit_message.txt b/commit_message.txt deleted file mode 100644 index baecec51..00000000 --- a/commit_message.txt +++ /dev/null @@ -1 +0,0 @@ -Fix F403 and F405 ruff errors \ No newline at end of file From 1348ac5b28344b6dea29e577e6ae3905c535818b Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:07:59 +0500 Subject: [PATCH 09/13] Revert "Fix E722 ruff errors" This reverts commit d60f2545369347a48f8d4aa1253f6abb9c021f6c. --- .../WebDancer/demos/assistant_qwq_chat.py | 8 +++++-- WebAgent/WebDancer/demos/llm/oai.py | 3 ++- .../WebDancer/demos/tools/private/search.py | 6 ++--- .../WebDancer/demos/tools/private/visit.py | 4 ++-- WebAgent/WebDancer/demos/utils/logs.py | 1 + WebAgent/WebSailor/src/evaluate.py | 4 ++-- WebAgent/WebSailor/src/react_agent.py | 4 ++-- WebAgent/WebSailor/src/tool_search.py | 7 +++--- WebAgent/WebSailor/src/tool_visit.py | 11 +++++---- WebAgent/WebWalker/src/agent.py | 10 ++++---- WebAgent/WebWalker/src/app.py | 5 ++-- evaluation/evaluate_deepsearch_official.py | 23 ++++++++++--------- inference/file_tools/file_parser.py | 7 +++--- inference/file_tools/idp.py | 6 +++-- inference/file_tools/video_agent.py | 11 +++++++++ inference/react_agent.py | 15 +++++++----- inference/run_multi_react.py | 2 ++ inference/tool_file.py | 22 ++++++++++++++---- inference/tool_python.py | 15 ++++++------ inference/tool_scholar.py | 7 +++--- inference/tool_search.py | 9 ++++++-- inference/tool_visit.py | 10 ++++++-- 22 files changed, 123 insertions(+), 67 deletions(-) diff --git a/WebAgent/WebDancer/demos/assistant_qwq_chat.py b/WebAgent/WebDancer/demos/assistant_qwq_chat.py index b0a48a69..3fa38ff3 100644 --- a/WebAgent/WebDancer/demos/assistant_qwq_chat.py +++ b/WebAgent/WebDancer/demos/assistant_qwq_chat.py @@ -2,11 +2,15 @@ import os +from qwen_agent.agents import Assistant +from qwen_agent.utils.output_beautify import typewriter_print from demos.agents.search_agent import SearchAgent from demos.llm.oai import TextChatAtOAI +from demos.llm.qwen_dashscope import QwenChatAtDS from demos.gui.web_ui import WebUI from demos.utils.date import date2str, get_date_now +from demos.tools import Visit, Search ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource') @@ -46,8 +50,8 @@ def make_system_prompt(): llm=llm_cfg, function_list=tools, system_message="", - name='WebDancer', - description="I am WebDancer, a web information seeking agent, welcome to try!", + name=f'WebDancer', + description=f"I am WebDancer, a web information seeking agent, welcome to try!", extra={ 'reasoning': reasoning, 'max_llm_calls': max_llm_calls, diff --git a/WebAgent/WebDancer/demos/llm/oai.py b/WebAgent/WebDancer/demos/llm/oai.py index 53a261da..0df27fe9 100644 --- a/WebAgent/WebDancer/demos/llm/oai.py +++ b/WebAgent/WebDancer/demos/llm/oai.py @@ -2,6 +2,7 @@ import json import logging import os +from http import HTTPStatus from pprint import pformat from typing import Dict, Iterator, List, Optional, Literal, Union @@ -14,7 +15,7 @@ from qwen_agent.llm.base import ModelServiceError, register_llm from qwen_agent.llm.function_calling import BaseFnCallModel, simulate_response_completion_with_chat -from qwen_agent.llm.schema import ASSISTANT, Message +from qwen_agent.llm.schema import ASSISTANT, Message, FunctionCall from qwen_agent.log import logger diff --git a/WebAgent/WebDancer/demos/tools/private/search.py b/WebAgent/WebDancer/demos/tools/private/search.py index 7f994ee5..29278ec7 100644 --- a/WebAgent/WebDancer/demos/tools/private/search.py +++ b/WebAgent/WebDancer/demos/tools/private/search.py @@ -30,7 +30,7 @@ def call(self, params: str, **kwargs) -> str: try: params = self._verify_json_format_args(params) query = params["query"][:MAX_MULTIQUERY_NUM] - except Exception: + except: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): @@ -57,9 +57,9 @@ def google_search(self, query: str) -> str: response = requests.post(url, headers=headers, data=json.dumps(data)) results = response.json() break - except Exception: + except Exception as e: if i == 4: - return "Google search Timeout, return None, Please try again later." + return f"Google search Timeout, return None, Please try again later." continue if response.status_code != 200: diff --git a/WebAgent/WebDancer/demos/tools/private/visit.py b/WebAgent/WebDancer/demos/tools/private/visit.py index fef4abe4..b911cc60 100644 --- a/WebAgent/WebDancer/demos/tools/private/visit.py +++ b/WebAgent/WebDancer/demos/tools/private/visit.py @@ -62,7 +62,7 @@ def jina_readpage(url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception: + except Exception as e: if attempt == max_retries - 1: return "[visit] Failed to read page." @@ -97,7 +97,7 @@ def call(self, params: str, **kwargs) -> str: params = self._verify_json_format_args(params) url = params["url"] goal = params["goal"] - except Exception: + except: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" if isinstance(url, str): response = self.readpage(url, goal) diff --git a/WebAgent/WebDancer/demos/utils/logs.py b/WebAgent/WebDancer/demos/utils/logs.py index 629814de..c8cd4329 100644 --- a/WebAgent/WebDancer/demos/utils/logs.py +++ b/WebAgent/WebDancer/demos/utils/logs.py @@ -1,5 +1,6 @@ # coding=utf-8 import os +import sys import logging diff --git a/WebAgent/WebSailor/src/evaluate.py b/WebAgent/WebSailor/src/evaluate.py index 0386a54e..6b3a22c2 100644 --- a/WebAgent/WebSailor/src/evaluate.py +++ b/WebAgent/WebSailor/src/evaluate.py @@ -289,7 +289,7 @@ def main(): for i in [1, 2, 3] } - print("===========") + print(f"===========") print(f"Avg. Pass@3 {avg_pass_at_3}%") print(f"Best Pass@1 {best_pass_at_1}%") print(f"Pass@3 {pass_at_3}%") @@ -297,7 +297,7 @@ def main(): print(f"# Invalid {aggr_statistics['num_invalid']} # Extra Length {aggr_statistics['extra_length']}") print(f"Avg. Action {aggr_statistics['avg_action']:.2f} Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f} Avg. Search Action {aggr_statistics['avg_search_action']:.2f} Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f} Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}") - print("===========" ) + print(f"===========" ) overall_eval_dict = { "dataset": dataset, diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index 1ec9352a..7a95051c 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -61,7 +61,7 @@ def call_server(self, msgs, max_tries=10): except Exception as e: if attempt == (max_tries - 1): print(f"SGLang server error {e}") - return "SGLang server error" + return f"SGLang server error" continue return "SGLang server empty response" @@ -69,7 +69,7 @@ def call_server(self, msgs, max_tries=10): def count_tokens(self, messages, model="gpt-4o"): try: tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) - except Exception: + except: tokenizer = tiktoken.encoding_for_model(model) full_message = [Message(**x) for x in messages] diff --git a/WebAgent/WebSailor/src/tool_search.py b/WebAgent/WebSailor/src/tool_search.py index ba0da496..3643c53a 100644 --- a/WebAgent/WebSailor/src/tool_search.py +++ b/WebAgent/WebSailor/src/tool_search.py @@ -3,6 +3,7 @@ from concurrent.futures import ThreadPoolExecutor from typing import List, Union import requests +from qwen_agent.tools.base import BaseTool, register_tool import os SEARCH_API_URL = os.getenv("SEARCH_API_URL") @@ -49,7 +50,7 @@ def google_search(self, query: str): except Exception as e: print(e) if i == 4: - return "Google search Timeout, return None, Please try again later." + return f"Google search Timeout, return None, Please try again later." if response.status_code != 200: raise Exception(f"Error: {response.status_code} - {response.text}") @@ -81,7 +82,7 @@ def google_search(self, query: str): content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets) return content - except Exception: + except: return f"No results found for '{query}'. Try with a more general query, or remove the year filter." @@ -89,7 +90,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: assert GOOGLE_SEARCH_KEY is not None, "Please set the GOOGLE_SEARCH_KEY environment variable." try: query = params["query"] - except Exception: + except: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/WebAgent/WebSailor/src/tool_visit.py b/WebAgent/WebSailor/src/tool_visit.py index fbae5e9b..ac8e5e61 100644 --- a/WebAgent/WebSailor/src/tool_visit.py +++ b/WebAgent/WebSailor/src/tool_visit.py @@ -6,6 +6,7 @@ from prompt import EXTRACTOR_PROMPT import os from openai import OpenAI +import random WEBCONTENT_MAXLENGTH = int(os.getenv("WEBCONTENT_MAXLENGTH", 150000)) @@ -45,7 +46,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: url = params["url"] goal = params["goal"] - except Exception: + except: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" if isinstance(url, str): @@ -86,14 +87,14 @@ def call_server(self, msgs, max_tries=10): if content: try: json.loads(content) - except Exception: + except: # extract json from string left = content.find('{') right = content.rfind('}') if left != -1 and right != -1 and left <= right: content = content[left:right+1] return content - except Exception: + except: if attempt == (max_tries - 1): return "" continue @@ -128,7 +129,7 @@ def jina_readpage(self, url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception: + except Exception as e: if attempt == max_retries - 1: return "[visit] Failed to read page." @@ -190,7 +191,7 @@ def readpage(self, url: str, goal: str) -> str: # 尝试 parse json raw = json.loads(raw) break - except Exception: + except: raw = self.call_server(messages) parse_retry_times += 1 # parse 失败 diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py index 5ec7a826..02ffeb25 100644 --- a/WebAgent/WebWalker/src/agent.py +++ b/WebAgent/WebWalker/src/agent.py @@ -63,7 +63,7 @@ def observation_information_extraction(self, query, observation): if "true" in response.choices[0].message.content: try: return json.loads(response.choices[0].message.content)["information"] - except Exception: + except: return response.choices[0].message.content else: return None @@ -97,7 +97,7 @@ def critic_information(self, query, memory): if "true" in response.choices[0].message.content: try: return json.loads(response.choices[0].message.content)["answer"] - except Exception: + except: return response.choices[0].message.content else: return None @@ -140,9 +140,9 @@ def _run(self, messages: List[Message], lang: Literal['en', 'zh'] = 'en', **kwar if stage1: self.momery.append(stage1+"\n") if len(self.momery) > 1: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"}")] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")}] else: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"}")] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")}] stage2 = self.critic_information(query, self.momery) if stage2: response = f'Final Answer: {stage2}' @@ -205,4 +205,4 @@ def _detect_tool(self, text: str) -> Tuple[bool, str, str, str]: func_name = text[i + len(special_func_token):j].strip() func_args = text[j + len(special_args_token):k].strip() text = text[:i] # Return the response before tool call, i.e., `Thought` - return (func_name is not None), func_name, func_args, text + return (func_name is not None), func_name, func_args, text \ No newline at end of file diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py index 5b122e25..f7fba538 100644 --- a/WebAgent/WebWalker/src/app.py +++ b/WebAgent/WebWalker/src/app.py @@ -3,10 +3,11 @@ import json5 from agent import WebWalker from qwen_agent.tools.base import BaseTool, register_tool +import os import re import json import asyncio -from utils import process_url, get_info, get_content_between_a_b +from utils import * import base64 from PIL import Image from bs4 import BeautifulSoup @@ -267,4 +268,4 @@ def call(self, params: str, **kwargs) -> str: else: return "The button can not be clicked, please retry a new botton!" else: - return "Your input is invalid, plase output the action input correctly!" + return "Your input is invalid, plase output the action input correctly!"} \ No newline at end of file diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py index 5b3e6d92..b348f660 100644 --- a/evaluation/evaluate_deepsearch_official.py +++ b/evaluation/evaluate_deepsearch_official.py @@ -7,7 +7,8 @@ import concurrent from tqdm import tqdm from transformers import AutoTokenizer -from prompt import JUDGE_PROMPT_GAIA, JUDGE_PROMPT_XBENCH, JUDGE_PROMPT_BROWSECOMP_OFFICIAL +import re +from prompt import * import traceback import tiktoken import time @@ -175,7 +176,7 @@ def count_tokens_with_tokenizer(text, tokenizer): return len(tokenizer.encode(text)) else: return len(tokenizer.encode(text)) - except Exception: + except: return len(text) // 4 @@ -186,7 +187,7 @@ def aggregate_statistics(round1_file, round2_file, round3_file): round3_stats = single_round_statistics(round3_file) keys = round1_stats.keys() - avg_stats = {} + avg_stats = {} for key in keys: if isinstance(round1_stats[key], dict): @@ -300,7 +301,7 @@ def single_round_statistics(input_file): try: if len(tokenizer.encode("".join([msg["content"] for msg in messages]))) > 30000: num_extra += 1 - except Exception: + except: pass total_questions = len(contents) @@ -342,7 +343,7 @@ def calculate_enhanced_statistics(round_results, round_items): continue try: matching_item = [item for item in items if item['messages'][1]['content'] == result['question']] - except Exception: + except: items = [item for item in items if len(item['messages'])>0] matching_item = [item for item in items if item['messages'][1]['content'] == result['question']] if not matching_item: @@ -416,7 +417,7 @@ def calculate_best_pass_at_1(query_results): round_correct = {round_name: 0 for round_name in ["round1", "round2", "round3"]} for query, results in query_results.items(): - for round_name in ["round1", "round2", "round3"]: + for round_name in ["round1", "round2", "round3"]: if results[round_name] == "Correct": round_correct[round_name] += 1 @@ -527,7 +528,7 @@ def main(): for i in [1, 2, 3] } - print("===========") + print(f"===========") print(f"Avg. Pass@3 {avg_pass_at_3}%") print(f"Best Pass@1 {best_pass_at_1}%") print(f"Pass@3 {pass_at_3}%") @@ -538,18 +539,18 @@ def main(): print(f"Avg. Action {aggr_statistics['avg_action']:.2f} Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f} Avg. Search Action {aggr_statistics['avg_search_action']:.2f} Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f} Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}") enhanced_statistics = calculate_enhanced_statistics(round_results, round_items) - print("\n=== ADDITIONAL STATISTICS ===") + print(f"\n=== ADDITIONAL STATISTICS ===") print(f"Avg. Tool Calls per Question: {aggr_statistics['avg_tool_calls_per_question']:.2f}") print(f"Avg. Tool Calls per Question (Correctly Solved): {enhanced_statistics['avg_tool_calls_per_question_correctly_solved']:.2f}") print(f"Avg. Assistant Tokens per Question: {aggr_statistics['avg_assistant_tokens_per_question']:.2f}") print(f"Avg. Assistant Tokens per Question (Correctly Solved): {enhanced_statistics['avg_assistant_tokens_per_question_correctly_solved']:.2f}") print(f"Avg. Assistant Tokens per Message: {aggr_statistics['avg_assistant_tokens_per_message']:.2f}") - print("\n=== TERMINATION FREQUENCIES ===") + print(f"\n=== TERMINATION FREQUENCIES ===") for termination_type, frequency in aggr_statistics['termination_freq'].items(): print(f"{termination_type}: {frequency:.3f}") - print("===========" ) + print(f"===========" ) overall_eval_dict = { "dataset": dataset, @@ -578,4 +579,4 @@ def main(): except Exception as e: error_str = traceback.format_exc() print(f"Evaluation Failed: {e}") - print("Trace Back", error_str) + print("Trace Back", error_str) \ No newline at end of file diff --git a/inference/file_tools/file_parser.py b/inference/file_tools/file_parser.py index 3410da72..ecbace9b 100644 --- a/inference/file_tools/file_parser.py +++ b/inference/file_tools/file_parser.py @@ -4,6 +4,7 @@ import time import zipfile import math +from pathlib import Path from typing import Any, Dict, List, Optional, Union from collections import Counter @@ -16,7 +17,7 @@ from tabulate import tabulate from qwen_agent.log import logger from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS -from qwen_agent.tools.base import BaseTool +from qwen_agent.tools.base import BaseTool, register_tool from qwen_agent.tools.storage import KeyNotExistsError, Storage from file_tools.utils import (get_file_type, hash_sha256, is_http_url, get_basename_from_url, sanitize_chrome_file_path, save_url_to_local_work_dir) @@ -521,7 +522,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]: if USE_IDP and file_type in idp_types: try: results = parse_file_by_idp(file_path=file_path) - except Exception: + except Exception as e: results = self.parsers[file_type](file_path) else: results = self.parsers[file_type](file_path) @@ -535,7 +536,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]: tokens += para['token'] if not results or not tokens: - logger.error("Parsing failed: No information was parsed") + logger.error(f"Parsing failed: No information was parsed") raise FileParserError("Document parsing failed") else: self._cache_result(file_path, results) diff --git a/inference/file_tools/idp.py b/inference/file_tools/idp.py index b77872ba..71199cbf 100644 --- a/inference/file_tools/idp.py +++ b/inference/file_tools/idp.py @@ -1,10 +1,12 @@ import os +import json from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client from alibabacloud_tea_openapi import models as open_api_models from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models from alibabacloud_tea_util.client import Client as UtilClient from alibabacloud_tea_util import models as util_models +from alibabacloud_credentials.client import Client as CredClient key = os.environ.get('IDP_KEY_ID') secret = os.environ.get('IDP_KEY_SECRET') @@ -16,7 +18,7 @@ def __init__(self): access_key_id=key, access_key_secret=secret ) - config.endpoint = 'docmind-api.cn-hangzhou.aliyuncs.com' + config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com' self.client = docmind_api20220711Client(config) def file_submit_with_url(self, file_url): @@ -82,7 +84,7 @@ def file_parser_query(self,fid): responses = result else: responses['layouts'].extend(result['layouts']) - except Exception: + except Exception as error: return None,status_parse return responses,status_parse \ No newline at end of file diff --git a/inference/file_tools/video_agent.py b/inference/file_tools/video_agent.py index 37857d98..7d9b709c 100644 --- a/inference/file_tools/video_agent.py +++ b/inference/file_tools/video_agent.py @@ -9,11 +9,22 @@ """ import sys import os +import re +import copy import json +from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional +import json5 import asyncio +from openai import OpenAI from qwen_agent.tools.base import BaseTool, register_tool +from qwen_agent.agents import Assistant +from qwen_agent.llm import BaseChatModel +from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE from qwen_agent.tools import BaseTool +from qwen_agent.log import logger +from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer +from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(current_dir)) diff --git a/inference/react_agent.py b/inference/react_agent.py index bd541923..8c26a35a 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -1,6 +1,7 @@ +import json import json5 import os -from typing import Dict, List, Optional, Union +from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union from qwen_agent.llm.schema import Message from qwen_agent.utils.utils import build_text_completion_prompt from openai import OpenAI, APIError, APIConnectionError, APITimeoutError @@ -9,8 +10,10 @@ from datetime import datetime from qwen_agent.agents.fncall_agent import FnCallAgent from qwen_agent.llm import BaseChatModel +from qwen_agent.llm.schema import ASSISTANT, DEFAULT_SYSTEM_MESSAGE, Message from qwen_agent.settings import MAX_LLM_CALL_PER_RUN from qwen_agent.tools import BaseTool +from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs from prompt import * import time import asyncio @@ -106,12 +109,12 @@ def call_server(self, msgs, planning_port, max_tries=10): else: print("Error: All retry attempts have been exhausted. The call has failed.") - return "vllm server error!!!" + return f"vllm server error!!!" def count_tokens(self, messages, model="gpt-4o"): try: tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) - except Exception: + except Exception as e: tokenizer = tiktoken.encoding_for_model(model) full_message = [Message(**x) for x in messages] @@ -123,7 +126,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: self.model=model try: question = data['item']['question'] - except Exception: + except: raw_msg = data['item']['messages'][1]["content"] question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg @@ -165,7 +168,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: try: code_raw=content.split('')[1].split('')[0].split('')[1].split('')[0].strip() result = TOOL_MAP['PythonInterpreter'].call(code_raw) - except Exception: + except: result = "[Python Interpreter Error]: Formatting error." else: @@ -174,7 +177,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: tool_args = tool_call.get('arguments', {}) result = self.custom_call_tool(tool_name, tool_args) - except Exception: + except: result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' result = "\n" + result + "\n" # print(result) diff --git a/inference/run_multi_react.py b/inference/run_multi_react.py index 4a517eaf..1056a0a7 100644 --- a/inference/run_multi_react.py +++ b/inference/run_multi_react.py @@ -5,7 +5,9 @@ import concurrent.futures from tqdm import tqdm import threading +from datetime import datetime from react_agent import MultiTurnReactAgent +import time import math if __name__ == "__main__": diff --git a/inference/tool_file.py b/inference/tool_file.py index 7fb1ce91..77c4960f 100644 --- a/inference/tool_file.py +++ b/inference/tool_file.py @@ -9,12 +9,26 @@ """ import sys import os +import re +import time +import copy import json - -from qwen_agent.tools.base import BaseTool -from qwen_agent.settings import DEFAULT_MAX_INPUT_TOKENS +from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional +import json5 +import asyncio +from openai import OpenAI, AsyncOpenAI +import pdb +import bdb + +from qwen_agent.tools.base import BaseTool, register_tool +from qwen_agent.agents import Assistant +from qwen_agent.llm import BaseChatModel +from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS +from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE from qwen_agent.tools import BaseTool -from qwen_agent.utils.tokenization_qwen import count_tokens +from qwen_agent.log import logger +from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer +from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(current_dir)) diff --git a/inference/tool_python.py b/inference/tool_python.py index d851ee84..e8e55223 100644 --- a/inference/tool_python.py +++ b/inference/tool_python.py @@ -1,13 +1,14 @@ import re -from typing import Dict, Optional, Union +from typing import Dict, List, Optional, Union import json5 from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool from qwen_agent.utils.utils import extract_code -from sandbox_fusion import run_code, RunCodeRequest +from sandbox_fusion import run_code, RunCodeRequest, RunStatus from requests.exceptions import Timeout import os import random import time +from concurrent.futures import ThreadPoolExecutor, as_completed # Array of sandbox fusion endpoints SANDBOX_FUSION_ENDPOINTS = [] @@ -79,12 +80,12 @@ def call(self, params, files= None, timeout = 50, **kwargs) -> str: if code_result.run_result.stderr: result.append(f"stderr:\n{code_result.run_result.stderr}") if code_result.run_result.execution_time >= timeout-1: - result.append("[PythonInterpreter Error] TimeoutError: Execution timed out.") + result.append(f"[PythonInterpreter Error] TimeoutError: Execution timed out.") result = '\n'.join(result) print('SUCCESS RUNNING TOOL') return result if result.strip() else 'Finished execution.' - except Timeout: + except Timeout as e: last_error = f'[Python Interpreter Error] TimeoutError: Execution timed out on endpoint {endpoint}.' print(f"Timeout on attempt {attempt + 1}: {last_error}") if attempt == 4: # Last attempt @@ -136,7 +137,7 @@ def call_specific_endpoint(self, params: Union[str, dict], endpoint: str, timeou execution_time = end_time - start_time return True, result if result.strip() else 'Finished execution.', execution_time - except Timeout: - return False, '[Python Interpreter Error] TimeoutError: Execution timed out.', None + except Timeout as e: + return False, f'[Python Interpreter Error] TimeoutError: Execution timed out.', None except Exception as e: - return False, f'[Python Interpreter Error]: {str(e)}', None \ No newline at end of file + return False, f'[Python Interpreter Error]: {str(e)}', None diff --git a/inference/tool_scholar.py b/inference/tool_scholar.py index 90e97c57..ae021b38 100644 --- a/inference/tool_scholar.py +++ b/inference/tool_scholar.py @@ -1,5 +1,6 @@ import os import json +import requests from typing import Union, List from qwen_agent.tools.base import BaseTool, register_tool from concurrent.futures import ThreadPoolExecutor @@ -43,7 +44,7 @@ def google_scholar_with_serp(self, query: str): except Exception as e: print(e) if i == 4: - return "Google Scholar Timeout, return None, Please try again later." + return f"Google Scholar Timeout, return None, Please try again later." continue @@ -86,7 +87,7 @@ def google_scholar_with_serp(self, query: str): content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets) return content - except Exception: + except: return f"No results found for '{query}'. Try with a more general query." @@ -95,7 +96,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: params = self._verify_json_format_args(params) query = params["query"] - except Exception: + except: return "[google_scholar] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/inference/tool_search.py b/inference/tool_search.py index 499ff0cb..d2289df0 100644 --- a/inference/tool_search.py +++ b/inference/tool_search.py @@ -1,8 +1,13 @@ import json +from concurrent.futures import ThreadPoolExecutor from typing import List, Union +import requests from qwen_agent.tools.base import BaseTool, register_tool -from typing import Optional +import asyncio +from typing import Dict, List, Optional, Union +import uuid import http.client +import json import os @@ -63,7 +68,7 @@ def contains_chinese_basic(text: str) -> bool: except Exception as e: print(e) if i == 4: - return "Google search Timeout, return None, Please try again later." + return f"Google search Timeout, return None, Please try again later." continue data = res.read() diff --git a/inference/tool_visit.py b/inference/tool_visit.py index 97284ff7..4981a2c2 100644 --- a/inference/tool_visit.py +++ b/inference/tool_visit.py @@ -1,11 +1,17 @@ import json import os +import signal +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed from typing import List, Union import requests from qwen_agent.tools.base import BaseTool, register_tool from prompt import EXTRACTOR_PROMPT from openai import OpenAI +import random +from urllib.parse import urlparse, unquote import time +from transformers import AutoTokenizer import tiktoken VISIT_SERVER_TIMEOUT = int(os.getenv("VISIT_SERVER_TIMEOUT", 200)) @@ -116,7 +122,7 @@ def call_server(self, msgs, max_retries=2): if left != -1 and right != -1 and left <= right: content = content[left:right+1] return content - except Exception: + except Exception as e: # print(e) if attempt == (max_retries - 1): return "" @@ -153,7 +159,7 @@ def jina_readpage(self, url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception: + except Exception as e: time.sleep(0.5) if attempt == max_retries - 1: return "[visit] Failed to read page." From fac9a0725a9855bc565de6f43e7fdd5167107133 Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:08:11 +0500 Subject: [PATCH 10/13] Revert "Fix remaining E722 ruff errors" This reverts commit ff94125461682fed4c23b3527559d4eb2466c54d. --- inference/tool_search.py | 4 ++-- inference/tool_visit.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/inference/tool_search.py b/inference/tool_search.py index d2289df0..1a3f7b53 100644 --- a/inference/tool_search.py +++ b/inference/tool_search.py @@ -101,7 +101,7 @@ def contains_chinese_basic(text: str) -> bool: content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets) return content - except Exception: + except: return f"No results found for '{query}'. Try with a more general query." @@ -113,7 +113,7 @@ def search_with_serp(self, query: str): def call(self, params: Union[str, dict], **kwargs) -> str: try: query = params["query"] - except Exception: + except: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/inference/tool_visit.py b/inference/tool_visit.py index 4981a2c2..92e4e3af 100644 --- a/inference/tool_visit.py +++ b/inference/tool_visit.py @@ -65,7 +65,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: url = params["url"] goal = params["goal"] - except Exception: + except: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" start_time = time.time() @@ -115,7 +115,7 @@ def call_server(self, msgs, max_retries=2): if content: try: json.loads(content) - except Exception: + except: # extract json from string left = content.find('{') right = content.rfind('}') @@ -227,7 +227,7 @@ def readpage_jina(self, url: str, goal: str) -> str: try: raw = json.loads(raw) break - except Exception: + except: raw = summary_page_func(messages, max_retries=max_retries) parse_retry_times += 1 From 33278d416578654d9b9660b27fe2483bd7369297 Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:15:38 +0500 Subject: [PATCH 11/13] Revert "Fix F403 and F405 ruff errors" This reverts commit 3f517c712e752ef444dd5127f7f82e98b30ba93e. --- WebAgent/WebSailor/src/run_multi_react.py | 2 +- WebAgent/WebWalker/src/agent.py | 6 +++--- evaluation/evaluate_deepsearch_official.py | 6 +++--- inference/react_agent.py | 7 ++++--- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/WebAgent/WebSailor/src/run_multi_react.py b/WebAgent/WebSailor/src/run_multi_react.py index 7057a079..d86489ae 100644 --- a/WebAgent/WebSailor/src/run_multi_react.py +++ b/WebAgent/WebSailor/src/run_multi_react.py @@ -8,7 +8,7 @@ from react_agent import MultiTurnReactAgent from prompt import SYSTEM_PROMPT_MULTI, USER_PROMPT from tool_search import * -from tool_visit import Visit +from tool_visit import * if __name__ == "__main__": diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py index 02ffeb25..fc035ee3 100644 --- a/WebAgent/WebWalker/src/agent.py +++ b/WebAgent/WebWalker/src/agent.py @@ -9,7 +9,7 @@ from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs from openai import OpenAI import time -from prompts import STSTEM_CRITIIC_INFORMATION, STSTEM_CRITIIC_ANSWER, SYSTEM_EXPLORER +from prompts import * TOOL_DESC = ( @@ -140,9 +140,9 @@ def _run(self, messages: List[Message], lang: Literal['en', 'zh'] = 'en', **kwar if stage1: self.momery.append(stage1+"\n") if len(self.momery) > 1: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")}] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")] else: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")}] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")] stage2 = self.critic_information(query, self.momery) if stage2: response = f'Final Answer: {stage2}' diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py index b348f660..d5aed588 100644 --- a/evaluation/evaluate_deepsearch_official.py +++ b/evaluation/evaluate_deepsearch_official.py @@ -457,10 +457,10 @@ def main(): args = parser.parse_args() dataset = args.dataset - if dataset in ["gaia", "webwalker"]: + if dataset in ["gaia", "webwalker"]: judge_model = "openai/qwen2.5-72b-instruct" judge_prompt = JUDGE_PROMPT_GAIA - elif dataset in ["xbench-deepsearch"]: + elif dataset in ["xbench-deepsearch"]: judge_prompt = JUDGE_PROMPT_XBENCH judge_model = "google/gemini-2.0-flash-001" elif dataset.startswith("browsecomp_zh"): @@ -579,4 +579,4 @@ def main(): except Exception as e: error_str = traceback.format_exc() print(f"Evaluation Failed: {e}") - print("Trace Back", error_str) \ No newline at end of file + print("Trace Back", error_str) diff --git a/inference/react_agent.py b/inference/react_agent.py index 8c26a35a..ec3aa26c 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -22,10 +22,11 @@ from tool_scholar import * from tool_python import * from tool_search import * -from tool_visit import Visit +from tool_visit import * OBS_START = '' -OBS_END = '\n' +OBS_END = ' +' MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 100)) @@ -249,4 +250,4 @@ def custom_call_tool(self, tool_name: str, tool_args: dict, **kwargs): return result else: - return f"Error: Tool {tool_name} not found" + return f"Error: Tool {tool_name} not found"} \ No newline at end of file From 3ff09e330bc22513031c111db7e10336d0eef557 Mon Sep 17 00:00:00 2001 From: MirzaSamadAhmedBaig <89132160+Mirza-Samad-Ahmed-Baig@users.noreply.github.com> Date: Mon, 22 Sep 2025 14:45:19 +0500 Subject: [PATCH 12/13] Update app.py --- WebAgent/WebWalker/src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py index f7fba538..73bc5571 100644 --- a/WebAgent/WebWalker/src/app.py +++ b/WebAgent/WebWalker/src/app.py @@ -268,4 +268,4 @@ def call(self, params: str, **kwargs) -> str: else: return "The button can not be clicked, please retry a new botton!" else: - return "Your input is invalid, plase output the action input correctly!"} \ No newline at end of file + return "Your input is invalid, plase output the action input correctly!" From 68cb1dec596872cf98d20309b2543fd140b5aa49 Mon Sep 17 00:00:00 2001 From: MirzaSamadAhmedBaig <89132160+Mirza-Samad-Ahmed-Baig@users.noreply.github.com> Date: Mon, 22 Sep 2025 15:31:21 +0500 Subject: [PATCH 13/13] Update react_agent.py --- inference/react_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/react_agent.py b/inference/react_agent.py index ec3aa26c..2e1dee85 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -250,4 +250,4 @@ def custom_call_tool(self, tool_name: str, tool_args: dict, **kwargs): return result else: - return f"Error: Tool {tool_name} not found"} \ No newline at end of file + return f"Error: Tool {tool_name} not found"