diff --git a/agents/camel_terminal_agent.py b/agents/camel_terminal_agent.py new file mode 100644 index 0000000..6e0f46c --- /dev/null +++ b/agents/camel_terminal_agent.py @@ -0,0 +1,120 @@ +# CAMEL agent using Terminal Toolkit +import os +import logging +from pathlib import Path +from typing import List, Tuple +from terminal_bench.agents.base_agent import BaseAgent, AgentResult +from terminal_bench.agents.failure_mode import FailureMode +from terminal_bench.terminal.tmux_session import TmuxSession + +from camel.models import ModelFactory +from camel.types import ModelPlatformType, ModelType + +logger = logging.getLogger(__name__) + + +class CamelTerminalAgent(BaseAgent): + @staticmethod + def name() -> str: + return "CamelTerminalAgent" + + def perform_task( + self, + instruction: str, + session: TmuxSession, + logging_dir: Path | None = None, + ) -> AgentResult: + """Execute a task using the Terminal Bench harness. + + Args: + instruction: The task instruction to execute + session: TmuxSession object for command execution + logging_dir: Optional directory for logging + + Returns: + AgentResult with token counts and failure mode + """ + + container_name = session.container.name + if not container_name: + raise ValueError("Container name is required for DockerExecutor") + + # Set up CAMEL working directory for logs and internal operations + camel_workdir = logging_dir / "CAMEL_WORKDIR" + if not camel_workdir.exists(): + camel_workdir.mkdir(parents=True) + + absolute_camel_workdir = str(camel_workdir.resolve()) + os.environ["CAMEL_WORKDIR"] = absolute_camel_workdir + print(f"Set CAMEL_WORKDIR to: {os.environ['CAMEL_WORKDIR']}") + + session_logs_dir = logging_dir / "session" / 'logs' + if not session_logs_dir.exists(): + session_logs_dir.mkdir(parents=True) + print(f"Session logs directory: {session_logs_dir}") + + from util.agent_factory import developer_agent_factory + + # Use Terminal-Bench's Docker container with TerminalToolkit's Docker backend + terminal_toolkit_kwargs = { + "working_directory": "/app", # Work in container's /app directory where tests expect files + "use_docker_backend": True, # Enable Docker backend + "docker_container_name": container_name, # Use Terminal-Bench's container + "session_logs_dir": str(session_logs_dir), + "safe_mode": False, # Allow more commands in Docker environment + } + + print(f"Using Docker container: {container_name}") + print(f"Docker working directory: /app") + model_backend_reason = ModelFactory.create( + model_platform=ModelPlatformType.OPENAI, + model_type=ModelType.GPT_4_1, + model_config_dict={ + "stream": False, + }, + ) + + task_id = 'workforce_task' + camel_agent = developer_agent_factory( + model_backend_reason, + task_id, + terminal_toolkit_kwargs + ) + camel_agent.reset() + + usr_msg = f"{instruction}\n" + + # Get response information + response = camel_agent.step(usr_msg) + + total_input_tokens = response.info['usage']['prompt_tokens'] + total_output_tokens = response.info['usage']['completion_tokens'] + + memory_list = camel_agent._memory._chat_history_block.storage.memory_list + + def create_timestamped_marker_from_memory(records: List[dict]) -> Tuple[float, str]: + """Create a timestamped marker from memory records.""" + results = [] + print(f"Total records: {len(records)}") + for record in records: + + if 'func_name' in record['message'].keys(): + timestamp = record['timestamp'] + func_name = record['message']['func_name'] + args = record['message'].get('args', {}) + if args: + command = args.get('command', '') + else: + command = '' + results.append((timestamp, f"Called tool: {func_name} with args: {command}")) + return results + + timestamped_markers = create_timestamped_marker_from_memory(memory_list) + + + return AgentResult( + total_input_tokens=total_input_tokens, + total_output_tokens=total_output_tokens, + failure_mode=FailureMode.NONE, + timestamped_markers=timestamped_markers, + ) diff --git a/pyproject.toml b/pyproject.toml index f0bc01c..2092670 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,5 +7,5 @@ requires-python = "==3.12.*" dependencies = [ "terminal-bench>=0.2.16", "openai>=1.0.0", - "camel-ai @ git+https://github.com/camel-ai/camel.git@agent-summarize", + "camel-ai @ git+https://github.com/camel-ai/camel.git@7bde8ec", ] diff --git a/test.sh b/test.sh index f569785..482bf5f 100755 --- a/test.sh +++ b/test.sh @@ -11,4 +11,10 @@ uv run tb run \ uv run tb run \ --dataset terminal-bench-core==head \ --agent-import-path agents.camel_agent:CamelTerminus \ + --task-id hello-world + + +uv run tb run \ + --dataset terminal-bench-core==head \ + --agent-import-path agents.camel_terminal_agent:CamelTerminalAgent \ --task-id hello-world \ No newline at end of file diff --git a/util/agent_factory.py b/util/agent_factory.py new file mode 100644 index 0000000..0533ef7 --- /dev/null +++ b/util/agent_factory.py @@ -0,0 +1,180 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +import asyncio +import datetime +import os +import platform +import uuid + +from camel.agents.chat_agent import ChatAgent +from camel.logger import get_logger +from camel.messages.base import BaseMessage +from camel.models import BaseModelBackend, ModelFactory +from camel.societies.workforce import Workforce +from camel.tasks.task import Task +from camel.toolkits import ( + AgentCommunicationToolkit, + NoteTakingToolkit, + TerminalToolkit, + ToolkitMessageIntegration, +) +from camel.types import ModelPlatformType, ModelType +from camel.utils.commons import api_keys_required + +logger = get_logger(__name__) + +WORKING_DIRECTORY = os.environ.get("CAMEL_WORKDIR") + +print(f"Using working directory: {WORKING_DIRECTORY}") + + +def send_message_to_user( + message_title: str, + message_description: str, + message_attachment: str = "", +) -> str: + r"""Use this tool to send a tidy message to the user, including a + short title, a one-sentence description, and an optional attachment. + + This one-way tool keeps the user informed about your progress, + decisions, or actions. It does not require a response. + You should use it to: + - Announce what you are about to do. + For example: + message_title="Starting Task" + message_description="Searching for papers on GUI Agents." + - Report the result of an action. + For example: + message_title="Search Complete" + message_description="Found 15 relevant papers." + - Report a created file. + For example: + message_title="File Ready" + message_description="The report is ready for your review." + message_attachment="report.pdf" + - State a decision. + For example: + message_title="Next Step" + message_description="Analyzing the top 10 papers." + - Give a status update during a long-running task. + + Args: + message_title (str): The title of the message. + message_description (str): The short description. + message_attachment (str): The attachment of the message, + which can be a file path or a URL. + + Returns: + str: Confirmation that the message was successfully sent. + """ + print(f"\nAgent Message:\n{message_title} " f"\n{message_description}\n") + if message_attachment: + print(message_attachment) + logger.info( + f"\nAgent Message:\n{message_title} " + f"{message_description} {message_attachment}" + ) + return ( + f"Message successfully sent to user: '{message_title} " + f"{message_description} {message_attachment}'" + ) + + +def developer_agent_factory( + model: BaseModelBackend, + task_id: str, + terminal_toolkit_kwargs: dict = None, +): + r"""Factory for creating a developer agent.""" + # Initialize message integration + message_integration = ToolkitMessageIntegration( + message_handler=send_message_to_user + ) + + # Initialize toolkits + # terminal_toolkit = TerminalToolkit(safe_mode=True, clone_current_env=False) + terminal_toolkit = TerminalToolkit(**terminal_toolkit_kwargs) + note_toolkit = NoteTakingToolkit(working_directory=WORKING_DIRECTORY) + + # Add messaging to toolkits + terminal_toolkit = message_integration.register_toolkits(terminal_toolkit) + note_toolkit = message_integration.register_toolkits(note_toolkit) + + # Get enhanced tools + tools = [ + *terminal_toolkit.get_tools(), + *note_toolkit.get_tools(), + ] + + # Determine environment info based on Docker usage + if terminal_toolkit_kwargs and terminal_toolkit_kwargs.get('use_docker_backend'): + # Use Docker container environment + system_info = "Linux (Docker Container)" + working_dir = terminal_toolkit_kwargs.get('working_directory', '/app') + env_note = "You are running inside a Docker container. All commands execute within the containerized environment." + else: + # Use host system environment + system_info = f"{platform.system()} ({platform.machine()})" + working_dir = WORKING_DIRECTORY + env_note = "You are running on the host system." + + system_message = f""" + +You are a Lead Software Engineer, a master-level coding assistant with a +powerful terminal. Your role is to solve technical tasks by writing and +executing code, installing necessary libraries, interacting with the operating +system, and deploying applications. + + + +- **System**: {system_info} +- **Working Directory**: `{working_dir}`. {env_note} +- **Current Date**: {datetime.date.today()}. +- **IMPORTANT**: When working with files, use paths relative to the working directory above. + Do NOT use host system paths like /Users/... when in a Docker container. + + + +- When you complete your task, provide a clear summary of what you accomplished. +- Focus on creating files in the correct location as specified by the task. + + + +- **Code Execution**: You can write and execute code in any language to solve tasks. +- **Terminal Control**: You have access to the terminal and can run command-line tools, + manage files, and interact with the OS. Install missing tools with package managers + like `pip3`, `uv`, or `apt-get`. +- **File Operations**: {"Create files directly in the working directory using simple paths like './filename' or 'filename'." if terminal_toolkit_kwargs and terminal_toolkit_kwargs.get('use_docker_backend') else "You can access files from any place in the file system."} +- **Verification**: Test and verify your solutions by executing them. + + + + +- Take action to solve problems. Don't just suggest solutions—implement them. +- Use the terminal effectively to execute commands and manage files. +- Verify your solutions by testing them. + + """ + + return ChatAgent( + system_message=BaseMessage.make_assistant_message( + role_name="Developer Agent", + content=system_message, + ), + model=model, + tools=tools, + # toolkits_to_register_agent=[screenshot_toolkit], + ) +