diff --git a/agents/camel_terminal_agent.py b/agents/camel_terminal_agent.py
new file mode 100644
index 0000000..6e0f46c
--- /dev/null
+++ b/agents/camel_terminal_agent.py
@@ -0,0 +1,120 @@
+# CAMEL agent using Terminal Toolkit
+import os
+import logging
+from pathlib import Path
+from typing import List, Tuple
+from terminal_bench.agents.base_agent import BaseAgent, AgentResult
+from terminal_bench.agents.failure_mode import FailureMode
+from terminal_bench.terminal.tmux_session import TmuxSession
+
+from camel.models import ModelFactory
+from camel.types import ModelPlatformType, ModelType
+
+logger = logging.getLogger(__name__)
+
+
+class CamelTerminalAgent(BaseAgent):
+ @staticmethod
+ def name() -> str:
+ return "CamelTerminalAgent"
+
+ def perform_task(
+ self,
+ instruction: str,
+ session: TmuxSession,
+ logging_dir: Path | None = None,
+ ) -> AgentResult:
+ """Execute a task using the Terminal Bench harness.
+
+ Args:
+ instruction: The task instruction to execute
+ session: TmuxSession object for command execution
+ logging_dir: Optional directory for logging
+
+ Returns:
+ AgentResult with token counts and failure mode
+ """
+
+ container_name = session.container.name
+ if not container_name:
+ raise ValueError("Container name is required for DockerExecutor")
+
+ # Set up CAMEL working directory for logs and internal operations
+ camel_workdir = logging_dir / "CAMEL_WORKDIR"
+ if not camel_workdir.exists():
+ camel_workdir.mkdir(parents=True)
+
+ absolute_camel_workdir = str(camel_workdir.resolve())
+ os.environ["CAMEL_WORKDIR"] = absolute_camel_workdir
+ print(f"Set CAMEL_WORKDIR to: {os.environ['CAMEL_WORKDIR']}")
+
+ session_logs_dir = logging_dir / "session" / 'logs'
+ if not session_logs_dir.exists():
+ session_logs_dir.mkdir(parents=True)
+ print(f"Session logs directory: {session_logs_dir}")
+
+ from util.agent_factory import developer_agent_factory
+
+ # Use Terminal-Bench's Docker container with TerminalToolkit's Docker backend
+ terminal_toolkit_kwargs = {
+ "working_directory": "/app", # Work in container's /app directory where tests expect files
+ "use_docker_backend": True, # Enable Docker backend
+ "docker_container_name": container_name, # Use Terminal-Bench's container
+ "session_logs_dir": str(session_logs_dir),
+ "safe_mode": False, # Allow more commands in Docker environment
+ }
+
+ print(f"Using Docker container: {container_name}")
+ print(f"Docker working directory: /app")
+ model_backend_reason = ModelFactory.create(
+ model_platform=ModelPlatformType.OPENAI,
+ model_type=ModelType.GPT_4_1,
+ model_config_dict={
+ "stream": False,
+ },
+ )
+
+ task_id = 'workforce_task'
+ camel_agent = developer_agent_factory(
+ model_backend_reason,
+ task_id,
+ terminal_toolkit_kwargs
+ )
+ camel_agent.reset()
+
+ usr_msg = f"{instruction}\n"
+
+ # Get response information
+ response = camel_agent.step(usr_msg)
+
+ total_input_tokens = response.info['usage']['prompt_tokens']
+ total_output_tokens = response.info['usage']['completion_tokens']
+
+ memory_list = camel_agent._memory._chat_history_block.storage.memory_list
+
+ def create_timestamped_marker_from_memory(records: List[dict]) -> Tuple[float, str]:
+ """Create a timestamped marker from memory records."""
+ results = []
+ print(f"Total records: {len(records)}")
+ for record in records:
+
+ if 'func_name' in record['message'].keys():
+ timestamp = record['timestamp']
+ func_name = record['message']['func_name']
+ args = record['message'].get('args', {})
+ if args:
+ command = args.get('command', '')
+ else:
+ command = ''
+ results.append((timestamp, f"Called tool: {func_name} with args: {command}"))
+ return results
+
+ timestamped_markers = create_timestamped_marker_from_memory(memory_list)
+
+
+ return AgentResult(
+ total_input_tokens=total_input_tokens,
+ total_output_tokens=total_output_tokens,
+ failure_mode=FailureMode.NONE,
+ timestamped_markers=timestamped_markers,
+ )
diff --git a/pyproject.toml b/pyproject.toml
index f0bc01c..2092670 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,5 +7,5 @@ requires-python = "==3.12.*"
dependencies = [
"terminal-bench>=0.2.16",
"openai>=1.0.0",
- "camel-ai @ git+https://github.com/camel-ai/camel.git@agent-summarize",
+ "camel-ai @ git+https://github.com/camel-ai/camel.git@7bde8ec",
]
diff --git a/test.sh b/test.sh
index f569785..482bf5f 100755
--- a/test.sh
+++ b/test.sh
@@ -11,4 +11,10 @@ uv run tb run \
uv run tb run \
--dataset terminal-bench-core==head \
--agent-import-path agents.camel_agent:CamelTerminus \
+ --task-id hello-world
+
+
+uv run tb run \
+ --dataset terminal-bench-core==head \
+ --agent-import-path agents.camel_terminal_agent:CamelTerminalAgent \
--task-id hello-world
\ No newline at end of file
diff --git a/util/agent_factory.py b/util/agent_factory.py
new file mode 100644
index 0000000..0533ef7
--- /dev/null
+++ b/util/agent_factory.py
@@ -0,0 +1,180 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+
+import asyncio
+import datetime
+import os
+import platform
+import uuid
+
+from camel.agents.chat_agent import ChatAgent
+from camel.logger import get_logger
+from camel.messages.base import BaseMessage
+from camel.models import BaseModelBackend, ModelFactory
+from camel.societies.workforce import Workforce
+from camel.tasks.task import Task
+from camel.toolkits import (
+ AgentCommunicationToolkit,
+ NoteTakingToolkit,
+ TerminalToolkit,
+ ToolkitMessageIntegration,
+)
+from camel.types import ModelPlatformType, ModelType
+from camel.utils.commons import api_keys_required
+
+logger = get_logger(__name__)
+
+WORKING_DIRECTORY = os.environ.get("CAMEL_WORKDIR")
+
+print(f"Using working directory: {WORKING_DIRECTORY}")
+
+
+def send_message_to_user(
+ message_title: str,
+ message_description: str,
+ message_attachment: str = "",
+) -> str:
+ r"""Use this tool to send a tidy message to the user, including a
+ short title, a one-sentence description, and an optional attachment.
+
+ This one-way tool keeps the user informed about your progress,
+ decisions, or actions. It does not require a response.
+ You should use it to:
+ - Announce what you are about to do.
+ For example:
+ message_title="Starting Task"
+ message_description="Searching for papers on GUI Agents."
+ - Report the result of an action.
+ For example:
+ message_title="Search Complete"
+ message_description="Found 15 relevant papers."
+ - Report a created file.
+ For example:
+ message_title="File Ready"
+ message_description="The report is ready for your review."
+ message_attachment="report.pdf"
+ - State a decision.
+ For example:
+ message_title="Next Step"
+ message_description="Analyzing the top 10 papers."
+ - Give a status update during a long-running task.
+
+ Args:
+ message_title (str): The title of the message.
+ message_description (str): The short description.
+ message_attachment (str): The attachment of the message,
+ which can be a file path or a URL.
+
+ Returns:
+ str: Confirmation that the message was successfully sent.
+ """
+ print(f"\nAgent Message:\n{message_title} " f"\n{message_description}\n")
+ if message_attachment:
+ print(message_attachment)
+ logger.info(
+ f"\nAgent Message:\n{message_title} "
+ f"{message_description} {message_attachment}"
+ )
+ return (
+ f"Message successfully sent to user: '{message_title} "
+ f"{message_description} {message_attachment}'"
+ )
+
+
+def developer_agent_factory(
+ model: BaseModelBackend,
+ task_id: str,
+ terminal_toolkit_kwargs: dict = None,
+):
+ r"""Factory for creating a developer agent."""
+ # Initialize message integration
+ message_integration = ToolkitMessageIntegration(
+ message_handler=send_message_to_user
+ )
+
+ # Initialize toolkits
+ # terminal_toolkit = TerminalToolkit(safe_mode=True, clone_current_env=False)
+ terminal_toolkit = TerminalToolkit(**terminal_toolkit_kwargs)
+ note_toolkit = NoteTakingToolkit(working_directory=WORKING_DIRECTORY)
+
+ # Add messaging to toolkits
+ terminal_toolkit = message_integration.register_toolkits(terminal_toolkit)
+ note_toolkit = message_integration.register_toolkits(note_toolkit)
+
+ # Get enhanced tools
+ tools = [
+ *terminal_toolkit.get_tools(),
+ *note_toolkit.get_tools(),
+ ]
+
+ # Determine environment info based on Docker usage
+ if terminal_toolkit_kwargs and terminal_toolkit_kwargs.get('use_docker_backend'):
+ # Use Docker container environment
+ system_info = "Linux (Docker Container)"
+ working_dir = terminal_toolkit_kwargs.get('working_directory', '/app')
+ env_note = "You are running inside a Docker container. All commands execute within the containerized environment."
+ else:
+ # Use host system environment
+ system_info = f"{platform.system()} ({platform.machine()})"
+ working_dir = WORKING_DIRECTORY
+ env_note = "You are running on the host system."
+
+ system_message = f"""
+
+You are a Lead Software Engineer, a master-level coding assistant with a
+powerful terminal. Your role is to solve technical tasks by writing and
+executing code, installing necessary libraries, interacting with the operating
+system, and deploying applications.
+
+
+
+- **System**: {system_info}
+- **Working Directory**: `{working_dir}`. {env_note}
+- **Current Date**: {datetime.date.today()}.
+- **IMPORTANT**: When working with files, use paths relative to the working directory above.
+ Do NOT use host system paths like /Users/... when in a Docker container.
+
+
+
+- When you complete your task, provide a clear summary of what you accomplished.
+- Focus on creating files in the correct location as specified by the task.
+
+
+
+- **Code Execution**: You can write and execute code in any language to solve tasks.
+- **Terminal Control**: You have access to the terminal and can run command-line tools,
+ manage files, and interact with the OS. Install missing tools with package managers
+ like `pip3`, `uv`, or `apt-get`.
+- **File Operations**: {"Create files directly in the working directory using simple paths like './filename' or 'filename'." if terminal_toolkit_kwargs and terminal_toolkit_kwargs.get('use_docker_backend') else "You can access files from any place in the file system."}
+- **Verification**: Test and verify your solutions by executing them.
+
+
+
+
+- Take action to solve problems. Don't just suggest solutions—implement them.
+- Use the terminal effectively to execute commands and manage files.
+- Verify your solutions by testing them.
+
+ """
+
+ return ChatAgent(
+ system_message=BaseMessage.make_assistant_message(
+ role_name="Developer Agent",
+ content=system_message,
+ ),
+ model=model,
+ tools=tools,
+ # toolkits_to_register_agent=[screenshot_toolkit],
+ )
+