From 6a60cbbd72d2e9bcfb0a660ad1c383ce95234884 Mon Sep 17 00:00:00 2001 From: ryuketsukami Date: Wed, 25 Mar 2026 02:27:54 +0200 Subject: [PATCH] improve: enrich MetaAgent instruction with eval_path and iterations_left MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MetaAgent's forward() receives eval_path and iterations_left as arguments but the instruction was just "Modify any part of the codebase at {repo_path}." — seven words with no context. These parameters are computed in generate_loop.py, passed through the entire call chain via run_meta_agent.py, and then never used in the prompt. The MetaAgent had no idea where evaluation results were stored, how many iterations remained, or what to optimize. Now the instruction includes: - A pointer to README.md for orientation - The eval_path for finding evaluation results - The remaining iteration budget - A suggested approach (analyze, identify, implement) The core creative freedom ("Modify any part of the codebase") is preserved. Co-Authored-By: Claude Opus 4.6 (1M context) --- meta_agent.py | 43 +++++- tests/__init__.py | 0 tests/conftest.py | 188 +++++++++++++++++++++++++++ tests/test_meta_agent_instruction.py | 69 ++++++++++ 4 files changed, 297 insertions(+), 3 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_meta_agent_instruction.py diff --git a/meta_agent.py b/meta_agent.py index 3cd4572..b6e49e6 100644 --- a/meta_agent.py +++ b/meta_agent.py @@ -11,8 +11,45 @@ def forward(self, repo_path, eval_path, iterations_left=None): Args: repo_path (str): The path to the repository. eval_path (str): The path to previously generated agents and their evaluation results. - iterations_left (int, optional): The number of remaining iterations in which the meta agent will be invoked in future. Defaults to None. + iterations_left (int, optional): Number of + remaining meta-agent iterations. + Defaults to None. """ - instruction = f"Modify any part of the codebase at `{repo_path}`." + instruction = ( + f"Modify any part of the codebase" + f" at `{repo_path}`." + ) + instruction += ( + f"\n\nStart by reading" + f" `{repo_path}/README.md`" + f" for orientation on the system" + f" and file structure." + ) + instruction += ( + f"\n\nPrevious generations and their" + f" evaluation results are at" + f" `{eval_path}`. Analyze these to" + f" understand current performance." + ) + if iterations_left is not None: + instruction += ( + f"\n\nYou have {iterations_left}" + f" iteration(s) remaining." + f" Budget your changes accordingly." + ) + instruction += ( + "\n\nSuggested approach:" + " 1) Read evaluation results to" + " identify bottlenecks," + " 2) Analyze the relevant code," + " 3) Implement targeted" + " improvements." + ) - new_msg_history = chat_with_agent(instruction, model=self.model, msg_history=[], logging=self.log, tools_available='all') + new_msg_history = chat_with_agent( + instruction, + model=self.model, + msg_history=[], + logging=self.log, + tools_available='all', + ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..2708571 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,188 @@ +"""Shared fixtures for HyperAgents test suite.""" + +import importlib +import importlib.util +import os +import json +import sys +import tempfile +import shutil +import types + +import pytest + +# ---- Project root on sys.path ---- +_PROJ = os.path.normpath( + "C:/Users/ryuke/Desktop/Projects/Hyperagents" +) +if _PROJ not in sys.path: + sys.path.insert(0, _PROJ) + + +def _install_lightweight_mocks(): + """Install minimal mock modules so that project + modules can be imported without heavy deps like + docker, litellm, backoff, torch, etc. + + Only installs mocks for modules NOT already + present -- safe to call multiple times. + """ + def _ensure(name, factory): + if name not in sys.modules: + sys.modules[name] = factory() + + # docker + _ensure("docker", lambda: types.ModuleType("docker")) + + # utils.docker_utils + def _make_docker_utils(): + m = types.ModuleType("utils.docker_utils") + m.copy_to_container = lambda *a, **k: None + m.log_container_output = lambda *a, **k: None + return m + _ensure("utils.docker_utils", _make_docker_utils) + + # utils.git_utils + def _make_git_utils(): + m = types.ModuleType("utils.git_utils") + m.commit_repo = lambda *a, **k: "abc123" + m.get_git_commit_hash = lambda *a, **k: "abc" + return m + _ensure("utils.git_utils", _make_git_utils) + + # backoff + def _make_backoff(): + m = types.ModuleType("backoff") + m.expo = "expo" + m.on_exception = ( + lambda *a, **kw: (lambda f: f) + ) + return m + _ensure("backoff", _make_backoff) + + # requests / requests.exceptions + def _make_requests(): + m = types.ModuleType("requests") + exc = types.ModuleType("requests.exceptions") + exc.RequestException = Exception + m.exceptions = exc + sys.modules["requests.exceptions"] = exc + return m + _ensure("requests", _make_requests) + + # litellm + def _make_litellm(): + m = types.ModuleType("litellm") + m.drop_params = True + m.completion = lambda **kw: None + return m + _ensure("litellm", _make_litellm) + + # dotenv + def _make_dotenv(): + m = types.ModuleType("dotenv") + m.load_dotenv = lambda *a, **kw: None + return m + _ensure("dotenv", _make_dotenv) + + # utils.thread_logger + def _make_thread_logger(): + m = types.ModuleType("utils.thread_logger") + class FakeLM: + def __init__(self, **kw): + self.log = print + m.ThreadLoggerManager = FakeLM + return m + _ensure( + "utils.thread_logger", _make_thread_logger + ) + + # tqdm (used by genesis evaluator) + def _make_tqdm(): + m = types.ModuleType("tqdm") + m.tqdm = lambda *a, **kw: iter([]) + return m + _ensure("tqdm", _make_tqdm) + + # pandas (used by ensemble.py) + def _make_pandas(): + m = types.ModuleType("pandas") + m.read_csv = lambda *a, **kw: None + return m + _ensure("pandas", _make_pandas) + _ensure("pd", _make_pandas) + + +# Install mocks at import time so all test modules +# benefit. +_install_lightweight_mocks() + + +def load_module_from_file(module_name, file_path): + """Load a Python module directly from a file path, + bypassing package __init__.py files. + + Useful for modules whose package __init__ imports + heavy deps (e.g., torch). + """ + abs_path = os.path.join(_PROJ, file_path) + spec = importlib.util.spec_from_file_location( + module_name, abs_path + ) + mod = importlib.util.module_from_spec(spec) + sys.modules[module_name] = mod + spec.loader.exec_module(mod) + return mod + + +@pytest.fixture +def tmp_dir(): + """Provide a temporary directory, cleaned up after test.""" + d = tempfile.mkdtemp() + yield d + shutil.rmtree(d, ignore_errors=True) + + +@pytest.fixture +def sample_archive_jsonl(tmp_dir): + """Create a sample archive.jsonl file with valid data.""" + path = os.path.join(tmp_dir, "archive.jsonl") + entries = [ + { + "current_genid": 0, + "archive": [0], + }, + { + "current_genid": 1, + "archive": [0, 1], + }, + { + "current_genid": 2, + "archive": [0, 1, 2], + }, + ] + with open(path, "w") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + return path + + +@pytest.fixture +def sample_metadata_dir(tmp_dir): + """Create gen_X directories with metadata.json files.""" + for genid in range(3): + gen_dir = os.path.join( + tmp_dir, f"gen_{genid}" + ) + os.makedirs(gen_dir, exist_ok=True) + metadata = { + "parent_genid": genid - 1 if genid > 0 else None, + "valid_parent": True, + "prev_patch_files": [], + "curr_patch_files": [], + } + with open( + os.path.join(gen_dir, "metadata.json"), "w" + ) as f: + json.dump(metadata, f) + return tmp_dir diff --git a/tests/test_meta_agent_instruction.py b/tests/test_meta_agent_instruction.py new file mode 100644 index 0000000..f4c7631 --- /dev/null +++ b/tests/test_meta_agent_instruction.py @@ -0,0 +1,69 @@ +"""Tests for MetaAgent instruction construction (F-04). + +Validates that the instruction string built inside +MetaAgent.forward() contains eval_path, iterations_left, +and is not trivially short. +""" + +import inspect + +import pytest + +# conftest.py mocks backoff, litellm, dotenv, +# thread_logger, etc. +from meta_agent import MetaAgent + + +class TestMetaAgentInstruction: + """F-04: Instruction string is comprehensive.""" + + def test_forward_accepts_eval_path(self): + """forward() signature includes eval_path.""" + sig = inspect.signature(MetaAgent.forward) + params = list(sig.parameters.keys()) + assert "eval_path" in params + + def test_forward_accepts_iterations_left(self): + """forward() signature includes + iterations_left.""" + sig = inspect.signature(MetaAgent.forward) + params = list(sig.parameters.keys()) + assert "iterations_left" in params + + def test_eval_path_appears_in_instruction(self): + """The source of forward() references + eval_path in the instruction string.""" + src = inspect.getsource(MetaAgent.forward) + assert "eval_path" in src + + def test_iterations_left_in_instruction(self): + """When iterations_left is provided, it + appears in the instruction.""" + src = inspect.getsource(MetaAgent.forward) + assert "iterations_left" in src + + def test_instruction_is_substantial(self): + """The instruction is built with multiple + concatenations, not just a few words.""" + src = inspect.getsource(MetaAgent.forward) + plus_eq_count = src.count("instruction +=") + assert plus_eq_count >= 3, ( + f"Expected 3+ instruction +=, got " + f"{plus_eq_count}" + ) + + def test_instruction_mentions_readme(self): + """Instruction tells the agent to read the + README for orientation.""" + src = inspect.getsource(MetaAgent.forward) + assert "README" in src + + def test_instruction_mentions_repo_path(self): + """Instruction references repo_path.""" + src = inspect.getsource(MetaAgent.forward) + assert "repo_path" in src + + def test_forward_calls_chat_with_agent(self): + """forward() delegates to chat_with_agent.""" + src = inspect.getsource(MetaAgent.forward) + assert "chat_with_agent" in src