From 6a60cbbd72d2e9bcfb0a660ad1c383ce95234884 Mon Sep 17 00:00:00 2001
From: ryuketsukami <ryuketsukami@gmail.com>
Date: Wed, 25 Mar 2026 02:27:54 +0200
Subject: [PATCH] improve: enrich MetaAgent instruction with eval_path and
 iterations_left
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MetaAgent's forward() receives eval_path and iterations_left
as arguments but the instruction was just "Modify any part of
the codebase at {repo_path}." — seven words with no context.

These parameters are computed in generate_loop.py, passed through
the entire call chain via run_meta_agent.py, and then never used
in the prompt. The MetaAgent had no idea where evaluation results
were stored, how many iterations remained, or what to optimize.

Now the instruction includes:
- A pointer to README.md for orientation
- The eval_path for finding evaluation results
- The remaining iteration budget
- A suggested approach (analyze, identify, implement)

The core creative freedom ("Modify any part of the codebase")
is preserved.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 meta_agent.py                        |  43 +++++-
 tests/__init__.py                    |   0
 tests/conftest.py                    | 188 +++++++++++++++++++++++++++
 tests/test_meta_agent_instruction.py |  69 ++++++++++
 4 files changed, 297 insertions(+), 3 deletions(-)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_meta_agent_instruction.py

diff --git a/meta_agent.py b/meta_agent.py
index 3cd4572..b6e49e6 100644
--- a/meta_agent.py
+++ b/meta_agent.py
@@ -11,8 +11,45 @@ def forward(self, repo_path, eval_path, iterations_left=None):
         Args:
             repo_path (str): The path to the repository.
             eval_path (str): The path to previously generated agents and their evaluation results.
-            iterations_left (int, optional): The number of remaining iterations in which the meta agent will be invoked in future. Defaults to None.
+            iterations_left (int, optional): Number of
+                remaining meta-agent iterations.
+                Defaults to None.
         """
-        instruction = f"Modify any part of the codebase at `{repo_path}`."
+        instruction = (
+            f"Modify any part of the codebase"
+            f" at `{repo_path}`."
+        )
+        instruction += (
+            f"\n\nStart by reading"
+            f" `{repo_path}/README.md`"
+            f" for orientation on the system"
+            f" and file structure."
+        )
+        instruction += (
+            f"\n\nPrevious generations and their"
+            f" evaluation results are at"
+            f" `{eval_path}`. Analyze these to"
+            f" understand current performance."
+        )
+        if iterations_left is not None:
+            instruction += (
+                f"\n\nYou have {iterations_left}"
+                f" iteration(s) remaining."
+                f" Budget your changes accordingly."
+            )
+        instruction += (
+            "\n\nSuggested approach:"
+            " 1) Read evaluation results to"
+            " identify bottlenecks,"
+            " 2) Analyze the relevant code,"
+            " 3) Implement targeted"
+            " improvements."
+        )
 
-        new_msg_history = chat_with_agent(instruction, model=self.model, msg_history=[], logging=self.log, tools_available='all')
+        new_msg_history = chat_with_agent(
+            instruction,
+            model=self.model,
+            msg_history=[],
+            logging=self.log,
+            tools_available='all',
+        )
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..2708571
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,188 @@
+"""Shared fixtures for HyperAgents test suite."""
+
+import importlib
+import importlib.util
+import os
+import json
+import sys
+import tempfile
+import shutil
+import types
+
+import pytest
+
+# ---- Project root on sys.path ----
+_PROJ = os.path.normpath(
+    "C:/Users/ryuke/Desktop/Projects/Hyperagents"
+)
+if _PROJ not in sys.path:
+    sys.path.insert(0, _PROJ)
+
+
+def _install_lightweight_mocks():
+    """Install minimal mock modules so that project
+    modules can be imported without heavy deps like
+    docker, litellm, backoff, torch, etc.
+
+    Only installs mocks for modules NOT already
+    present -- safe to call multiple times.
+    """
+    def _ensure(name, factory):
+        if name not in sys.modules:
+            sys.modules[name] = factory()
+
+    # docker
+    _ensure("docker", lambda: types.ModuleType("docker"))
+
+    # utils.docker_utils
+    def _make_docker_utils():
+        m = types.ModuleType("utils.docker_utils")
+        m.copy_to_container = lambda *a, **k: None
+        m.log_container_output = lambda *a, **k: None
+        return m
+    _ensure("utils.docker_utils", _make_docker_utils)
+
+    # utils.git_utils
+    def _make_git_utils():
+        m = types.ModuleType("utils.git_utils")
+        m.commit_repo = lambda *a, **k: "abc123"
+        m.get_git_commit_hash = lambda *a, **k: "abc"
+        return m
+    _ensure("utils.git_utils", _make_git_utils)
+
+    # backoff
+    def _make_backoff():
+        m = types.ModuleType("backoff")
+        m.expo = "expo"
+        m.on_exception = (
+            lambda *a, **kw: (lambda f: f)
+        )
+        return m
+    _ensure("backoff", _make_backoff)
+
+    # requests / requests.exceptions
+    def _make_requests():
+        m = types.ModuleType("requests")
+        exc = types.ModuleType("requests.exceptions")
+        exc.RequestException = Exception
+        m.exceptions = exc
+        sys.modules["requests.exceptions"] = exc
+        return m
+    _ensure("requests", _make_requests)
+
+    # litellm
+    def _make_litellm():
+        m = types.ModuleType("litellm")
+        m.drop_params = True
+        m.completion = lambda **kw: None
+        return m
+    _ensure("litellm", _make_litellm)
+
+    # dotenv
+    def _make_dotenv():
+        m = types.ModuleType("dotenv")
+        m.load_dotenv = lambda *a, **kw: None
+        return m
+    _ensure("dotenv", _make_dotenv)
+
+    # utils.thread_logger
+    def _make_thread_logger():
+        m = types.ModuleType("utils.thread_logger")
+        class FakeLM:
+            def __init__(self, **kw):
+                self.log = print
+        m.ThreadLoggerManager = FakeLM
+        return m
+    _ensure(
+        "utils.thread_logger", _make_thread_logger
+    )
+
+    # tqdm (used by genesis evaluator)
+    def _make_tqdm():
+        m = types.ModuleType("tqdm")
+        m.tqdm = lambda *a, **kw: iter([])
+        return m
+    _ensure("tqdm", _make_tqdm)
+
+    # pandas (used by ensemble.py)
+    def _make_pandas():
+        m = types.ModuleType("pandas")
+        m.read_csv = lambda *a, **kw: None
+        return m
+    _ensure("pandas", _make_pandas)
+    _ensure("pd", _make_pandas)
+
+
+# Install mocks at import time so all test modules
+# benefit.
+_install_lightweight_mocks()
+
+
+def load_module_from_file(module_name, file_path):
+    """Load a Python module directly from a file path,
+    bypassing package __init__.py files.
+
+    Useful for modules whose package __init__ imports
+    heavy deps (e.g., torch).
+    """
+    abs_path = os.path.join(_PROJ, file_path)
+    spec = importlib.util.spec_from_file_location(
+        module_name, abs_path
+    )
+    mod = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = mod
+    spec.loader.exec_module(mod)
+    return mod
+
+
+@pytest.fixture
+def tmp_dir():
+    """Provide a temporary directory, cleaned up after test."""
+    d = tempfile.mkdtemp()
+    yield d
+    shutil.rmtree(d, ignore_errors=True)
+
+
+@pytest.fixture
+def sample_archive_jsonl(tmp_dir):
+    """Create a sample archive.jsonl file with valid data."""
+    path = os.path.join(tmp_dir, "archive.jsonl")
+    entries = [
+        {
+            "current_genid": 0,
+            "archive": [0],
+        },
+        {
+            "current_genid": 1,
+            "archive": [0, 1],
+        },
+        {
+            "current_genid": 2,
+            "archive": [0, 1, 2],
+        },
+    ]
+    with open(path, "w") as f:
+        for entry in entries:
+            f.write(json.dumps(entry) + "\n")
+    return path
+
+
+@pytest.fixture
+def sample_metadata_dir(tmp_dir):
+    """Create gen_X directories with metadata.json files."""
+    for genid in range(3):
+        gen_dir = os.path.join(
+            tmp_dir, f"gen_{genid}"
+        )
+        os.makedirs(gen_dir, exist_ok=True)
+        metadata = {
+            "parent_genid": genid - 1 if genid > 0 else None,
+            "valid_parent": True,
+            "prev_patch_files": [],
+            "curr_patch_files": [],
+        }
+        with open(
+            os.path.join(gen_dir, "metadata.json"), "w"
+        ) as f:
+            json.dump(metadata, f)
+    return tmp_dir
diff --git a/tests/test_meta_agent_instruction.py b/tests/test_meta_agent_instruction.py
new file mode 100644
index 0000000..f4c7631
--- /dev/null
+++ b/tests/test_meta_agent_instruction.py
@@ -0,0 +1,69 @@
+"""Tests for MetaAgent instruction construction (F-04).
+
+Validates that the instruction string built inside
+MetaAgent.forward() contains eval_path, iterations_left,
+and is not trivially short.
+"""
+
+import inspect
+
+import pytest
+
+# conftest.py mocks backoff, litellm, dotenv,
+# thread_logger, etc.
+from meta_agent import MetaAgent
+
+
+class TestMetaAgentInstruction:
+    """F-04: Instruction string is comprehensive."""
+
+    def test_forward_accepts_eval_path(self):
+        """forward() signature includes eval_path."""
+        sig = inspect.signature(MetaAgent.forward)
+        params = list(sig.parameters.keys())
+        assert "eval_path" in params
+
+    def test_forward_accepts_iterations_left(self):
+        """forward() signature includes
+        iterations_left."""
+        sig = inspect.signature(MetaAgent.forward)
+        params = list(sig.parameters.keys())
+        assert "iterations_left" in params
+
+    def test_eval_path_appears_in_instruction(self):
+        """The source of forward() references
+        eval_path in the instruction string."""
+        src = inspect.getsource(MetaAgent.forward)
+        assert "eval_path" in src
+
+    def test_iterations_left_in_instruction(self):
+        """When iterations_left is provided, it
+        appears in the instruction."""
+        src = inspect.getsource(MetaAgent.forward)
+        assert "iterations_left" in src
+
+    def test_instruction_is_substantial(self):
+        """The instruction is built with multiple
+        concatenations, not just a few words."""
+        src = inspect.getsource(MetaAgent.forward)
+        plus_eq_count = src.count("instruction +=")
+        assert plus_eq_count >= 3, (
+            f"Expected 3+ instruction +=, got "
+            f"{plus_eq_count}"
+        )
+
+    def test_instruction_mentions_readme(self):
+        """Instruction tells the agent to read the
+        README for orientation."""
+        src = inspect.getsource(MetaAgent.forward)
+        assert "README" in src
+
+    def test_instruction_mentions_repo_path(self):
+        """Instruction references repo_path."""
+        src = inspect.getsource(MetaAgent.forward)
+        assert "repo_path" in src
+
+    def test_forward_calls_chat_with_agent(self):
+        """forward() delegates to chat_with_agent."""
+        src = inspect.getsource(MetaAgent.forward)
+        assert "chat_with_agent" in src