From 41a03d8558b393b953251303e3f4def1c5adc2ea Mon Sep 17 00:00:00 2001 From: ryuketsukami Date: Wed, 25 Mar 2026 02:28:21 +0200 Subject: [PATCH] fix: return LLM response metadata (finish_reason, usage, model) get_response_from_llm() returned an empty dict {} as the third element, discarding finish_reason, token usage, and model info. Now returns a populated info dict with finish_reason, usage, and model from the litellm ModelResponse. Also adds system_msg parameter for proper role-based message separation. Co-Authored-By: Claude Opus 4.6 (1M context) --- agent/llm.py | 41 +++++++- tests/__init__.py | 0 tests/conftest.py | 188 +++++++++++++++++++++++++++++++++++++ tests/test_llm_metadata.py | 93 ++++++++++++++++++ 4 files changed, 318 insertions(+), 4 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_llm_metadata.py diff --git a/agent/llm.py b/agent/llm.py index b8ba343..e11893a 100644 --- a/agent/llm.py +++ b/agent/llm.py @@ -39,6 +39,7 @@ def get_response_from_llm( temperature: float = 0.0, max_tokens: int = MAX_TOKENS, msg_history=None, + system_msg: str = None, ) -> Tuple[str, list, dict]: if msg_history is None: msg_history = [] @@ -49,7 +50,11 @@ def get_response_from_llm( for msg in msg_history ] - new_msg_history = msg_history + [{"role": "user", "content": msg}] + new_msg_history = [] + # Prepend system message if provided + if system_msg is not None: + new_msg_history.append({"role": "system", "content": system_msg}) + new_msg_history += msg_history + [{"role": "user", "content": msg}] # Build kwargs - handle model-specific requirements completion_kwargs = { @@ -75,8 +80,16 @@ def get_response_from_llm( completion_kwargs["max_tokens"] = max_tokens response = litellm.completion(**completion_kwargs) - response_text = response['choices'][0]['message']['content'] # pyright: ignore - new_msg_history.append({"role": "assistant", "content": response['choices'][0]['message']['content']}) + msg_content = response['choices'][0]['message'] # pyright: ignore + response_text = msg_content['content'] + new_msg_history.append({ + "role": "assistant", + "content": response_text, + }) + + # Strip system message from returned history + # (only needed for the API call) + new_msg_history = [m for m in new_msg_history if m.get("role") != "system"] # Convert content to text, compatible with MetaGen API new_msg_history = [ @@ -84,7 +97,27 @@ def get_response_from_llm( for msg in new_msg_history ] - return response_text, new_msg_history, {} + has_choices = ( + hasattr(response, 'choices') and response.choices + ) + has_usage = ( + hasattr(response, 'usage') and response.usage + ) + info = { + "finish_reason": ( + response.choices[0].finish_reason + if has_choices else None + ), + "usage": ( + dict(response.usage) + if has_usage else {} + ), + "model": ( + response.model + if hasattr(response, 'model') else None + ), + } + return response_text, new_msg_history, info if __name__ == "__main__": diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..2708571 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,188 @@ +"""Shared fixtures for HyperAgents test suite.""" + +import importlib +import importlib.util +import os +import json +import sys +import tempfile +import shutil +import types + +import pytest + +# ---- Project root on sys.path ---- +_PROJ = os.path.normpath( + "C:/Users/ryuke/Desktop/Projects/Hyperagents" +) +if _PROJ not in sys.path: + sys.path.insert(0, _PROJ) + + +def _install_lightweight_mocks(): + """Install minimal mock modules so that project + modules can be imported without heavy deps like + docker, litellm, backoff, torch, etc. + + Only installs mocks for modules NOT already + present -- safe to call multiple times. + """ + def _ensure(name, factory): + if name not in sys.modules: + sys.modules[name] = factory() + + # docker + _ensure("docker", lambda: types.ModuleType("docker")) + + # utils.docker_utils + def _make_docker_utils(): + m = types.ModuleType("utils.docker_utils") + m.copy_to_container = lambda *a, **k: None + m.log_container_output = lambda *a, **k: None + return m + _ensure("utils.docker_utils", _make_docker_utils) + + # utils.git_utils + def _make_git_utils(): + m = types.ModuleType("utils.git_utils") + m.commit_repo = lambda *a, **k: "abc123" + m.get_git_commit_hash = lambda *a, **k: "abc" + return m + _ensure("utils.git_utils", _make_git_utils) + + # backoff + def _make_backoff(): + m = types.ModuleType("backoff") + m.expo = "expo" + m.on_exception = ( + lambda *a, **kw: (lambda f: f) + ) + return m + _ensure("backoff", _make_backoff) + + # requests / requests.exceptions + def _make_requests(): + m = types.ModuleType("requests") + exc = types.ModuleType("requests.exceptions") + exc.RequestException = Exception + m.exceptions = exc + sys.modules["requests.exceptions"] = exc + return m + _ensure("requests", _make_requests) + + # litellm + def _make_litellm(): + m = types.ModuleType("litellm") + m.drop_params = True + m.completion = lambda **kw: None + return m + _ensure("litellm", _make_litellm) + + # dotenv + def _make_dotenv(): + m = types.ModuleType("dotenv") + m.load_dotenv = lambda *a, **kw: None + return m + _ensure("dotenv", _make_dotenv) + + # utils.thread_logger + def _make_thread_logger(): + m = types.ModuleType("utils.thread_logger") + class FakeLM: + def __init__(self, **kw): + self.log = print + m.ThreadLoggerManager = FakeLM + return m + _ensure( + "utils.thread_logger", _make_thread_logger + ) + + # tqdm (used by genesis evaluator) + def _make_tqdm(): + m = types.ModuleType("tqdm") + m.tqdm = lambda *a, **kw: iter([]) + return m + _ensure("tqdm", _make_tqdm) + + # pandas (used by ensemble.py) + def _make_pandas(): + m = types.ModuleType("pandas") + m.read_csv = lambda *a, **kw: None + return m + _ensure("pandas", _make_pandas) + _ensure("pd", _make_pandas) + + +# Install mocks at import time so all test modules +# benefit. +_install_lightweight_mocks() + + +def load_module_from_file(module_name, file_path): + """Load a Python module directly from a file path, + bypassing package __init__.py files. + + Useful for modules whose package __init__ imports + heavy deps (e.g., torch). + """ + abs_path = os.path.join(_PROJ, file_path) + spec = importlib.util.spec_from_file_location( + module_name, abs_path + ) + mod = importlib.util.module_from_spec(spec) + sys.modules[module_name] = mod + spec.loader.exec_module(mod) + return mod + + +@pytest.fixture +def tmp_dir(): + """Provide a temporary directory, cleaned up after test.""" + d = tempfile.mkdtemp() + yield d + shutil.rmtree(d, ignore_errors=True) + + +@pytest.fixture +def sample_archive_jsonl(tmp_dir): + """Create a sample archive.jsonl file with valid data.""" + path = os.path.join(tmp_dir, "archive.jsonl") + entries = [ + { + "current_genid": 0, + "archive": [0], + }, + { + "current_genid": 1, + "archive": [0, 1], + }, + { + "current_genid": 2, + "archive": [0, 1, 2], + }, + ] + with open(path, "w") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + return path + + +@pytest.fixture +def sample_metadata_dir(tmp_dir): + """Create gen_X directories with metadata.json files.""" + for genid in range(3): + gen_dir = os.path.join( + tmp_dir, f"gen_{genid}" + ) + os.makedirs(gen_dir, exist_ok=True) + metadata = { + "parent_genid": genid - 1 if genid > 0 else None, + "valid_parent": True, + "prev_patch_files": [], + "curr_patch_files": [], + } + with open( + os.path.join(gen_dir, "metadata.json"), "w" + ) as f: + json.dump(metadata, f) + return tmp_dir diff --git a/tests/test_llm_metadata.py b/tests/test_llm_metadata.py new file mode 100644 index 0000000..759f9cd --- /dev/null +++ b/tests/test_llm_metadata.py @@ -0,0 +1,93 @@ +"""Tests for LLM response metadata (F-10). + +Validates that get_response_from_llm() returns an +info dict with expected keys: finish_reason, usage, +model. +""" + +import inspect +from unittest.mock import MagicMock, patch + +import pytest + +# conftest.py mocks backoff, litellm, dotenv, etc. +from agent.llm import get_response_from_llm + + +class TestLlmMetadataKeys: + """F-10: info dict contains expected keys.""" + + def test_return_annotation_is_tuple(self): + """get_response_from_llm returns a 3-tuple + (text, history, info).""" + sig = inspect.signature( + get_response_from_llm + ) + ret = sig.return_annotation + assert "Tuple" in str(ret) + + def test_info_dict_constructed_in_source(self): + """Source constructs info with finish_reason, + usage, model keys.""" + src = inspect.getsource( + get_response_from_llm + ) + assert '"finish_reason"' in src + assert '"usage"' in src + assert '"model"' in src + + def test_info_dict_is_returned(self): + """The function returns (response_text, + new_msg_history, info).""" + src = inspect.getsource( + get_response_from_llm + ) + assert ( + "return response_text, " + "new_msg_history, info" + ) in src + + def test_info_structure_via_mock(self): + """Mock litellm.completion and verify the + returned info dict shape.""" + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = ( + "hello" + ) + mock_response.choices[0].finish_reason = ( + "stop" + ) + mock_response.usage = MagicMock() + mock_response.model = "test-model" + + # Make response subscriptable for the + # response['choices'][0]['message']['content'] + # pattern used in the source. + choice_msg = {"content": "hello"} + choice = {"message": choice_msg} + + def getitem(self, key): + if key == "choices": + return [choice] + return None + + type(mock_response).__getitem__ = getitem + + with patch( + "agent.llm.litellm.completion", + return_value=mock_response, + ): + text, history, info = ( + get_response_from_llm( + msg="test", model="test-model" + ) + ) + + assert isinstance(info, dict) + assert "finish_reason" in info + assert "usage" in info + assert "model" in info + assert info["finish_reason"] == "stop" + assert info["model"] == "test-model" + assert text == "hello"