Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 37 additions & 4 deletions agent/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def get_response_from_llm(
temperature: float = 0.0,
max_tokens: int = MAX_TOKENS,
msg_history=None,
system_msg: str = None,
) -> Tuple[str, list, dict]:
if msg_history is None:
msg_history = []
Expand All @@ -49,7 +50,11 @@ def get_response_from_llm(
for msg in msg_history
]

new_msg_history = msg_history + [{"role": "user", "content": msg}]
new_msg_history = []
# Prepend system message if provided
if system_msg is not None:
new_msg_history.append({"role": "system", "content": system_msg})
new_msg_history += msg_history + [{"role": "user", "content": msg}]

# Build kwargs - handle model-specific requirements
completion_kwargs = {
Expand All @@ -75,16 +80,44 @@ def get_response_from_llm(
completion_kwargs["max_tokens"] = max_tokens

response = litellm.completion(**completion_kwargs)
response_text = response['choices'][0]['message']['content'] # pyright: ignore
new_msg_history.append({"role": "assistant", "content": response['choices'][0]['message']['content']})
msg_content = response['choices'][0]['message'] # pyright: ignore
response_text = msg_content['content']
new_msg_history.append({
"role": "assistant",
"content": response_text,
})

# Strip system message from returned history
# (only needed for the API call)
new_msg_history = [m for m in new_msg_history if m.get("role") != "system"]

# Convert content to text, compatible with MetaGen API
new_msg_history = [
{**msg, "text": msg.pop("content")} if "content" in msg else msg
for msg in new_msg_history
]

return response_text, new_msg_history, {}
has_choices = (
hasattr(response, 'choices') and response.choices
)
has_usage = (
hasattr(response, 'usage') and response.usage
)
info = {
"finish_reason": (
response.choices[0].finish_reason
if has_choices else None
),
"usage": (
dict(response.usage)
if has_usage else {}
),
"model": (
response.model
if hasattr(response, 'model') else None
),
}
return response_text, new_msg_history, info


if __name__ == "__main__":
Expand Down
Empty file added tests/__init__.py
Empty file.
188 changes: 188 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""Shared fixtures for HyperAgents test suite."""

import importlib
import importlib.util
import os
import json
import sys
import tempfile
import shutil
import types

import pytest

# ---- Project root on sys.path ----
_PROJ = os.path.normpath(
"C:/Users/ryuke/Desktop/Projects/Hyperagents"
)
if _PROJ not in sys.path:
sys.path.insert(0, _PROJ)


def _install_lightweight_mocks():
"""Install minimal mock modules so that project
modules can be imported without heavy deps like
docker, litellm, backoff, torch, etc.

Only installs mocks for modules NOT already
present -- safe to call multiple times.
"""
def _ensure(name, factory):
if name not in sys.modules:
sys.modules[name] = factory()

# docker
_ensure("docker", lambda: types.ModuleType("docker"))

# utils.docker_utils
def _make_docker_utils():
m = types.ModuleType("utils.docker_utils")
m.copy_to_container = lambda *a, **k: None
m.log_container_output = lambda *a, **k: None
return m
_ensure("utils.docker_utils", _make_docker_utils)

# utils.git_utils
def _make_git_utils():
m = types.ModuleType("utils.git_utils")
m.commit_repo = lambda *a, **k: "abc123"
m.get_git_commit_hash = lambda *a, **k: "abc"
return m
_ensure("utils.git_utils", _make_git_utils)

# backoff
def _make_backoff():
m = types.ModuleType("backoff")
m.expo = "expo"
m.on_exception = (
lambda *a, **kw: (lambda f: f)
)
return m
_ensure("backoff", _make_backoff)

# requests / requests.exceptions
def _make_requests():
m = types.ModuleType("requests")
exc = types.ModuleType("requests.exceptions")
exc.RequestException = Exception
m.exceptions = exc
sys.modules["requests.exceptions"] = exc
return m
_ensure("requests", _make_requests)

# litellm
def _make_litellm():
m = types.ModuleType("litellm")
m.drop_params = True
m.completion = lambda **kw: None
return m
_ensure("litellm", _make_litellm)

# dotenv
def _make_dotenv():
m = types.ModuleType("dotenv")
m.load_dotenv = lambda *a, **kw: None
return m
_ensure("dotenv", _make_dotenv)

# utils.thread_logger
def _make_thread_logger():
m = types.ModuleType("utils.thread_logger")
class FakeLM:
def __init__(self, **kw):
self.log = print
m.ThreadLoggerManager = FakeLM
return m
_ensure(
"utils.thread_logger", _make_thread_logger
)

# tqdm (used by genesis evaluator)
def _make_tqdm():
m = types.ModuleType("tqdm")
m.tqdm = lambda *a, **kw: iter([])
return m
_ensure("tqdm", _make_tqdm)

# pandas (used by ensemble.py)
def _make_pandas():
m = types.ModuleType("pandas")
m.read_csv = lambda *a, **kw: None
return m
_ensure("pandas", _make_pandas)
_ensure("pd", _make_pandas)


# Install mocks at import time so all test modules
# benefit.
_install_lightweight_mocks()


def load_module_from_file(module_name, file_path):
"""Load a Python module directly from a file path,
bypassing package __init__.py files.

Useful for modules whose package __init__ imports
heavy deps (e.g., torch).
"""
abs_path = os.path.join(_PROJ, file_path)
spec = importlib.util.spec_from_file_location(
module_name, abs_path
)
mod = importlib.util.module_from_spec(spec)
sys.modules[module_name] = mod
spec.loader.exec_module(mod)
return mod


@pytest.fixture
def tmp_dir():
"""Provide a temporary directory, cleaned up after test."""
d = tempfile.mkdtemp()
yield d
shutil.rmtree(d, ignore_errors=True)


@pytest.fixture
def sample_archive_jsonl(tmp_dir):
"""Create a sample archive.jsonl file with valid data."""
path = os.path.join(tmp_dir, "archive.jsonl")
entries = [
{
"current_genid": 0,
"archive": [0],
},
{
"current_genid": 1,
"archive": [0, 1],
},
{
"current_genid": 2,
"archive": [0, 1, 2],
},
]
with open(path, "w") as f:
for entry in entries:
f.write(json.dumps(entry) + "\n")
return path


@pytest.fixture
def sample_metadata_dir(tmp_dir):
"""Create gen_X directories with metadata.json files."""
for genid in range(3):
gen_dir = os.path.join(
tmp_dir, f"gen_{genid}"
)
os.makedirs(gen_dir, exist_ok=True)
metadata = {
"parent_genid": genid - 1 if genid > 0 else None,
"valid_parent": True,
"prev_patch_files": [],
"curr_patch_files": [],
}
with open(
os.path.join(gen_dir, "metadata.json"), "w"
) as f:
json.dump(metadata, f)
return tmp_dir
93 changes: 93 additions & 0 deletions tests/test_llm_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Tests for LLM response metadata (F-10).

Validates that get_response_from_llm() returns an
info dict with expected keys: finish_reason, usage,
model.
"""

import inspect
from unittest.mock import MagicMock, patch

import pytest

# conftest.py mocks backoff, litellm, dotenv, etc.
from agent.llm import get_response_from_llm


class TestLlmMetadataKeys:
"""F-10: info dict contains expected keys."""

def test_return_annotation_is_tuple(self):
"""get_response_from_llm returns a 3-tuple
(text, history, info)."""
sig = inspect.signature(
get_response_from_llm
)
ret = sig.return_annotation
assert "Tuple" in str(ret)

def test_info_dict_constructed_in_source(self):
"""Source constructs info with finish_reason,
usage, model keys."""
src = inspect.getsource(
get_response_from_llm
)
assert '"finish_reason"' in src
assert '"usage"' in src
assert '"model"' in src

def test_info_dict_is_returned(self):
"""The function returns (response_text,
new_msg_history, info)."""
src = inspect.getsource(
get_response_from_llm
)
assert (
"return response_text, "
"new_msg_history, info"
) in src

def test_info_structure_via_mock(self):
"""Mock litellm.completion and verify the
returned info dict shape."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = (
"hello"
)
mock_response.choices[0].finish_reason = (
"stop"
)
mock_response.usage = MagicMock()
mock_response.model = "test-model"

# Make response subscriptable for the
# response['choices'][0]['message']['content']
# pattern used in the source.
choice_msg = {"content": "hello"}
choice = {"message": choice_msg}

def getitem(self, key):
if key == "choices":
return [choice]
return None

type(mock_response).__getitem__ = getitem

with patch(
"agent.llm.litellm.completion",
return_value=mock_response,
):
text, history, info = (
get_response_from_llm(
msg="test", model="test-model"
)
)

assert isinstance(info, dict)
assert "finish_reason" in info
assert "usage" in info
assert "model" in info
assert info["finish_reason"] == "stop"
assert info["model"] == "test-model"
assert text == "hello"