From eff407af530393c5bf321e6684dc4553df66a58e Mon Sep 17 00:00:00 2001 From: Teingi Date: Wed, 8 Apr 2026 11:52:31 +0800 Subject: [PATCH] feat(memory): integrate optional PowerMem retrieval into system prompt --- pyproject.toml | 3 + src/openharness/config/settings.py | 32 ++++- src/openharness/memory/__init__.py | 2 + src/openharness/memory/powermem_client.py | 143 ++++++++++++++++++++++ src/openharness/prompts/context.py | 49 +++++--- tests/test_memory/test_powermem_client.py | 67 ++++++++++ 6 files changed, 276 insertions(+), 20 deletions(-) create mode 100644 src/openharness/memory/powermem_client.py create mode 100644 tests/test_memory/test_powermem_client.py diff --git a/pyproject.toml b/pyproject.toml index b54e0904..5197a50b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,9 @@ dependencies = [ ] [project.optional-dependencies] +powermem = [ + "powermem>=1.1.0", +] dev = [ "pexpect>=4.9.0", "pytest>=8.0.0", diff --git a/src/openharness/config/settings.py b/src/openharness/config/settings.py index f3ecc2b1..e596b3f2 100644 --- a/src/openharness/config/settings.py +++ b/src/openharness/config/settings.py @@ -13,7 +13,7 @@ import os from dataclasses import dataclass from pathlib import Path -from typing import Any +from typing import Any, Literal from pydantic import BaseModel, Field @@ -45,6 +45,14 @@ class MemorySettings(BaseModel): enabled: bool = True max_files: int = 5 max_entrypoint_lines: int = 200 + # PowerMem: local = Markdown files only; powermem_* = semantic retrieval; + # hybrid = Markdown relevance + PowerMem. + backend: Literal["local", "powermem_http", "powermem_sdk", "hybrid"] = "local" + powermem_base_url: str = "" + powermem_api_key: str = "" + powermem_user_id: str | None = None + powermem_agent_id: str | None = None + powermem_run_id: str | None = None class SandboxNetworkSettings(BaseModel): @@ -705,6 +713,28 @@ def _apply_env_overrides(settings: Settings) -> Settings: if sandbox_updates: updates["sandbox"] = settings.sandbox.model_copy(update=sandbox_updates) + mem_backend = os.environ.get("OPENHARNESS_MEMORY_BACKEND") + mem_url = os.environ.get("OPENHARNESS_POWERMEM_BASE_URL") + mem_key = os.environ.get("OPENHARNESS_POWERMEM_API_KEY") + mem_user = os.environ.get("OPENHARNESS_POWERMEM_USER_ID") + mem_agent = os.environ.get("OPENHARNESS_POWERMEM_AGENT_ID") + mem_run = os.environ.get("OPENHARNESS_POWERMEM_RUN_ID") + memory_updates: dict[str, Any] = {} + if mem_backend: + memory_updates["backend"] = mem_backend.strip().lower() + if mem_url: + memory_updates["powermem_base_url"] = mem_url.strip().rstrip("/") + if mem_key is not None: + memory_updates["powermem_api_key"] = mem_key + if mem_user is not None: + memory_updates["powermem_user_id"] = mem_user or None + if mem_agent is not None: + memory_updates["powermem_agent_id"] = mem_agent or None + if mem_run is not None: + memory_updates["powermem_run_id"] = mem_run or None + if memory_updates: + updates["memory"] = settings.memory.model_copy(update=memory_updates) + if not updates: return settings return settings.model_copy(update=updates) diff --git a/src/openharness/memory/__init__.py b/src/openharness/memory/__init__.py index 35b91bc8..53c42d96 100644 --- a/src/openharness/memory/__init__.py +++ b/src/openharness/memory/__init__.py @@ -4,6 +4,7 @@ from openharness.memory.manager import add_memory_entry, list_memory_files, remove_memory_entry from openharness.memory.paths import get_memory_entrypoint, get_project_memory_dir from openharness.memory.scan import scan_memory_files +from openharness.memory.powermem_client import search_powermem_for_prompt from openharness.memory.search import find_relevant_memories __all__ = [ @@ -15,4 +16,5 @@ "load_memory_prompt", "remove_memory_entry", "scan_memory_files", + "search_powermem_for_prompt", ] diff --git a/src/openharness/memory/powermem_client.py b/src/openharness/memory/powermem_client.py new file mode 100644 index 00000000..584cf215 --- /dev/null +++ b/src/openharness/memory/powermem_client.py @@ -0,0 +1,143 @@ +"""PowerMem integration: retrieve memories for system prompt injection.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +import httpx + +if TYPE_CHECKING: + from openharness.config.settings import MemorySettings + +logger = logging.getLogger(__name__) + +_sdk_memory: object | None = None +_sdk_cache_key: str | None = None + + +def reset_powermem_sdk_cache() -> None: + """Clear cached SDK client (for tests).""" + global _sdk_memory, _sdk_cache_key + _sdk_memory = None + _sdk_cache_key = None + + +def _normalize_http_results(payload: object) -> list[tuple[str, str]]: + if not isinstance(payload, dict): + return [] + if not payload.get("success"): + return [] + data = payload.get("data") + if not isinstance(data, dict): + return [] + raw_results = data.get("results") + if not isinstance(raw_results, list): + return [] + out: list[tuple[str, str]] = [] + for item in raw_results: + if not isinstance(item, dict): + continue + content = item.get("content") or item.get("memory") or "" + if not isinstance(content, str): + content = str(content) + mid = item.get("memory_id", item.get("id", "")) + label = f"powermem-{mid}" if mid != "" else "powermem" + out.append((str(label), content.strip())) + return out + + +def search_powermem_http(query: str, memory: MemorySettings) -> list[tuple[str, str]]: + base = (memory.powermem_base_url or "").strip().rstrip("/") + if not base: + logger.warning("memory.backend is powermem_http but powermem_base_url is empty") + return [] + + url = f"{base}/api/v1/memories/search" + body: dict[str, object] = { + "query": query, + "limit": max(1, min(100, memory.max_files)), + } + if memory.powermem_user_id: + body["user_id"] = memory.powermem_user_id + if memory.powermem_agent_id: + body["agent_id"] = memory.powermem_agent_id + if memory.powermem_run_id: + body["run_id"] = memory.powermem_run_id + + headers: dict[str, str] = {} + if (memory.powermem_api_key or "").strip(): + headers["X-API-Key"] = memory.powermem_api_key.strip() + + try: + resp = httpx.post(url, json=body, headers=headers, timeout=30.0) + resp.raise_for_status() + return _normalize_http_results(resp.json()) + except httpx.HTTPError as e: + logger.warning("PowerMem HTTP search failed: %s", e) + return [] + + +def _get_sdk_memory(memory: MemorySettings) -> object | None: + global _sdk_memory, _sdk_cache_key + try: + from powermem import create_memory + except ImportError: + logger.warning("memory.backend is powermem_sdk but powermem is not installed") + return None + + key = f"{memory.powermem_agent_id or ''}|{memory.powermem_user_id or ''}" + if _sdk_memory is not None and _sdk_cache_key == key: + return _sdk_memory + + kwargs: dict[str, str] = {} + if memory.powermem_agent_id: + kwargs["agent_id"] = memory.powermem_agent_id + _sdk_memory = create_memory(**kwargs) + _sdk_cache_key = key + return _sdk_memory + + +def search_powermem_sdk(query: str, memory: MemorySettings) -> list[tuple[str, str]]: + client = _get_sdk_memory(memory) + if client is None: + return [] + + try: + raw = client.search( + query, + user_id=memory.powermem_user_id, + agent_id=memory.powermem_agent_id, + run_id=memory.powermem_run_id, + limit=max(1, min(100, memory.max_files)), + ) + except Exception as e: + logger.warning("PowerMem SDK search failed: %s", e) + return [] + + if not isinstance(raw, dict): + return [] + results = raw.get("results") + if not isinstance(results, list): + return [] + + out: list[tuple[str, str]] = [] + for item in results: + if not isinstance(item, dict): + continue + text = item.get("memory") or item.get("content") or "" + if not isinstance(text, str): + text = str(text) + mid = item.get("id", item.get("memory_id", "")) + label = f"powermem-{mid}" if mid != "" else "powermem" + out.append((str(label), text.strip())) + return out + + +def search_powermem_for_prompt(query: str, memory: MemorySettings) -> list[tuple[str, str]]: + b = memory.backend + if b == "powermem_http": + return search_powermem_http(query, memory) + if b == "powermem_sdk": + return search_powermem_sdk(query, memory) + return [] diff --git a/src/openharness/prompts/context.py b/src/openharness/prompts/context.py index 865f9455..35509b22 100644 --- a/src/openharness/prompts/context.py +++ b/src/openharness/prompts/context.py @@ -7,6 +7,7 @@ from openharness.config.paths import get_project_issue_file, get_project_pr_comments_file from openharness.config.settings import Settings from openharness.memory import find_relevant_memories, load_memory_prompt +from openharness.memory.powermem_client import search_powermem_for_prompt from openharness.prompts.claudemd import load_claude_md_prompt from openharness.prompts.system_prompt import build_system_prompt from openharness.skills.loader import load_skill_registry @@ -78,24 +79,34 @@ def build_runtime_system_prompt( sections.append(memory_section) if latest_user_prompt: - relevant = find_relevant_memories( - latest_user_prompt, - cwd, - max_results=settings.memory.max_files, - ) - if relevant: - lines = ["# Relevant Memories"] - for header in relevant: - content = header.path.read_text(encoding="utf-8", errors="replace").strip() - lines.extend( - [ - "", - f"## {header.path.name}", - "```md", - content[:8000], - "```", - ] - ) - sections.append("\n".join(lines)) + mem = settings.memory + if mem.backend in ("local", "hybrid"): + relevant = find_relevant_memories( + latest_user_prompt, + cwd, + max_results=mem.max_files, + ) + if relevant: + lines = ["# Relevant Memories"] + for header in relevant: + content = header.path.read_text(encoding="utf-8", errors="replace").strip() + lines.extend( + [ + "", + f"## {header.path.name}", + "```md", + content[:8000], + "```", + ] + ) + sections.append("\n".join(lines)) + + if mem.backend in ("powermem_http", "powermem_sdk", "hybrid"): + pm_hits = search_powermem_for_prompt(latest_user_prompt, mem) + if pm_hits: + lines = ["# PowerMem Retrieval"] + for label, text in pm_hits: + lines.extend(["", f"## {label}", "```md", text[:8000], "```"]) + sections.append("\n".join(lines)) return "\n\n".join(section for section in sections if section.strip()) diff --git a/tests/test_memory/test_powermem_client.py b/tests/test_memory/test_powermem_client.py new file mode 100644 index 00000000..ef4aa8c4 --- /dev/null +++ b/tests/test_memory/test_powermem_client.py @@ -0,0 +1,67 @@ +"""Tests for PowerMem HTTP client helpers.""" + +from __future__ import annotations + +from unittest.mock import patch + +from openharness.config.settings import MemorySettings +from openharness.memory.powermem_client import ( + _normalize_http_results, + search_powermem_http, + search_powermem_for_prompt, +) + + +def test_normalize_http_results_success(): + payload = { + "success": True, + "data": { + "results": [ + {"memory_id": 42, "content": "User likes tea"}, + ] + }, + } + pairs = _normalize_http_results(payload) + assert pairs == [("powermem-42", "User likes tea")] + + +def test_normalize_http_results_empty(): + assert _normalize_http_results({}) == [] + assert _normalize_http_results({"success": False}) == [] + + +def test_search_powermem_http_calls_api(): + memory = MemorySettings( + backend="powermem_http", + powermem_base_url="http://127.0.0.1:8000/", + powermem_api_key="secret", + powermem_user_id="u1", + max_files=3, + ) + + class FakeResp: + def raise_for_status(self) -> None: + return None + + def json(self) -> dict: + return { + "success": True, + "data": {"results": [{"memory_id": 1, "content": "fact"}]}, + } + + with patch("openharness.memory.powermem_client.httpx.post", return_value=FakeResp()) as m: + out = search_powermem_http("hello", memory) + + assert out == [("powermem-1", "fact")] + m.assert_called_once() + _args, kwargs = m.call_args + assert kwargs["json"]["query"] == "hello" + assert kwargs["json"]["limit"] == 3 + assert kwargs["json"]["user_id"] == "u1" + assert kwargs["headers"]["X-API-Key"] == "secret" + assert _args[0] == "http://127.0.0.1:8000/api/v1/memories/search" + + +def test_search_powermem_for_prompt_dispatches(): + memory = MemorySettings(backend="local") + assert search_powermem_for_prompt("q", memory) == []