diff --git a/.amplifier/digital-twin-universe/profiles/memory-bundle-e2e.yaml b/.amplifier/digital-twin-universe/profiles/memory-bundle-e2e.yaml new file mode 100644 index 0000000..9c3175e --- /dev/null +++ b/.amplifier/digital-twin-universe/profiles/memory-bundle-e2e.yaml @@ -0,0 +1,137 @@ +# memory-bundle-e2e DTU Profile +# +# End-to-end test environment for amplifier-bundle-memory. +# Provisions an Ubuntu 24.04 Incus container with mempalace, the memory +# bundle, and Amplifier installed from a local Gitea mirror. +# +# Variables (provided at launch via --var): +# GITEA_URL -- Gitea base URL reachable from inside the environment +# (e.g. http://10.0.0.1:10110) +# GITEA_TOKEN -- API token for Gitea (admin user) +# +# Required host environment variables (forwarded via passthrough): +# ANTHROPIC_API_KEY -- Anthropic API key for LLM calls +# OPENAI_API_KEY -- OpenAI API key for LLM calls +# +# Launch example: +# amplifier-digital-twin launch memory-bundle-e2e \ +# --var GITEA_URL=http://10.0.0.1:10110 \ +# --var GITEA_TOKEN=$(amplifier-gitea token | jq -r .token) + +name: memory-bundle-e2e +description: | + End-to-end test environment for amplifier-bundle-memory. Provisions an + Ubuntu 24.04 container with mempalace and Amplifier installed from a local + Gitea mirror. A seed palace is pre-populated from fixture content and frozen + as a reset snapshot so each test run starts from a known-good state. + The reset-palace script restores the palace between test runs without + re-provisioning the entire environment. + +base: + image: ubuntu:24.04 + +# URL rewriting rules. +# +# A mitmproxy-based HTTPS proxy intercepts git fetch calls and redirects +# requests for the amplifier-bundle-memory GitHub repo to the local Gitea +# mirror, so uv installs the locally mirrored version. +url_rewrites: + auth: + username: admin + token_var: GITEA_TOKEN + # Keep uv's GitHub fast path disabled so `uv tool install` routes through + # git fetch and these rules actually apply. See profiles.md for the + # reasoning. When true, uv's native GitHub shortcut bypasses the proxy and + # url_rewrites do not apply — the upstream GitHub commit is installed instead + # of the local Gitea mirror. + allow_uv_github_fast_path: false + rules: + - match: github.com/michaeljabbour/amplifier-bundle-memory + target: ${GITEA_URL}/admin/amplifier-bundle-memory + +passthrough: + allow_external: true + services: + - name: anthropic + key_env: ANTHROPIC_API_KEY + - name: openai + key_env: OPENAI_API_KEY + +provision: + setup_cmds: + # 1. Core system packages + - apt-get update && apt-get install -y git curl python3 python3-pip jq + + # 2. Install uv (Python package and project manager) + - curl -LsSf https://astral.sh/uv/install.sh | sh + + # 3. Add /root/.local/bin to PATH in .bashrc and export for this session + - | + echo 'export PATH="/root/.local/bin:$PATH"' >> /root/.bashrc + export PATH="/root/.local/bin:$PATH" + + # 4. Install mempalace + pytest (system-wide via pip with break-system-packages) + # --ignore-installed skips reinstalling packages already present (e.g. rich + # installed by Debian's python3-rich package which has no RECORD file). + # pytest is required to run tests/integration/ inside the DTU. + - pip install --break-system-packages --ignore-installed mempalace pytest + + # 5. Create the palace directory — mempalace mine will populate it. + # mempalace init is a project-level entity-detection setup command; + # for a fresh DTU palace we only need the directory to exist. + - mkdir -p /root/.mempalace + + # 6. Clone amplifier-bundle-memory from Gitea mirror into /workspace + - git clone ${GITEA_URL}/admin/amplifier-bundle-memory /workspace/amplifier-bundle-memory + + # 7. Mine seed fixture content into the palace + # mempalace mine --mode projects is for code/docs (the default); 'files' is + # not a valid mode. Use --mode projects for the markdown fixture content. + - cd /workspace/amplifier-bundle-memory && mempalace mine tests/fixtures/seed-palace/content/ --mode projects + + # 8. Copy project-context fixture to workspace for test use + - cp -r /workspace/amplifier-bundle-memory/tests/fixtures/seed-palace/project-context /workspace/project-context + + # 9. Freeze the seeded palace as an immutable snapshot + - cp -r /root/.mempalace /root/.mempalace-seed + + # 10. Ensure the spool directory exists inside the live palace + - mkdir -p /root/.mempalace/spool + + # 11. Install the reset-palace utility script + - | + printf '#!/bin/bash\nrm -rf /root/.mempalace\ncp -r /root/.mempalace-seed /root/.mempalace\nmkdir -p /root/.mempalace/spool\n' > /usr/local/bin/reset-palace + chmod +x /usr/local/bin/reset-palace + + # 12. Install Amplifier via uv — git+https URL is rewritten to Gitea mirror + - | + export PATH="/root/.local/bin:$PATH" + uv tool install -vv git+https://github.com/microsoft/amplifier + + # 13. Write API keys to keys.env (passthrough env vars; chmod 600 for security) + - | + mkdir -p /root/.amplifier + printf 'ANTHROPIC_API_KEY=%s\nOPENAI_API_KEY=%s\n' "${ANTHROPIC_API_KEY}" "${OPENAI_API_KEY}" > /root/.amplifier/keys.env + chmod 600 /root/.amplifier/keys.env + + # 14. Add the memory bundle to Amplifier (URL rewritten to Gitea mirror) + - | + export PATH="/root/.local/bin:$PATH" + amplifier bundle add --app "git+https://github.com/michaeljabbour/amplifier-bundle-memory@main#subdirectory=behaviors/mempalace.yaml" + + # 15. Validate that both tools are installed and reachable + - | + export PATH="/root/.local/bin:$PATH" + amplifier --version + mempalace --version || true + +update: + refresh_pypi: true + cmds: + # Clear Amplifier module cache so re-add fetches the latest bundle from Gitea. + # The palace is intentionally NOT reset here — accumulated memories are preserved + # across updates. Use reset-palace manually to restore the seed state. + - rm -rf /root/.amplifier/cache/ + - | + export PATH="/root/.local/bin:$PATH" + amplifier bundle add --app "git+https://github.com/michaeljabbour/amplifier-bundle-memory@main#subdirectory=behaviors/mempalace.yaml" diff --git a/README.md b/README.md index be3f138..f54466f 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,19 @@ Rebuild from source: `cd docs/research && make all` (requires LaTeX + graphviz). --- +## Development + +For end-to-end testing and bundle development, a [Digital Twin Universe (DTU) profile](docs/development/dtu.md) is provided. + +See [docs/development/dtu.md](docs/development/dtu.md) for: +- Prerequisites and setup +- Launching the test environment +- Running integration tests +- Interactive session testing +- The update loop for iterating on changes + +--- + ## License MIT \ No newline at end of file diff --git a/docs/development/dtu.md b/docs/development/dtu.md new file mode 100644 index 0000000..0d8470f --- /dev/null +++ b/docs/development/dtu.md @@ -0,0 +1,422 @@ +# Digital Twin Universe (DTU) — End-to-End Test Environment + +This guide explains how to provision, use, and maintain the DTU environment for +`amplifier-bundle-memory`. The DTU profile is at: + +``` +.amplifier/digital-twin-universe/profiles/memory-bundle-e2e.yaml +``` + +--- + +## Why the DTU? + +Unit tests for this bundle mock their dependencies to run fast and in isolation: + +- `subprocess.run` is patched so the mempalace CLI is never invoked. +- `emit_event` is replaced by an in-process spy. +- MemPalace storage is shadowed by a temp directory or a stub object. + +These stubs are valuable for regression safety, but they do not prove the bundle +works in a real Amplifier session. The DTU closes that gap. + +Inside a DTU environment the following all run against real infrastructure: + +- **Real bundle-install path.** `amplifier bundle add` fetches the bundle from a + local Gitea mirror using the subdirectory syntax + (`git+https://...@main#subdirectory=behaviors/mempalace.yaml`). Any packaging + or manifest error that the mocks hide will surface here. +- **Live MemPalace with semantic search.** The palace is seeded from fixture + content, and actual OpenAI embedding calls are made during mine and recall + operations. +- **Real Anthropic / OpenAI calls.** The LLM provider is not stubbed. Prompt + regressions that do not break unit tests become visible. +- **Full event flow.** Every hook—briefing, post-tool, post-assistant—fires in + sequence inside a genuine Amplifier session. Ordering bugs and missing awaits + show up here. + +Run unit tests first (they are fast), then run the DTU suite before opening a +pull request or shipping a release. + +--- + +## Prerequisites + +You need five things before you can launch the DTU: + +1. **Incus** — the container runtime used by `amplifier-digital-twin`. +2. **`amplifier-digital-twin` CLI** — install with `uv`: + ```bash + uv tool install amplifier-digital-twin + ``` +3. **A running Gitea instance with the bundle mirrored.** See the one-time setup + section below. +4. **`ANTHROPIC_API_KEY`** — an Anthropic API key starting with `sk-ant`. +5. **`OPENAI_API_KEY`** — an OpenAI API key starting with `sk-`. + +### Verify your environment + +Run the following commands and confirm the expected output: + +```bash +# 1. Incus is installed and reachable +incus --version +# expected: a version string, e.g. 6.x.x + +# 2. amplifier-digital-twin CLI is available +amplifier-digital-twin --version +# expected: a version string + +# 3. Anthropic key is set (first 6 chars should be sk-ant) +echo $ANTHROPIC_API_KEY | head -c 6 +# expected: sk-ant + +# 4. OpenAI key is set (first 3 chars should be sk-) +echo $OPENAI_API_KEY | head -c 3 +# expected: sk- +``` + +If any check fails, resolve it before proceeding. The DTU passthrough will +forward both keys into the container at launch time; they must be exported in +the host shell. + +--- + +## One-Time Gitea Setup + +The DTU profile rewrites GitHub URLs to a local Gitea mirror so that +`amplifier bundle add` installs your local version of the bundle, not the +upstream one on GitHub. You need to create this mirror once. + +### 1. Get your Gitea base URL and token + +```bash +GITEA_URL=$(amplifier-gitea url ) +GITEA_TOKEN=$(amplifier-gitea token | jq -r .token) + +echo "URL: $GITEA_URL" +echo "Token: ${GITEA_TOKEN:0:8}..." +``` + +Replace `` with the identifier printed when you provisioned your +Gitea instance. + +### 2. Create the mirror repository + +```bash +curl -s -X POST "${GITEA_URL}/api/v1/repos/migrate" \ + -H "Content-Type: application/json" \ + -H "Authorization: token ${GITEA_TOKEN}" \ + -d '{ + "clone_addr": "https://github.com/michaeljabbour/amplifier-bundle-memory", + "repo_name": "amplifier-bundle-memory", + "uid": 1, + "mirror": true, + "private": false, + "description": "Mirror of amplifier-bundle-memory for DTU use" + }' | jq .full_name +# expected output: "admin/amplifier-bundle-memory" +``` + +The `uid: 1` is the admin user. Adjust if your admin has a different UID +(`GET /api/v1/users/admin` to check). + +### Working from a fork + +If you are developing on a personal fork rather than the upstream repo, change +`clone_addr` to your fork URL. The DTU url-rewrite rule matches +`github.com/michaeljabbour/amplifier-bundle-memory`; update it in the profile +YAML if your fork is at a different path. + +--- + +## Launch the DTU + +Run the following from the **bundle root** (the directory that contains +`amplifier-bundle-memory/`): + +```bash +DTU_ID=$(amplifier-digital-twin launch memory-bundle-e2e \ + --var GITEA_URL="${GITEA_URL}" \ + --var GITEA_TOKEN="${GITEA_TOKEN}" \ + | tail -n1) + +echo "DTU environment ID: ${DTU_ID}" +``` + +The `tail -n1` captures the environment ID printed as the last line of the +launch output. Save it; every subsequent command uses it. + +> **First launch takes 5–10 minutes.** The profile has 15 `setup_cmds` that +> install system packages, compile Python wheels, initialise MemPalace, mine +> fixture content, freeze a reset snapshot, install Amplifier, and add the +> bundle. Subsequent launches reuse the cached base image and are faster. + +--- + +## Three Usage Modes + +### Mode 1 — Pytest Integration Tests + +Run the full integration suite inside the DTU: + +```bash +amplifier-digital-twin exec ${DTU_ID} -- \ + pytest tests/integration/ -v +``` + +The test suite uses an `autouse` fixture named `reset_palace` that runs +`reset-palace` before each test. This restores the palace to its seeded state +so tests are independent and repeatable. + +#### Inspecting failures + +If a test fails, connect to the container to investigate: + +```bash +# Tail the palace event log +amplifier-digital-twin exec ${DTU_ID} -- \ + cat /root/.mempalace/events/*.jsonl | jq . + +# Check palace status +amplifier-digital-twin exec ${DTU_ID} -- \ + mempalace status + +# Run a single failing test with verbose output and log capture +amplifier-digital-twin exec ${DTU_ID} -- \ + pytest tests/integration/test_recall.py::test_semantic_search -v -s --tb=long +``` + +### Mode 2 — Interactive Amplifier Session + +Open an interactive Amplifier session inside the DTU to manually exercise the +bundle: + +```bash +amplifier-digital-twin exec ${DTU_ID} -- amplifier run +``` + +Once inside, the memory bundle is active. Example queries to try: + +- `Search my palace for architecture decisions about the dual-palace pattern.` +- `What notes do I have about semantic search configuration?` +- `Store a new memory: the DTU reset-palace script restores the seed snapshot.` + +This mode is useful for exploratory testing, prompt tuning, and verifying that +the briefing hook surfaces the correct project context in the system prompt. + +### Mode 3 — Palace Inspection + +Inspect the live palace contents without running tests or a full session. + +**From an Amplifier session inside the DTU:** + +```python +# Tail the last 20 events +palace(operation="events", limit=20, tail=True) + +# Check palace metadata and drawer counts +palace(operation="status") +``` + +**From a shell inside the DTU:** + +```bash +# Inspect all events as JSON +amplifier-digital-twin exec ${DTU_ID} -- \ + bash -c 'cat /root/.mempalace/events/*.jsonl | jq .' + +# Reset the palace to its seed state for a clean slate +amplifier-digital-twin exec ${DTU_ID} -- reset-palace +``` + +Use `reset-palace` whenever you want to start from a known-good state without +re-provisioning the entire environment. + +--- + +## The Update Loop + +When you change bundle code and want to test in the DTU, follow this five-step +cycle: + +1. **Edit** — make your changes in + `amplifier-bundle-memory/behaviors/` or `amplifier-bundle-memory/modules/`. + +2. **Commit** — commit the changes locally so they are on a Git ref: + ```bash + git -C amplifier-bundle-memory commit -am "wip: " + ``` + +3. **Push to Gitea** — push the branch to your Gitea mirror: + ```bash + git -C amplifier-bundle-memory push gitea HEAD:main --force + ``` + If you track a different remote name, substitute it for `gitea`. + +4. **Update the DTU** — trigger the in-container update sequence: + ```bash + amplifier-digital-twin update ${DTU_ID} + ``` + This clears the Amplifier module cache and re-runs `amplifier bundle add`, + fetching the latest commit from Gitea. The palace is **not** reset during + an update — accumulated memories survive. + +5. **Test** — run the integration suite or an interactive session: + ```bash + amplifier-digital-twin exec ${DTU_ID} -- pytest tests/integration/ -v + ``` + +Repeat from step 1 as needed. Only steps 2–4 are required for subsequent +iterations if the container is still running. + +--- + +## Troubleshooting + +### `uv` bypasses Gitea (URL rewrites not applied) + +**Symptom:** `amplifier bundle add` installs from GitHub rather than from your +Gitea mirror. The bundle version inside the DTU does not reflect your local +changes. + +**Cause:** The DTU profile sets `allow_uv_github_fast_path: false`. Without +this flag, `uv` uses a native GitHub shortcut that bypasses the mitmproxy HTTPS +proxy. When the fast path is active, URL rewrite rules are never consulted, and +`uv` fetches directly from upstream GitHub. + +**Resolution:** The flag is already set correctly in the profile. If you copied +the profile and removed it by accident, add it back: + +```yaml +url_rewrites: + allow_uv_github_fast_path: false +``` + +Do not remove this flag even if `uv` installation feels slow — without it the +entire point of the mirror is defeated. + +--- + +### `amplifier bundle add` fails with 401 from Gitea + +**Symptom:** Setup step 14 (`amplifier bundle add ...`) exits with a 401 +Unauthorized error during provisioning. + +**Cause:** The Gitea token passed via `--var GITEA_TOKEN=` is expired, revoked, +or was generated for a user that does not have read access to the +`admin/amplifier-bundle-memory` repository. + +**Resolution:** Regenerate a fresh token and relaunch: + +```bash +NEW_TOKEN=$(amplifier-gitea token | jq -r .token) +amplifier-digital-twin launch memory-bundle-e2e \ + --var GITEA_URL="${GITEA_URL}" \ + --var GITEA_TOKEN="${NEW_TOKEN}" \ + | tail -n1 +``` + +--- + +### Palace has zero drawers after launch + +**Symptom:** `palace(operation="status")` reports 0 drawers, or integration +tests fail because no seed content is found. + +**Cause:** Setup step 6 clones the bundle into `/workspace/amplifier-bundle-memory` +and step 7 mines content from +`tests/fixtures/seed-palace/content/`. If the clone path is wrong — for +example because the repo was mirrored under a different name in Gitea — step 7 +runs but mines from an empty or non-existent directory. + +**Resolution:** Re-launch with `--verbose` to capture the full setup output: + +```bash +amplifier-digital-twin launch memory-bundle-e2e \ + --var GITEA_URL="${GITEA_URL}" \ + --var GITEA_TOKEN="${GITEA_TOKEN}" \ + --verbose \ + | tail -n50 +``` + +Confirm that step 6 clones to `/workspace/amplifier-bundle-memory` and step 7 +prints a non-zero mine count. If the Gitea repo name is different from +`amplifier-bundle-memory`, update the `clone_addr` destination path in setup +step 6 of the profile YAML. + +--- + +### Briefing hook does not surface `project-context` + +**Symptom:** The system prompt in an interactive session does not include +project-context notes. Tests that assert briefing content fail. + +**Cause:** The briefing hook's helper function `_find_project_context_dir` +walks upward from the current working directory looking for a `project-context` +subdirectory. If the CWD inside the session is not under `/workspace`, the walk +will not reach `/workspace/project-context` and the hook returns no content. + +**Resolution:** + +1. Verify the fixture was copied during provisioning: + ```bash + amplifier-digital-twin exec ${DTU_ID} -- ls /workspace/project-context/ + ``` + You should see at least one `.md` file. + +2. If missing, copy it manually: + ```bash + amplifier-digital-twin exec ${DTU_ID} -- \ + cp -r /workspace/amplifier-bundle-memory/tests/fixtures/seed-palace/project-context \ + /workspace/project-context + ``` + +3. Ensure integration tests `cd` to `/workspace` or a subdirectory of it before + starting an Amplifier session, so `_find_project_context_dir` can locate the + directory. + +--- + +### API calls fail with "permission denied to anthropic.com" + +**Symptom:** LLM calls inside the DTU fail with a network error such as +`ConnectionRefusedError`, `permission denied`, or `ECONNREFUSED` when +connecting to `api.anthropic.com` or `api.openai.com`. + +**Cause:** Either: +- The DTU profile's `passthrough.allow_external: true` setting was removed or + overridden, blocking outbound traffic to external hosts. +- `ANTHROPIC_API_KEY` or `OPENAI_API_KEY` were not exported in the host shell + before running `amplifier-digital-twin launch`, so the keys were not + forwarded into the container. + +**Resolution:** + +1. Confirm the profile includes: + ```yaml + passthrough: + allow_external: true + services: + - name: anthropic + key_env: ANTHROPIC_API_KEY + - name: openai + key_env: OPENAI_API_KEY + ``` + +2. Verify the keys are set in the host shell **before** calling launch: + ```bash + echo $ANTHROPIC_API_KEY | head -c 6 # should print sk-ant + echo $OPENAI_API_KEY | head -c 3 # should print sk- + ``` + +3. If the keys were missing at launch time, destroy the environment and + relaunch after exporting them: + ```bash + export ANTHROPIC_API_KEY= + export OPENAI_API_KEY= + amplifier-digital-twin launch memory-bundle-e2e \ + --var GITEA_URL="${GITEA_URL}" \ + --var GITEA_TOKEN="${GITEA_TOKEN}" \ + | tail -n1 + ``` diff --git a/tests/fixtures/seed-palace/README.md b/tests/fixtures/seed-palace/README.md new file mode 100644 index 0000000..8f7dc78 --- /dev/null +++ b/tests/fixtures/seed-palace/README.md @@ -0,0 +1,79 @@ +# Seed Palace Fixtures + +This directory contains fixture files used to populate a MemPalace instance inside the +Digital Twin Universe (DTU) profile for end-to-end testing of the memory-bundle. + +--- + +## What the files do + +### `content/` — mined into the palace + +All Markdown files under `content/` are ingested into the MemPalace palace via: + +```bash +mempalace mine /workspace/seed-palace/content --mode files +``` + +The `--mode files` flag reads each `.md` file as a discrete memory fragment and lets +the capture hook classify each fragment by category (decisions, learnings, patterns, etc.) +based on keyword detection in the body text. + +### `project-context/` — copied for the briefing hook + +Files under `project-context/` are copied verbatim to `/workspace/project-context/` inside +the DTU container so that the briefing hook's `_find_project_context_dir()` function can +discover them. That function walks upward from the current working directory (and also +checks `$PROJECT_CONTEXT_DIR` if set) looking for a directory named `project-context/`. + +--- + +## Seeding flow + +The DTU `memory-bundle-e2e.yaml` profile runs this sequence on startup: + +1. **`mempalace init`** — creates a fresh `~/.mempalace/` store. +2. **`mempalace mine /workspace/seed-palace/content --mode files`** — populates the palace + with the session-notes and architecture-decisions fragments. +3. **`cp -r /workspace/seed-palace/project-context /workspace/`** — places the briefing + files where `_find_project_context_dir()` expects them. +4. **`cp -r ~/.mempalace ~/.mempalace-seed`** — freezes a clean snapshot so tests can + call `reset-palace` to restore a known-good state between runs. +5. **`mkdir -p /workspace/spool`** — creates the spool directory that the capture hook + writes event fragments to before the drain thread flushes them. + +--- + +## The `reset-palace` script + +A 3-line helper script is placed on `$PATH` in the DTU profile so tests can restore the +seeded state without re-running the full mine step: + +```bash +#!/usr/bin/env bash +set -e +rm -rf ~/.mempalace +cp -r ~/.mempalace-seed ~/.mempalace +``` + +--- + +## How to extend + +- **Add new memory fragments** — drop a `.md` file into `content/`. It will be picked up + by the next `mine` run. Include trigger keywords (`decided`, `learned`, `pattern`, etc.) + if you want the capture hook to classify the fragment into a specific category. +- **Add project-context documents** — drop a `.md` file into `project-context/`. The + briefing hook will include it in the next session briefing. + +--- + +## Files + +| Path | Purpose | +|------|---------| +| `content/session-notes.md` | Synthetic session notes mined as memory fragments | +| `content/architecture-decisions.md` | ADR log mined as memory fragments | +| `project-context/HANDOFF.md` | Current-work snapshot for the briefing hook | +| `project-context/PROJECT_CONTEXT.md` | Project overview for the briefing hook | +| `project-context/GLOSSARY.md` | Domain term definitions for the briefing hook | diff --git a/tests/fixtures/seed-palace/content/architecture-decisions.md b/tests/fixtures/seed-palace/content/architecture-decisions.md new file mode 100644 index 0000000..3cdd424 --- /dev/null +++ b/tests/fixtures/seed-palace/content/architecture-decisions.md @@ -0,0 +1,144 @@ +# Architecture Decisions — memory-bundle + +This document records the key architecture decisions made during the design and +implementation of the memory-bundle. Each ADR captures the context, the decision, +and the rationale so that future contributors understand why the system is built +the way it is. + +--- + +## ADR-001 — Two-layer memory architecture (palace + project-context) + +**Status:** Accepted + +**Context:** +The memory-bundle must serve two distinct retrieval patterns: (1) fuzzy semantic recall +of fragments from past sessions (palace layer), and (2) structured, deterministic +retrieval of current-project state (project-context layer). These patterns have +different latency budgets, different staleness tolerances, and different access +mechanisms. + +**Decision:** +We decided to implement a two-layer memory architecture. The palace layer (MemPalace) +handles fuzzy semantic search via embedding-based recall and is the module's primary +design concern. The project-context layer is a simple file tree that the briefing +hook reads verbatim; no embedding is computed. + +**Rationale:** +Mixing structured context documents into the embedding space would degrade recall +precision for semantic queries. Keeping the two layers separate preserves the +design intent of each and makes the module easier to reason about. + +--- + +## ADR-002 — Behaviour-based bundle install (`--app` with `#subdirectory=`) + +**Status:** Accepted + +**Context:** +The DTU profile must install the memory-bundle inside the container. Three approaches +were evaluated: (a) pip install from the repo root, (b) manual YAML copy, (c) Amplifier +behaviour flag with `--app` and `#subdirectory=`. + +**Decision:** +We decided on behaviour-based bundle install using: + +```bash +amplifier bundle add --app git+https://gitea.local/memory-bundle.git#subdirectory=behaviors/mempalace.yaml +``` + +**Rationale:** +The `#subdirectory=` fragment is an established pattern in the Amplifier ecosystem for +referencing a specific behaviour definition from a repository root. It keeps the +container image lean (no transitive test dependencies are pulled in) and stays +in sync with the source repository without manual YAML maintenance. + +--- + +## ADR-003 — Hot-path / drain-thread split for capture + +**Status:** Accepted + +**Context:** +The capture hook fires on every `tool:post` event. Embedding calls to the external +API take 100–400 ms, which is unacceptable latency on the hot path. + +**Decision:** +We decided to split capture into a synchronous hot-path component and an asynchronous +drain-thread component. The hot-path emits `capture_queued` or `capture_skipped` +synchronously and writes the raw fragment to the spool directory. The drain thread +reads from the spool, calls the embedding API, and writes to the palace. + +**Rationale:** +The pattern decouples API latency from tool response latency. The spool directory +acts as a durable queue: if the process crashes after spool-write but before +palace-write, the fragment can be recovered on next startup. The design also makes +the module easier to test: hot-path tests require no API keys; drain-thread tests +require real keys but run separately. + +--- + +## ADR-004 — Dual-palace seeding for the DTU + +**Status:** Accepted + +**Context:** +Integration tests require a palace that is pre-seeded with known fragments so that +recall precision can be asserted deterministically. Re-running `mempalace mine` +inside each test is too slow (4–8 seconds per run) when real API keys are required. + +**Decision:** +We decided to implement a dual-palace seeding pattern for the DTU profile. A seed +palace is built once at container-init time and frozen to `~/.mempalace-seed`. A +3-line `reset-palace` script restores the working palace from the frozen snapshot +between test runs. + +**Rationale:** +The dual-palace pattern reduces per-test overhead from ~6 seconds to ~50 ms for the +palace restore step. The frozen snapshot guarantees deterministic fragment content +across runs, which is critical for asserting recall precision thresholds. + +--- + +## ADR-005 — Real API keys for end-to-end tests (< $0.10 / run) + +**Status:** Accepted + +**Context:** +Early prototypes used a mock embedding client that returned random unit vectors. +Recall precision tests failed non-deterministically and the results were not meaningful +for validating the palace query logic. + +**Decision:** +We decided that real API keys for the embedding provider are required for all +integration and end-to-end tests. Mock clients are only permissible in unit tests +that explicitly test client-error handling paths. + +**Rationale:** +Real embeddings cluster in meaningful semantic space. Mock vectors do not. The cost +of running the full integration suite against the seed-palace fixture corpus is less +than $0.10 per run, which is acceptable given the confidence gain. The DTU profile +passes API keys in via environment variable so they are never committed to source. + +--- + +## ADR-006 — Gitea mirror for bundle install + +**Status:** Accepted + +**Context:** +The DTU container must install the memory-bundle from a Git URL. Using the public +GitHub URL requires internet access from inside the container, which conflicts with +the design goal of fully isolated test environments. + +**Decision:** +We decided to run a local Gitea mirror inside the DTU network and configure the +bundle install URL to point to it. The DTU profile YAML also sets +`allow_uv_github_fast_path: false` to prevent uv from bypassing the mirror by +resolving GitHub URLs directly. + +**Rationale:** +A Gitea mirror provides a stable, reproducible install source that does not depend +on public internet availability during CI runs. The `allow_uv_github_fast_path: false` +setting is a necessary companion because uv's fast path would silently bypass the +mirror if left enabled, undermining the isolation guarantee. diff --git a/tests/fixtures/seed-palace/content/session-notes.md b/tests/fixtures/seed-palace/content/session-notes.md new file mode 100644 index 0000000..c6a994d --- /dev/null +++ b/tests/fixtures/seed-palace/content/session-notes.md @@ -0,0 +1,96 @@ +# Session Notes — 2026-04-29 + +## Overview + +This session focused on wiring up the end-to-end test infrastructure for the memory-bundle. +The goal was to get a Digital Twin Universe (DTU) environment that boots with a pre-seeded +MemPalace palace so integration tests can exercise the full capture→recall→briefing pipeline +without standing up external services. + +--- + +## Decisions made this session + +### Dual-palace pattern adopted + +We decided to use a dual-palace seeding strategy for the DTU profile. A "seed palace" is +frozen at container-init time (`cp -r ~/.mempalace ~/.mempalace-seed`) so individual test +suites can call `reset-palace` to restore a known-good baseline without re-running the full +`mempalace mine` pipeline. This pattern was decided after observing that mine runs against +the fixture corpus take roughly 4–8 seconds due to real API embedding calls, which is +acceptable once at boot but not acceptable inside every test. + +### Behaviour-based bundle install + +We decided to install the memory-bundle inside the DTU container using Amplifier's behaviour +flag rather than a conventional pip dependency: + +```bash +amplifier bundle add --app git+https://gitea.local/memory-bundle.git#subdirectory=behaviors/mempalace.yaml +``` + +The `#subdirectory=behaviors/mempalace.yaml` fragment tells the installer to pull only the +behaviour definition from the repository root, keeping the container image lean. This +approach was decided after evaluating direct pip installs (too many transitive deps) and +manual YAML copies (fragile, diverges from source). + +--- + +## Blockers resolved + +### uv GitHub fast path bypassing Gitea proxy + +The DTU profile uses a local Gitea mirror to serve bundle repositories without reaching the +public internet. During initial bring-up we observed that `uv` was resolving GitHub URLs +directly, bypassing the mirror. This was resolved by adding `allow_uv_github_fast_path: false` +to the DTU profile YAML, which forces uv to respect the `[[source]]` redirects configured +in `pyproject.toml`. + +### Project-context discovery via `_find_project_context_dir` + +The briefing hook locates its input documents by calling `_find_project_context_dir()`, +which walks upward from the current working directory checking each ancestor for a +subdirectory named `project-context/`. During testing we observed that the hook was unable +to find the directory when tests were launched from `/workspace/tests/`. This was resolved +by pre-copying `tests/fixtures/seed-palace/project-context/` to `/workspace/project-context/` +during the DTU init sequence so the walk terminates at `/workspace/`. + +--- + +## Patterns observed + +### Synchronous `*_queued` / `*_skipped` event emission + +The capture hook emits `capture_queued` or `capture_skipped` events synchronously on the +hot path, before handing work off to a drain thread. The actual embedding call and palace +write happen asynchronously in the drain thread (or subprocess for large payloads). This +pattern was observed consistently across all three capture entry points (tool:post, +tool:pre, session:start) and is now documented as a convention: slow work is always +deferred; fast bookkeeping events are always synchronous. + +--- + +## Lessons learned + +### Spool directory must exist before `tool:post` + +We learned the hard way that the spool directory (`/workspace/spool/`) must be created +before the first `tool:post` hook fires. If the directory is missing the drain thread +silently drops the fragment rather than raising an error. The DTU init sequence now +includes `mkdir -p /workspace/spool` as an explicit step. + +### Real API keys are non-negotiable for integration tests + +We learned that mock embedding clients produce vectors that cluster differently from real +embeddings, causing recall precision tests to fail with misleading results. Real API keys +for the embedding provider are non-negotiable for integration tests. The cost is less than +$0.10 per full test suite run against the seed corpus. + +--- + +## Next steps + +- Wire up the reset-palace helper script into the DTU profile +- Add `verify-seeding.sh` smoke test that asserts at least 3 fragments are recalled +- Extend the fixture corpus with a third content file covering hook contract edge-cases +- Confirm that `allow_uv_github_fast_path: false` propagates to nested sub-installs diff --git a/tests/fixtures/seed-palace/project-context/GLOSSARY.md b/tests/fixtures/seed-palace/project-context/GLOSSARY.md new file mode 100644 index 0000000..a5037d9 --- /dev/null +++ b/tests/fixtures/seed-palace/project-context/GLOSSARY.md @@ -0,0 +1,14 @@ +# Glossary — memory-bundle + +| Term | Definition | +|------|-----------| +| **Palace** | The MemPalace persistent store. Holds all captured memory fragments indexed by embedding vector. Lives at `~/.mempalace/` by default. | +| **Drawer** | A named collection inside a Palace. Fragments are organised into drawers by category (e.g. `decisions`, `learnings`, `patterns`). Each drawer has its own embedding index. | +| **Wing** | A top-level namespace inside a Palace, grouping multiple drawers. Typically corresponds to a project or domain. Wings allow multiple projects to share a single Palace without cross-contamination. | +| **Room** | A sub-division of a Drawer used for fine-grained access control and retrieval scoping. Rooms are optional; a Drawer with no rooms behaves as a single flat collection. | +| **Briefing** | The session-start context injection produced by the briefing hook. Combines semantic recall results from the Palace with verbatim project-context documents to give the agent a situational summary at the start of a session. | +| **Spool** | A directory (`/workspace/spool/` in the DTU) used as a durable intermediate queue by the capture hook. Raw fragments are written to the spool synchronously on the hot path; the drain thread reads from the spool and performs the slow embedding + palace-write asynchronously. | +| **Seed palace** | A pre-seeded Palace snapshot used in the DTU profile for end-to-end testing. Built once at container-init time by running `mempalace mine` against the fixture corpus and frozen to `~/.mempalace-seed`. Restored by the `reset-palace` script between test runs. | +| **Drain thread** | A background thread (or subprocess for large payloads) that reads fragments from the spool directory, calls the embedding API, and writes the resulting vectors to the Palace. Decouples API latency from tool response latency. | +| **Capture hook** | The Amplifier hook that fires on `tool:post` events and enqueues the tool result as a memory fragment. Emits `capture_queued` or `capture_skipped` synchronously, then hands off to the drain thread. | +| **Briefing hook** | The Amplifier hook that fires on `session:start` and injects a briefing (Palace recall + project-context documents) into the session context. | diff --git a/tests/fixtures/seed-palace/project-context/HANDOFF.md b/tests/fixtures/seed-palace/project-context/HANDOFF.md new file mode 100644 index 0000000..da13c09 --- /dev/null +++ b/tests/fixtures/seed-palace/project-context/HANDOFF.md @@ -0,0 +1,41 @@ +# Handoff — 2026-04-29 + +## Currently working on + +Setting up the end-to-end test infrastructure for the memory-bundle DTU profile. +The immediate focus is the seed-palace fixture corpus and the `memory-bundle-e2e.yaml` +DTU profile that consumes it. + +## Next steps + +1. Write `reset-palace` helper script and add it to the DTU profile `$PATH`. +2. Add `verify-seeding.sh` smoke test asserting ≥ 3 fragments recalled from seed corpus. +3. Extend fixture corpus with a third content file covering hook contract edge-cases. +4. Confirm `allow_uv_github_fast_path: false` propagates to nested sub-installs. +5. Implement deferred full hook tests (currently marked `xfail` pending DTU availability). +6. Verify that behaviour `#subdirectory=` relative source paths resolve correctly from + inside the DTU container. + +## Key decisions made + +- **Dual-palace pattern** — seed palace frozen at `~/.mempalace-seed`; reset script + restores working palace in ~50 ms without re-running `mempalace mine`. +- **`allow_uv_github_fast_path: false`** — required in the DTU profile YAML to prevent + uv from bypassing the local Gitea mirror when installing bundle dependencies. +- **Real API keys for integration tests** — mock embeddings are insufficient; cost is + < $0.10 per full run against the seed corpus. + +## Open items + +- Deferred: full hook tests in `test_hook_emissions.py` are `xfail` pending DTU + availability in CI. +- Unverified: whether behaviour `#subdirectory=` relative source paths resolve correctly + when the behaviour file references sibling modules via `../modules/` paths. + +--- + +## Session log + +| Date | Summary | +|------|---------| +| 2026-04-29 | Initial DTU profile design; seed-palace fixture corpus created; dual-palace pattern adopted; `allow_uv_github_fast_path: false` fix applied | diff --git a/tests/fixtures/seed-palace/project-context/PROJECT_CONTEXT.md b/tests/fixtures/seed-palace/project-context/PROJECT_CONTEXT.md new file mode 100644 index 0000000..7c79ab0 --- /dev/null +++ b/tests/fixtures/seed-palace/project-context/PROJECT_CONTEXT.md @@ -0,0 +1,61 @@ +# Project Context — memory-bundle + +## What this project is + +`amplifier-bundle-memory` is an Amplifier bundle that adds persistent, semantic memory to +an Amplifier agent session. It provides hooks and tools that capture tool call results, +session summaries, and arbitrary fragments to a MemPalace palace, and retrieves relevant +context at session start via a briefing hook. + +## Current phase + +**Infrastructure hardening** — the core capture/recall/briefing pipeline is implemented +and passing unit tests. The current focus is on DTU-based end-to-end tests that exercise +the full pipeline against real embedding APIs in an isolated container environment. + +## Active milestone + +Milestone: `v0.2.0 — DTU E2E` + +- [x] Capture hook emitting `capture_queued` / `capture_skipped` synchronously +- [x] Drain thread deferring embedding + palace-write off the hot path +- [x] Briefing hook loading project-context documents from `_find_project_context_dir()` +- [x] Seed-palace fixture corpus for DTU profile +- [ ] `memory-bundle-e2e.yaml` DTU profile (in progress) +- [ ] `verify-seeding.sh` smoke test +- [ ] `reset-palace` helper script on `$PATH` + +## Team + +| Role | Notes | +|------|-------| +| Lead engineer | Owner of capture/drain architecture | +| Integration test | Owner of DTU profile and fixture corpus | + +## Architecture pointers + +Key files and directories: + +| Path | Description | +|------|-------------| +| `behaviors/mempalace.yaml` | Amplifier behaviour definition — the entry point for `amplifier bundle add --app` | +| `modules/hooks-mempalace-briefing/` | Briefing hook — fires on `session:start`, injects palace recall into context | +| `modules/hooks-mempalace-capture/` | Capture hook — fires on `tool:post`, queues fragments for drain thread | +| `modules/tool-mempalace/` | Palace query tool — direct semantic search exposed as an Amplifier tool | +| `modules/tool-memory/` | High-level memory tool — wraps palace query with summarisation | +| `tests/` | Unit and contract tests | +| `tests/integration/` | Integration tests (require real API keys + DTU) | +| `tests/fixtures/seed-palace/` | Seed corpus and project-context documents for DTU profile | +| `.amplifier/digital-twin-universe/profiles/memory-bundle-e2e.yaml` | DTU profile definition | + +## Conventions + +- **Synchronous `*_queued` / `*_skipped` events** — all `_queued` and `_skipped` event + types are emitted synchronously on the hot path before any slow work begins. Slow work + (embedding calls, palace writes) is always deferred to a drain thread or subprocess. +- **`emit_events: false` kill switch** — setting `emit_events: false` in the hook config + disables all event emission without disabling capture. Used in tests that do not have + an event bus available. +- **Integration tests use `subprocess.run()`** — integration tests launch Amplifier + sessions via `subprocess.run()` rather than importing modules directly. This ensures + that the test exercises the full hook mount lifecycle and catches import-order issues. diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 0000000..0ef5bb0 --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,57 @@ +"""Integration test fixtures for the amplifier-bundle-memory DTU. + +These fixtures are designed to run INSIDE the DTU container: + +- reset_palace: autouse module-scope fixture that resets the memory palace + before each test module so each module starts with a clean slate. Calls the + ``reset-palace`` CLI tool that is installed in the DTU environment. + +- workspace_dir: returns the Path to /workspace, the directory where Amplifier + is launched inside the DTU. /workspace contains project-context/ (the project + context read by hooks-project-context) and amplifier-bundle-memory/ (the + bundle under test). +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path + +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def reset_palace(): + """Reset the memory palace before each test module. + + Runs the ``reset-palace`` CLI tool which is installed in the DTU + environment. If the command fails (e.g. when running on the host rather + than inside the DTU), the fixture calls pytest.fail() with the returncode + and stderr so the error is immediately visible. + + Yields control to the test module after the reset completes. + """ + result = subprocess.run( + ["reset-palace"], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + pytest.fail( + f"reset-palace failed (rc={result.returncode}).\n" + f"stderr: {result.stderr}\n" + "Note: this fixture only runs correctly inside the DTU container." + ) + yield + + +@pytest.fixture +def workspace_dir() -> Path: + """/workspace — the directory where Amplifier is launched inside the DTU. + + Contains: + - project-context/ read by hooks-project-context + - amplifier-bundle-memory/ the bundle under test + """ + return Path("/workspace") diff --git a/tests/integration/test_smoke.py b/tests/integration/test_smoke.py new file mode 100644 index 0000000..bda9a45 --- /dev/null +++ b/tests/integration/test_smoke.py @@ -0,0 +1,200 @@ +"""Integration smoke tests for the amplifier-bundle-memory DTU. + +These tests are designed to run INSIDE the DTU container. They call CLI tools +via subprocess.run() rather than importing bundle modules directly — this +matches how a real user observes the system and avoids coupling tests to +internal module structure. + +subprocess.run() discipline: every CLI call uses capture_output=True, +text=True, check=False so that the test can report meaningful diagnostics on +failure. Never use check=True here — let the assertion carry the failure +message. + +All 7 tests in this module are DTU-only. They depend on: + - /root/.mempalace and /root/.mempalace-seed existing (DTU provision step) + - mempalace CLI being installed (provision step) + - amplifier CLI being installed (provision step) + - reset-palace CLI being installed (provision step) + - /workspace/project-context/ being populated (provision step 8) + +Running on a host machine without the DTU will cause all tests to fail or +error at the autouse reset_palace fixture. +""" + +from __future__ import annotations + +import re +import subprocess +from pathlib import Path + + +def test_palace_directory_exists(): + """Provision check: both memory-palace directories must exist. + + /root/.mempalace — created by the DTU provision step (palace init) + /root/.mempalace-seed — created by the DTU provision step (seed install) + """ + assert Path("/root/.mempalace").exists(), ( + "/root/.mempalace does not exist — the DTU provision step that " + "initialises the memory palace failed or was not run." + ) + assert Path("/root/.mempalace-seed").exists(), ( + "/root/.mempalace-seed does not exist — the DTU provision step that " + "installs the seed content failed or was not run." + ) + + +def test_mempalace_installed(): + """Provision check: mempalace CLI must be installed and respond to --version.""" + result = subprocess.run( + ["mempalace", "--version"], + capture_output=True, + text=True, + check=False, + ) + assert result.returncode == 0, ( + f"mempalace --version failed (rc={result.returncode}).\n" + f"stdout: {result.stdout}\n" + f"stderr: {result.stderr}" + ) + + +def test_palace_has_seeded_drawers(): + """After seeding, mempalace status must report at least one drawer. + + Uses `mempalace status` which outputs human-readable text. + Expected format: 'N drawers' where N > 0. + """ + result = subprocess.run( + ["mempalace", "status"], + capture_output=True, + text=True, + check=False, + ) + assert result.returncode == 0, ( + f"mempalace status failed (rc={result.returncode}).\n" + f"stdout: {result.stdout}\n" + f"stderr: {result.stderr}" + ) + # The output contains "N drawers" — parse the count from the summary line. + # Example: "MemPalace Status — 19 drawers" + match = re.search(r"(\d+)\s+drawer", result.stdout) + assert match is not None, ( + "mempalace status output does not contain a drawer count — " + "seed content may not have been loaded.\n" + f"output: {result.stdout}" + ) + drawer_count = int(match.group(1)) + assert drawer_count > 0, ( + f"mempalace status reports zero drawers — seed content was not loaded.\n" + f"output: {result.stdout}" + ) + + +def test_seed_content_searchable(): + """Seed content must be searchable via mempalace search. + + Uses `mempalace search --results 3` which outputs human-readable + text. A non-empty result block indicates the seed content is searchable. + """ + result = subprocess.run( + ["mempalace", "search", "architecture decisions mempalace", "--results", "3"], + capture_output=True, + text=True, + check=False, + ) + assert result.returncode == 0, ( + f"mempalace search failed (rc={result.returncode}).\n" + f"stdout: {result.stdout}\n" + f"stderr: {result.stderr}" + ) + # The output contains "[1]" result markers when results are found. + assert "[1]" in result.stdout, ( + "mempalace search returned no results for 'architecture decisions mempalace' " + "— seed content is missing or search is broken.\n" + f"output: {result.stdout}" + ) + + +def test_amplifier_installed(): + """Provision check: amplifier CLI must be installed and respond to --version.""" + result = subprocess.run( + ["amplifier", "--version"], + capture_output=True, + text=True, + check=False, + ) + assert result.returncode == 0, ( + f"amplifier --version failed (rc={result.returncode}).\n" + f"stdout: {result.stdout}\n" + f"stderr: {result.stderr}" + ) + + +def test_reset_palace_restores_seed(): + """reset-palace must replace the palace directory and restore seed content. + + Writes a sentinel file into the palace, calls reset-palace, then asserts: + 1. The sentinel is gone (palace was actually replaced, not just patched). + 2. drawer_count > 0 after reset (seed content was restored). + """ + sentinel = Path("/root/.mempalace/sentinel_test.txt") + sentinel.write_text("dirty") + assert sentinel.exists(), ( + "Failed to write sentinel file — /root/.mempalace may not exist." + ) + + reset_result = subprocess.run( + ["reset-palace"], + capture_output=True, + text=True, + check=False, + ) + assert reset_result.returncode == 0, ( + f"reset-palace failed (rc={reset_result.returncode}).\n" + f"stdout: {reset_result.stdout}\n" + f"stderr: {reset_result.stderr}" + ) + + assert not sentinel.exists(), ( + "Sentinel survived reset — palace was not actually replaced." + ) + + status_result = subprocess.run( + ["mempalace", "status"], + capture_output=True, + text=True, + check=False, + ) + assert status_result.returncode == 0, ( + f"mempalace status after reset failed (rc={status_result.returncode}).\n" + f"stdout: {status_result.stdout}\n" + f"stderr: {status_result.stderr}" + ) + match = re.search(r"(\d+)\s+drawer", status_result.stdout) + assert match is not None and int(match.group(1)) > 0, ( + "After reset, palace has zero drawers — seed restore is broken.\n" + f"output: {status_result.stdout}" + ) + + +def test_project_context_files_present(workspace_dir): + """Provision check: project-context files must be present in /workspace. + + Expects provision step 8 to have populated: + /workspace/project-context/HANDOFF.md + /workspace/project-context/PROJECT_CONTEXT.md + /workspace/project-context/GLOSSARY.md + """ + pc = workspace_dir / "project-context" + assert (pc / "HANDOFF.md").exists(), ( + f"{pc / 'HANDOFF.md'} not found — " + "the DTU provision step 8 that writes project-context files failed or was not run." + ) + assert (pc / "PROJECT_CONTEXT.md").exists(), ( + f"{pc / 'PROJECT_CONTEXT.md'} not found — " + "project-context was not fully provisioned." + ) + assert (pc / "GLOSSARY.md").exists(), ( + f"{pc / 'GLOSSARY.md'} not found — project-context was not fully provisioned." + )