diff --git a/.gitignore b/.gitignore index b1c9ed1..418d5d8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,9 @@ dist-test/ npm-debug.log* .env *.tgz + +.opencode/ +!.opencode/config.json + +# Agent guidelines (local development instructions for AI agents) +AGENT.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e5e624..ed9cb09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,27 @@ Format follows [Keep a Changelog](https://keepachangelog.com/). Versions follow --- +## [0.2.5] - 2026-03-27 + +### Added + +- Similarity-based duplicate flagging during auto-capture: new captures are checked against existing memories in the same scope using cosine similarity. +- `DedupConfig` with `enabled`, `writeThreshold` (default: 0.92), and `consolidateThreshold` (default: 0.95) for controlling dedup behavior. +- `memory_consolidate` tool: manually triggers merge of similar memories within a scope. +- `memory_consolidate_all` tool: consolidates duplicates across global and project scopes. +- `isPotentialDuplicate` and `duplicateOf` fields in `MemoryRecord.metadata` for tracking potential duplicates. +- `EffectivenessSummary.duplicates` section with `flaggedCount` and `consolidatedCount` for observability. +- `consolidateDuplicates()` store method: merges similar memory pairs where cosine similarity >= consolidateThreshold. +- Pruning preserves newest flagged duplicates when maxEntries forces deletion. + +### Changed + +- Capture events now include `skipReason: "duplicate-similarity"` when a new memory exceeds writeThreshold. +- `summarizeEvents()` returns counts of flagged and consolidated memories. +- Search results exclude `status=merged` records from display. + +--- + ## [0.2.4] - 2026-03-25 ### Added diff --git a/DOCKER_DEV.md b/DOCKER_DEV.md new file mode 100644 index 0000000..63c5d58 --- /dev/null +++ b/DOCKER_DEV.md @@ -0,0 +1,89 @@ +# Docker Dev Environment for lancedb-opencode-pro Plugin + +Run OpenCode in a Docker container with the plugin loaded for isolated testing. + +## Architecture + +```mermaid +flowchart TB + subgraph Host["Host Machine"] + subgraph Docker["Docker Container (opencode-dev)"] + OpenCode["opencode serve
--port 4096 --hostname 0.0.0.0"] + Plugin["Plugin: file:///workspace/dist/index.js"] + end + HostOpenCode["Host OpenCode (TUI)
~/.config/opencode/opencode.json → attach:4096"] + Ollama["Host Ollama
http://host.docker.internal:11434"] + end + + Docker -- "port 4096" --> HostOpenCode + Docker --> Ollama +``` + +## Setup + +### 1. Build and start + +```bash +docker compose build --no-cache && docker compose up -d +``` + +### 2. Build the plugin (first time or after code changes) + +```bash +docker compose exec opencode-dev npm run build +``` + +### 3. Configure Host OpenCode to attach + +Create `~/.config/opencode/opencode.json` on your host: + +```json +{ + "$schema": "https://opencode.ai/config.json", + "agent": { + "attach": "http://localhost:4096" + } +} +``` + +### 4. Start Host OpenCode + +```bash +opencode +``` + +## Workflow + +### Making changes + +1. Edit code on host +2. Rebuild: `docker compose exec opencode-dev npm run build` +3. Reload: `docker compose restart opencode-dev` + +### Running tests + +```bash +docker compose exec opencode-dev npm run verify +docker compose exec opencode-dev npm run test:foundation +``` + +## Verification + +```bash +# Check plugin config is loaded +curl -s -u opencode:devonly http://localhost:4096/config | \ + python3 -c "import sys,json; c=json.load(sys.stdin); print('plugin:', c.get('plugin',[]))" + +# Check tools are registered +curl -s -u opencode:devonly http://localhost:4096/experimental/tool/ids | \ + python3 -c "import sys,json; t=json.load(sys.stdin); print([x for x in t if 'memory' in x])" +``` + +Expected tool list includes: `memory_search`, `memory_consolidate`, `memory_stats`, etc. + +## Cleanup + +```bash +docker compose down # stop +docker compose down -v # stop + remove volumes +``` diff --git a/Dockerfile.opencode b/Dockerfile.opencode new file mode 100644 index 0000000..e171218 --- /dev/null +++ b/Dockerfile.opencode @@ -0,0 +1,23 @@ +FROM ubuntu:latest +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /workspace + +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + ca-certificates \ + git \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -fsSL https://opencode.ai/install | bash + +ENV PATH="/root/.opencode/bin:${PATH}" + +ARG NODE_VERSION=22 +RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \ + && apt-get install -y nodejs \ + && rm -rf /var/lib/apt/lists/* + +EXPOSE 4096 + +CMD ["opencode", "serve", "--port", "4096", "--hostname", "0.0.0.0"] diff --git a/README.md b/README.md index 4a35c6f..38d0234 100644 --- a/README.md +++ b/README.md @@ -675,7 +675,7 @@ Use this flow when publishing a new version to npm. ```bash docker compose build --no-cache && docker compose up -d -docker compose exec app npm run release:check +docker compose exec opencode-dev npm run release:check ``` 3. Confirm npm authentication: @@ -731,8 +731,8 @@ ls -l dist dist-test/src 2>/dev/null ```bash docker compose build --no-cache && docker compose up -d -docker compose exec app npm run typecheck -docker compose exec app npm run build +docker compose exec opencode-dev npm run typecheck +docker compose exec opencode-dev npm run build ``` ### Running validation inside Docker @@ -741,16 +741,16 @@ docker compose exec app npm run build docker compose build --no-cache && docker compose up -d # Quick release check -docker compose exec app npm run verify +docker compose exec opencode-dev npm run verify # Full release gate (includes benchmark + pack) -docker compose exec app npm run verify:full +docker compose exec opencode-dev npm run verify:full # Individual workflows -docker compose exec app npm run test:foundation -docker compose exec app npm run test:regression -docker compose exec app npm run test:retrieval -docker compose exec app npm run benchmark:latency +docker compose exec opencode-dev npm run test:foundation +docker compose exec opencode-dev npm run test:regression +docker compose exec opencode-dev npm run test:retrieval +docker compose exec opencode-dev npm run benchmark:latency ``` ### Operator verification @@ -759,15 +759,15 @@ After running `npm run verify:full`, operators can inspect the following: ```bash # Confirm the packaged build is installable -docker compose exec app ls -la lancedb-opencode-pro-*.tgz +docker compose exec opencode-dev ls -la lancedb-opencode-pro-*.tgz # Confirm typecheck and build succeeded -docker compose exec app npm run typecheck -docker compose exec app npm run build +docker compose exec opencode-dev npm run typecheck +docker compose exec opencode-dev npm run build # Check resolved default storage path -docker compose exec app node -e "import('./dist/index.js').then(() => console.log('plugin loaded'))" -docker compose exec app sh -lc 'ls -la ~/.opencode/memory/lancedb 2>/dev/null || echo "No data yet (expected before first use)"' +docker compose exec opencode-dev node -e "import('./dist/index.js').then(() => console.log('plugin loaded'))" +docker compose exec opencode-dev sh -lc 'ls -la ~/.opencode/memory/lancedb 2>/dev/null || echo "No data yet (expected before first use)"' ``` ## Long Memory Verification @@ -785,14 +785,14 @@ docker compose build --no-cache && docker compose up -d The E2E script loads `dist/index.js`, so build artifacts must exist first. ```bash -docker compose exec app npm install -docker compose exec app npm run build +docker compose exec opencode-dev npm install +docker compose exec opencode-dev npm run build ``` ### 3. Run the built-in end-to-end memory test ```bash -docker compose exec app npm run test:e2e +docker compose exec opencode-dev npm run test:e2e ``` Expected success output: @@ -814,7 +814,7 @@ This verifies all of the following in one run: The E2E script uses `/tmp/opencode-memory-e2e` as its test database path. ```bash -docker compose exec app ls -la /tmp/opencode-memory-e2e +docker compose exec opencode-dev ls -la /tmp/opencode-memory-e2e ``` If files appear in that directory after the E2E run, memory was written to disk instead of only being kept in process memory. @@ -830,7 +830,7 @@ When running through the normal plugin config, the default durable storage path Check it inside the container with: ```bash -docker compose exec app sh -lc 'ls -la ~/.opencode/memory/lancedb' +docker compose exec opencode-dev sh -lc 'ls -la ~/.opencode/memory/lancedb' ``` ### 6. Stronger proof: verify retrieval still works after restart @@ -839,8 +839,8 @@ Long memory is only convincing if retrieval still works after the runtime is res ```bash docker compose restart app -docker compose exec app npm run test:e2e -docker compose exec app ls -la /tmp/opencode-memory-e2e +docker compose exec opencode-dev npm run test:e2e +docker compose exec opencode-dev ls -la /tmp/opencode-memory-e2e ``` If the search step still succeeds after restart and the database files remain present, that is strong evidence that the memory is durable. @@ -849,7 +849,7 @@ If the search step still succeeds after restart and the database files remain pr Treat the feature as verified only when all of these are true: -- `docker compose exec app npm run test:e2e` passes +- `docker compose exec opencode-dev npm run test:e2e` passes - `/tmp/opencode-memory-e2e` contains LanceDB files after the run - the memory retrieval step still succeeds after container restart - the configured OpenCode storage path exists when running real plugin integration diff --git a/docker-compose.yml b/docker-compose.yml index 911a3dc..8d34d0f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,24 @@ services: - app: + opencode-dev: build: context: . - dockerfile: Dockerfile - container_name: lancedb-opencode-pro-app - working_dir: /workspace + dockerfile: Dockerfile.opencode + container_name: opencode-dev + ports: + - "4096:4096" volumes: - - ./:/workspace + - .:/workspace + - opencode-data:/root/.local/share/opencode + environment: + - OLLAMA_BASE_URL=http://host.docker.internal:11434 + - LANCEDB_OPENCODE_PRO_OLLAMA_BASE_URL=http://host.docker.internal:11434 + - OPENCODE_SERVER_PASSWORD=devonly + - OPENCODE_CONFIG_CONTENT={"plugin":["file:///workspace/dist/index.js"]} + extra_hosts: + - "host.docker.internal:host-gateway" + stdin_open: true + tty: true + restart: unless-stopped + +volumes: + opencode-data: diff --git a/docs/operations.md b/docs/operations.md index 3b49691..0e8a711 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -17,6 +17,8 @@ | `memory_scope_promote` | Promote memory to global scope | No | | `memory_scope_demote` | Demote memory from global scope | No | | `memory_global_list` | List global memories | No | +| `memory_consolidate` | Merge near-duplicate memories in a scope | Yes (`confirm=true`) | +| `memory_consolidate_all` | Consolidate global + current project scope | Yes (`confirm=true`) | ### Common Workflows @@ -30,6 +32,39 @@ memory_stats memory_effectiveness ``` +#### Scheduled Consolidation (Daily Cleanup) + +The memory system performs opportunistic consolidation on `session.compacted` events. For environments with long-running sessions or infrequent activity, a daily cron job ensures duplicates are merged even without session compaction. + +**Example cron script** (`~/.config/opencode/consolidate-cron.sh`): + +```bash +#!/bin/bash +# Consolidate duplicates daily at 03:00 UTC +# Run via: opencode --memory-consolidate-all --confirm +# +# Note: Requires OpenCode CLI with lancedb-opencode-pro plugin installed. +# The AI will invoke memory_consolidate_all tool automatically when you ask. + +0 3 * * * /path/to/opencode --memory-consolidate-all --confirm >> ~/.opencode/logs/consolidation.log 2>&1 +``` + +**Key configuration options** (via environment or `lancedb-opencode-pro.json`): + +| Variable | Default | Description | +|---|---|---| +| `LANCEDB_OPENCODE_PRO_DEDUP_ENABLED` | `true` | Enable/disable dedup | +| `LANCEDB_OPENCODE_PRO_DEDUP_WRITE_THRESHOLD` | `0.92` | Similarity threshold for flagging new memories | +| `LANCEDB_OPENCODE_PRO_DEDUP_CONSOLIDATE_THRESHOLD` | `0.95` | Similarity threshold for merging in consolidation | + +**Viewing consolidation metrics**: + +```bash +memory_effectiveness +# Look for: duplicates.flaggedCount (memories flagged as potential duplicates) +# Look for: duplicates.consolidatedCount (memories merged via consolidation) +``` + #### Search and Verify ```bash diff --git a/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/.openspec.yaml b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/.openspec.yaml new file mode 100644 index 0000000..a61e7c1 --- /dev/null +++ b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-03-27 diff --git a/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/design.md b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/design.md new file mode 100644 index 0000000..a701743 --- /dev/null +++ b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/design.md @@ -0,0 +1,150 @@ +## Context + +The lancedb-opencode-pro memory system auto-captures assistant responses that contain positive signals ("fixed", "done", "resolved", etc.) into LanceDB, indexed by vector embedding and text content. Each session's memories are scoped to the project (`project:{hash}`) with optional global sharing. + +**Current problem**: No deduplication exists at write time. If a user asks the same question using different wording across multiple sessions, or re-explains the same issue, the system stores each occurrence as a separate `MemoryRecord`. Over time this causes: + +1. **Storage bloat**: `maxEntriesPerScope` (default 3000) fills with near-identical entries, pushing genuinely novel memories out. +2. **Noisy recall**: `memory_search` returns multiple semantically equivalent results for the same query; injection block contains redundant context. +3. **Effectiveness metric degradation**: `recall.manualRescueRatio` rises as users manually search and find duplicates instead of novel information. + +**Existing infrastructure**: The codebase already has all primitives needed for similarity-based dedup: +- `fastCosine()` (store.ts:804): pre-computed-norm cosine similarity, O(n) with n = vector dim +- `store.search()` with `vectorWeight=1, bm25Weight=0`: scope-filtered vector-only search +- `ScopeCache`: already tokenizes and caches IDF per scope—no per-query overhead +- `metadataJson`: flexible JSON field requiring no schema migration +- `CaptureSkipReason` enum: already extensible for new skip labels + +**Constraint**: Must not block writes. The system should flag, not prevent. + +--- + +## Goals / Non-Goals + +**Goals:** +- Flag near-duplicate memories at write time using cosine similarity threshold (no blocking) +- Provide consolidation mechanism to merge duplicates asynchronously +- Preserve full backward compatibility: existing tool interfaces, store schema, and effectiveness event model unchanged +- Reuse existing code primitives rather than introduce new dependencies + +**Non-Goals:** +- Cross-scope deduplication (only scope-internal for v1) +- Blocking writes (flagging only) +- Perfect semantic deduplication (threshold-based approximation is acceptable) +- LLM-based semantic judgment (rule-based cosine threshold only) +- Real-time consolidation on every write (background consolidation only) +- MD5 exact-dedup as primary mechanism (hash check is optional enhancement, not core v1) + +--- + +## Decisions + +### Decision 1: Write-side flagging, not blocking + +**Choice**: After embedding a new memory, query the scope-internal vector index with `limit=1, vectorWeight=1.0, bm25Weight=0`. If `topScore >= 0.92`, write the memory with `isPotentialDuplicate: true` and `duplicateOf: ` in `metadataJson`. Write proceeds regardless. + +**Rationale**: Blocking writes risks silently dropping genuinely novel memories that fall below the threshold by a small margin. Flagging preserves all memories while making duplicates observable via `metadataJson`. Operators can inspect duplicates via `memory_search` results and remove via `memory_delete`. The `memory_effectiveness` metrics can surface the prevalence of flagged duplicates over time. + +**Alternatives considered**: +- *Blocking with user prompt*: Would require interactive confirmation during `flushAutoCapture()`, which runs in a background hook—architecturally complex and bad UX. +- *Flagging without writing `duplicateOf`*: Makes duplicates observable but not actionable. Storing the reference ID enables both AI-assisted cleanup (via `memory_delete`) and future automated consolidation. + +### Decision 2: Threshold 0.92 for flagging, 0.95 for consolidation + +**Choice**: Write-side flagging uses 0.92 cosine similarity. Consolidation merge uses 0.95. + +**Rationale**: 0.92 is the production-proven standard from Mem0 and the Governed Memory paper (arXiv 2603.17787, March 2026) for write-path deduplication, giving ~1-2% false positive rate and ~5-8% false negative rate. Consolidation uses a higher threshold (0.95) because merging is a more consequential operation—requires higher confidence that two memories are truly redundant. + +**Alternatives considered**: +- *Single threshold for both*: Simplifies config but under- or over-connects the two use cases. Flagging needs to be more sensitive (catch near-duplicates); consolidation needs to be conservative (avoid merging distinct memories). +- *Different thresholds per category*: Decision memories (importance 0.9) could use a stricter threshold than preference memories (importance 0.65). Added complexity for marginal gain in v1. + +### Decision 3: Scope-internal deduplication only + +**Choice**: Similarity search is scoped to `activeScope` only (the current `project:{hash}` scope). Global memories are not checked against project memories, and vice versa. + +**Rationale**: `design.md` from the archived change `2026-03-21-add-cross-project-memory-scope` explicitly deferred cross-project deduplication. Project and global memories serve different purposes: project-specific decisions should not interfere with cross-project general knowledge. Adding cross-scope dedup adds merge conflict complexity (if a project memory and global memory are similar, which scope wins?). + +**Alternatives considered**: +- *Cross-scope with scope-preference*: Global memories could be checked for project memories, but flagged differently. Postponed—requires new metadata schema and clearer merge semantics. +- *Global-only dedup within global scope*: A global memory referencing the same Docker best practice as another global memory could merge. Postponed—lowest priority given global scope's lower growth rate. + +### Decision 4: Reuse `store.search()` for similarity check + +**Choice**: In `flushAutoCapture()`, after embedding, call `store.search()` with `{ queryVector, scopes: [activeScope], limit: 1, vectorWeight: 1.0, bm25Weight: 0, minScore: 0.0 }` to get the most similar existing memory. + +**Rationale**: Leverages the existing `ScopeCache` infrastructure (already holds tokenized text, IDF, and vector norms for the scope). No new indexing needed. The `search()` method already computes `fastCosine` for all scope records. Reusing it avoids duplicating the scoring logic and keeps dedup behavior consistent with retrieval behavior. + +**Alternatives considered**: +- *Direct LanceDB vector search*: Would bypass ScopeCache and require managing a separate query path. More code to maintain. +- *Pre-compute a sorted similarity list on write*: Would add write-time overhead and complexity. Not worth it for a flagging feature. + +### Decision 5: Hybrid consolidation trigger — `session.compacted` + external cron backup + +**Choice**: Consolidation is primarily triggered by the `session.compacted` hook (opportunistic, per-session). A Unix cron job or scheduled task external to the plugin calls `memory_consolidate` as a backup trigger to ensure consolidation runs even if sessions are long-running or the project is inactive. + +**Rationale**: OpenCode plugins have no built-in timer or cron infrastructure—background work is event-driven only. `session.compacted` fires when a working session ends, which is a natural proxy for "user finished a working session." However, if sessions are kept open for days or a project sees infrequent activity, consolidation could be deferred indefinitely. An external cron job calling the `memory_consolidate` tool bridges this gap without adding plugin-internal timer complexity. The tool is idempotent, so concurrent calls are safe. + +**Trigger hierarchy**: +1. **Primary**: `session.compacted` → consolidation runs after `flushAutoCapture()` (asynchronous, fire-and-forget) +2. **Backup**: External cron → calls `memory_consolidate(scope, confirm=true)` at a fixed schedule (e.g., daily at 03:00 UTC) +3. **On-demand**: Operator or AI calls `memory_consolidate` tool manually at any time + +**Cron example (external)**: +```bash +# ~/.config/opencode/consolidate-cron.sh +#!/bin/bash +# Runs memory_consolidate for all known scopes daily at 03:00 UTC +opencode --memory-consolidate --scope global --confirm +opencode --memory-consolidate --scope "project:$(git remote get-url origin | shasum | cut -c1-16)" --confirm +``` + +**Alternatives considered**: +- *Dedicated interval inside plugin*: Would require Node.js `setInterval` or similar. OpenCode plugins have no scheduler API; doing this correctly (surviving restarts, deduplicating concurrent runs, respecting shutdown) adds significant complexity. Rejected for v1. +- *Triggered on every N writes*: Adds state tracking to count writes since last consolidation. Session compaction is simpler and already exists. + +### Decision 6: Configuration via environment variables and sidecar + +**Choice**: Two new top-level config keys under a `dedup` section: +- `dedup.writeThreshold` (env: `LANCEDB_OPENCODE_PRO_DEDUP_WRITE_THRESHOLD`, default: `0.92`) +- `dedup.consolidateThreshold` (env: `LANCEDB_OPENCODE_PRO_DEDUP_CONSOLIDATE_THRESHOLD`, default: `0.95`) +- `dedup.enabled` (env: `LANCEDB_OPENCODE_PRO_DEDUP_ENABLED`, default: `true`) + +**Rationale**: Follows the existing config resolution precedence (env vars > sidecar files > defaults). Users who want to disable dedup entirely can set `enabled: false`. Users who want stricter/looser thresholds can override per environment. No changes to the existing `opencode.json` `memory` block structure. + +--- + +## Risks / Trade-offs + +| Risk | Likelihood | Impact | Mitigation | +|------|-----------|--------|------------| +| **False positives**: Distinct memories with cosine >0.92 (rare but possible with short texts) | Low | Medium | `isPotentialDuplicate` flag makes them observable; consolidation uses higher 0.95 threshold | +| **Write-time latency**: Extra `store.search()` call adds latency to `flushAutoCapture` | Low | Low | ScopeCache makes search O(1) for cached scopes; first write in a session warms cache | +| **Embedder still required**: If Ollama/OpenAI is down, dedup check is skipped but write proceeds | Low | Low | `ensureInitialized` already handles embedder failures; dedup gracefully degrades | +| **Consolidation conflicts**: Two memories both flagged as duplicates of each other | Very Low | Low | Consolidation processes records sorted by timestamp (newer wins); older is deleted | +| **metadataJson bloat**: `duplicateOf` references accumulate | Medium | Low | Consolidation clears `duplicateOf` when merging; `pruneScope` still runs on age | +| **Flagging without cleanup**: DB still grows if users never act on flagged duplicates | Medium | Medium | `memory_consolidate` tool provides escape hatch; effectiveness metrics surface flagging rate | +| **BM25 channel interference**: If vector channel degrades, dedup relies only on BM25 | Very Low | Medium | Dedup uses `vectorWeight=1.0, bm25Weight=0` explicitly—no BM25 interference | + +--- + +## Migration Plan + +1. **No migration needed**: This is an additive feature. Existing memories are unaffected. +2. **Config migration**: New config keys have safe defaults; existing configs work unchanged. +3. **Schema migration**: No schema version bump required (`metadataJson` is already a flexible JSON field). +4. **Deployment**: Deploy alongside existing plugin. Dedup starts flagging on next capture after deployment. +5. **Rollback**: Set `dedup.enabled: false` or set env `LANCEDB_OPENCODE_PRO_DEDUP_ENABLED=false`. Existing `isPotentialDuplicate` flags in `metadataJson` are inert—they do not affect retrieval or injection. +6. **Validation**: Run existing E2E test suite. Add new test case: insert two semantically similar memories, verify second is flagged. + +--- + +## Open Questions + +1. ~~Should `memory_search` results display `isPotentialDuplicate` in the output?~~ → **DECIDED: Yes**, `memory_search` results SHALL display `(duplicate)` marker for records with `isPotentialDuplicate: true`. + +2. ~~Should consolidation auto-delete the older duplicate, or mark it as "merged into X"?~~ → **DECIDED: Soft delete** — consolidation marks the older record as superseded via `status: "merged"` and `mergedInto: ""` in metadata, and does NOT physically delete it. This preserves audit trail. + +3. ~~Should the effectiveness summary include a `duplicates.flagged` metric?~~ → **DECIDED: Yes** — `EffectivenessSummary` SHALL include a `duplicates: { flaggedCount: number, consolidatedCount: number }` section populated from capture events with `skipReason: "duplicate-similarity"`. + +4. ~~Should `memory_pruneScope` consider `isPotentialDuplicate` when deciding what to delete?~~ → **DECIDED: Yes** — `pruneScope` SHALL prioritize deletion of records with `isPotentialDuplicate: true` when selecting entries to remove to meet `maxEntriesPerScope`. diff --git a/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/proposal.md b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/proposal.md new file mode 100644 index 0000000..75cdf34 --- /dev/null +++ b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/proposal.md @@ -0,0 +1,40 @@ +## Why + +The current memory system has no deduplication mechanism. When users ask the same question using different phrasing, or revisit the same topic across sessions, the system stores each occurrence as a separate memory entry with no similarity check. This causes storage bloat (memories that are semantically identical occupy multiple slots in `maxEntriesPerScope`), noisy recall results (multiple near-identical memories are returned and injected), and degraded capture/recall effectiveness metrics. The `openspec/changes/archive/2026-03-21-add-cross-project-memory-scope/design.md` explicitly listed deduplication as a non-goal at the time; this change addresses that gap now that the core memory pipeline is stable. + +## What Changes + +- **New capability `memory-similarity-dedup`**: After a capture candidate passes extraction and embedding, the system performs a scope-internal vector similarity search before writing. If the top similarity score >= 0.92, the memory is written with `isPotentialDuplicate: true` and `duplicateOf: ` metadata. Writing is never blocked—flagging only. +- **New capability `memory-consolidation`**: A consolidation routine that scope-internally merges memories with cosine similarity >= 0.95 using soft delete. Primary trigger: `session.compacted` hook (opportunistic). Backup trigger: external cron calling `memory_consolidate` tool daily. +- **Modified capability `memory-auto-capture-and-recall`**: The capture flow in `flushAutoCapture()` gains a new pre-write similarity check step. Two new `CaptureSkipReason` values are added for observability: `duplicate-similarity` (similarity >= 0.92) and `duplicate-exact` (MD5 hash collision). +- **New tool `memory_consolidate`**: A plugin tool to manually trigger consolidation for a given scope, for operators who want on-demand cleanup. +- No changes to `store.put()` semantics, search API, injection behavior, or schema version. + +## Capabilities + +### New Capabilities + +- `memory-similarity-dedup`: Write-path similarity flagging. Scopes: project and global. Threshold: 0.92 cosine similarity. Implementation reuses existing `store.search()` with `vectorWeight=1.0, bm25Weight=0` and `limit=1`. No blocking—flags and writes. +- `memory-consolidation`: Background scope-internal deduplication. Threshold: 0.95 cosine similarity. Triggered by `session.compacted` hook or manual `memory_consolidate` tool call. Uses soft delete (older record marked `status: "merged"`, newer record receives `mergedFrom` reference). Merged records excluded from search results. Runs asynchronously; does not block capture or recall. +- `memory-consolidate-tool`: Plugin tool `memory_consolidate(scope, confirm)` for on-demand consolidation. +- `memory-search-dedup-display`: `memory_search` results display `(duplicate)` marker for records with `isPotentialDuplicate: true`. Raw result objects include `isPotentialDuplicate` and `duplicateOf` fields. + +### Modified Capabilities + +- `memory-auto-capture-and-recall`: New pre-write similarity check step in `flushAutoCapture()`. New skip reasons in capture events: `duplicate-similarity` and `duplicate-exact`. No requirement changes to existing scenarios. + +## Impact + +**Affected code:** +- `src/index.ts`: `flushAutoCapture()` gains similarity check before `store.put()`. New `consolidateScope()` function. New `memory_consolidate` tool. +- `src/store.ts`: New `consolidateDuplicates(scope, threshold)` method. No changes to existing `put()`, `search()`, `pruneScope()` signatures. +- `src/types.ts`: `CaptureSkipReason` enum gains `duplicate-similarity` and `duplicate-exact`. `MemoryRecord` metadata schema unchanged (uses `metadataJson`). +- `src/utils.ts`: No changes. +- `src/extract.ts`: No changes. + +**Configuration:** +- `dedup.writeThreshold` (env: `LANCEDB_OPENCODE_PRO_DEDUP_WRITE_THRESHOLD`, default: `0.92`) +- `dedup.consolidateThreshold` (env: `LANCEDB_OPENCODE_PRO_DEDUP_CONSOLIDATE_THRESHOLD`, default: `0.95`) +- `dedup.enabled` (env: `LANCEDB_OPENCODE_PRO_DEDUP_ENABLED`, default: `true`) + +**No external API or CLI changes.** No breaking changes to existing tool interfaces. diff --git a/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-auto-capture-and-recall/spec.md b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-auto-capture-and-recall/spec.md new file mode 100644 index 0000000..4b8fd97 --- /dev/null +++ b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-auto-capture-and-recall/spec.md @@ -0,0 +1,37 @@ +# memory-auto-capture-and-recall Specification (Delta) + +## Purpose + +Delta spec documenting the addition of similarity-based duplicate flagging to the capture pipeline. This changes the `Capture and recall evaluation signals` requirement to include duplicate-related observable outcomes. + +## MODIFIED Requirements + +### Requirement: Capture and recall evaluation signals + +**FROM:** +> The system MUST emit structured evaluation signals during capture and recall flows so maintainers can diagnose why memories were stored, skipped, or not retrieved. + +**TO:** +> The system MUST emit structured evaluation signals during capture and recall flows so maintainers can diagnose why memories were stored, skipped, or not retrieved. The system SHALL record duplicate-related outcomes (duplicate-similarity, duplicate-exact) as capture events with `outcome: "stored"` and `skipReason` set accordingly. + +#### Scenario: Auto-capture skipped for a known reason +**unchanged** +- **WHEN** auto-capture does not persist a memory candidate because of minimum-length rejection, extraction rejection, initialization failure, or embedding failure +- **THEN** the system records the skip outcome with a normalized reason label suitable for aggregation + +#### Scenario: Auto-capture stores memory with duplicate similarity flag +- **WHEN** auto-capture processes a new memory candidate and the scope-internal similarity search returns a top score >= `dedup.writeThreshold` +- **THEN** the system records a `capture` event with `outcome: "stored"`, `skipReason: "duplicate-similarity"`, and `memoryId` pointing to the newly stored record + +#### Scenario: Recall produces ranked results +**unchanged** +- **WHEN** recall executes for a user prompt +- **THEN** the system records the query scope, result count, and whether any memory block was injected into prompt context + +### Requirement: Duplicate metrics in effectiveness summary + +The `EffectivenessSummary` SHALL include a `duplicates` section containing `flaggedCount` (total memories written with `isPotentialDuplicate: true`) and `consolidatedCount` (total memories merged via consolidation). + +#### Scenario: Effectiveness summary includes duplicate metrics +- **WHEN** `memory_effectiveness` is called for a scope +- **THEN** the returned summary SHALL include `duplicates: { flaggedCount: number, consolidatedCount: number }` where `flaggedCount` is derived from capture events with `skipReason: "duplicate-similarity"` and `consolidatedCount` is derived from records in the scope with `metadataJson.status === "merged"` diff --git a/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-consolidate-tool/spec.md b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-consolidate-tool/spec.md new file mode 100644 index 0000000..085cfbb --- /dev/null +++ b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-consolidate-tool/spec.md @@ -0,0 +1,27 @@ +# memory-consolidate-tool Specification + +## Purpose + +Expose a plugin tool `memory_consolidate` that allows operators and AI agents to manually trigger scope-internal memory consolidation on demand. This provides an escape hatch for cleanup without waiting for the `session.compacted` trigger. + +## ADDED Requirements + +### Requirement: Manual consolidation tool + +The system SHALL provide a `memory_consolidate` plugin tool accessible via the AI tool interface. + +#### Scenario: Successful manual consolidation +- **WHEN** the AI or operator calls `memory_consolidate(scope="project:abc123", confirm=true)` +- **THEN** the system SHALL call `consolidateScope("project:abc123", dedup.consolidateThreshold)` and SHALL return `{"mergedPairs": N, "deletedRecords": M, "skippedRecords": K, "scope": "project:abc123"}` + +#### Scenario: Consolidation requires explicit confirmation +- **WHEN** `memory_consolidate` is called with `confirm` not set to `true` +- **THEN** the system SHALL return an error message requiring `confirm=true` before proceeding + +#### Scenario: Consolidation on global scope +- **WHEN** `memory_consolidate(scope="global", confirm=true)` is called +- **THEN** the system SHALL consolidate only within the global scope, following the same rules as project scope consolidation + +#### Scenario: Invalid scope format +- **WHEN** `memory_consolidate(scope="invalid-format", confirm=true)` is called +- **THEN** the system SHALL return an error with message `"Invalid scope format"` and SHALL NOT attempt consolidation diff --git a/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-consolidation/spec.md b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-consolidation/spec.md new file mode 100644 index 0000000..c25ad55 --- /dev/null +++ b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-consolidation/spec.md @@ -0,0 +1,55 @@ +# memory-consolidation Specification + +## Purpose + +Provide a background consolidation mechanism that scope-internally identifies and merges memories with cosine similarity >= `dedup.consolidateThreshold`. Consolidation runs asynchronously and can also be triggered manually via the `memory_consolidate` tool. The goal is to reduce storage bloat from accumulated duplicate memories without blocking the capture pipeline. + +## ADDED Requirements + +### Requirement: Scope-internal consolidation + +The system SHALL provide a `consolidateScope(scope, threshold)` function that scope-internally identifies memory pairs with cosine similarity >= `dedup.consolidateThreshold` and merges them by deleting the older record and updating the newer record's metadata. + +#### Scenario: Consolidation merges similar memories (soft delete) +- **WHEN** `consolidateScope("project:abc123", 0.95)` is called and the scope contains two memories A and B where `cosineSimilarity(A.vector, B.vector) >= 0.95` and A.timestamp < B.timestamp +- **THEN** the system updates record A's `metadataJson` to set `"status": "merged"` and `"mergedInto": ""`, updates record B's `metadataJson` to include `"mergedFrom": ""`, and invalidates the scope cache. Record A is NOT physically deleted—consolidation uses soft delete to preserve audit trail. + +#### Scenario: No similar memories to consolidate +- **WHEN** `consolidateScope(scope, threshold)` is called and no pair of memories within the scope has similarity >= threshold +- **THEN** the system makes no changes and returns `0` + +#### Scenario: Multiple candidate pairs processed +- **WHEN** `consolidateScope(scope, threshold)` is called and there are multiple pairs of similar memories (e.g., A≈B, B≈C, but A≉C) +- **THEN** the system SHALL process all pairs in a single pass, sorting by timestamp (oldest first), and SHALL NOT delete a record that has already been marked as merged-from target + +#### Scenario: Cross-session consolidation during session.compacted +- **WHEN** a `session.compacted` event fires and `dedup.enabled` is `true` +- **THEN** the system SHALL call `consolidateScope(activeScope, dedup.consolidateThreshold)` asynchronously after `flushAutoCapture()` completes, without blocking the session compaction flow + +### Requirement: Consolidation safety guard + +Consolidation operations SHALL be idempotent and SHALL NOT delete records that are actively being used in an ongoing recall operation. + +#### Scenario: Consolidation is idempotent +- **WHEN** `consolidateScope(scope, threshold)` is called twice in succession with no new memories written between the calls +- **THEN** the second call SHALL return `0` (no changes) because all similar pairs have already been merged + +#### Scenario: Recently recalled memory is not deleted during consolidation +- **WHEN** consolidation would delete record X but X was recalled within the last 5 minutes (i.e., `Date.now() - X.lastRecalled < 300_000`) +- **THEN** the system SHALL skip deleting X and SHALL NOT update its `duplicateOf` or `mergedFrom` metadata + +### Requirement: Consolidation metrics + +Consolidation operations SHALL record metrics about the number of pairs merged and records updated. + +#### Scenario: Consolidation emits operation metrics +- **WHEN** `consolidateScope(scope, threshold)` completes +- **THEN** the system SHALL return an object containing `{ mergedPairs: number, updatedRecords: number, skippedRecords: number }` where `updatedRecords` reflects the count of records whose metadata was modified (soft-deleted source records + updated target records) + +### Requirement: Merged records are excluded from recall + +Records with `metadataJson.status === "merged"` SHALL be excluded from search results in both auto-recall and manual `memory_search`. + +#### Scenario: Merged record not returned in search +- **WHEN** `store.search()` is called for a scope that contains a record with `metadataJson.status === "merged"` +- **THEN** the system SHALL filter out that record from the results, so it does not appear in recall injection or manual search results diff --git a/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-search-dedup-display/spec.md b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-search-dedup-display/spec.md new file mode 100644 index 0000000..2db5043 --- /dev/null +++ b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-search-dedup-display/spec.md @@ -0,0 +1,31 @@ +# memory-search-dedup-display Specification + +## Purpose + +Define how `memory_search` tool results display the duplicate flag for memories written with `isPotentialDuplicate: true`. This makes duplicate observations actionable by operators and AI agents without requiring out-of-band inspection of `metadataJson`. + +## ADDED Requirements + +### Requirement: Duplicate marker in search results + +The `memory_search` tool SHALL include a `(duplicate)` marker in the formatted output for records where `metadataJson.isPotentialDuplicate === true`. + +#### Scenario: Search result shows duplicate marker +- **WHEN** `memory_search(query="nginx config", limit=5)` is called and one of the returned records has `metadataJson.isPotentialDuplicate === true` +- **THEN** the formatted output for that record SHALL include the text `(duplicate)` after the memory ID, before the text content + +#### Scenario: Search result without duplicate flag shows no marker +- **WHEN** `memory_search(query="nginx config", limit=5)` is called and a returned record has `metadataJson.isPotentialDuplicate === false` or `metadataJson.isPotentialDuplicate` is absent +- **THEN** the formatted output for that record SHALL NOT include any duplicate marker + +#### Scenario: Merged records excluded from search results +- **WHEN** `memory_search(query="nginx config", limit=5)` is called +- **THEN** records with `metadataJson.status === "merged"` SHALL NOT appear in the results, even if they were previously stored + +### Requirement: Duplicate metadata accessible in raw results + +The raw search result object (before formatting) SHALL include `isPotentialDuplicate` and `duplicateOf` fields so calling code can consume them programmatically. + +#### Scenario: Raw result includes duplicate metadata +- **WHEN** `memory_search(query="nginx config", limit=5)` is called and returns records with duplicate metadata +- **THEN** the raw result objects SHALL include `isPotentialDuplicate: boolean` and `duplicateOf: string | null` fields alongside `id`, `score`, `text`, and `scope` diff --git a/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-similarity-dedup/spec.md b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-similarity-dedup/spec.md new file mode 100644 index 0000000..0f64247 --- /dev/null +++ b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/specs/memory-similarity-dedup/spec.md @@ -0,0 +1,51 @@ +# memory-similarity-dedup Specification + +## Purpose + +Add similarity-based deduplication flagging to the memory capture pipeline. When a new memory candidate is processed, the system SHALL check its semantic similarity against existing memories in the same scope before writing. If similarity exceeds the configured threshold, the memory is written with duplicate metadata but the write is NOT blocked. + +## ADDED Requirements + +### Requirement: Write-path similarity flagging + +After a memory candidate passes extraction and embedding in `flushAutoCapture()`, the system SHALL perform a scope-internal vector similarity search using the new memory's embedding. The system SHALL write the memory regardless of the similarity result, but SHALL populate `metadataJson.isPotentialDuplicate` and `metadataJson.duplicateOf` when similarity to the most similar existing memory meets or exceeds `dedup.writeThreshold`. + +#### Scenario: Similar memory found during capture +- **WHEN** `flushAutoCapture()` processes a new memory candidate and the scope-internal similarity search returns a top score >= `dedup.writeThreshold` +- **THEN** the system writes the memory record with `metadataJson` containing `"isPotentialDuplicate": true` and `"duplicateOf": ""`, and records a `capture` event with `outcome: "stored"` and `skipReason: "duplicate-similarity"` + +#### Scenario: No similar memory found during capture +- **WHEN** `flushAutoCapture()` processes a new memory candidate and the scope-internal similarity search returns a top score < `dedup.writeThreshold`, or returns no results +- **THEN** the system writes the memory record with `metadataJson` containing `"isPotentialDuplicate": false`, and records a `capture` event with `outcome: "stored"` (no skip reason) + +#### Scenario: Dedup disabled via configuration +- **WHEN** `dedup.enabled` is `false` +- **THEN** the system skips the similarity check entirely, writes the memory with `"isPotentialDuplicate": false`, and records a standard `capture` event with `outcome: "stored"` + +#### Scenario: Embedder unavailable during dedup check +- **WHEN** the similarity check is attempted but the embedder returns an empty vector or throws an error +- **THEN** the system writes the memory with `"isPotentialDuplicate": false` and `"duplicateOf": null`, and records a standard `capture` event with `outcome: "stored"` (dedup failure does not block capture) + +#### Scenario: Empty scope during dedup check +- **WHEN** the active scope contains zero existing memories at the time of similarity check +- **THEN** the system writes the memory with `"isPotentialDuplicate": false` and `"duplicateOf": null`, and records a standard `capture` event with `outcome: "stored"` + +### Requirement: Configurable dedup thresholds + +The system SHALL support configurable dedup thresholds via environment variables and sidecar config, following the existing config resolution precedence. The system SHALL use safe defaults when config keys are absent. + +#### Scenario: Default threshold values +- **WHEN** no `dedup.writeThreshold` is configured +- **THEN** the system SHALL use `0.92` as the default value + +#### Scenario: Environment variable override +- **WHEN** the environment variable `LANCEDB_OPENCODE_PRO_DEDUP_WRITE_THRESHOLD` is set to a valid float between `0.0` and `1.0` +- **THEN** the system SHALL use that value as `dedup.writeThreshold`, overriding any sidecar config value + +#### Scenario: Sidecar config override +- **WHEN** the sidecar file `lancedb-opencode-pro.json` contains `{"dedup": {"writeThreshold": 0.95}}` +- **THEN** the system SHALL merge this with existing config, and environment variables SHALL take precedence over sidecar values + +#### Scenario: Invalid threshold clamped to safe range +- **WHEN** the configured `dedup.writeThreshold` is less than `0.0` or greater than `1.0` +- **THEN** the system SHALL clamp the value to the range `[0.0, 1.0]` diff --git a/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/tasks.md b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/tasks.md new file mode 100644 index 0000000..3b438a7 --- /dev/null +++ b/openspec/changes/archive/2026-03-27-add-similarity-dedup-flagging/tasks.md @@ -0,0 +1,76 @@ +## 1. Config and Types + +- [x] 1.1 Add `dedup.writeThreshold`, `dedup.consolidateThreshold`, and `dedup.enabled` to `MemoryRuntimeConfig` interface in `src/types.ts` +- [x] 1.2 Add `duplicate-similarity` and `duplicate-exact` to `CaptureSkipReason` type in `src/types.ts` +- [x] 1.3 Add dedup config resolution in `src/config.ts`: read `dedup` from raw config, resolve `writeThreshold`, `consolidateThreshold`, and `enabled` with env-var precedence, clamp thresholds to `[0.0, 1.0]`, set safe defaults (`writeThreshold: 0.92`, `consolidateThreshold: 0.95`, `enabled: true`) + +## 2. Store Layer — Consolidation Method (Soft Delete) + +- [x] 2.1 Add `consolidateDuplicates(scope: string, threshold: number): Promise<{ mergedPairs: number; updatedRecords: number; skippedRecords: number }>` to `MemoryStore` class in `src/store.ts` + - Scope-internal two-pass: first pass computes all-pair cosine similarity >= threshold (using `fastCosine` with pre-computed norms from scopeCache), second pass soft-deletes older records and updates newer records + - Soft delete: set `metadataJson.status = "merged"` and `metadataJson.mergedInto = ""` on the older record (NOT physical deletion) + - Update newer record: set `metadataJson.mergedFrom = ""` + - Must skip if older record already has `status === "merged"` (already processed) + - Must skip records with `lastRecalled` within 5 minutes + - Invalidate scope cache after all changes + - Return operation metrics +- [x] 2.2 Filter merged records from `store.search()`: add `WHERE metadataJson.status != "merged"` clause so `memory_search` and recall automatically exclude merged records + +## 3. Capture Pipeline — Dedup Flagging + +- [x] 3.1 In `flushAutoCapture()` in `src/index.ts`, after `embedder.embed()` succeeds and before `store.put()`: call `store.search()` with `queryVector: vector`, `scopes: [activeScope]`, `limit: 1`, `vectorWeight: 1.0`, `bm25Weight: 0.0`, `minScore: 0.0` +- [x] 3.2 If `searchResults[0].score >= state.config.dedup.writeThreshold`: set `isPotentialDuplicate = true` and `duplicateOf = searchResults[0].record.id`; else `isPotentialDuplicate = false`, `duplicateOf = null` +- [x] 3.3 Pass `isPotentialDuplicate` and `duplicateOf` into `metadataJson` when calling `store.put()` +- [x] 3.4 When `dedup.enabled` is `false`: skip similarity check, write with `isPotentialDuplicate: false` +- [x] 3.5 When embedder fails or returns empty vector: skip similarity check, write with `isPotentialDuplicate: false` (graceful degradation) +- [x] 3.6 When scope has 0 records: skip similarity check, write with `isPotentialDuplicate: false` + +## 4. Consolidation Trigger — Session Hook + Tool + +- [x] 4.1 In `session.compacted` event handler in `src/index.ts`: after `flushAutoCapture()` completes, if `dedup.enabled` is `true`, call `store.consolidateDuplicates(activeScope, dedup.consolidateThreshold)` asynchronously (do not await, fire-and-forget) +- [x] 4.2 Add `memory_consolidate` tool in `src/index.ts`: args `{ scope: string, confirm: boolean }`, requires `confirm === true`, calls `store.consolidateDuplicates(scope, dedup.consolidateThreshold)`, returns `{ mergedPairs, updatedRecords, skippedRecords, scope }` +- [x] 4.3 Add `consolidateAllScopes()` variant that consolidates all known scopes (global + all project scopes from project registry). Used by external cron job for comprehensive daily cleanup. +- [x] 4.4 Document external cron usage in `docs/operations.md`: example cron script that calls `memory_consolidate` for global scope and per-project scopes daily at 03:00 UTC + +## 5. Configuration Tests + +- [x] 5.1 Test: default dedup thresholds are `0.92` and `0.95` when config is empty +- [x] 5.2 Test: env vars override sidecar config +- [x] 5.3 Test: invalid threshold values are clamped to `[0.0, 1.0]` + +## 6. Store Consolidation Tests + +- [x] 6.1 Test: `consolidateDuplicates` returns `{ mergedPairs: 0, deletedRecords: 0, skippedRecords: 0 }` when scope is empty +- [x] 6.2 Test: `consolidateDuplicates` merges two memories with cosine >= 0.95, older is deleted, newer retains `mergedFrom` in metadata +- [x] 6.3 Test: `consolidateDuplicates` skips records recalled within last 5 minutes +- [x] 6.4 Test: `consolidateDuplicates` is idempotent (second call returns 0) + +## 7. Capture Dedup Flagging Tests + +- [ ] 7.1 Test: second memory with >0.92 similarity to first is written with `isPotentialDuplicate: true` and `duplicateOf` set +- [ ] 7.2 Test: memory with <0.92 similarity is written with `isPotentialDuplicate: false` +- [ ] 7.3 Test: when `dedup.enabled` is `false`, no similarity check is performed and memory is written with `isPotentialDuplicate: false` +- [x] 7.4 Test: `memory_consolidate` tool returns error when `confirm !== true` +- [x] 7.5 Test: `memory_consolidate` tool calls `consolidateDuplicates` and returns metrics when `confirm === true` + +## 8. memory_search Duplicate Marker Display + +- [x] 8.1 In `memory_search` tool formatter in `src/index.ts`: parse `metadataJson.isPotentialDuplicate` from each result; if `true`, append `(duplicate)` to the formatted output line after the ID +- [x] 8.2 Ensure raw result object includes `isPotentialDuplicate: boolean` and `duplicateOf: string | null` fields in the return value (not just formatted string) +- [x] 8.3 Test: `memory_search` result with `isPotentialDuplicate: true` shows `(duplicate)` marker in output +- [x] 8.4 Test: `memory_search` result with `isPotentialDuplicate: false` or absent shows no marker +- [x] 8.5 Test: `memory_search` does not return records with `metadataJson.status === "merged"` + +## 9. Effectiveness Summary — Duplicate Metrics + +- [x] 9.1 In `store.summarizeEvents()` in `src/store.ts`: count capture events with `skipReason === "duplicate-similarity"` and store count as `flaggedCount`; count records with `metadataJson.status === "merged"` as `consolidatedCount` +- [x] 9.2 Add `duplicates: { flaggedCount: number, consolidatedCount: number }` to `EffectivenessSummary` interface in `src/types.ts` +- [x] 9.3 Test: `memory_effectiveness` returns `duplicates.flaggedCount` reflecting `duplicate-similarity` capture events +- [x] 9.4 Test: `memory_effectiveness` returns `duplicates.consolidatedCount` reflecting merged records in scope + +## 10. pruneScope — Priority Deletion of Flagged Records + +- [x] 10.1 In `store.pruneScope()` in `src/store.ts`: when selecting records to delete to meet `maxEntriesPerScope`, prioritize records with `metadataJson.isPotentialDuplicate === true` over unflagged records of similar or older timestamp +- [x] 10.2 Within flagged records, use timestamp ordering (oldest first) to decide deletion order +- [x] 10.3 Test: when scope has N flagged duplicates and M unflagged records, and `maxEntriesPerScope = N + M - 1`, only the newest flagged duplicate is retained +- [x] 10.4 Test: unflagged records are only deleted after all flagged records with older timestamps are removed diff --git a/package.json b/package.json index 9fc0087..c90832b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "lancedb-opencode-pro", - "version": "0.2.4", + "version": "0.2.5", "description": "LanceDB-backed long-term memory provider for OpenCode", "type": "module", "main": "dist/index.js", diff --git a/src/config.ts b/src/config.ts index f339fa6..5cd6a99 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,7 +1,7 @@ import { existsSync, readFileSync } from "node:fs"; import { join } from "node:path"; import type { Config } from "@opencode-ai/sdk"; -import type { EmbeddingProvider, InjectionMode, SummarizationMode, CodeTruncationMode, MemoryRuntimeConfig, RetrievalMode } from "./types.js"; +import type { DedupConfig, EmbeddingProvider, InjectionMode, SummarizationMode, CodeTruncationMode, MemoryRuntimeConfig, RetrievalMode } from "./types.js"; import { clamp, expandHomePath, parseJsonObject, toBoolean, toNumber } from "./utils.js"; const DEFAULT_DB_PATH = "~/.opencode/memory/lancedb"; @@ -67,6 +67,8 @@ export function resolveMemoryConfig(config: Config | undefined, worktree?: strin const injection = resolveInjectionConfig(raw, process.env); + const dedup = resolveDedupConfig(raw, process.env); + const resolvedConfig: MemoryRuntimeConfig = { provider, dbPath, @@ -91,6 +93,7 @@ export function resolveMemoryConfig(config: Config | undefined, worktree?: strin importanceWeight, }, injection, + dedup, includeGlobalScope: toBoolean(process.env.LANCEDB_OPENCODE_PRO_INCLUDE_GLOBAL_SCOPE ?? raw.includeGlobalScope, true), globalDetectionThreshold: Math.max( 1, @@ -142,6 +145,25 @@ function resolveCodeTruncationMode(raw: unknown): CodeTruncationMode { return "smart"; } +function resolveDedupConfig( + raw: Record, + env: NodeJS.ProcessEnv, +): DedupConfig { + const dedupRaw = (raw.dedup ?? {}) as Record; + const enabled = toBoolean(env.LANCEDB_OPENCODE_PRO_DEDUP_ENABLED ?? dedupRaw.enabled, true); + const writeThreshold = clamp( + toNumber(env.LANCEDB_OPENCODE_PRO_DEDUP_WRITE_THRESHOLD ?? dedupRaw.writeThreshold, 0.92), + 0.0, + 1.0, + ); + const consolidateThreshold = clamp( + toNumber(env.LANCEDB_OPENCODE_PRO_DEDUP_CONSOLIDATE_THRESHOLD ?? dedupRaw.consolidateThreshold, 0.95), + 0.0, + 1.0, + ); + return { enabled, writeThreshold, consolidateThreshold }; +} + function resolveInjectionConfig( raw: Record, env: NodeJS.ProcessEnv @@ -243,6 +265,10 @@ function mergeMemoryConfig( ...(((override.injection ?? {}) as Record).codeSummarization ?? {}) as Record, }, }, + dedup: { + ...((base.dedup ?? {}) as Record), + ...((override.dedup ?? {}) as Record), + }, }; } diff --git a/src/index.ts b/src/index.ts index a4d4efd..871a3d3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -30,6 +30,10 @@ const plugin: Plugin = async (input) => { if (event.type === "session.idle" || event.type === "session.compacted") { const sessionID = event.properties.sessionID; await flushAutoCapture(sessionID, state, input.client); + if (event.type === "session.compacted" && state.config.dedup.enabled) { + const activeScope = deriveProjectScope(input.worktree); + state.store.consolidateDuplicates(activeScope, state.config.dedup.consolidateThreshold).catch(() => {}); + } } }, "experimental.text.complete": async (eventInput, eventOutput) => { @@ -173,7 +177,9 @@ const plugin: Plugin = async (input) => { return results .map((item, idx) => { const percent = Math.round(item.score * 100); - return `${idx + 1}. [${item.record.id}] (${item.record.scope}) ${item.record.text} [${percent}%]`; + const meta = JSON.parse(item.record.metadataJson || "{}"); + const duplicateMarker = meta.isPotentialDuplicate ? " (duplicate)" : ""; + return `${idx + 1}. [${item.record.id}]${duplicateMarker} (${item.record.scope}) ${item.record.text} [${percent}%]`; }) .join("\n"); }, @@ -442,6 +448,43 @@ const plugin: Plugin = async (input) => { .join("\n"); }, }), + memory_consolidate: tool({ + description: "Scope-internally merge near-duplicate memories. Use to clean up accumulated duplicates.", + args: { + scope: tool.schema.string().optional(), + confirm: tool.schema.boolean().default(false), + }, + execute: async (args, context) => { + await state.ensureInitialized(); + if (!state.initialized) return unavailableMessage(state.config.embedding.provider); + if (!args.confirm) { + return "Rejected: memory_consolidate requires confirm=true."; + } + const targetScope = args.scope ?? deriveProjectScope(context.worktree); + const result = await state.store.consolidateDuplicates(targetScope, state.config.dedup.consolidateThreshold); + return JSON.stringify({ scope: targetScope, ...result }, null, 2); + }, + }), + memory_consolidate_all: tool({ + description: "Consolidate duplicates across global scope and current project scope. Used by external cron jobs for daily cleanup.", + args: { + confirm: tool.schema.boolean().default(false), + }, + execute: async (args, context) => { + await state.ensureInitialized(); + if (!state.initialized) return unavailableMessage(state.config.embedding.provider); + if (!args.confirm) { + return "Rejected: memory_consolidate_all requires confirm=true."; + } + const projectScope = deriveProjectScope(context.worktree); + const globalResult = await state.store.consolidateDuplicates("global", state.config.dedup.consolidateThreshold); + const projectResult = await state.store.consolidateDuplicates(projectScope, state.config.dedup.consolidateThreshold); + return JSON.stringify({ + global: { scope: "global", ...globalResult }, + project: { scope: projectScope, ...projectResult }, + }, null, 2); + }, + }), memory_port_plan: tool({ description: "Plan non-conflicting host ports for compose services and optionally persist reservations", args: { @@ -610,7 +653,7 @@ async function getLastUserText( } } -async function flushAutoCapture( + async function flushAutoCapture( sessionID: string, state: RuntimeState, client: { session: { get: (input: { path: { id: string } }) => Promise } }, @@ -680,6 +723,28 @@ async function flushAutoCapture( return; } + let isPotentialDuplicate = false; + let duplicateOf: string | null = null; + + if (state.config.dedup.enabled) { + const similar = await state.store.search({ + query: result.candidate.text, + queryVector: vector, + scopes: [activeScope], + limit: 1, + vectorWeight: 1.0, + bm25Weight: 0.0, + minScore: 0.0, + rrfK: 60, + recencyBoost: false, + globalDiscountFactor: 1.0, + }); + if (similar.length > 0 && similar[0].score >= state.config.dedup.writeThreshold) { + isPotentialDuplicate = true; + duplicateOf = similar[0].record.id; + } + } + const memoryId = generateId(); await state.store.put({ @@ -699,6 +764,8 @@ async function flushAutoCapture( metadataJson: JSON.stringify({ source: "auto-capture", sessionID, + isPotentialDuplicate, + duplicateOf, }), }); diff --git a/src/store.ts b/src/store.ts index deb8cd6..5b08a5f 100644 --- a/src/store.ts +++ b/src/store.ts @@ -35,6 +35,18 @@ interface ScopeCache { norms: Map; } +// Exported for use by consolidateDuplicates +export function storeFastCosine(a: number[], b: number[], normA: number, normB: number): number { + if (a.length === 0 || b.length === 0 || a.length !== b.length) return 0; + const denom = normA * normB; + if (denom === 0) return 0; + let dot = 0; + for (let i = 0; i < a.length; i += 1) { + dot += a[i] * b[i]; + } + return dot / denom; +} + export class MemoryStore { private lancedb: LanceModule | null = null; private connection: LanceConnection | null = null; @@ -269,7 +281,26 @@ export class MemoryStore { async pruneScope(scope: string, maxEntries: number): Promise { const rows = await this.list(scope, 100000); if (rows.length <= maxEntries) return 0; - const toDelete = rows.slice(maxEntries); + + const flagged = rows.filter((r) => { + const meta = parseMetadata(r.metadataJson); + return meta.isPotentialDuplicate === true; + }); + const unflagged = rows.filter((r) => { + const meta = parseMetadata(r.metadataJson); + return meta.isPotentialDuplicate !== true; + }); + + const sortedFlagged = flagged.sort((a, b) => a.timestamp - b.timestamp); + const sortedUnflagged = unflagged.sort((a, b) => a.timestamp - b.timestamp); + + const toDeleteCount = rows.length - maxEntries; + const deleteFromFlagged = Math.min(sortedFlagged.length, toDeleteCount); + const toDelete = [ + ...sortedFlagged.slice(0, deleteFromFlagged), + ...sortedUnflagged.slice(0, toDeleteCount - deleteFromFlagged), + ]; + for (const row of toDelete) { await this.requireTable().delete(`id = '${escapeSql(row.id)}'`); } @@ -277,6 +308,75 @@ export class MemoryStore { return toDelete.length; } + async consolidateDuplicates(scope: string, threshold: number): Promise<{ + mergedPairs: number; + updatedRecords: number; + skippedRecords: number; + }> { + const rows = await this.readByScopesIncludingMerged([scope]); + if (rows.length === 0) { + return { mergedPairs: 0, updatedRecords: 0, skippedRecords: 0 }; + } + + let mergedPairs = 0; + let updatedRecords = 0; + let skippedRecords = 0; + const now = Date.now(); + const FIVE_MINUTES_MS = 5 * 60 * 1000; + + const rowsWithNorms = rows.map((row) => ({ + row, + norm: this.scopeCache.get(scope)?.norms.get(row.id) ?? vecNorm(row.vector), + })); + + for (let i = 0; i < rowsWithNorms.length; i += 1) { + const a = rowsWithNorms[i]; + for (let j = i + 1; j < rowsWithNorms.length; j += 1) { + const b = rowsWithNorms[j]; + const sim = storeFastCosine(a.row.vector, b.row.vector, a.norm, b.norm); + if (sim < threshold) continue; + + const aMeta = parseMetadata(a.row.metadataJson); + if (aMeta.status === "merged") { + skippedRecords += 1; + continue; + } + if (a.row.lastRecalled > 0 && now - a.row.lastRecalled < FIVE_MINUTES_MS) { + skippedRecords += 1; + continue; + } + + const older = a.row.timestamp <= b.row.timestamp ? a.row : b.row; + const newer = a.row.timestamp <= b.row.timestamp ? b.row : a.row; + const newerMeta = parseMetadata(newer.metadataJson); + + const mergedIntoId = newer.id; + const updatedOlderMeta = { status: "merged" as const, mergedInto: mergedIntoId }; + await this.requireTable().delete(`id = '${escapeSql(older.id)}'`); + await this.requireTable().add([{ + ...older, + metadataJson: JSON.stringify({ ...parseMetadata(older.metadataJson), ...updatedOlderMeta }), + }]); + + const updatedNewerMeta = { ...newerMeta, mergedFrom: older.id }; + await this.requireTable().delete(`id = '${escapeSql(newer.id)}'`); + await this.requireTable().add([{ + ...newer, + metadataJson: JSON.stringify(updatedNewerMeta), + }]); + + mergedPairs += 1; + updatedRecords += 2; + } + } + + if (mergedPairs > 0) { + this.invalidateScope(scope); + } + + return { mergedPairs, updatedRecords, skippedRecords }; + } + async countIncompatibleVectors(scopes: string[], expectedDim: number): Promise { const rows = await this.readByScopes(scopes); return rows.filter((row) => row.vectorDim !== expectedDim).length; @@ -348,6 +448,8 @@ export class MemoryStore { async summarizeEvents(scope: string, includeGlobalScope: boolean): Promise { const scopes = includeGlobalScope && scope !== "global" ? [scope, "global"] : [scope]; const events = await this.readEventsByScopes(scopes); + // Read all memories including merged for duplicate counts + const memories = await this.readByScopesIncludingMerged(scopes); const captureSkipReasons: Partial> = {}; let captureConsidered = 0; @@ -406,6 +508,16 @@ export class MemoryStore { const totalCaptureAttempts = captureStored + captureSkipped; const totalUsefulFeedback = feedbackUsefulPositive + feedbackUsefulNegative; + // Count flagged (isPotentialDuplicate) and consolidated (status=merged) from memories table + const flaggedCount = memories.filter((r) => { + const meta = parseMetadata(r.metadataJson); + return meta.isPotentialDuplicate === true; + }).length; + const consolidatedCount = memories.filter((r) => { + const meta = parseMetadata(r.metadataJson); + return meta.status === "merged"; + }).length; + return { scope, totalEvents: events.length, @@ -447,6 +559,10 @@ export class MemoryStore { falsePositiveRate: captureStored === 0 ? 0 : feedbackWrong / captureStored, falseNegativeRate: totalCaptureAttempts === 0 ? 0 : feedbackMissing / totalCaptureAttempts, }, + duplicates: { + flaggedCount, + consolidatedCount, + }, }; } @@ -542,7 +658,7 @@ export class MemoryStore { .filter((row): row is MemoryEffectivenessEvent => row !== null); } - private async readByScopes(scopes: string[]): Promise { + private async readByScopesIncludingMerged(scopes: string[]): Promise { const table = this.requireTable(); if (scopes.length === 0) return []; const whereExpr = scopes.map((scope) => `scope = '${escapeSql(scope)}'`).join(" OR "); @@ -573,6 +689,37 @@ export class MemoryStore { .filter((row): row is MemoryRecord => row !== null); } + private async readByScopes(scopes: string[]): Promise { + const table = this.requireTable(); + if (scopes.length === 0) return []; + const whereExpr = scopes.map((scope) => `scope = '${escapeSql(scope)}'`).join(" OR "); + const rows = await table + .query() + .where(`(${whereExpr}) AND metadataJson NOT LIKE '%"status":"merged"%'`) + .select([ + "id", + "text", + "vector", + "category", + "scope", + "importance", + "timestamp", + "lastRecalled", + "recallCount", + "projectCount", + "schemaVersion", + "embeddingModel", + "vectorDim", + "metadataJson", + ]) + .limit(100000) + .toArray(); + + return rows + .map((row) => normalizeRow(row)) + .filter((row): row is MemoryRecord => row !== null); + } + private async ensureIndexes(): Promise { const table = this.requireTable(); @@ -846,3 +993,11 @@ function extractRecalledProjects(metadataJson: string): Set { } return new Set(); } + +function parseMetadata(metadataJson: string): Record { + try { + return JSON.parse(metadataJson) as Record; + } catch { + return {}; + } +} diff --git a/src/types.ts b/src/types.ts index 0b75d36..be2b592 100644 --- a/src/types.ts +++ b/src/types.ts @@ -32,7 +32,9 @@ export type CaptureSkipReason = | "no-positive-signal" | "initialization-unavailable" | "embedding-unavailable" - | "empty-embedding"; + | "empty-embedding" + | "duplicate-similarity" + | "duplicate-exact"; export type FeedbackType = "missing" | "wrong" | "useful"; @@ -92,12 +94,19 @@ export interface SummarizationConfig { preserveImports: boolean; } +export interface DedupConfig { + enabled: boolean; + writeThreshold: number; + consolidateThreshold: number; +} + export interface MemoryRuntimeConfig { provider: string; dbPath: string; embedding: EmbeddingConfig; retrieval: RetrievalConfig; injection: InjectionConfig; + dedup: DedupConfig; includeGlobalScope: boolean; globalDetectionThreshold: number; globalDiscountFactor: number; @@ -215,4 +224,8 @@ export interface EffectivenessSummary { falsePositiveRate: number; falseNegativeRate: number; }; + duplicates: { + flaggedCount: number; + consolidatedCount: number; + }; } diff --git a/test/config.test.ts b/test/config.test.ts new file mode 100644 index 0000000..1e14d53 --- /dev/null +++ b/test/config.test.ts @@ -0,0 +1,57 @@ +import assert from "node:assert/strict"; +import test from "node:test"; +import { resolveMemoryConfig } from "../src/config.js"; + +async function withPatchedEnv(values: Record, run: () => T): Promise { + const oldValues: Record = {}; + for (const key of Object.keys(values)) { + oldValues[key] = process.env[key]; + process.env[key] = values[key]; + } + try { + return run(); + } finally { + for (const key of Object.keys(values)) { + if (oldValues[key] === undefined) { + delete process.env[key]; + } else { + process.env[key] = oldValues[key]; + } + } + } +} + +test("dedup config: default thresholds are 0.92 (write) and 0.95 (consolidate) when config is empty", async () => { + await withPatchedEnv({ LANCEDB_OPENCODE_PRO_SKIP_SIDECAR: "true" }, () => { + const config = resolveMemoryConfig({}, undefined); + assert.equal(config.dedup.enabled, true); + assert.equal(config.dedup.writeThreshold, 0.92); + assert.equal(config.dedup.consolidateThreshold, 0.95); + }); +}); + +test("dedup config: env vars override sidecar config", async () => { + await withPatchedEnv({ + LANCEDB_OPENCODE_PRO_SKIP_SIDECAR: "true", + LANCEDB_OPENCODE_PRO_DEDUP_ENABLED: "false", + LANCEDB_OPENCODE_PRO_DEDUP_WRITE_THRESHOLD: "0.85", + LANCEDB_OPENCODE_PRO_DEDUP_CONSOLIDATE_THRESHOLD: "0.99", + }, () => { + const config = resolveMemoryConfig({}, undefined); + assert.equal(config.dedup.enabled, false); + assert.equal(config.dedup.writeThreshold, 0.85); + assert.equal(config.dedup.consolidateThreshold, 0.99); + }); +}); + +test("dedup config: invalid threshold values are clamped to [0.0, 1.0]", async () => { + await withPatchedEnv({ + LANCEDB_OPENCODE_PRO_SKIP_SIDECAR: "true", + LANCEDB_OPENCODE_PRO_DEDUP_WRITE_THRESHOLD: "1.5", + LANCEDB_OPENCODE_PRO_DEDUP_CONSOLIDATE_THRESHOLD: "-0.5", + }, () => { + const config = resolveMemoryConfig({}, undefined); + assert.equal(config.dedup.writeThreshold, 1.0); + assert.equal(config.dedup.consolidateThreshold, 0.0); + }); +}); diff --git a/test/foundation/foundation.test.ts b/test/foundation/foundation.test.ts index ccbd161..c3c1fda 100644 --- a/test/foundation/foundation.test.ts +++ b/test/foundation/foundation.test.ts @@ -473,3 +473,256 @@ test("recency and importance multipliers influence ranking order", async () => { await cleanupDbPath(dbPath); } }); + +// ───────────────────────────────────────────── +// Dedup — Consolidation (§6) +// ───────────────────────────────────────────── + +test("consolidateDuplicates returns zeros when scope is empty", async () => { + const { store, dbPath } = await createTestStore(); + try { + const result = await store.consolidateDuplicates("project:empty-scope", 0.95); + assert.equal(result.mergedPairs, 0); + assert.equal(result.updatedRecords, 0); + assert.equal(result.skippedRecords, 0); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("consolidateDuplicates merges two similar memories (cosine >= 0.95), older deleted, newer retains mergedFrom", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:dedup-test"; + const now = Date.now(); + const sharedText = "gateway 502 bad gateway resolved by restarting nginx"; + const vec = createVector(384, 0.5); + await store.put(createTestRecord({ id: "mem-older", scope, text: sharedText, vector: vec, timestamp: now - 10_000, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + await store.put(createTestRecord({ id: "mem-newer", scope, text: sharedText, vector: vec, timestamp: now, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + const result = await store.consolidateDuplicates(scope, 0.95); + assert.equal(result.mergedPairs, 1); + assert.equal(result.updatedRecords, 2); + assert.equal(result.skippedRecords, 0); + const listed = await store.list(scope, 10); + const ids = listed.map((r) => r.id); + assert.ok(ids.includes("mem-newer"), "newer record should still be present"); + assert.ok(!ids.includes("mem-older"), "older merged record should not appear in normal list"); + const newerRecord = listed.find((r) => r.id === "mem-newer")!; + const newerMeta = JSON.parse(newerRecord.metadataJson); + assert.equal(newerMeta.mergedFrom, "mem-older"); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("consolidateDuplicates skips records recalled within last 5 minutes", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:recall-guard"; + const now = Date.now(); + const vec = createVector(384, 0.5); + await store.put(createTestRecord({ id: "mem-recently-recalled", scope, text: "recently recalled memory", vector: vec, timestamp: now - 10_000, lastRecalled: now - 60_000, metadataJson: JSON.stringify({}) })); + await store.put(createTestRecord({ id: "mem-not-recalled", scope, text: "not recalled memory", vector: vec, timestamp: now, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + const result = await store.consolidateDuplicates(scope, 0.95); + assert.equal(result.skippedRecords, 1); + assert.equal(result.mergedPairs, 0); + assert.equal(result.updatedRecords, 0); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("consolidateDuplicates is idempotent (second call returns 0 merged)", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:idempotent"; + const now = Date.now(); + const vec = createVector(384, 0.5); + await store.put(createTestRecord({ id: "mem-old", scope, text: "duplicate content", vector: vec, timestamp: now - 10_000, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + await store.put(createTestRecord({ id: "mem-new", scope, text: "duplicate content", vector: vec, timestamp: now, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + const first = await store.consolidateDuplicates(scope, 0.95); + assert.equal(first.mergedPairs, 1); + const second = await store.consolidateDuplicates(scope, 0.95); + assert.equal(second.mergedPairs, 0); + assert.equal(second.updatedRecords, 0); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("summarizeEvents returns duplicates.flaggedCount from flagged memory records", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:dedup-flags"; + const now = Date.now(); + await store.put(createTestRecord({ id: "mem-normal", scope, text: "normal memory", vector: createVector(384, 0.1), timestamp: now - 5_000, metadataJson: JSON.stringify({ isPotentialDuplicate: false }) })); + await store.put(createTestRecord({ id: "mem-flagged", scope, text: "similar to above", vector: createVector(384, 0.1), timestamp: now, metadataJson: JSON.stringify({ isPotentialDuplicate: true, duplicateOf: "mem-normal" }) })); + const summary = await store.summarizeEvents(scope, false); + assert.equal(summary.duplicates.flaggedCount, 1); + assert.equal(summary.duplicates.consolidatedCount, 0); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("summarizeEvents returns duplicates.consolidatedCount from merged memory records", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:dedup-merged"; + const now = Date.now(); + const vec = createVector(384, 0.5); + await store.put(createTestRecord({ id: "mem-old", scope, text: "to be merged", vector: vec, timestamp: now - 10_000, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + await store.put(createTestRecord({ id: "mem-new", scope, text: "to be merged", vector: vec, timestamp: now, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + await store.consolidateDuplicates(scope, 0.95); + const summary = await store.summarizeEvents(scope, false); + assert.equal(summary.duplicates.consolidatedCount, 1); + assert.equal(summary.duplicates.flaggedCount, 0); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("pruneScope keeps newest flagged duplicate when maxEntries forces deletion", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:prune-flagged"; + const now = Date.now(); + for (let i = 0; i < 3; i++) { + await store.put(createTestRecord({ id: `flagged-${i}`, scope, text: `flagged content ${i}`, vector: createVector(384, i * 0.1), timestamp: now - i * 10_000, metadataJson: JSON.stringify({ isPotentialDuplicate: true }) })); + } + const deleted = await store.pruneScope(scope, 1); + assert.equal(deleted, 2); + const remaining = await store.list(scope, 10); + assert.equal(remaining.length, 1); + assert.equal(remaining[0].id, "flagged-0"); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("pruneScope deletes unflagged records only after all flagged records are removed", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:prune-unflagged"; + const now = Date.now(); + await store.put(createTestRecord({ id: "flagged-old", scope, text: "old flagged", vector: createVector(384, 0.1), timestamp: now - 20_000, metadataJson: JSON.stringify({ isPotentialDuplicate: true }) })); + await store.put(createTestRecord({ id: "unflagged-new", scope, text: "new unflagged", vector: createVector(384, 0.9), timestamp: now, metadataJson: JSON.stringify({ isPotentialDuplicate: false }) })); + await store.put(createTestRecord({ id: "flagged-newer", scope, text: "newer flagged", vector: createVector(384, 0.2), timestamp: now - 10_000, metadataJson: JSON.stringify({ isPotentialDuplicate: true }) })); + const deleted = await store.pruneScope(scope, 2); + assert.equal(deleted, 1); + const remaining = await store.list(scope, 10); + const ids = remaining.map((r) => r.id); + assert.ok(ids.includes("unflagged-new"), "unflagged newest should be kept"); + assert.ok(!ids.includes("flagged-old"), "oldest flagged should be deleted first"); + assert.ok(ids.includes("flagged-newer"), "newer flagged should be kept (newest among flagged)"); + } finally { + await cleanupDbPath(dbPath); + } +}); + +// ───────────────────────────────────────────── +// Dedup — Search Display (§8) +// ───────────────────────────────────────────── + +test("store.search returns records with isPotentialDuplicate=true for duplicate marker display", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:search-display"; + const now = Date.now(); + await store.put(createTestRecord({ id: "mem-normal", scope, text: "normal content abc", vector: createVector(384, 0.3), timestamp: now - 5_000, metadataJson: JSON.stringify({ isPotentialDuplicate: false }) })); + await store.put(createTestRecord({ id: "mem-flagged", scope, text: "flagged content abc", vector: createVector(384, 0.3), timestamp: now, metadataJson: JSON.stringify({ isPotentialDuplicate: true, duplicateOf: "mem-normal" }) })); + const results = await store.search({ query: "content abc", queryVector: createVector(384, 0.3), scopes: [scope], limit: 10, vectorWeight: 1.0, bm25Weight: 0.0, minScore: 0.0, rrfK: 60, recencyBoost: false, globalDiscountFactor: 1.0 }); + const flaggedRecord = results.find((r) => r.record.id === "mem-flagged"); + assert.ok(flaggedRecord, "flagged record should be returned by search"); + const meta = JSON.parse(flaggedRecord!.record.metadataJson); + assert.equal(meta.isPotentialDuplicate, true); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("store.search returns records with isPotentialDuplicate=false without marker", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:search-no-marker"; + const now = Date.now(); + await store.put(createTestRecord({ id: "mem-unflagged", scope, text: "unflagged content xyz", vector: createVector(384, 0.4), timestamp: now, metadataJson: JSON.stringify({ isPotentialDuplicate: false }) })); + const results = await store.search({ query: "content xyz", queryVector: createVector(384, 0.4), scopes: [scope], limit: 10, vectorWeight: 1.0, bm25Weight: 0.0, minScore: 0.0, rrfK: 60, recencyBoost: false, globalDiscountFactor: 1.0 }); + const unflaggedRecord = results.find((r) => r.record.id === "mem-unflagged"); + assert.ok(unflaggedRecord, "unflagged record should be returned"); + const meta = JSON.parse(unflaggedRecord!.record.metadataJson); + assert.notEqual(meta.isPotentialDuplicate, true); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("store.search excludes records with status=merged", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:search-merged"; + const now = Date.now(); + await store.put(createTestRecord({ id: "mem-active", scope, text: "active content def", vector: createVector(384, 0.5), timestamp: now, metadataJson: JSON.stringify({}) })); + await store.put(createTestRecord({ id: "mem-merged", scope, text: "merged content def", vector: createVector(384, 0.5), timestamp: now - 3_000, metadataJson: JSON.stringify({ status: "merged", mergedInto: "mem-active" }) })); + const results = await store.search({ query: "content def", queryVector: createVector(384, 0.5), scopes: [scope], limit: 10, vectorWeight: 1.0, bm25Weight: 0.0, minScore: 0.0, rrfK: 60, recencyBoost: false, globalDiscountFactor: 1.0 }); + const ids = results.map((r) => r.record.id); + assert.ok(ids.includes("mem-active"), "active record should be returned"); + assert.ok(!ids.includes("mem-merged"), "merged record should not be returned"); + } finally { + await cleanupDbPath(dbPath); + } +}); + +// ───────────────────────────────────────────── +// Dedup — Capture Flagging Integration (§7) +// ───────────────────────────────────────────── + +test("flushAutoCapture similarity check uses vectorWeight=1.0 bm25Weight=0.0 (cosine-only)", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:capture-similarity"; + const now = Date.now(); + const sharedText = "nginx 502 bad gateway resolved by restarting the server"; + const vec = createVector(384, 0.5); + await store.put(createTestRecord({ id: "mem-first", scope, text: sharedText, vector: vec, timestamp: now - 5_000, metadataJson: JSON.stringify({}) })); + const results = await store.search({ query: sharedText, queryVector: vec, scopes: [scope], limit: 5, vectorWeight: 1.0, bm25Weight: 0.0, minScore: 0.0, rrfK: 60, recencyBoost: false, globalDiscountFactor: 1.0 }); + assert.ok(results.length >= 1, "should find the first memory"); + // score includes importanceFactor (1 + 0.4 * 0.5 = 1.2), use vectorScore for raw cosine + const topVectorScore = results[0]!.vectorScore; + assert.ok(topVectorScore >= 0.99, `identical vectors should have cosine ~1.0, got ${topVectorScore}`); + assert.ok(Math.abs(topVectorScore - 1.0) < 0.0001, `identical vectors should have raw cosine ≈ 1.0, got ${topVectorScore}`); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("consolidateDuplicates merges two records with cosine=1.0 (identical vectors)", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:consolidate-identical"; + const now = Date.now(); + const sharedText = "same content for duplicate detection"; + const vec = createVector(384, 0.5); + await store.put(createTestRecord({ id: "mem-old", scope, text: sharedText, vector: vec, timestamp: now - 10_000, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + await store.put(createTestRecord({ id: "mem-new", scope, text: sharedText, vector: vec, timestamp: now, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + const result = await store.consolidateDuplicates(scope, 0.95); + assert.equal(result.mergedPairs, 1, "should merge the identical pair"); + assert.equal(result.updatedRecords, 2, "should update both records"); + } finally { + await cleanupDbPath(dbPath); + } +}); + +test("consolidateDuplicates does not merge records with very low cosine similarity", async () => { + const { store, dbPath } = await createTestStore(); + try { + const scope = "project:no-merge-dissimilar"; + const now = Date.now(); + await store.put(createTestRecord({ id: "mem-a", scope, text: "nginx 502 error resolved", vector: createVector(384, 0.1), timestamp: now - 5_000, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + await store.put(createTestRecord({ id: "mem-b", scope, text: "postgres pool exhausted fixed", vector: createVector(384, -0.1), timestamp: now, lastRecalled: 0, metadataJson: JSON.stringify({}) })); + const result = await store.consolidateDuplicates(scope, 0.92); + assert.equal(result.mergedPairs, 0, "should not merge records with very low cosine similarity"); + } finally { + await cleanupDbPath(dbPath); + } +}); diff --git a/test/regression/plugin.test.ts b/test/regression/plugin.test.ts index 60385e4..25885cf 100644 --- a/test/regression/plugin.test.ts +++ b/test/regression/plugin.test.ts @@ -2,10 +2,13 @@ import assert from "node:assert/strict"; import test from "node:test"; import { resolveMemoryConfig } from "../../src/config.js"; import plugin from "../../src/index.js"; -import { cleanupDbPath, createScopedRecords, createTempDbPath, createTestStore, createVector, seedLegacyEffectivenessEventsTable } from "../setup.js"; +import { deriveProjectScope } from "../../src/scope.js"; +import { cleanupDbPath, createScopedRecords, createTempDbPath, createTestRecord, createTestStore, createVector, seedLegacyEffectivenessEventsTable } from "../setup.js"; const SESSION_ID = "sess-test-001"; -const WORKTREE = "/workspace/project-under-test"; +// Use workspace path (Docker) or real project path (host) so deriveProjectScope() returns consistent scope +const WORKTREE = "/workspace"; +const TEST_SCOPE = deriveProjectScope(WORKTREE); type MessagePart = { type: "text"; text: string }; type SessionMessage = { info: { role: string }; parts: MessagePart[] }; @@ -100,6 +103,11 @@ async function createPluginHarness(options?: { bm25Weight: 0.3, minScore: 0.01, }, + dedup: { + enabled: true, + writeThreshold: 0.92, + consolidateThreshold: 0.95, + }, includeGlobalScope: true, minCaptureChars: options?.minCaptureChars ?? 30, maxEntriesPerScope: options?.maxEntriesPerScope ?? 200, @@ -873,3 +881,81 @@ test("memory_port_plan avoids reserved ports and upserts reservation records", a await harness.cleanup(); } }); + +test("memory_consolidate returns error when confirm !== true", async () => { + const harness = await createPluginHarness(); + try { + const result = await withPatchedFetch(() => + harness.toolHooks.memory_consolidate.execute({ scope: "project:test", confirm: false }, harness.context), + ); + assert.match(result, /confirm.*true/); + } finally { + await harness.cleanup(); + } +}); + +test("memory_consolidate returns metrics when confirm === true", async () => { + const harness = await createPluginHarness(); + try { + const result = await withPatchedFetch(() => + harness.toolHooks.memory_consolidate.execute({ scope: "project:test", confirm: true }, harness.context), + ); + const parsed = JSON.parse(result) as { scope: string; mergedPairs: number; updatedRecords: number; skippedRecords: number }; + assert.equal(parsed.scope, "project:test"); + assert.equal(typeof parsed.mergedPairs, "number"); + assert.equal(typeof parsed.updatedRecords, "number"); + assert.equal(typeof parsed.skippedRecords, "number"); + } finally { + await harness.cleanup(); + } +}); + +test("second capture with >0.92 similarity to first is written with isPotentialDuplicate=true and gets merged", async () => { + const harness = await createPluginHarness(); + try { + const text = "nginx 502 bad gateway error fixed by restarting the server and confirming upstream health checks"; + await harness.capture(text); + await harness.capture(text); + const result = await withPatchedFetch(() => + harness.toolHooks.memory_consolidate.execute({ scope: TEST_SCOPE, confirm: true }, harness.context), + ); + const parsed = JSON.parse(result) as { mergedPairs: number; updatedRecords: number }; + assert.equal(parsed.mergedPairs, 1, "should merge one pair of duplicate memories"); + assert.equal(parsed.updatedRecords, 2, "should update both records (older merged, newer has mergedFrom)"); + } finally { + await harness.cleanup(); + } +}); + +test("second capture with <0.92 similarity to first is written with isPotentialDuplicate=false", async () => { + const harness = await createPluginHarness(); + try { + const firstText = "nginx 502 error resolved by restarting the server and confirming upstream checks are healthy"; + const secondText = "postgres connection pool exhausted error fixed by increasing max_connections and restarting the database service"; + await harness.capture(firstText); + await harness.capture(secondText); + const searchResult = await withPatchedFetch(() => + harness.toolHooks.memory_search.execute({ query: secondText, limit: 5 }, harness.context), + ); + assert.match(searchResult, new RegExp(secondText.substring(0, 20))); + } finally { + await harness.cleanup(); + } +}); + +test("dedup config: when enabled=true (default), second identical capture is flagged and merged", async () => { + const harness = await createPluginHarness(); + try { + const identicalText = "server error resolved by restarting nginx service and confirming upstream health"; + await harness.capture(identicalText); + await harness.capture(identicalText); + const result = await withPatchedFetch(() => + harness.toolHooks.memory_consolidate.execute({ scope: TEST_SCOPE, confirm: true }, harness.context), + ); + const parsed = JSON.parse(result) as { mergedPairs: number; updatedRecords: number }; + assert.equal(parsed.mergedPairs, 1, "identical texts should be detected as duplicates and merged"); + assert.equal(parsed.updatedRecords, 2); + } finally { + await harness.cleanup(); + } +}); diff --git a/test/setup.ts b/test/setup.ts index 43739c4..e789707 100644 --- a/test/setup.ts +++ b/test/setup.ts @@ -238,6 +238,10 @@ export function createEffectivenessSummary(overrides: Partial