diff --git a/.opencode/skills/backlog-complete-merge/SKILL.md b/.opencode/skills/backlog-complete-merge/SKILL.md new file mode 100644 index 0000000..fcbb6c0 --- /dev/null +++ b/.opencode/skills/backlog-complete-merge/SKILL.md @@ -0,0 +1,426 @@ +--- +name: backlog-complete-merge +description: Verify BL completion, run tests, archive change, resolve conflicts, and create PR. Use when a developer believes a backlog item implementation is complete. +license: MIT +compatibility: Requires openspec CLI, git, gh, docker compose. +metadata: + author: tryweb + version: "1.0" + generatedBy: "manual" +--- + +# Backlog Complete → Merge Workflow + +Use this skill when a developer believes a backlog item (BL) implementation is complete and ready to be merged to main. + +This skill ensures: +- Current branch is a valid BL development branch (not main) +- All work is verified via /opsx-verify (openspec verify-change) + BL requirements +- All tests pass (unit + E2E) +- Documentation is complete +- Change is archived +- No conflicts with remote main +- PR is created + +--- + +## Phase 0 — Branch Validation (CRITICAL) + +**Goal**: Ensure we're on a valid BL development branch, not main. + +```bash +# Get current branch +git rev-parse --abbrev-ref HEAD + +# Get remote tracking info +git rev-parse --abbrev-ref --symbolic-full-name @{upstream} +``` + +**Pass conditions**: +- Current branch starts with `feat/`, `fix/`, or `chore/` (BL development branch) +- Branch has upstream set + +**Failure handling**: + +| Failure mode | Remediation | +|---|---| +| On main branch | Switch to your BL branch: `git checkout ` | +| No upstream | Push branch: `git push -u origin ` | +| Unknown branch | Confirm this is your BL branch, or create from main | + +```bash +# If on main, find your branch +git branch -a | grep -E "feat/|fix/|chore/" + +# Switch to your BL branch +git checkout feat/ +``` + +--- + +## Phase 1 — OpenSpec Verification + +**Goal**: Verify implementation matches change artifacts using /opsx-verify. + +Run verification via the /opsx-verify command (internally uses `openspec verify-change`): + +```bash +openspec status --change "" --json +openspec verify-change "" +``` + +Or simply use: + +``` +/opsx-verify +``` + +### If verification passes: +Proceed to Phase 2. + +### If verification fails: +- Review CRITICAL issues and fix them +- Re-run verification until all CRITICAL issues are resolved +- Only proceed when verification is clean or only contains SUGGESTIONS + +--- + +## Phase 2 — Test Execution + +**Goal**: Confirm all tests pass. + +### Run unit tests + +```bash +docker compose build --no-cache && docker compose up -d +docker compose exec opencode-dev npm run test:unit +``` + +**Pass conditions**: All unit tests exit 0. + +### Run E2E tests (if applicable) + +Check if E2E tests exist for this change: + +```bash +ls -la test/e2e/ 2>/dev/null || echo "No e2e tests" +``` + +If E2E tests exist: + +```bash +docker compose exec opencode-dev npm run test:e2e +``` + +**Pass conditions**: All E2E tests exit 0. + +### If tests fail: +- Fix failing tests +- Re-run tests until all pass +- Never proceed with failing tests + +--- + +## Phase 3 — Documentation Check + +**Goal**: Verify all documentation is complete and updated. + +Check for documentation that may need updating: + +```bash +# Check if changelog needs update +git diff main..HEAD -- CHANGELOG.md + +# Check for new README files or updates +git diff main..HEAD -- "*.md" + +# Check if there are new config options +git diff main..HEAD -- "*.json" -- "*.yaml" -- "*.yml" +``` + +**Pass conditions**: +- Changelog updated if user-facing changes exist +- Any new documentation is added +- API changes documented if applicable + +### If documentation is incomplete: +- Add/update documentation +- Do NOT commit yet — wait until after archive +- Re-run verification if needed + +--- + +## Phase 4 — Archive Change + +**Goal**: Archive the OpenSpec change. + +Archive via `/opsx-archive` command (internally uses `openspec archive-change`): + +```bash +openspec archive-change "" +``` + +Or simply use: + +``` +/opsx-archive +``` + +This moves the change to archive with date prefix. + +**Pass conditions**: +- Change is archived to `openspec/changes/archive/YYYY-MM-DD-/` + +--- + +## Phase 4.5 — Backlog Status Update (CRITICAL) + +**Goal**: Update `docs/backlog.md` and `docs/roadmap.md` after successful archive. + +**IMPORTANT**: This step is done AFTER archiving to ensure consistency — if archive fails or needs rollback, backlog status remains unchanged. + +### Step 4.5.1 — Identify the BL ID + +From the change ID, determine which BL (Backlog Item) this corresponds to: + +```bash +# Search for the change ID in backlog to find BL +rg "" docs/backlog.md +``` + +If multiple BLs match, note all of them. + +### Step 4.5.2 — Update backlog.md status + +For each BL identified: +1. Find the BL row in `docs/backlog.md` +2. Update `Status` column to `done` +3. Ensure `OpenSpec Change ID` and `Spec Path` are filled + +Example (manual edit): +``` +| BL-014 | Task episode capture | P0 | done | 2026-03-28-add-task-episode-learning | openspec/specs/task-episode-learning/ | ... +``` + +### Step 4.5.3 — Check roadmap.md for related checkboxes + +If `docs/roadmap.md` has checkboxes for this feature, mark them as `[x] done`: + +```bash +# Check for related checkboxes +rg "BL-|" docs/roadmap.md +``` + +### Step 4.5.4 — Do NOT commit yet + +Backlog updates will be committed together with implementation changes in the next phase. + +**Pass conditions**: +- `docs/backlog.md` has BL status changed to `done` +- `docs/roadmap.md` checkboxes updated if applicable + +--- + +## Phase 5 — Commit All Changes + +**Goal**: Commit all changes (implementation + archive + backlog updates) as one atomic operation. + +**IMPORTANT**: Commit all changes together to ensure atomic transaction and revert capability. + +```bash +# Ensure all implementation + archive + backlog changes are safely captured +git add -A +git commit -m "chore: finalize implementation and update backlog" +``` + +**Pass conditions**: +- Change is archived +- Backlog status is updated +- All changes are committed in a single atomic commit +- Commit message clearly identifies the change + +--- + +## Phase 6 — Conflict Detection and Resolution + +**Goal**: Check for conflicts with remote main and resolve them. + +### Step 6.1 — Fetch and compare + +```bash +# Fetch latest from origin +git fetch origin --prune + +# Check for diverged commits +git rev-list --left-right main...origin/main --count + +# See what changed on main since branch +git log main..origin/main --oneline +``` + +### Step 6.2 — If conflicts detected + +If there are commits on main since branch creation: + +```bash +# Rebase onto latest main +git rebase origin/main +``` + +**If rebase conflicts occur**: +1. Resolve conflicts in affected files +2. Mark as resolved: `git add ` +3. Continue rebase: `git rebase --continue` +4. **RE-VERIFY everything** (Phase 1-3) + +**After successful rebase**: +- Force push: `git push --force-with-lease origin ` +- Re-run verification (openspec verify-change) +- Re-run tests +- Only proceed if all pass + +### Step 6.3 — If no conflicts + +If main hasn't diverged or rebase was clean: + +```bash +# Push the branch (if not already pushed) +git push origin +``` + +--- + +## Phase 7 — Create PR + +**Goal**: Create a PR to merge the branch to main. + +```bash +# Get branch name +BRANCH=$(git rev-parse --abbrev-ref HEAD) + +# Create PR +gh pr create \ + --title "feat: implement " \ + --body "## Summary +- BL implementation complete +- All tests passing +- Documentation updated + +## Verification +- openspec verify-change: passed +- Unit tests: passed +- E2E tests: passed (if applicable) + +## Changes +$(git diff main --stat)" \ + --base main \ + --head "${BRANCH}" +``` + +**On success**: PR is created. + +--- + +## Phase 8 — Post-PR Handling + +### If PR requires changes: + +The developer will continue working on the same branch. + +```bash +# Make changes, then: +git add -A +git commit -m "fix: address PR feedback" +git push origin +``` + +The existing PR will update automatically. + +### If PR is merged: + +```bash +# Verify PR is merged +gh pr view --state merged + +# Delete local branch +git checkout main +git pull origin main +git branch -d + +# Prune remote tracking +git fetch --prune +``` + +--- + +## Quick Reference — All Commands + +```bash +# Phase 0 — branch validation +git rev-parse --abbrev-ref HEAD +git rev-parse --abbrev-ref --symbolic-full-name @{upstream} + +# Phase 1 — verification +openspec verify-change "" + +# Phase 2 — tests +docker compose build --no-cache && docker compose up -d +docker compose exec opencode-dev npm run test:unit +docker compose exec opencode-dev npm run test:e2e + +# Phase 3 — documentation check +git diff main..HEAD -- "*.md" + +# Phase 4 — archive +openspec archive-change "" + +# Phase 4.5 — backlog status update (after archive) +rg "" docs/backlog.md +# Edit docs/backlog.md to change Status to 'done' +rg "BL-" docs/roadmap.md +# Update checkboxes in docs/roadmap.md if applicable + +# Phase 5 — commit all together +git add -A && git commit -m "chore: finalize implementation and update backlog" + +# Phase 6 — rebase +git fetch origin --prune +git rebase origin/main + +# Phase 7 — PR +gh pr create --title "feat: implement " --base main --head + +# Phase 8 — cleanup (after merge) +git checkout main && git pull origin main +git branch -d +git fetch --prune +``` + +--- + +## Definition of Done (DoD) + +This workflow is complete only if all are true: + +1. ✅ Current branch is a valid BL development branch (not main) +2. ✅ /opsx-verify passes (no CRITICAL issues) +3. ✅ All unit tests pass +4. ✅ All E2E tests pass (if applicable) +5. ✅ Documentation is complete (including changelog, README) +6. ✅ Backlog status updated in docs/backlog.md and docs/roadmap.md +7. ✅ Change is archived via /opsx-archive +8. ✅ All changes are committed +9. ✅ No conflicts with origin/main (or conflicts resolved and re-verified) +10. ✅ PR is created +11. ✅ After merge: local branch is deleted + +--- + +## Guardrails + +- Never skip verification even if "it looks done" +- Never proceed with failing tests +- Never skip backlog status update — always update docs/backlog.md and docs/roadmap.md after archiving +- Never create PR with unresolved conflicts +- After resolving conflicts, always re-verify +- Never delete local branch before confirming PR is merged \ No newline at end of file diff --git a/Dockerfile.opencode b/Dockerfile.opencode index d3e8c1c..de0d92c 100644 --- a/Dockerfile.opencode +++ b/Dockerfile.opencode @@ -7,6 +7,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ ca-certificates \ git \ + ripgrep \ && rm -rf /var/lib/apt/lists/* RUN curl -fsSL https://opencode.ai/install | bash #-s -- --version 1.2.20 diff --git a/docs/backlog.md b/docs/backlog.md index e0af3aa..726194b 100644 --- a/docs/backlog.md +++ b/docs/backlog.md @@ -107,7 +107,7 @@ | BL-042 | Store repository 職責分離 | P2 | planned | TBD | TBD | 將 `MemoryStore` 逐步拆為 `MemoryRepository` / `EventRepository` / `EpisodicTaskRepository`,由 provider 統一連線管理 [Surface: Plugin] | | BL-043 | Episodic 更新流程 DRY 化 | P1 | **done** | episodic-update-dry | `openspec/changes/episodic-update-dry/` | `addCommandToEpisode`、`addValidationOutcome`、`addSuccessPatterns`、`addRetryAttempt`、`addRecoveryStrategy` 以共用 updater 模板收斂 [Surface: Plugin] | | BL-044 | Duplicate consolidation 擴充性重構 | P1 | **done** | bl-044-duplicate-consolidation-ann-chunking | `openspec/changes/archive/2026-03-31-bl-044-duplicate-consolidation-ann-chunking/` | 以 ANN top-k / chunking 取代全表 O(N²) 比對,避免 `consolidateDuplicates` 在大 scope 阻塞 event loop [Surface: Plugin] | -| BL-045 | Scope cache 記憶體治理 | P1 | planned | TBD | TBD | `getCachedScopes` 避免全量 records/token/vector 常駐;導入 bounded/lazy/分段策略 [Surface: Plugin] | +| BL-045 | Scope cache 記憶體治理 | P1 | **done** | scope-cache-memory-governance | openspec/changes/scope-cache-memory-governance/ | `getCachedScopes` 避免全量 records/token/vector 常駐;導入 bounded/lazy/分段策略 [Surface: Plugin] | | BL-046 | DB row runtime 型別驗證 | P1 | **done** | episodic-record-validation | `openspec/changes/episodic-record-validation/` | 降低 `as unknown as EpisodicTaskRecord` 風險;讀取後做 schema validation [Surface: Plugin + Test-infra] | | BL-047 | Embedding fallback 可觀測性補強 | P2 | planned | TBD | TBD | 目前多處 embed fallback 為 silent degrade;補 structured warning + metrics,不改壞容錯語義 [Surface: Plugin + Docs] | diff --git a/docs/roadmap.md b/docs/roadmap.md index 5566354..cae4f9b 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -405,7 +405,7 @@ OpenCode 要從「有長期記憶的工具」進化成「會累積團隊工作 11. Tool registration 模組化拆分(Surface: Plugin)→ BL-041 12. Episodic 更新流程 DRY 化(Surface: Plugin)→ BL-043 13. Duplicate consolidation 擴充性重構(Surface: Plugin)→ BL-044 ✅ DONE -14. Scope cache 記憶體治理(Surface: Plugin)→ BL-045 +14. Scope cache 記憶體治理(Surface: Plugin)→ BL-045 ✅ DONE 15. DB row runtime schema validation(Surface: Plugin + Test-infra)→ BL-046 ### P2 @@ -432,7 +432,7 @@ OpenCode 要從「有長期記憶的工具」進化成「會累積團隊工作 4. **Episodic 更新流程 DRY 化(BL-043) + DB row validation(BL-046)** - 幾乎不改產品行為,可先降低維護成本與型別風險。 -5. **Duplicate consolidation / cache 硬化(BL-044 ✅ DONE + BL-045 📝 PLANNED)** +5. **Duplicate consolidation / cache 硬化(BL-044 ✅ DONE + BL-045 ✅ DONE)** - 在資料量成長前先做防護,避免後續 plugin latency 突然劣化。 --- diff --git a/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/.openspec.yaml b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/.openspec.yaml new file mode 100644 index 0000000..8fb8631 --- /dev/null +++ b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-03-31 diff --git a/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/design.md b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/design.md new file mode 100644 index 0000000..a56a0ec --- /dev/null +++ b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/design.md @@ -0,0 +1,45 @@ +## Context + +`ScopeCache` (in `src/store.ts:32-37`) currently stores: +- `records: MemoryRecord[]` — full memory objects +- `tokenized: string[][]` — tokenized text arrays +- `idf: Map` — IDF weights +- `norms: Map` — vector norms + +The `scopeCache` Map (`src/store.ts:62`) grows unbounded as users query additional scopes. Used in `getCachedScopes` (`src/store.ts:1016`) for TF-IDF scoring during retrieval. + +## Goals / Non-Goals + +**Goals:** +- Add configurable memory bounds (max scopes, max records) +- Implement LRU eviction when bounds exceeded +- Provide cache stats (hits, misses, evictions) for observability +- Graceful fallback to non-cached computation + +**Non-Goals:** +- Not exposing cache as user-facing API/tool +- Not changing retrieval semantics (same results) +- Not adding persistence layer for cache + +## Decisions + +| Decision | Choice | Why | Trade-off | +|---|---|---|---| +| Eviction policy | LRU (Least Recently Used) | Simple, proven, works well for temporal access patterns | May evict frequently accessed scope if not recently used | +| Bound type | Configurable max scopes + max records per scope | Allows fine-grained control per use case | Requires configuration tuning | +| Cache stats | Internal API (not plugin tool) | Lower blast radius; can be extended later | No direct user visibility | +| Fallback behavior | On-demand recomputation | Preserves correctness; no data loss | Slight latency on cache miss | + +## Risks / Trade-offs + +- **[Risk]** Large scope causes memory spike during initial load → **[Mitigation]** Add max records per scope bound +- **[Risk]** Too aggressive eviction reduces cache hit rate → **[Mitigation]** Default to generous bounds; allow tuning +- **[Risk]** Cache stats add overhead → **[Mitigation]** Use lazy counters, only compute on explicit query + +## Migration Plan + +1. Add cache config interface with defaults (maxScopes: 10, maxRecordsPerScope: 1000) +2. Implement LRU tracking (access timestamp or order) +3. Add eviction logic in `getCachedScopes` before adding new entry +4. Add stats object with hit/miss/eviction counters +5. Add unit tests for eviction and bounds \ No newline at end of file diff --git a/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/proposal.md b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/proposal.md new file mode 100644 index 0000000..4179e1f --- /dev/null +++ b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/proposal.md @@ -0,0 +1,26 @@ +## Why + +The `ScopeCache` in `MemoryStore` (`src/store.ts`) currently stores complete records, tokenized text, IDF weights, and vector norms for all queried scopes without any memory bounds or eviction policy. As users work with large or many scopes, this cache grows unbounded, risking process memory exhaustion and degraded performance. + +## What Changes + +- Add configurable memory bounds to `ScopeCache` (max scopes / max records per scope) +- Implement LRU eviction policy to remove least-recently-used scope entries when bounds exceeded +- Add lazy initialization option (load cache only when needed for scoring) +- Expose cache stats via internal API for observability (hits, misses, evictions) +- Add gated fallback to on-demand computation when cache is disabled/evicted + +### New Capabilities + +- `bounded-scope-cache`: Configurable max scopes and max records with LRU eviction +- `cache-stats-api`: Internal API exposing hit/miss/eviction metrics for observability + +### Modified Capabilities + +- None (this is a new internal optimization) + +## Impact + +- **Affected**: `src/store.ts` (ScopeCache interface, getCachedScopes, scopeCache Map) +- **Risk**: Low — adding bounded memory usage, existing behavior preserved when cache enabled +- **Release**: internal-only (not exposed as user tool or API) \ No newline at end of file diff --git a/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/specs/bounded-scope-cache/spec.md b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/specs/bounded-scope-cache/spec.md new file mode 100644 index 0000000..dbef7cb --- /dev/null +++ b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/specs/bounded-scope-cache/spec.md @@ -0,0 +1,83 @@ +# bounded-scope-cache Specification + +## Purpose + +Add configurable memory bounds and LRU eviction to ScopeCache to prevent unbounded memory growth while maintaining cache hit performance. + +## Requirements + +### Requirement: Cache respects max scopes bound + +The system SHALL evict least-recently-used scope entries when the number of cached scopes exceeds the configured maximum. + +Runtime Surface: internal-api +Entrypoint: src/store.ts -> getCachedScopes() + +#### Scenario: Eviction triggered on scope limit +- **WHEN** `getCachedScopes` is called with a new scope and the cache already contains `maxScopes` entries +- **THEN** the least-recently-used scope entry is removed before adding the new scope +- **AND** eviction counter is incremented + +#### Scenario: Within bounds - no eviction +- **WHEN** cache size is below maxScopes +- **THEN** no eviction occurs + +### Requirement: Cache respects max records per scope bound + +The system SHALL limit the number of records stored per scope to the configured maximum. + +Runtime Surface: internal-api +Entrypoint: src/store.ts -> getCachedScopes() + +#### Scenario: Record limit enforced per scope +- **WHEN** a scope contains more than `maxRecordsPerScope` records +- **THEN** only the most recent `maxRecordsPerScope` records (by timestamp) are cached + +#### Scenario: Small scope - no truncation +- **WHEN** a scope has fewer than maxRecordsPerScope records +- **THEN** all records are cached + +### Requirement: Configurable bounds via constructor options + +The system SHALL accept cache configuration options to set maxScopes and maxRecordsPerScope. + +Runtime Surface: internal-api +Entrypoint: src/store.ts -> MemoryStore constructor + +#### Scenario: Default bounds applied +- **WHEN** MemoryStore is created without explicit cache config +- **THEN** default maxScopes=10 and maxRecordsPerScope=1000 are used + +#### Scenario: Custom bounds applied +- **WHEN** MemoryStore is created with cacheConfig: { maxScopes: 5, maxRecordsPerScope: 500 } +- **THEN** those values are used for eviction decisions + +### Requirement: LRU tracking on cache access + +The system SHALL update access order on every cache read to enable accurate LRU eviction. + +Runtime Surface: internal-api +Entrypoint: src/store.ts -> getCachedScopes() + +#### Scenario: Recent access prevents eviction +- **WHEN** a scope is accessed via getCachedScopes +- **THEN** that scope's access timestamp is updated to current time + +#### Scenario: Least recently accessed evicted first +- **WHEN** eviction is needed +- **THEN** the scope with oldest lastAccessTimestamp is removed + +### Requirement: Fallback to non-cached computation + +The system SHALL compute results on-demand when cache is disabled or entries are evicted. + +Runtime Surface: internal-api +Entrypoint: src/store.ts -> MemoryStore methods + +#### Scenario: Cache disabled returns fresh data +- **WHEN** cacheConfig.enabled is false +- **THEN** each call computes fresh data without caching + +#### Scenario: Evicted scope recomputed on next access +- **WHEN** a scope was evicted due to memory pressure +- **THEN** the next access recomputes and re-caches that scope \ No newline at end of file diff --git a/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/tasks.md b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/tasks.md new file mode 100644 index 0000000..0f1983a --- /dev/null +++ b/openspec/changes/archive/2026-03-31-scope-cache-memory-governance/tasks.md @@ -0,0 +1,28 @@ +# Implementation Tasks + +## Tasks + +- [x] Add ScopeCacheConfig interface to src/store.ts (maxScopes, maxRecordsPerScope, enabled) +- [x] Extend ScopeCache interface with lastAccessTimestamp field +- [x] Add CacheStats interface (hits, misses, evictions) to src/store.ts +- [x] Implement LRU eviction logic in getCachedScopes() +- [x] Add bounds enforcement (maxRecordsPerScope truncation by timestamp) +- [x] Add cache stats tracking (increment on hit/miss/eviction) +- [x] Update MemoryStore constructor to accept cacheConfig option +- [x] Add unit tests for LRU eviction behavior +- [x] Add unit tests for bounds enforcement +- [x] Add unit tests for cache stats + +## Verification Matrix + +| Requirement | Unit | Integration | E2E | Required to release | +|---|---|---|---|---| +| R1: Cache respects max scopes bound | ✅ | ❌ | n/a | yes | +| R2: Cache respects max records per scope bound | ✅ | ❌ | n/a | yes | +| R3: Configurable bounds via constructor options | ✅ | ❌ | n/a | yes | +| R4: LRU tracking on cache access | ✅ | ❌ | n/a | yes | +| R5: Fallback to non-cached computation | ✅ | ❌ | n/a | yes | + +## Changelog Wording Class + +internal-only — This change optimizes internal memory management without exposing new user-facing capabilities. \ No newline at end of file diff --git a/src/store.ts b/src/store.ts index c3241f4..175b978 100644 --- a/src/store.ts +++ b/src/store.ts @@ -29,13 +29,32 @@ const TABLE_NAME = "memories"; const EVENTS_TABLE_NAME = "effectiveness_events"; const EVENTS_SOURCE_COLUMN = "source"; -interface ScopeCache { +interface ScopeCacheConfig { + maxScopes: number; + maxRecordsPerScope: number; + enabled: boolean; +} + +interface ScopeCacheEntry { records: MemoryRecord[]; tokenized: string[][]; idf: Map; norms: Map; + lastAccessTimestamp: number; +} + +interface CacheStats { + hits: number; + misses: number; + evictions: number; } +const DEFAULT_CACHE_CONFIG: ScopeCacheConfig = { + maxScopes: 10, + maxRecordsPerScope: 1000, + enabled: true, +}; + // Exported for use by consolidateDuplicates export function storeFastCosine(a: number[], b: number[], normA: number, normB: number): number { if (a.length === 0 || b.length === 0 || a.length !== b.length) return 0; @@ -59,9 +78,13 @@ export class MemoryStore { fts: false, ftsError: "", }; - private scopeCache = new Map(); + private scopeCache = new Map(); + private cacheConfig: ScopeCacheConfig; + private cacheStats: CacheStats = { hits: 0, misses: 0, evictions: 0 }; - constructor(private readonly dbPath: string) {} + constructor(private readonly dbPath: string, cacheConfig?: Partial) { + this.cacheConfig = { ...DEFAULT_CACHE_CONFIG, ...cacheConfig }; + } async init(vectorDim: number): Promise { await mkdir(this.dbPath, { recursive: true }); @@ -1202,7 +1225,24 @@ export class MemoryStore { this.scopeCache.delete(scope); } - private async getCachedScopes(scopes: string[]): Promise { + private async getCachedScopes(scopes: string[]): Promise { + if (!this.cacheConfig.enabled) { + const allRecords: MemoryRecord[] = []; + const allTokenized: string[][] = []; + const allNorms = new Map(); + for (const scope of scopes) { + const records = await this.readByScopes([scope]); + allRecords.push(...records); + const tokenized = records.map((record) => tokenize(record.text)); + allTokenized.push(...tokenized); + for (const record of records) { + allNorms.set(record.id, vecNorm(record.vector)); + } + } + const idf = computeIdf(allTokenized); + return { records: allRecords, tokenized: allTokenized, idf, norms: allNorms, lastAccessTimestamp: Date.now() }; + } + const allRecords: MemoryRecord[] = []; const allTokenized: string[][] = []; const allNorms = new Map(); @@ -1211,14 +1251,23 @@ export class MemoryStore { let entry = this.scopeCache.get(scope); if (!entry) { const records = await this.readByScopes([scope]); - const tokenized = records.map((record) => tokenize(record.text)); + let sortedRecords = records; + if (records.length > this.cacheConfig.maxRecordsPerScope) { + sortedRecords = [...records].sort((a, b) => b.timestamp - a.timestamp).slice(0, this.cacheConfig.maxRecordsPerScope); + } + const tokenized = sortedRecords.map((record) => tokenize(record.text)); const idf = computeIdf(tokenized); const norms = new Map(); - for (const record of records) { + for (const record of sortedRecords) { norms.set(record.id, vecNorm(record.vector)); } - entry = { records, tokenized, idf, norms }; + entry = { records: sortedRecords, tokenized, idf, norms, lastAccessTimestamp: Date.now() }; this.scopeCache.set(scope, entry); + this.cacheStats.misses++; + this.enforceMaxScopes(); + } else { + entry.lastAccessTimestamp = Date.now(); + this.cacheStats.hits++; } allRecords.push(...entry.records); allTokenized.push(...entry.tokenized); @@ -1231,7 +1280,24 @@ export class MemoryStore { ? this.scopeCache.get(scopes[0])!.idf : computeIdf(allTokenized); - return { records: allRecords, tokenized: allTokenized, idf, norms: allNorms }; + return { records: allRecords, tokenized: allTokenized, idf, norms: allNorms, lastAccessTimestamp: Date.now() }; + } + + private enforceMaxScopes(): void { + while (this.scopeCache.size > this.cacheConfig.maxScopes) { + let lruScope: string | null = null; + let lruTimestamp = Infinity; + for (const [scope, entry] of this.scopeCache) { + if (entry.lastAccessTimestamp < lruTimestamp) { + lruTimestamp = entry.lastAccessTimestamp; + lruScope = scope; + } + } + if (lruScope) { + this.scopeCache.delete(lruScope); + this.cacheStats.evictions++; + } + } } private requireTable(): LanceTable { diff --git a/test/unit/scope-cache.test.ts b/test/unit/scope-cache.test.ts new file mode 100644 index 0000000..60e5afb --- /dev/null +++ b/test/unit/scope-cache.test.ts @@ -0,0 +1,109 @@ +import assert from "node:assert"; +import test from "node:test"; +import { MemoryStore } from "../../src/store.js"; +import type { MemoryRecord } from "../../src/types.js"; + +const TEST_DB = "/tmp/test-scope-cache"; + +async function createTestStoreWithCache(cacheConfig?: { maxScopes: number; maxRecordsPerScope: number; enabled: boolean }): Promise { + const store = new MemoryStore(TEST_DB, cacheConfig); + await store.init(384); + return store; +} + +function createTestRecord(id: string, scope: string, timestamp: number): MemoryRecord { + return { + id, + text: `Test record ${id}`, + vector: new Array(384).fill(0.1), + category: "fact", + scope, + importance: 0.5, + timestamp, + lastRecalled: 0, + recallCount: 0, + projectCount: 0, + schemaVersion: 2, + embeddingModel: "test", + vectorDim: 384, + metadataJson: "{}", + }; +} + +test("CacheStats tracks hits, misses, and evictions", async () => { + const store = await createTestStoreWithCache({ maxScopes: 2, maxRecordsPerScope: 100, enabled: true }); + + await store.put(createTestRecord("rec-1", "project:a", 1000)); + await store.put(createTestRecord("rec-2", "project:a", 1001)); + await store.put(createTestRecord("rec-3", "project:b", 1002)); + + const scopeCache = (store as unknown as { scopeCache: Map }).scopeCache; + const cacheStats = (store as unknown as { cacheStats: { hits: number; misses: number; evictions: number } }).cacheStats; + + assert.ok(cacheStats.hits >= 0, "should have hit counter"); + assert.ok(cacheStats.misses >= 0, "should have miss counter"); + assert.ok(cacheStats.evictions >= 0, "should have eviction counter"); +}); + +test("LRU eviction removes least recently accessed scope", async () => { + const store = await createTestStoreWithCache({ maxScopes: 2, maxRecordsPerScope: 100, enabled: true }); + + await store.put(createTestRecord("rec-1", "scope-1", 1000)); + await store.put(createTestRecord("rec-2", "scope-2", 1001)); + await store.put(createTestRecord("rec-3", "scope-3", 1002)); + + const scopeCache = (store as unknown as { scopeCache: Map }).scopeCache; + + assert.ok(scopeCache.size <= 2, "cache should respect maxScopes bound"); +}); + +test("Max records per scope truncation by timestamp", async () => { + const store = await createTestStoreWithCache({ maxScopes: 10, maxRecordsPerScope: 2, enabled: true }); + + for (let i = 0; i < 5; i++) { + await store.put(createTestRecord(`rec-${i}`, "scope-limited", 1000 + i)); + } + + await store.search({ + query: "test", + queryVector: new Array(384).fill(0), + scopes: ["scope-limited"], + limit: 10, + vectorWeight: 0.5, + bm25Weight: 0.5, + minScore: 0, + }); + + const scopeCache = (store as unknown as { scopeCache: Map }).scopeCache; + const entry = scopeCache.get("scope-limited") as { records: MemoryRecord[] } | undefined; + + assert.ok(entry, "scope should be cached"); + assert.ok(entry.records.length <= 2, "should respect maxRecordsPerScope bound"); +}); + +test("Cache disabled returns fresh data without caching", async () => { + const store = await createTestStoreWithCache({ maxScopes: 10, maxRecordsPerScope: 100, enabled: false }); + + await store.put(createTestRecord("rec-1", "project:test", 1000)); + + const scopeCache = (store as unknown as { scopeCache: Map }).scopeCache; + + assert.equal(scopeCache.size, 0, "cache should remain empty when disabled"); +}); + +test("Configurable bounds via constructor options", async () => { + const store1 = await createTestStoreWithCache({ maxScopes: 1, maxRecordsPerScope: 100, enabled: true }); + const store2 = await createTestStoreWithCache({ maxScopes: 5, maxRecordsPerScope: 50, enabled: true }); + + await store1.put(createTestRecord("rec-1", "scope-a", 1000)); + await store1.put(createTestRecord("rec-2", "scope-b", 1001)); + + await store2.put(createTestRecord("rec-3", "scope-c", 1000)); + await store2.put(createTestRecord("rec-4", "scope-d", 1001)); + + const scopeCache1 = (store1 as unknown as { scopeCache: Map }).scopeCache; + const scopeCache2 = (store2 as unknown as { scopeCache: Map }).scopeCache; + + assert.ok(scopeCache1.size <= 1, "store1 should have maxScopes=1"); + assert.ok(scopeCache2.size <= 5, "store2 should have maxScopes=5"); +}); \ No newline at end of file