From 64cbbe1ea856212e649b1ab45d4a8d4a3d419ef3 Mon Sep 17 00:00:00 2001 From: Test Date: Tue, 7 Apr 2026 08:52:44 -0400 Subject: [PATCH] =?UTF-8?q?refactor:=20reduce=20structural=20hotspots=20?= =?UTF-8?q?=E2=80=94=20split=20barrels,=20extract=20modules,=20auto-genera?= =?UTF-8?q?te=20registry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address all findings from hotspot analysis (co-change, churn, hidden coupling): Types package (450→95 lines + 5 domain files): - Split index.ts into result.ts, workflow.ts, skill.ts, ci.ts, roadmap.ts - Fix circular imports: tracker-sync.ts and orchestrator.ts now import from domain files directly instead of through index.ts Learnings module (813→230 lines + 3 leaf modules): - Extract learnings-content.ts (parsing, hashing, dedup — leaf, no siblings) - Extract learnings-loader.ts (file loading + mtime cache — leaf, no siblings) - Extract learnings-lifecycle.ts (prune, archive, promote — imports from leaves) - Strictly acyclic dependency graph: content ← loader ← {learnings, lifecycle} CLI command registration (108→14 lines in index.ts): - Auto-generate _registry.ts via scripts/generate-barrel-exports.mjs - createProgram() loops over discovered commands instead of 48 manual imports - Supports both `export function` and `export const` patterns - `pnpm run generate-barrel-exports --check` for CI freshness validation Hidden dependency (sync-engine ↔ github-issues): - Document adapter coupling via comment in sync-engine.ts Update arch baselines for new module count (+10 files). --- .harness/arch/baselines.json | 4 +- package.json | 1 + packages/cli/src/commands/_registry.ts | 109 ++++ packages/cli/src/index.ts | 106 +--- packages/core/src/roadmap/sync-engine.ts | 3 + packages/core/src/state/events.ts | 2 +- packages/core/src/state/index.ts | 46 +- packages/core/src/state/learnings-content.ts | 225 +++++++ .../core/src/state/learnings-lifecycle.ts | 252 ++++++++ packages/core/src/state/learnings-loader.ts | 106 ++++ packages/core/src/state/learnings.ts | 580 ++---------------- packages/core/src/state/state-manager.ts | 19 +- .../tests/state/learnings-promotion.test.ts | 2 +- .../tests/state/learnings-pruning.test.ts | 8 +- packages/types/src/ci.ts | 112 ++++ packages/types/src/index.ts | 466 ++------------ packages/types/src/orchestrator.ts | 2 +- packages/types/src/result.ts | 32 + packages/types/src/roadmap.ts | 92 +++ packages/types/src/skill.ts | 92 +++ packages/types/src/tracker-sync.ts | 2 +- packages/types/src/workflow.ts | 58 ++ scripts/generate-barrel-exports.mjs | 147 +++++ 23 files changed, 1383 insertions(+), 1083 deletions(-) create mode 100644 packages/cli/src/commands/_registry.ts create mode 100644 packages/core/src/state/learnings-content.ts create mode 100644 packages/core/src/state/learnings-lifecycle.ts create mode 100644 packages/core/src/state/learnings-loader.ts create mode 100644 packages/types/src/ci.ts create mode 100644 packages/types/src/result.ts create mode 100644 packages/types/src/roadmap.ts create mode 100644 packages/types/src/skill.ts create mode 100644 packages/types/src/workflow.ts create mode 100644 scripts/generate-barrel-exports.mjs diff --git a/.harness/arch/baselines.json b/.harness/arch/baselines.json index eaae1c90..a6226fa6 100644 --- a/.harness/arch/baselines.json +++ b/.harness/arch/baselines.json @@ -313,7 +313,7 @@ "violationIds": [] }, "module-size": { - "value": 57900, + "value": 58027, "violationIds": [ "e5162f4bcf3fa5b14ca7535ace40b58e6a1b319b38dd7e794d64ea1b577fae67", "c5b4c5a3ec42dfff0c1b6ecb8a0e2dc391925c3cef0645f6235b7b2ac2c03626", @@ -322,7 +322,7 @@ ] }, "dependency-depth": { - "value": 269, + "value": 271, "violationIds": [] } } diff --git a/package.json b/package.json index 82bc37c6..e4b2249c 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "docs:build": "pnpm --filter docs build", "docs:preview": "pnpm --filter docs preview", "generate-docs": "node scripts/generate-docs.mjs", + "generate-barrel-exports": "node scripts/generate-barrel-exports.mjs", "changeset": "changeset", "version": "changeset version", "release": "pnpm build && changeset publish", diff --git a/packages/cli/src/commands/_registry.ts b/packages/cli/src/commands/_registry.ts new file mode 100644 index 00000000..2286ae88 --- /dev/null +++ b/packages/cli/src/commands/_registry.ts @@ -0,0 +1,109 @@ +// AUTO-GENERATED — do not edit. Run `pnpm run generate-barrel-exports` to regenerate. + +import type { Command } from 'commander'; + +import { createAddCommand } from './add'; +import { createAgentCommand } from './agent'; +import { createBlueprintCommand } from './blueprint'; +import { createCheckArchCommand } from './check-arch'; +import { createCheckDepsCommand } from './check-deps'; +import { createCheckDocsCommand } from './check-docs'; +import { createCheckPerfCommand } from './check-perf'; +import { createCheckPhaseGateCommand } from './check-phase-gate'; +import { createCheckSecurityCommand } from './check-security'; +import { createCICommand } from './ci'; +import { createCleanupCommand } from './cleanup'; +import { createCreateSkillCommand } from './create-skill'; +import { createDoctorCommand } from './doctor'; +import { createFixDriftCommand } from './fix-drift'; +import { createGenerateAgentDefinitionsCommand } from './generate-agent-definitions'; +import { createGenerateCommand } from './generate'; +import { createGenerateSlashCommandsCommand } from './generate-slash-commands'; +import { createGraphCommand } from './graph'; +import { createHooksCommand } from './hooks'; +import { createImpactPreviewCommand } from './impact-preview'; +import { createIngestCommand } from './graph/ingest'; +import { createInitCommand } from './init'; +import { createInstallCommand } from './install'; +import { createInstallConstraintsCommand } from './install-constraints'; +import { createIntegrationsCommand } from './integrations'; +import { createLearningsCommand } from './learnings'; +import { createLinterCommand } from './linter'; +import { createMcpCommand } from './mcp'; +import { createOrchestratorCommand } from './orchestrator'; +import { createPerfCommand } from './perf'; +import { createPersonaCommand } from './persona'; +import { createPredictCommand } from './predict'; +import { createQueryCommand } from './graph/query'; +import { createRecommendCommand } from './recommend'; +import { createScanCommand } from './graph/scan'; +import { createScanConfigCommand } from './scan-config'; +import { createSetupCommand } from './setup'; +import { createSetupMcpCommand } from './setup-mcp'; +import { createShareCommand } from './share'; +import { createSkillCommand } from './skill'; +import { createSnapshotCommand } from './snapshot'; +import { createStateCommand } from './state'; +import { createTaintCommand } from './taint'; +import { createTraceabilityCommand } from './traceability'; +import { createUninstallCommand } from './uninstall'; +import { createUninstallConstraintsCommand } from './uninstall-constraints'; +import { createUpdateCommand } from './update'; +import { createUsageCommand } from './usage'; +import { createValidateCommand } from './validate'; + +/** + * All discovered command creators, sorted alphabetically. + * Used by createProgram() to register commands without manual imports. + */ +export const commandCreators: Array<() => Command> = [ + createAddCommand, + createAgentCommand, + createBlueprintCommand, + createCheckArchCommand, + createCheckDepsCommand, + createCheckDocsCommand, + createCheckPerfCommand, + createCheckPhaseGateCommand, + createCheckSecurityCommand, + createCICommand, + createCleanupCommand, + createCreateSkillCommand, + createDoctorCommand, + createFixDriftCommand, + createGenerateAgentDefinitionsCommand, + createGenerateCommand, + createGenerateSlashCommandsCommand, + createGraphCommand, + createHooksCommand, + createImpactPreviewCommand, + createIngestCommand, + createInitCommand, + createInstallCommand, + createInstallConstraintsCommand, + createIntegrationsCommand, + createLearningsCommand, + createLinterCommand, + createMcpCommand, + createOrchestratorCommand, + createPerfCommand, + createPersonaCommand, + createPredictCommand, + createQueryCommand, + createRecommendCommand, + createScanCommand, + createScanConfigCommand, + createSetupCommand, + createSetupMcpCommand, + createShareCommand, + createSkillCommand, + createSnapshotCommand, + createStateCommand, + createTaintCommand, + createTraceabilityCommand, + createUninstallCommand, + createUninstallConstraintsCommand, + createUpdateCommand, + createUsageCommand, + createValidateCommand, +]; diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 1ec833b8..7bc036ad 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -10,58 +10,15 @@ import { Command } from 'commander'; import { CLI_VERSION } from './version'; -import { createValidateCommand } from './commands/validate'; -import { createCheckDepsCommand } from './commands/check-deps'; -import { createCheckPerfCommand } from './commands/check-perf'; -import { createCheckSecurityCommand } from './commands/check-security'; -import { createPerfCommand } from './commands/perf'; -import { createCheckDocsCommand } from './commands/check-docs'; -import { createInitCommand } from './commands/init'; -import { createCleanupCommand } from './commands/cleanup'; -import { createFixDriftCommand } from './commands/fix-drift'; -import { createAgentCommand } from './commands/agent'; -import { createAddCommand } from './commands/add'; -import { createLinterCommand } from './commands/linter'; -import { createPersonaCommand } from './commands/persona'; -import { createSkillCommand } from './commands/skill'; -import { createStateCommand } from './commands/state'; -import { createCheckPhaseGateCommand } from './commands/check-phase-gate'; -import { createCreateSkillCommand } from './commands/create-skill'; -import { createSetupMcpCommand } from './commands/setup-mcp'; -import { createSetupCommand } from './commands/setup'; -import { createDoctorCommand } from './commands/doctor'; -import { createGenerateSlashCommandsCommand } from './commands/generate-slash-commands'; -import { createCICommand } from './commands/ci'; -import { createHooksCommand } from './commands/hooks'; -import { createUpdateCommand } from './commands/update'; -import { createGenerateAgentDefinitionsCommand } from './commands/generate-agent-definitions'; -import { createGenerateCommand } from './commands/generate'; -import { createScanCommand } from './commands/graph/scan'; -import { createIngestCommand } from './commands/graph/ingest'; -import { createQueryCommand } from './commands/graph/query'; -import { createGraphCommand } from './commands/graph/index'; -import { createMcpCommand } from './commands/mcp'; -import { createImpactPreviewCommand } from './commands/impact-preview'; -import { createCheckArchCommand } from './commands/check-arch'; -import { createBlueprintCommand } from './commands/blueprint'; -import { createShareCommand } from './commands/share'; -import { createInstallCommand } from './commands/install'; -import { createInstallConstraintsCommand } from './commands/install-constraints'; -import { createUninstallConstraintsCommand } from './commands/uninstall-constraints'; -import { createUninstallCommand } from './commands/uninstall'; -import { createOrchestratorCommand } from './commands/orchestrator'; -import { createLearningsCommand } from './commands/learnings'; -import { createIntegrationsCommand } from './commands/integrations/index'; -import { createUsageCommand } from './commands/usage'; -import { createTaintCommand } from './commands/taint'; -import { createScanConfigCommand } from './commands/scan-config'; -import { createSnapshotCommand } from './commands/snapshot'; -import { createPredictCommand } from './commands/predict'; -import { createRecommendCommand } from './commands/recommend'; +import { commandCreators } from './commands/_registry'; /** * Creates and configures the main Harness CLI program. * + * Commands are auto-discovered from the commands/ directory via _registry.ts. + * To add a new command: create it in commands/, export a createXXXCommand() + * function, then run `pnpm run generate-barrel-exports` to regenerate the registry. + * * @returns A Commander instance with all subcommands registered. */ export function createProgram(): Command { @@ -76,55 +33,10 @@ export function createProgram(): Command { .option('--verbose', 'Verbose output') .option('--quiet', 'Minimal output'); - // Register commands - program.addCommand(createValidateCommand()); - program.addCommand(createCheckDepsCommand()); - program.addCommand(createCheckDocsCommand()); - program.addCommand(createCheckPerfCommand()); - program.addCommand(createCheckSecurityCommand()); - program.addCommand(createPerfCommand()); - program.addCommand(createInitCommand()); - program.addCommand(createCleanupCommand()); - program.addCommand(createFixDriftCommand()); - program.addCommand(createAgentCommand()); - program.addCommand(createAddCommand()); - program.addCommand(createLinterCommand()); - program.addCommand(createPersonaCommand()); - program.addCommand(createSkillCommand()); - program.addCommand(createStateCommand()); - program.addCommand(createLearningsCommand()); - program.addCommand(createCheckPhaseGateCommand()); - program.addCommand(createCreateSkillCommand()); - program.addCommand(createSetupMcpCommand()); - program.addCommand(createSetupCommand()); - program.addCommand(createDoctorCommand()); - program.addCommand(createGenerateSlashCommandsCommand()); - program.addCommand(createGenerateAgentDefinitionsCommand()); - program.addCommand(createGenerateCommand()); - program.addCommand(createCICommand()); - program.addCommand(createHooksCommand()); - program.addCommand(createUpdateCommand()); - program.addCommand(createScanCommand()); - program.addCommand(createIngestCommand()); - program.addCommand(createQueryCommand()); - program.addCommand(createGraphCommand()); - program.addCommand(createMcpCommand()); - program.addCommand(createImpactPreviewCommand()); - program.addCommand(createCheckArchCommand()); - program.addCommand(createBlueprintCommand()); - program.addCommand(createShareCommand()); - program.addCommand(createInstallCommand()); - program.addCommand(createInstallConstraintsCommand()); - program.addCommand(createUninstallConstraintsCommand()); - program.addCommand(createUninstallCommand()); - program.addCommand(createOrchestratorCommand()); - program.addCommand(createIntegrationsCommand()); - program.addCommand(createUsageCommand()); - program.addCommand(createTaintCommand()); - program.addCommand(createScanConfigCommand()); - program.addCommand(createSnapshotCommand()); - program.addCommand(createPredictCommand()); - program.addCommand(createRecommendCommand()); + // Register all discovered commands + for (const creator of commandCreators) { + program.addCommand(creator()); + } return program; } diff --git a/packages/core/src/roadmap/sync-engine.ts b/packages/core/src/roadmap/sync-engine.ts index 7a815b10..c7e3a782 100644 --- a/packages/core/src/roadmap/sync-engine.ts +++ b/packages/core/src/roadmap/sync-engine.ts @@ -12,6 +12,9 @@ import { serializeRoadmap } from './serialize'; import type { TrackerSyncAdapter, ExternalSyncOptions } from './tracker-sync'; import { resolveReverseStatus } from './tracker-sync'; import { isRegression } from './status-rank'; +// Known adapters: adapters/github-issues.ts (GitHubIssuesSyncAdapter). +// This module consumes adapters via the TrackerSyncAdapter interface. +// Changes to the interface contract require updating both this file and all adapters. function emptySyncResult(): SyncResult { return { created: [], updated: [], assignmentChanges: [], errors: [] }; diff --git a/packages/core/src/state/events.ts b/packages/core/src/state/events.ts index d5f50531..e55594ee 100644 --- a/packages/core/src/state/events.ts +++ b/packages/core/src/state/events.ts @@ -6,7 +6,7 @@ import type { Result } from '../shared/result'; import { Ok, Err } from '../shared/result'; import { getStateDir } from './state-shared'; import { EVENTS_FILE } from './constants'; -import { computeContentHash } from './learnings'; +import { computeContentHash } from './learnings-content'; /** Event types emitted at skill lifecycle points. */ export type EventType = diff --git a/packages/core/src/state/index.ts b/packages/core/src/state/index.ts index 284b27ac..24498d72 100644 --- a/packages/core/src/state/index.ts +++ b/packages/core/src/state/index.ts @@ -21,31 +21,43 @@ export type { HarnessState, FailureEntry, Handoff, GateResult, GateConfig } from export { loadState, saveState } from './state-persistence'; /** - * Learning accumulation and retrieval. + * Learning content parsing and deduplication. */ export { - clearLearningsCache, - appendLearning, - loadRelevantLearnings, - loadBudgetedLearnings, + parseFrontmatter, + extractIndexEntry, parseDateFromEntry, + normalizeLearningContent, + computeContentHash, analyzeLearningPatterns, +} from './learnings-content'; +export type { + LearningsFrontmatter, + LearningsIndexEntry, + LearningPattern, +} from './learnings-content'; + +/** + * Learning file loader with mtime-based cache. + */ +export { clearLearningsCache, loadRelevantLearnings } from './learnings-loader'; + +/** + * Learning CRUD operations: append, load index, budgeted retrieval. + */ +export { appendLearning, loadBudgetedLearnings, loadIndexEntries } from './learnings'; +export type { BudgetedLearningsOptions } from './learnings'; + +/** + * Learning lifecycle: archival, pruning, session promotion. + */ +export { archiveLearnings, pruneLearnings, promoteSessionLearnings, countLearningEntries, - parseFrontmatter, - extractIndexEntry, - loadIndexEntries, -} from './learnings'; -export type { - BudgetedLearningsOptions, - LearningPattern, - PruneResult, - PromoteResult, - LearningsFrontmatter, - LearningsIndexEntry, -} from './learnings'; +} from './learnings-lifecycle'; +export type { PruneResult, PromoteResult } from './learnings-lifecycle'; /** * Failure tracking, loading, and archival. diff --git a/packages/core/src/state/learnings-content.ts b/packages/core/src/state/learnings-content.ts new file mode 100644 index 00000000..3d8d5e80 --- /dev/null +++ b/packages/core/src/state/learnings-content.ts @@ -0,0 +1,225 @@ +// packages/core/src/state/learnings-content.ts +// +// Content deduplication: normalization, hashing, and content hash index management. +// Extracted from learnings.ts to reduce blast radius. + +import * as fs from 'fs'; +import * as path from 'path'; +import * as crypto from 'crypto'; +import { CONTENT_HASHES_FILE } from './constants'; + +// --- Types --- + +export interface LearningsFrontmatter { + hash: string; + tags: string[]; +} + +export interface LearningsIndexEntry { + hash: string; + tags: string[]; + summary: string; + fullText: string; +} + +/** Content hash index: maps content hash -> metadata */ +export interface ContentHashEntry { + date: string; + line: number; +} + +export type ContentHashIndex = Record; + +// --- Parsing --- + +/** Parse a frontmatter comment line: */ +export function parseFrontmatter(line: string): LearningsFrontmatter | null { + const match = line.match(/^/); + if (!match) return null; + const hash = match[1]!; + const tags = match[2] ? match[2].split(',').filter(Boolean) : []; + return { hash, tags }; +} + +/** + * Parse date from a learning entry. Returns the date string or null. + * Entries look like: "- **2026-03-25 [skill:X]:** content" + * or heading format: "## 2026-03-25 — Task 3: ..." + */ +export function parseDateFromEntry(entry: string): string | null { + const match = entry.match(/(\d{4}-\d{2}-\d{2})/); + return match ? (match[1] ?? null) : null; +} + +/** + * Extract a lightweight index entry from a full learning entry. + * Summary = first line only. Tags extracted from [skill:X] and [outcome:Y] markers. + * Hash computed from full entry text. + */ +export function extractIndexEntry(entry: string): LearningsIndexEntry { + const lines = entry.split('\n'); + const summary = lines[0] ?? entry; + const tags: string[] = []; + const skillMatch = entry.match(/\[skill:([^\]]+)\]/); + if (skillMatch?.[1]) tags.push(skillMatch[1]); + const outcomeMatch = entry.match(/\[outcome:([^\]]+)\]/); + if (outcomeMatch?.[1]) tags.push(outcomeMatch[1]); + return { + hash: computeEntryHash(entry), + tags, + summary, + fullText: entry, + }; +} + +// --- Hashing --- + +/** Compute an 8-char hex hash of the entry text. */ +export function computeEntryHash(text: string): string { + return crypto.createHash('sha256').update(text).digest('hex').slice(0, 8); +} + +/** + * Normalize learning content for deduplication. + * Strips date prefixes, skill/outcome tags, list markers, bold markers; + * lowercases; collapses whitespace; trims. + */ +export function normalizeLearningContent(text: string): string { + let normalized = text; + // Strip date prefix (YYYY-MM-DD) + normalized = normalized.replace(/\d{4}-\d{2}-\d{2}/g, ''); + // Strip skill/outcome tags + normalized = normalized.replace(/\[skill:[^\]]*\]/g, ''); + normalized = normalized.replace(/\[outcome:[^\]]*\]/g, ''); + // Strip list markers (- or *) + normalized = normalized.replace(/^[\s]*[-*]\s+/gm, ''); + // Strip bold markers + normalized = normalized.replace(/\*\*/g, ''); + // Strip colons left after tag removal (e.g., ":]" -> "") + normalized = normalized.replace(/:\s*/g, ' '); + // Lowercase + normalized = normalized.toLowerCase(); + // Collapse whitespace + normalized = normalized.replace(/\s+/g, ' ').trim(); + return normalized; +} + +/** + * Compute a 16-char hex SHA-256 hash of normalized content. + */ +export function computeContentHash(text: string): string { + return crypto.createHash('sha256').update(text).digest('hex').slice(0, 16); +} + +// --- Content Hash Index I/O --- + +/** Load content hash index from sidecar file. Returns empty object on missing/corrupt. */ +export function loadContentHashes(stateDir: string): ContentHashIndex { + const hashesPath = path.join(stateDir, CONTENT_HASHES_FILE); + if (!fs.existsSync(hashesPath)) return {}; + try { + const raw = fs.readFileSync(hashesPath, 'utf-8'); + const parsed = JSON.parse(raw); + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) return {}; + return parsed as ContentHashIndex; + } catch { + return {}; + } +} + +/** Save content hash index to sidecar file. */ +export function saveContentHashes(stateDir: string, index: ContentHashIndex): void { + const hashesPath = path.join(stateDir, CONTENT_HASHES_FILE); + fs.writeFileSync(hashesPath, JSON.stringify(index, null, 2) + '\n'); +} + +/** + * Rebuild content hash index from learnings.md. + * Used for self-healing when sidecar is missing or corrupted. + */ +export function rebuildContentHashes(stateDir: string, learningsFile: string): ContentHashIndex { + const learningsPath = path.join(stateDir, learningsFile); + if (!fs.existsSync(learningsPath)) return {}; + + const content = fs.readFileSync(learningsPath, 'utf-8'); + const lines = content.split('\n'); + const index: ContentHashIndex = {}; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]!; + const isDatedBullet = /^- \*\*\d{4}-\d{2}-\d{2}/.test(line); + if (isDatedBullet) { + // Extract the raw learning text from the bullet line + const learningMatch = line.match(/:\*\*\s*(.+)$/); + if (learningMatch?.[1]) { + const normalized = normalizeLearningContent(learningMatch[1]); + const hash = computeContentHash(normalized); + const dateMatch = line.match(/(\d{4}-\d{2}-\d{2})/); + index[hash] = { date: dateMatch?.[1] ?? '', line: i + 1 }; + } + } + } + + saveContentHashes(stateDir, index); + return index; +} + +// --- Pattern Analysis --- + +export interface LearningPattern { + tag: string; + count: number; + entries: string[]; +} + +/** + * Analyze learning entries for recurring patterns. + * Groups entries by [skill:X] and [outcome:Y] tags. + * Returns patterns where 3+ entries share the same tag. + */ +export function analyzeLearningPatterns(entries: string[]): LearningPattern[] { + const tagGroups = new Map(); + + for (const entry of entries) { + const tagMatches = entry.matchAll(/\[(skill:[^\]]+)\]|\[(outcome:[^\]]+)\]/g); + for (const match of tagMatches) { + const tag = match[1] ?? match[2]; + if (tag) { + const group = tagGroups.get(tag) ?? []; + group.push(entry); + tagGroups.set(tag, group); + } + } + } + + const patterns: LearningPattern[] = []; + for (const [tag, groupEntries] of tagGroups) { + if (groupEntries.length >= 3) { + patterns.push({ tag, count: groupEntries.length, entries: groupEntries }); + } + } + + return patterns.sort((a, b) => b.count - a.count); +} + +/** Estimate token count from a string (chars / 4, ceiling). */ +export function estimateTokens(text: string): number { + return Math.ceil(text.length / 4); +} + +/** + * Score how relevant a learning entry is to a given intent. + * Returns a number 0-1. Higher = more relevant. + * Uses keyword overlap between intent words and entry text. + */ +export function scoreRelevance(entry: string, intent: string): number { + if (!intent || intent.trim() === '') return 0; + const intentWords = intent + .toLowerCase() + .split(/\s+/) + .filter((w) => w.length > 2); // skip short words like "a", "to", "in" + if (intentWords.length === 0) return 0; + const entryLower = entry.toLowerCase(); + const matches = intentWords.filter((word) => entryLower.includes(word)); + return matches.length / intentWords.length; +} diff --git a/packages/core/src/state/learnings-lifecycle.ts b/packages/core/src/state/learnings-lifecycle.ts new file mode 100644 index 00000000..49d62cdc --- /dev/null +++ b/packages/core/src/state/learnings-lifecycle.ts @@ -0,0 +1,252 @@ +// packages/core/src/state/learnings-lifecycle.ts +// +// Lifecycle operations: archival, pruning, session promotion, counting. +// Extracted from learnings.ts to reduce blast radius. + +import * as fs from 'fs'; +import * as path from 'path'; +import type { Result } from '../shared/result'; +import { Ok, Err } from '../shared/result'; +import { getStateDir } from './state-shared'; +import { LEARNINGS_FILE } from './constants'; +import { parseDateFromEntry, analyzeLearningPatterns } from './learnings-content'; +import type { LearningPattern } from './learnings-content'; +import { loadRelevantLearnings, invalidateLearningsCacheEntry } from './learnings-loader'; + +export interface PruneResult { + kept: number; + archived: number; + patterns: LearningPattern[]; +} + +/** + * Archive learning entries to .harness/learnings-archive/{YYYY-MM}.md. + * Appends to existing archive file if one exists for the current month. + */ +export async function archiveLearnings( + projectPath: string, + entries: string[], + stream?: string +): Promise> { + try { + const dirResult = await getStateDir(projectPath, stream); + if (!dirResult.ok) return dirResult; + const stateDir = dirResult.value; + + const archiveDir = path.join(stateDir, 'learnings-archive'); + fs.mkdirSync(archiveDir, { recursive: true }); + + const now = new Date(); + const yearMonth = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}`; + const archivePath = path.join(archiveDir, `${yearMonth}.md`); + + const archiveContent = entries.join('\n\n') + '\n'; + + if (fs.existsSync(archivePath)) { + fs.appendFileSync(archivePath, '\n' + archiveContent); + } else { + fs.writeFileSync(archivePath, `# Learnings Archive\n\n${archiveContent}`); + } + + return Ok(undefined); + } catch (error) { + return Err( + new Error( + `Failed to archive learnings: ${error instanceof Error ? error.message : String(error)}` + ) + ); + } +} + +/** + * Prune global learnings: analyze patterns, archive old entries, keep 20 most recent. + * + * Pruning triggers when: + * - Entry count exceeds 30, OR + * - Entries older than 14 days exist AND total count exceeds 20 + * + * Returns the prune result with pattern analysis and counts. + */ +export async function pruneLearnings( + projectPath: string, + stream?: string +): Promise> { + try { + const dirResult = await getStateDir(projectPath, stream); + if (!dirResult.ok) return dirResult; + const stateDir = dirResult.value; + const learningsPath = path.join(stateDir, LEARNINGS_FILE); + + if (!fs.existsSync(learningsPath)) { + return Ok({ kept: 0, archived: 0, patterns: [] }); + } + + const loadResult = await loadRelevantLearnings(projectPath, undefined, stream); + if (!loadResult.ok) return loadResult; + const allEntries = loadResult.value; + + if (allEntries.length <= 20) { + const cutoffDate = new Date(); + cutoffDate.setDate(cutoffDate.getDate() - 14); + const cutoffStr = cutoffDate.toISOString().split('T')[0]; + + const hasOld = allEntries.some((entry) => { + const date = parseDateFromEntry(entry); + return date !== null && date < cutoffStr!; + }); + + if (!hasOld) { + return Ok({ kept: allEntries.length, archived: 0, patterns: [] }); + } + } + + // Sort by date descending (newest first) + const sorted = [...allEntries].sort((a, b) => { + const dateA = parseDateFromEntry(a) ?? '0000-00-00'; + const dateB = parseDateFromEntry(b) ?? '0000-00-00'; + return dateB.localeCompare(dateA); + }); + + const toKeep = sorted.slice(0, 20); + const toArchive = sorted.slice(20); + + // Analyze patterns in ALL entries before pruning + const patterns = analyzeLearningPatterns(allEntries); + + if (toArchive.length > 0) { + const archiveResult = await archiveLearnings(projectPath, toArchive, stream); + if (!archiveResult.ok) return archiveResult; + } + + // Rewrite learnings.md with only kept entries + const newContent = '# Learnings\n\n' + toKeep.join('\n\n') + '\n'; + fs.writeFileSync(learningsPath, newContent); + + // Invalidate cache (targeted — only this file, not all cached entries) + invalidateLearningsCacheEntry(learningsPath); + + return Ok({ + kept: toKeep.length, + archived: toArchive.length, + patterns, + }); + } catch (error) { + return Err( + new Error( + `Failed to prune learnings: ${error instanceof Error ? error.message : String(error)}` + ) + ); + } +} + +export interface PromoteResult { + promoted: number; + skipped: number; +} + +/** + * Outcomes considered generalizable (applicable beyond the current session). + * Entries with these tags get promoted to global learnings. + * Task-completion entries ([outcome:success] with no broader insight, + * or entries with no outcome tag) stay in the session. + */ +const PROMOTABLE_OUTCOMES = ['gotcha', 'decision', 'observation']; + +/** + * Check if a learning entry is generalizable (should be promoted to global). + * Generalizable = has an outcome tag that indicates a reusable insight. + */ +function isGeneralizable(entry: string): boolean { + for (const outcome of PROMOTABLE_OUTCOMES) { + if (entry.includes(`[outcome:${outcome}]`)) return true; + } + return false; +} + +/** + * Promote generalizable session learnings to global learnings.md. + * + * Generalizable entries are those tagged with [outcome:gotcha], + * [outcome:decision], or [outcome:observation]. These represent + * reusable insights that apply beyond the current session. + * + * Task-specific entries (e.g., [outcome:success] completion summaries, + * or entries without outcome tags) stay in the session directory. + */ +export async function promoteSessionLearnings( + projectPath: string, + sessionSlug: string, + stream?: string +): Promise> { + try { + // Load session learnings + const sessionResult = await loadRelevantLearnings(projectPath, undefined, stream, sessionSlug); + if (!sessionResult.ok) return sessionResult; + const sessionEntries = sessionResult.value; + + if (sessionEntries.length === 0) { + return Ok({ promoted: 0, skipped: 0 }); + } + + const toPromote: string[] = []; + let skipped = 0; + + for (const entry of sessionEntries) { + if (isGeneralizable(entry)) { + toPromote.push(entry); + } else { + skipped++; + } + } + + if (toPromote.length === 0) { + return Ok({ promoted: 0, skipped }); + } + + // Append promoted entries to global learnings (with dedup for idempotency) + const dirResult = await getStateDir(projectPath, stream); + if (!dirResult.ok) return dirResult; + const stateDir = dirResult.value; + const globalPath = path.join(stateDir, LEARNINGS_FILE); + + // Load existing global entries for duplicate detection + const existingGlobal = fs.existsSync(globalPath) ? fs.readFileSync(globalPath, 'utf-8') : ''; + const newEntries = toPromote.filter((entry) => !existingGlobal.includes(entry.trim())); + + if (newEntries.length === 0) { + return Ok({ promoted: 0, skipped: skipped + toPromote.length }); + } + + const promotedContent = newEntries.join('\n\n') + '\n'; + + if (!existingGlobal) { + fs.writeFileSync(globalPath, `# Learnings\n\n${promotedContent}`); + } else { + fs.appendFileSync(globalPath, '\n\n' + promotedContent); + } + + // Invalidate cache (targeted — only the global file, not all cached entries) + invalidateLearningsCacheEntry(globalPath); + + return Ok({ + promoted: newEntries.length, + skipped: skipped + (toPromote.length - newEntries.length), + }); + } catch (error) { + return Err( + new Error( + `Failed to promote session learnings: ${error instanceof Error ? error.message : String(error)}` + ) + ); + } +} + +/** + * Count the number of learning entries in the global learnings.md file. + * Useful for checking if pruning should be suggested (threshold: 30). + */ +export async function countLearningEntries(projectPath: string, stream?: string): Promise { + const loadResult = await loadRelevantLearnings(projectPath, undefined, stream); + if (!loadResult.ok) return 0; + return loadResult.value.length; +} diff --git a/packages/core/src/state/learnings-loader.ts b/packages/core/src/state/learnings-loader.ts new file mode 100644 index 00000000..176a2bbb --- /dev/null +++ b/packages/core/src/state/learnings-loader.ts @@ -0,0 +1,106 @@ +// packages/core/src/state/learnings-loader.ts +// +// Learnings file loader with mtime-based cache. +// Leaf module — no imports from sibling learnings-* files. +// Both learnings.ts (CRUD) and learnings-lifecycle.ts (prune/archive/promote) +// import from here, keeping the dependency graph strictly acyclic. + +import * as fs from 'fs'; +import * as path from 'path'; +import type { Result } from '../shared/result'; +import { Ok, Err } from '../shared/result'; +import { getStateDir, LEARNINGS_FILE, evictIfNeeded } from './state-shared'; + +// --- Cache --- + +interface LearningsCache { + mtimeMs: number; + entries: string[]; +} + +const learningsCacheMap = new Map(); + +export function clearLearningsCache(): void { + learningsCacheMap.clear(); +} + +/** Remove a single path from the learnings cache. Used by lifecycle operations for targeted invalidation. */ +export function invalidateLearningsCacheEntry(key: string): void { + learningsCacheMap.delete(key); +} + +// --- Loader --- + +export async function loadRelevantLearnings( + projectPath: string, + skillName?: string, + stream?: string, + session?: string +): Promise> { + try { + const dirResult = await getStateDir(projectPath, stream, session); + if (!dirResult.ok) return dirResult; + const stateDir = dirResult.value; + const learningsPath = path.join(stateDir, LEARNINGS_FILE); + + if (!fs.existsSync(learningsPath)) { + return Ok([]); + } + + // Cache check: use mtime to determine if re-parse is needed + const stats = fs.statSync(learningsPath); + const cacheKey = learningsPath; + const cached = learningsCacheMap.get(cacheKey); + + let entries: string[]; + + if (cached && cached.mtimeMs === stats.mtimeMs) { + entries = cached.entries; + } else { + // Parse file and populate cache + const content = fs.readFileSync(learningsPath, 'utf-8'); + const lines = content.split('\n'); + entries = []; + let currentBlock: string[] = []; + + for (const line of lines) { + if (line.startsWith('# ')) continue; + + // Skip frontmatter comment lines — they are metadata, not entry content + if (/^ */ -export function parseFrontmatter(line: string): LearningsFrontmatter | null { - const match = line.match(/^/); - if (!match) return null; - const hash = match[1]!; - const tags = match[2] ? match[2].split(',').filter(Boolean) : []; - return { hash, tags }; -} - -/** Compute an 8-char hex hash of the entry text. */ -function computeEntryHash(text: string): string { - return crypto.createHash('sha256').update(text).digest('hex').slice(0, 8); -} - -// --- Content Deduplication --- - -/** - * Normalize learning content for deduplication. - * Strips date prefixes, skill/outcome tags, list markers, bold markers; - * lowercases; collapses whitespace; trims. - */ -export function normalizeLearningContent(text: string): string { - let normalized = text; - // Strip date prefix (YYYY-MM-DD) - normalized = normalized.replace(/\d{4}-\d{2}-\d{2}/g, ''); - // Strip skill/outcome tags - normalized = normalized.replace(/\[skill:[^\]]*\]/g, ''); - normalized = normalized.replace(/\[outcome:[^\]]*\]/g, ''); - // Strip list markers (- or *) - normalized = normalized.replace(/^[\s]*[-*]\s+/gm, ''); - // Strip bold markers - normalized = normalized.replace(/\*\*/g, ''); - // Strip colons left after tag removal (e.g., ":]" -> "") - normalized = normalized.replace(/:\s*/g, ' '); - // Lowercase - normalized = normalized.toLowerCase(); - // Collapse whitespace - normalized = normalized.replace(/\s+/g, ' ').trim(); - return normalized; -} - -/** - * Compute a 16-char hex SHA-256 hash of normalized content. - */ -export function computeContentHash(text: string): string { - return crypto.createHash('sha256').update(text).digest('hex').slice(0, 16); -} - -/** Content hash index: maps content hash -> metadata */ -export interface ContentHashEntry { - date: string; - line: number; -} - -export type ContentHashIndex = Record; - -/** Load content hash index from sidecar file. Returns empty object on missing/corrupt. */ -function loadContentHashes(stateDir: string): ContentHashIndex { - const hashesPath = path.join(stateDir, CONTENT_HASHES_FILE); - if (!fs.existsSync(hashesPath)) return {}; - try { - const raw = fs.readFileSync(hashesPath, 'utf-8'); - const parsed = JSON.parse(raw); - if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) return {}; - return parsed as ContentHashIndex; - } catch { - return {}; - } -} - -/** Save content hash index to sidecar file. */ -function saveContentHashes(stateDir: string, index: ContentHashIndex): void { - const hashesPath = path.join(stateDir, CONTENT_HASHES_FILE); - fs.writeFileSync(hashesPath, JSON.stringify(index, null, 2) + '\n'); -} - -/** - * Rebuild content hash index from learnings.md. - * Used for self-healing when sidecar is missing or corrupted. - */ -function rebuildContentHashes(stateDir: string): ContentHashIndex { - const learningsPath = path.join(stateDir, LEARNINGS_FILE); - if (!fs.existsSync(learningsPath)) return {}; - - const content = fs.readFileSync(learningsPath, 'utf-8'); - const lines = content.split('\n'); - const index: ContentHashIndex = {}; - - for (let i = 0; i < lines.length; i++) { - const line = lines[i]!; - const isDatedBullet = /^- \*\*\d{4}-\d{2}-\d{2}/.test(line); - if (isDatedBullet) { - // Extract the raw learning text from the bullet line - const learningMatch = line.match(/:\*\*\s*(.+)$/); - if (learningMatch?.[1]) { - const normalized = normalizeLearningContent(learningMatch[1]); - const hash = computeContentHash(normalized); - const dateMatch = line.match(/(\d{4}-\d{2}-\d{2})/); - index[hash] = { date: dateMatch?.[1] ?? '', line: i + 1 }; - } - } - } - - saveContentHashes(stateDir, index); - return index; -} - -/** - * Extract a lightweight index entry from a full learning entry. - * Summary = first line only. Tags extracted from [skill:X] and [outcome:Y] markers. - * Hash computed from full entry text. - */ -export function extractIndexEntry(entry: string): LearningsIndexEntry { - const lines = entry.split('\n'); - const summary = lines[0] ?? entry; - const tags: string[] = []; - const skillMatch = entry.match(/\[skill:([^\]]+)\]/); - if (skillMatch?.[1]) tags.push(skillMatch[1]); - const outcomeMatch = entry.match(/\[outcome:([^\]]+)\]/); - if (outcomeMatch?.[1]) tags.push(outcomeMatch[1]); - return { - hash: computeEntryHash(entry), - tags, - summary, - fullText: entry, - }; -} - -interface LearningsCache { - mtimeMs: number; - entries: string[]; -} - -const learningsCacheMap = new Map(); - -export function clearLearningsCache(): void { - learningsCacheMap.clear(); +export interface BudgetedLearningsOptions { + intent: string; + tokenBudget?: number; + skill?: string; + session?: string; + stream?: string; + depth?: 'index' | 'summary' | 'full'; } export async function appendLearning( @@ -185,11 +70,11 @@ export async function appendLearning( contentHashes = loadContentHashes(stateDir); // If loaded index is empty but learnings exist, sidecar may be corrupted — rebuild if (Object.keys(contentHashes).length === 0 && fs.existsSync(learningsPath)) { - contentHashes = rebuildContentHashes(stateDir); + contentHashes = rebuildContentHashes(stateDir, LEARNINGS_FILE); } } else if (fs.existsSync(learningsPath)) { // Sidecar missing but learnings exist — rebuild (self-healing) - contentHashes = rebuildContentHashes(stateDir); + contentHashes = rebuildContentHashes(stateDir, LEARNINGS_FILE); } else { contentHashes = {}; } @@ -238,7 +123,7 @@ export async function appendLearning( saveContentHashes(stateDir, contentHashes); // Invalidate cache on write - learningsCacheMap.delete(learningsPath); + invalidateLearningsCacheEntry(learningsPath); return Ok(undefined); } catch (error) { @@ -250,83 +135,6 @@ export async function appendLearning( } } -/** Estimate token count from a string (chars / 4, ceiling). */ -function estimateTokens(text: string): number { - return Math.ceil(text.length / 4); -} - -/** - * Score how relevant a learning entry is to a given intent. - * Returns a number 0-1. Higher = more relevant. - * Uses keyword overlap between intent words and entry text. - */ -function scoreRelevance(entry: string, intent: string): number { - if (!intent || intent.trim() === '') return 0; - const intentWords = intent - .toLowerCase() - .split(/\s+/) - .filter((w) => w.length > 2); // skip short words like "a", "to", "in" - if (intentWords.length === 0) return 0; - const entryLower = entry.toLowerCase(); - const matches = intentWords.filter((word) => entryLower.includes(word)); - return matches.length / intentWords.length; -} - -/** - * Parse date from a learning entry. Returns the date string or null. - * Entries look like: "- **2026-03-25 [skill:X]:** content" - * or heading format: "## 2026-03-25 — Task 3: ..." - */ -export function parseDateFromEntry(entry: string): string | null { - const match = entry.match(/(\d{4}-\d{2}-\d{2})/); - return match ? (match[1] ?? null) : null; -} - -export interface LearningPattern { - tag: string; - count: number; - entries: string[]; -} - -/** - * Analyze learning entries for recurring patterns. - * Groups entries by [skill:X] and [outcome:Y] tags. - * Returns patterns where 3+ entries share the same tag. - */ -export function analyzeLearningPatterns(entries: string[]): LearningPattern[] { - const tagGroups = new Map(); - - for (const entry of entries) { - const tagMatches = entry.matchAll(/\[(skill:[^\]]+)\]|\[(outcome:[^\]]+)\]/g); - for (const match of tagMatches) { - const tag = match[1] ?? match[2]; - if (tag) { - const group = tagGroups.get(tag) ?? []; - group.push(entry); - tagGroups.set(tag, group); - } - } - } - - const patterns: LearningPattern[] = []; - for (const [tag, groupEntries] of tagGroups) { - if (groupEntries.length >= 3) { - patterns.push({ tag, count: groupEntries.length, entries: groupEntries }); - } - } - - return patterns.sort((a, b) => b.count - a.count); -} - -export interface BudgetedLearningsOptions { - intent: string; - tokenBudget?: number; - skill?: string; - session?: string; - stream?: string; - depth?: 'index' | 'summary' | 'full'; -} - /** * Load learnings with token budget, two-tier loading, recency sorting, and relevance filtering. * @@ -499,315 +307,3 @@ export async function loadIndexEntries( ); } } - -export async function loadRelevantLearnings( - projectPath: string, - skillName?: string, - stream?: string, - session?: string -): Promise> { - try { - const dirResult = await getStateDir(projectPath, stream, session); - if (!dirResult.ok) return dirResult; - const stateDir = dirResult.value; - const learningsPath = path.join(stateDir, LEARNINGS_FILE); - - if (!fs.existsSync(learningsPath)) { - return Ok([]); - } - - // Cache check: use mtime to determine if re-parse is needed - const stats = fs.statSync(learningsPath); - const cacheKey = learningsPath; - const cached = learningsCacheMap.get(cacheKey); - - let entries: string[]; - - if (cached && cached.mtimeMs === stats.mtimeMs) { - entries = cached.entries; - } else { - // Parse file and populate cache - const content = fs.readFileSync(learningsPath, 'utf-8'); - const lines = content.split('\n'); - entries = []; - let currentBlock: string[] = []; - - for (const line of lines) { - if (line.startsWith('# ')) continue; - - // Skip frontmatter comment lines — they are metadata, not entry content - if (/^