diff --git a/src/application/services/ExtractService.ts b/src/application/services/ExtractService.ts index 5998a342..f14d883d 100644 --- a/src/application/services/ExtractService.ts +++ b/src/application/services/ExtractService.ts @@ -16,12 +16,14 @@ import type { import type { IGitTriageService } from '../../domain/interfaces/IGitTriageService'; import type { IMemoryRepository } from '../../domain/interfaces/IMemoryRepository'; import type { IGitClient } from '../../domain/interfaces/IGitClient'; +import type { ITrailerService } from '../../domain/interfaces/ITrailerService'; import type { ILLMClient, ILLMExtractedFact } from '../../domain/interfaces/ILLMClient'; import type { MemoryType } from '../../domain/entities/IMemoryEntity'; import type { ConfidenceLevel } from '../../domain/types/IMemoryQuality'; import type { IPatternMatch } from '../../infrastructure/services/patterns/HeuristicPatterns'; import { extractPatternMatches } from '../../infrastructure/services/patterns/HeuristicPatterns'; import { extractWords, jaccardSimilarity } from '../../domain/utils/deduplication'; +import { AI_TRAILER_KEYS } from '../../domain/entities/ITrailer'; import type { ILogger } from '../../domain/interfaces/ILogger'; /** Maximum diff length sent to LLM (chars). Truncated at line boundary. */ @@ -30,13 +32,21 @@ const MAX_DIFF_LENGTH = 15_000; /** Jaccard similarity threshold for deduplication between heuristic and LLM facts. */ const DEDUP_THRESHOLD = 0.7; -/** Uniform fact shape for merging heuristic and LLM results. */ +/** Trailer key → MemoryType mapping. */ +const TRAILER_KEY_TO_MEMORY_TYPE: Record = { + [AI_TRAILER_KEYS.DECISION]: 'decision', + [AI_TRAILER_KEYS.GOTCHA]: 'gotcha', + [AI_TRAILER_KEYS.CONVENTION]: 'convention', + [AI_TRAILER_KEYS.FACT]: 'fact', +}; + +/** Uniform fact shape for merging heuristic, LLM, and trailer results. */ interface IUnifiedFact { readonly content: string; readonly type: MemoryType; readonly confidence: ConfidenceLevel; readonly tags: readonly string[]; - readonly source: 'heuristic-extraction' | 'llm-enrichment'; + readonly source: 'heuristic-extraction' | 'llm-enrichment' | 'commit-trailer'; } export class ExtractService implements IExtractService { @@ -46,6 +56,7 @@ export class ExtractService implements IExtractService { private readonly gitClient?: IGitClient, private readonly llmClient?: ILLMClient, private readonly logger?: ILogger, + private readonly trailerService?: ITrailerService, ) {} async extract(options?: IExtractOptions): Promise { @@ -85,8 +96,17 @@ export class ExtractService implements IExtractService { for (const scored of triageResult.highInterest) { commitIndex++; options?.onProgress?.({ phase: 'processing', current: commitIndex, total: highInterestTotal, sha: scored.commit.sha, subject: scored.commit.subject, factsExtracted: totalFactsExtracted }); + + // Read existing AI-* trailers from this commit (authoritative, high-confidence) + const trailerFacts = this.extractTrailerFacts(scored.commit.sha, options?.cwd); + const trailerTypes = new Set(trailerFacts.map(f => f.type)); + + // Heuristic extraction — skip types already covered by trailers const text = `${scored.commit.subject}\n${scored.commit.body}`.trim(); - const heuristicMatches = extractPatternMatches(text); + const allHeuristicMatches = extractPatternMatches(text); + const heuristicMatches = trailerTypes.size > 0 + ? allHeuristicMatches.filter(m => !trailerTypes.has(m.factType)) + : allHeuristicMatches; // LLM enrichment (if enabled) let llmFacts: ILLMExtractedFact[] = []; @@ -121,8 +141,8 @@ export class ExtractService implements IExtractService { } } - // Merge heuristic + LLM facts with deduplication - const mergedFacts = mergeFacts(heuristicMatches, llmFacts); + // Merge trailer + heuristic + LLM facts with deduplication + const mergedFacts = [...trailerFacts, ...mergeFacts(heuristicMatches, llmFacts)]; if (mergedFacts.length === 0) continue; @@ -175,6 +195,37 @@ export class ExtractService implements IExtractService { return result; } + + private extractTrailerFacts(sha: string, cwd?: string): IUnifiedFact[] { + if (!this.trailerService) return []; + + try { + const trailers = this.trailerService.readTrailers(sha, cwd); + if (trailers.length === 0) return []; + + const facts: IUnifiedFact[] = []; + const confidence = (trailers.find(t => t.key === AI_TRAILER_KEYS.CONFIDENCE)?.value || 'high') as ConfidenceLevel; + const tagsStr = trailers.find(t => t.key === AI_TRAILER_KEYS.TAGS)?.value; + const tags: string[] = tagsStr ? tagsStr.split(',').map(t => t.trim()) : []; + + for (const trailer of trailers) { + const type = TRAILER_KEY_TO_MEMORY_TYPE[trailer.key]; + if (!type) continue; + + facts.push({ + content: trailer.value, + type, + confidence, + tags, + source: 'commit-trailer', + }); + } + + return facts; + } catch { + return []; + } + } } /** diff --git a/tests/unit/application/services/ExtractService.test.ts b/tests/unit/application/services/ExtractService.test.ts index 3742da3b..ca276256 100644 --- a/tests/unit/application/services/ExtractService.test.ts +++ b/tests/unit/application/services/ExtractService.test.ts @@ -10,6 +10,7 @@ import { GitTriageService } from '../../../../src/application/services/GitTriage import { MemoryRepository } from '../../../../src/infrastructure/repositories/MemoryRepository'; import { NotesService } from '../../../../src/infrastructure/services/NotesService'; import { GitClient } from '../../../../src/infrastructure/git/GitClient'; +import { TrailerService } from '../../../../src/infrastructure/services/TrailerService'; function git(args: string[], cwd: string): string { return execFileSync('git', args, { encoding: 'utf8', cwd }).trim(); @@ -17,14 +18,18 @@ function git(args: string[], cwd: string): string { describe('ExtractService', () => { let service: ExtractService; + let serviceWithTrailers: ExtractService; + let memoryRepo: MemoryRepository; let repoDir: string; before(() => { const gitClient = new GitClient(); const triageService = new GitTriageService(gitClient); const notesService = new NotesService(); - const memoryRepo = new MemoryRepository(notesService); + memoryRepo = new MemoryRepository(notesService); + const trailerService = new TrailerService(); service = new ExtractService(triageService, memoryRepo); + serviceWithTrailers = new ExtractService(triageService, memoryRepo, gitClient, undefined, undefined, trailerService); repoDir = mkdtempSync(join(tmpdir(), 'git-mem-extract-test-')); git(['init'], repoDir); @@ -142,4 +147,128 @@ describe('ExtractService', () => { assert.equal(events[1].factsExtracted, 0); }); }); + + describe('extract with trailers', () => { + let trailerRepoDir: string; + + before(() => { + trailerRepoDir = mkdtempSync(join(tmpdir(), 'git-mem-extract-trailer-')); + git(['init'], trailerRepoDir); + git(['config', 'user.email', 'test@test.com'], trailerRepoDir); + git(['config', 'user.name', 'Test User'], trailerRepoDir); + + writeFileSync(join(trailerRepoDir, 'file1.txt'), 'hello'); + git(['add', '.'], trailerRepoDir); + git(['commit', '-m', 'initial commit'], trailerRepoDir); + }); + + after(() => { + rmSync(trailerRepoDir, { recursive: true, force: true }); + }); + + it('should import AI-* trailers as high-confidence memories', async () => { + // Create a commit with AI-Decision trailer + writeFileSync(join(trailerRepoDir, 'trailer1.txt'), 'trailer1'); + git(['add', '.'], trailerRepoDir); + const msg = 'feat: add caching layer\n\nAI-Decision: Use Redis for caching\nAI-Confidence: high'; + git(['commit', '-m', msg], trailerRepoDir); + + const result = await serviceWithTrailers.extract({ + cwd: trailerRepoDir, + dryRun: false, + threshold: 1, + }); + + // Should extract the trailer as a fact + const annotation = result.annotations.find(a => a.subject === 'feat: add caching layer'); + assert.ok(annotation, 'should find annotation for trailer commit'); + assert.ok(annotation.factsExtracted >= 1); + assert.ok(annotation.factTypes.includes('decision')); + + // Verify the memory was stored with correct source + const memories = memoryRepo.query({ cwd: trailerRepoDir }); + const trailerMemory = memories.memories.find(m => m.content === 'Use Redis for caching'); + assert.ok(trailerMemory); + assert.equal(trailerMemory.source, 'commit-trailer'); + assert.equal(trailerMemory.type, 'decision'); + }); + + it('should not duplicate when trailer and heuristic extract same type', async () => { + // Create a commit with both a decision trailer AND decision keywords in the message + writeFileSync(join(trailerRepoDir, 'dedup1.txt'), 'dedup1'); + git(['add', '.'], trailerRepoDir); + const msg = [ + 'feat: migrate to PostgreSQL', + '', + 'Decided to use PostgreSQL instead of MySQL because it has better JSON support.', + '', + 'AI-Decision: Use PostgreSQL for persistence', + 'AI-Confidence: high', + ].join('\n'); + git(['commit', '-m', msg], trailerRepoDir); + + const result = await serviceWithTrailers.extract({ + cwd: trailerRepoDir, + dryRun: false, + threshold: 1, + }); + + const annotation = result.annotations.find(a => a.subject === 'feat: migrate to PostgreSQL'); + assert.ok(annotation); + + // The trailer decision should be included but heuristic decision should be skipped + const memories = memoryRepo.query({ cwd: trailerRepoDir }); + const pgMemories = memories.memories.filter( + m => m.sha === annotation.sha && m.type === 'decision' + ); + + // Should have exactly 1 decision (from trailer), not 2 (trailer + heuristic) + assert.equal(pgMemories.length, 1, 'should not duplicate decision from trailer + heuristic'); + assert.equal(pgMemories[0].source, 'commit-trailer'); + }); + + it('should still extract heuristic facts for types not covered by trailers', async () => { + // Commit with AI-Decision trailer but gotcha keywords in message + writeFileSync(join(trailerRepoDir, 'mixed1.txt'), 'mixed1'); + git(['add', '.'], trailerRepoDir); + const msg = [ + 'feat: add auth middleware', + '', + 'Watch out: tokens expire after 24h, must handle refresh.', + '', + 'AI-Decision: Use middleware pattern for auth', + 'AI-Confidence: high', + ].join('\n'); + git(['commit', '-m', msg], trailerRepoDir); + + const result = await serviceWithTrailers.extract({ + cwd: trailerRepoDir, + dryRun: false, + threshold: 1, + }); + + const annotation = result.annotations.find(a => a.subject === 'feat: add auth middleware'); + assert.ok(annotation); + // Should have both decision (from trailer) and gotcha (from heuristic) + assert.ok(annotation.factTypes.includes('decision')); + assert.ok(annotation.factTypes.includes('gotcha')); + }); + + it('should work without trailerService (existing behavior preserved)', async () => { + writeFileSync(join(trailerRepoDir, 'no-svc.txt'), 'no-svc'); + git(['add', '.'], trailerRepoDir); + const msg = 'feat: add validation\n\nAI-Decision: Use Zod for validation'; + git(['commit', '-m', msg], trailerRepoDir); + + // Service without trailerService should still work (ignores trailers) + const result = await service.extract({ + cwd: trailerRepoDir, + dryRun: true, + threshold: 1, + }); + + assert.ok(result.commitsScanned >= 1); + assert.ok(result.durationMs >= 0); + }); + }); });