-
Notifications
You must be signed in to change notification settings - Fork 0
feat: liberate reads AI-* trailers from commit history (GIT-69) #40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,12 +16,14 @@ import type { | |
| import type { IGitTriageService } from '../../domain/interfaces/IGitTriageService'; | ||
| import type { IMemoryRepository } from '../../domain/interfaces/IMemoryRepository'; | ||
| import type { IGitClient } from '../../domain/interfaces/IGitClient'; | ||
| import type { ITrailerService } from '../../domain/interfaces/ITrailerService'; | ||
| import type { ILLMClient, ILLMExtractedFact } from '../../domain/interfaces/ILLMClient'; | ||
| import type { MemoryType } from '../../domain/entities/IMemoryEntity'; | ||
| import type { ConfidenceLevel } from '../../domain/types/IMemoryQuality'; | ||
| import type { IPatternMatch } from '../../infrastructure/services/patterns/HeuristicPatterns'; | ||
| import { extractPatternMatches } from '../../infrastructure/services/patterns/HeuristicPatterns'; | ||
| import { extractWords, jaccardSimilarity } from '../../domain/utils/deduplication'; | ||
| import { AI_TRAILER_KEYS } from '../../domain/entities/ITrailer'; | ||
| import type { ILogger } from '../../domain/interfaces/ILogger'; | ||
|
|
||
| /** Maximum diff length sent to LLM (chars). Truncated at line boundary. */ | ||
|
|
@@ -30,13 +32,21 @@ const MAX_DIFF_LENGTH = 15_000; | |
| /** Jaccard similarity threshold for deduplication between heuristic and LLM facts. */ | ||
| const DEDUP_THRESHOLD = 0.7; | ||
|
|
||
| /** Uniform fact shape for merging heuristic and LLM results. */ | ||
| /** Trailer key → MemoryType mapping. */ | ||
| const TRAILER_KEY_TO_MEMORY_TYPE: Record<string, MemoryType> = { | ||
| [AI_TRAILER_KEYS.DECISION]: 'decision', | ||
| [AI_TRAILER_KEYS.GOTCHA]: 'gotcha', | ||
| [AI_TRAILER_KEYS.CONVENTION]: 'convention', | ||
| [AI_TRAILER_KEYS.FACT]: 'fact', | ||
| }; | ||
|
|
||
| /** Uniform fact shape for merging heuristic, LLM, and trailer results. */ | ||
| interface IUnifiedFact { | ||
| readonly content: string; | ||
| readonly type: MemoryType; | ||
| readonly confidence: ConfidenceLevel; | ||
| readonly tags: readonly string[]; | ||
| readonly source: 'heuristic-extraction' | 'llm-enrichment'; | ||
| readonly source: 'heuristic-extraction' | 'llm-enrichment' | 'commit-trailer'; | ||
| } | ||
|
|
||
| export class ExtractService implements IExtractService { | ||
|
|
@@ -46,6 +56,7 @@ export class ExtractService implements IExtractService { | |
| private readonly gitClient?: IGitClient, | ||
| private readonly llmClient?: ILLMClient, | ||
| private readonly logger?: ILogger, | ||
| private readonly trailerService?: ITrailerService, | ||
| ) {} | ||
|
|
||
| async extract(options?: IExtractOptions): Promise<IExtractResult> { | ||
|
|
@@ -85,8 +96,17 @@ export class ExtractService implements IExtractService { | |
| for (const scored of triageResult.highInterest) { | ||
| commitIndex++; | ||
| options?.onProgress?.({ phase: 'processing', current: commitIndex, total: highInterestTotal, sha: scored.commit.sha, subject: scored.commit.subject, factsExtracted: totalFactsExtracted }); | ||
|
|
||
| // Read existing AI-* trailers from this commit (authoritative, high-confidence) | ||
| const trailerFacts = this.extractTrailerFacts(scored.commit.sha, options?.cwd); | ||
| const trailerTypes = new Set(trailerFacts.map(f => f.type)); | ||
|
|
||
| // Heuristic extraction — skip types already covered by trailers | ||
| const text = `${scored.commit.subject}\n${scored.commit.body}`.trim(); | ||
| const heuristicMatches = extractPatternMatches(text); | ||
| const allHeuristicMatches = extractPatternMatches(text); | ||
| const heuristicMatches = trailerTypes.size > 0 | ||
| ? allHeuristicMatches.filter(m => !trailerTypes.has(m.factType)) | ||
| : allHeuristicMatches; | ||
|
|
||
| // LLM enrichment (if enabled) | ||
| let llmFacts: ILLMExtractedFact[] = []; | ||
|
|
@@ -121,8 +141,8 @@ export class ExtractService implements IExtractService { | |
| } | ||
| } | ||
|
|
||
| // Merge heuristic + LLM facts with deduplication | ||
| const mergedFacts = mergeFacts(heuristicMatches, llmFacts); | ||
| // Merge trailer + heuristic + LLM facts with deduplication | ||
| const mergedFacts = [...trailerFacts, ...mergeFacts(heuristicMatches, llmFacts)]; | ||
|
|
||
| if (mergedFacts.length === 0) continue; | ||
|
|
||
|
|
@@ -175,6 +195,37 @@ export class ExtractService implements IExtractService { | |
|
|
||
| return result; | ||
| } | ||
|
|
||
| private extractTrailerFacts(sha: string, cwd?: string): IUnifiedFact[] { | ||
| if (!this.trailerService) return []; | ||
|
|
||
| try { | ||
| const trailers = this.trailerService.readTrailers(sha, cwd); | ||
| if (trailers.length === 0) return []; | ||
|
|
||
| const facts: IUnifiedFact[] = []; | ||
| const confidence = (trailers.find(t => t.key === AI_TRAILER_KEYS.CONFIDENCE)?.value || 'high') as ConfidenceLevel; | ||
|
||
| const tagsStr = trailers.find(t => t.key === AI_TRAILER_KEYS.TAGS)?.value; | ||
| const tags: string[] = tagsStr ? tagsStr.split(',').map(t => t.trim()) : []; | ||
|
|
||
| for (const trailer of trailers) { | ||
| const type = TRAILER_KEY_TO_MEMORY_TYPE[trailer.key]; | ||
| if (!type) continue; | ||
|
|
||
| facts.push({ | ||
| content: trailer.value, | ||
| type, | ||
| confidence, | ||
| tags, | ||
| source: 'commit-trailer', | ||
| }); | ||
| } | ||
|
|
||
| return facts; | ||
| } catch { | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| return []; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The extractTrailerFacts method spawns a git process for each commit via readTrailers(sha). For large liberations, this could be inefficient. Consider using queryTrailers (which can batch-query multiple commits at once) to fetch all trailer data upfront, then look up by SHA during processing. This is a performance optimization opportunity, not a blocking issue, but could significantly speed up large-scale liberations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good callout. For now,
readTrailers()per commit is acceptable since liberate processes high-interest commits only (typically a small subset). If this becomes a bottleneck, we can switch toqueryTrailers()for batch retrieval. Deferring as a future optimization.