diff --git a/packages/adf/src/__tests__/evidence.test.ts b/packages/adf/src/__tests__/evidence.test.ts new file mode 100644 index 0000000..a314bf4 --- /dev/null +++ b/packages/adf/src/__tests__/evidence.test.ts @@ -0,0 +1,191 @@ +import { describe, it, expect } from 'vitest'; +import { evaluateEvidence } from '../evidence'; +import type { BundleResult } from '../types'; + +function makeBundleResult(overrides: Partial = {}): BundleResult { + return { + manifest: { + version: '0.1', + defaultLoad: ['core.adf'], + onDemand: [], + rules: [], + sync: [], + cadence: [], + metrics: [], + }, + resolvedModules: ['core.adf'], + mergedDocument: { + version: '0.1', + sections: [ + { + key: 'CONSTRAINTS', + decoration: null, + content: { type: 'list', items: ['No secrets'] }, + weight: 'load-bearing', + }, + ], + }, + tokenEstimate: 50, + tokenBudget: 4000, + tokenUtilization: 0.0125, + perModuleTokens: { 'core.adf': 50 }, + moduleBudgetOverruns: [], + triggerMatches: [], + unmatchedModules: [], + advisoryOnlyModules: [], + ...overrides, + }; +} + +describe('evaluateEvidence', () => { + it('forwards constraint results from validateConstraints', () => { + const bundle = makeBundleResult({ + mergedDocument: { + version: '0.1', + sections: [ + { + key: 'METRICS', + decoration: null, + content: { + type: 'metric', + entries: [{ key: 'loc', value: 100, ceiling: 200, unit: 'lines' }], + }, + weight: 'load-bearing', + }, + ], + }, + }); + const report = evaluateEvidence(bundle, { loc: 150 }); + expect(report.constraints).toHaveLength(1); + expect(report.constraints[0].status).toBe('pass'); + expect(report.constraints[0].value).toBe(150); + expect(report.allPassing).toBe(true); + }); + + it('reports failing constraints', () => { + const bundle = makeBundleResult({ + mergedDocument: { + version: '0.1', + sections: [ + { + key: 'METRICS', + decoration: null, + content: { + type: 'metric', + entries: [{ key: 'loc', value: 100, ceiling: 200, unit: 'lines' }], + }, + }, + ], + }, + }); + const report = evaluateEvidence(bundle, { loc: 250 }); + expect(report.constraints[0].status).toBe('fail'); + expect(report.allPassing).toBe(false); + expect(report.failCount).toBe(1); + }); + + it('reports warn status at ceiling boundary', () => { + const bundle = makeBundleResult({ + mergedDocument: { + version: '0.1', + sections: [ + { + key: 'METRICS', + decoration: null, + content: { + type: 'metric', + entries: [{ key: 'loc', value: 100, ceiling: 200, unit: 'lines' }], + }, + }, + ], + }, + }); + const report = evaluateEvidence(bundle, { loc: 200 }); + expect(report.constraints[0].status).toBe('warn'); + expect(report.warnCount).toBe(1); + }); + + it('forwards token data from bundle', () => { + const bundle = makeBundleResult({ + tokenEstimate: 500, + tokenBudget: 4000, + tokenUtilization: 0.125, + perModuleTokens: { 'core.adf': 300, 'frontend.adf': 200 }, + }); + const report = evaluateEvidence(bundle); + expect(report.tokenEstimate).toBe(500); + expect(report.tokenBudget).toBe(4000); + expect(report.tokenUtilization).toBe(0.125); + expect(report.perModuleTokens).toEqual({ 'core.adf': 300, 'frontend.adf': 200 }); + }); + + it('forwards module budget overruns', () => { + const overruns = [{ module: 'frontend.adf', tokens: 150, budget: 100 }]; + const bundle = makeBundleResult({ moduleBudgetOverruns: overruns }); + const report = evaluateEvidence(bundle); + expect(report.moduleBudgetOverruns).toEqual(overruns); + }); + + it('forwards advisory-only modules', () => { + const bundle = makeBundleResult({ advisoryOnlyModules: ['frontend.adf'] }); + const report = evaluateEvidence(bundle); + expect(report.advisoryOnlyModules).toEqual(['frontend.adf']); + }); + + it('includes weight summary', () => { + const report = evaluateEvidence(makeBundleResult()); + expect(report.weightSummary).toBeDefined(); + expect(report.weightSummary.total).toBeGreaterThan(0); + }); + + it('detects stale baselines when context drifts beyond threshold', () => { + const bundle = makeBundleResult({ + mergedDocument: { + version: '0.1', + sections: [ + { + key: 'METRICS', + decoration: null, + content: { + type: 'metric', + entries: [{ key: 'loc', value: 100, ceiling: 200, unit: 'lines' }], + }, + }, + ], + }, + }); + // current 150, baseline 100 → ratio 1.5 > default threshold 1.2 + const report = evaluateEvidence(bundle, { loc: 150 }); + expect(report.staleBaselines).toHaveLength(1); + expect(report.staleBaselines[0].metric).toBe('loc'); + expect(report.staleBaselines[0].baseline).toBe(100); + expect(report.staleBaselines[0].current).toBe(150); + expect(report.staleBaselines[0].rationaleRequired).toBe(true); + }); + + it('respects custom stale threshold', () => { + const bundle = makeBundleResult({ + mergedDocument: { + version: '0.1', + sections: [ + { + key: 'METRICS', + decoration: null, + content: { + type: 'metric', + entries: [{ key: 'loc', value: 100, ceiling: 200, unit: 'lines' }], + }, + }, + ], + }, + }); + // ratio 1.5, threshold 2.0 → not stale + const report = evaluateEvidence(bundle, { loc: 150 }, 2.0); + expect(report.staleBaselines).toHaveLength(0); + }); + + it('returns empty stale baselines when no context provided', () => { + const report = evaluateEvidence(makeBundleResult()); + expect(report.staleBaselines).toEqual([]); + }); +}); diff --git a/packages/adf/src/evidence.ts b/packages/adf/src/evidence.ts new file mode 100644 index 0000000..0966bbc --- /dev/null +++ b/packages/adf/src/evidence.ts @@ -0,0 +1,116 @@ +/** + * ADF Evidence — unified evaluation combining constraint validation, + * token analysis, and baseline staleness. + * + * Produces a single EvidenceReport from a BundleResult and optional + * external metric context, replacing the need for consumers to manually + * assemble evidence from multiple sources. + */ + +import type { + AdfDocument, + BundleResult, + EvidenceResult, +} from './types'; +import { validateConstraints } from './validator'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface StaleBaselineWarning { + metric: string; + baseline: number; + current: number; + delta: number; + ratio: number; + recommendedCeiling: number; + rationaleRequired: boolean; +} + +export interface EvidenceReport extends EvidenceResult { + tokenEstimate: number; + tokenBudget: number | null; + tokenUtilization: number | null; + perModuleTokens: Record; + moduleBudgetOverruns: BundleResult['moduleBudgetOverruns']; + advisoryOnlyModules: string[]; + staleBaselines: StaleBaselineWarning[]; +} + +// ============================================================================ +// Evaluation +// ============================================================================ + +/** + * Evaluate evidence from a bundle result and optional metric context. + * + * Combines constraint validation, token budget analysis, module budget + * overruns, advisory-only module warnings, and stale baseline detection + * into a single unified report. + * + * @param bundle - Result from bundleModules() + * @param context - Optional external metric overrides (e.g., actual LOC counts) + * @param staleThreshold - Ratio threshold for stale baseline detection (default 1.2) + */ +export function evaluateEvidence( + bundle: BundleResult, + context?: Record, + staleThreshold?: number, +): EvidenceReport { + const evidence = validateConstraints(bundle.mergedDocument, context); + const staleBaselines = detectStaleBaselines( + bundle.mergedDocument, + context, + staleThreshold ?? 1.2, + ); + + return { + ...evidence, + tokenEstimate: bundle.tokenEstimate, + tokenBudget: bundle.tokenBudget, + tokenUtilization: bundle.tokenUtilization, + perModuleTokens: bundle.perModuleTokens, + moduleBudgetOverruns: bundle.moduleBudgetOverruns, + advisoryOnlyModules: bundle.advisoryOnlyModules, + staleBaselines, + }; +} + +// ============================================================================ +// Stale Baseline Detection +// ============================================================================ + +/** + * Detect metrics whose current values have drifted significantly from + * their ADF baselines, indicating the ceilings may need recalibration. + */ +function detectStaleBaselines( + doc: AdfDocument, + context: Record | undefined, + staleThreshold: number, +): StaleBaselineWarning[] { + if (!context) return []; + const warnings: StaleBaselineWarning[] = []; + for (const section of doc.sections) { + if (section.key !== 'METRICS' || section.content.type !== 'metric') continue; + for (const entry of section.content.entries) { + if (entry.value <= 0) continue; + const key = entry.key.toLowerCase(); + const current = context[key]; + if (!Number.isFinite(current)) continue; + const ratio = current / entry.value; + if (ratio < staleThreshold) continue; + warnings.push({ + metric: key, + baseline: entry.value, + current, + delta: current - entry.value, + ratio: Number(ratio.toFixed(2)), + recommendedCeiling: Math.ceil(current * 1.15), + rationaleRequired: true, + }); + } + } + return warnings; +} diff --git a/packages/adf/src/index.ts b/packages/adf/src/index.ts index 6baea86..9824ef4 100644 --- a/packages/adf/src/index.ts +++ b/packages/adf/src/index.ts @@ -3,6 +3,8 @@ export { formatAdf } from './formatter'; export { applyPatches } from './patcher'; export { parseManifest, resolveModules, bundleModules } from './bundler'; export { validateConstraints, computeWeightSummary } from './validator'; +export { evaluateEvidence } from './evidence'; +export type { EvidenceReport, StaleBaselineWarning } from './evidence'; export { parseMarkdownSections } from './markdown-parser'; export type { MarkdownSection, MarkdownElement, RuleStrength } from './markdown-parser'; export { classifyElement, isDuplicateItem, buildMigrationPlan } from './content-classifier'; diff --git a/packages/cli/src/commands/adf-evidence.ts b/packages/cli/src/commands/adf-evidence.ts index c51c199..cbe202c 100644 --- a/packages/cli/src/commands/adf-evidence.ts +++ b/packages/cli/src/commands/adf-evidence.ts @@ -11,9 +11,9 @@ import { parseManifest, resolveModules, bundleModules, - validateConstraints, + evaluateEvidence, } from '@stackbilt/adf'; -import type { AdfDocument, EvidenceResult } from '@stackbilt/adf'; +import type { EvidenceReport } from '@stackbilt/adf'; import type { CLIOptions } from '../index'; import { CLIError, EXIT_CODE } from '../index'; import { getFlag, readFlagFile, tokenizeTask } from '../flags'; @@ -26,16 +26,6 @@ interface AutoMeasurement { error?: string; } -interface StaleBaselineWarning { - metric: string; - baseline: number; - current: number; - delta: number; - ratio: number; - recommendedCeiling: number; - rationaleRequired: boolean; -} - export function adfEvidence(options: CLIOptions, args: string[]): number { const task = getFlag(args, '--task'); const aiDir = getFlag(args, '--ai-dir') || '.ai'; @@ -103,8 +93,7 @@ export function adfEvidence(options: CLIOptions, args: string[]): number { try { const bundle = bundleModules(aiDir, modulePaths, readFile, keywords, manifest); - const evidence: EvidenceResult = validateConstraints(bundle.mergedDocument, context); - const staleBaselines = detectStaleBaselines(bundle.mergedDocument, context, staleThreshold); + const report: EvidenceReport = evaluateEvidence(bundle, context, staleThreshold); // Check sync status const lockFile = path.join(aiDir, '.adf.lock'); @@ -126,42 +115,42 @@ export function adfEvidence(options: CLIOptions, args: string[]): number { const jsonOut: Record = { aiDir, resolvedModules: bundle.resolvedModules, - tokenEstimate: bundle.tokenEstimate, - tokenBudget: bundle.tokenBudget, - tokenUtilization: bundle.tokenUtilization, - constraints: evidence.constraints, - weightSummary: evidence.weightSummary, - allPassing: evidence.allPassing, - failCount: evidence.failCount, - warnCount: evidence.warnCount, - staleBaselineCount: staleBaselines.length, + tokenEstimate: report.tokenEstimate, + tokenBudget: report.tokenBudget, + tokenUtilization: report.tokenUtilization, + constraints: report.constraints, + weightSummary: report.weightSummary, + allPassing: report.allPassing, + failCount: report.failCount, + warnCount: report.warnCount, + staleBaselineCount: report.staleBaselines.length, syncStatus: { allInSync, staleCount }, }; if (task) { jsonOut.task = task; jsonOut.keywords = keywords; } - if (bundle.advisoryOnlyModules.length > 0) { - jsonOut.advisoryOnlyModules = bundle.advisoryOnlyModules; + if (report.advisoryOnlyModules.length > 0) { + jsonOut.advisoryOnlyModules = report.advisoryOnlyModules; } if (autoMeasured.length > 0) { jsonOut.autoMeasured = autoMeasured; } - if (staleBaselines.length > 0) { - jsonOut.staleBaselines = staleBaselines; + if (report.staleBaselines.length > 0) { + jsonOut.staleBaselines = report.staleBaselines; } // Suggest logical next steps based on results const nextActions: string[] = []; - if (!evidence.allPassing) { + if (!report.allPassing) { nextActions.push('Fix failing constraints before merging'); } if (!allInSync) { nextActions.push('charter adf sync --write'); } - if (evidence.warnCount > 0) { + if (report.warnCount > 0) { nextActions.push('Review metrics at ceiling boundary'); } - if (staleBaselines.length > 0) { + if (report.staleBaselines.length > 0) { nextActions.push('charter adf metrics recalibrate --headroom 15 --reason "" --dry-run'); } if (nextActions.length > 0) { @@ -173,12 +162,12 @@ export function adfEvidence(options: CLIOptions, args: string[]): number { console.log(' ADF Evidence Report'); console.log(' ==================='); console.log(` Modules loaded: ${bundle.resolvedModules.join(', ')}`); - console.log(` Token estimate: ~${bundle.tokenEstimate}`); - if (bundle.tokenBudget !== null) { - const pct = bundle.tokenUtilization !== null - ? ` (${(bundle.tokenUtilization * 100).toFixed(0)}%)` + console.log(` Token estimate: ~${report.tokenEstimate}`); + if (report.tokenBudget !== null) { + const pct = report.tokenUtilization !== null + ? ` (${(report.tokenUtilization * 100).toFixed(0)}%)` : ''; - console.log(` Token budget: ${bundle.tokenBudget}${pct}`); + console.log(` Token budget: ${report.tokenBudget}${pct}`); } console.log(''); @@ -195,9 +184,9 @@ export function adfEvidence(options: CLIOptions, args: string[]): number { console.log(''); } - if (staleBaselines.length > 0) { + if (report.staleBaselines.length > 0) { console.log(' Stale baseline warnings:'); - for (const s of staleBaselines) { + for (const s of report.staleBaselines) { console.log(` [warn] ${s.metric}: baseline ${s.baseline}, current ${s.current}, delta ${s.delta}, recommended ceiling ${s.recommendedCeiling} (rationale required)`); } console.log(''); @@ -205,24 +194,24 @@ export function adfEvidence(options: CLIOptions, args: string[]): number { // Weight summary console.log(' Section weights:'); - console.log(` Load-bearing: ${evidence.weightSummary.loadBearing}`); - console.log(` Advisory: ${evidence.weightSummary.advisory}`); - console.log(` Unweighted: ${evidence.weightSummary.unweighted}`); + console.log(` Load-bearing: ${report.weightSummary.loadBearing}`); + console.log(` Advisory: ${report.weightSummary.advisory}`); + console.log(` Unweighted: ${report.weightSummary.unweighted}`); console.log(''); // Advisory-only module warnings - if (bundle.advisoryOnlyModules.length > 0) { + if (report.advisoryOnlyModules.length > 0) { console.log(' Advisory-only modules:'); - for (const m of bundle.advisoryOnlyModules) { + for (const m of report.advisoryOnlyModules) { console.log(` [!] ${m}: no load-bearing sections`); } console.log(''); } // Constraints - if (evidence.constraints.length > 0) { + if (report.constraints.length > 0) { console.log(' Constraints:'); - for (const c of evidence.constraints) { + for (const c of report.constraints) { const icon = c.status === 'pass' ? 'ok' : c.status === 'warn' ? 'WARN' : 'FAIL'; console.log(` [${icon}] ${c.message}`); } @@ -244,16 +233,16 @@ export function adfEvidence(options: CLIOptions, args: string[]): number { console.log(''); // Verdict - const verdict = evidence.allPassing ? 'PASS' : 'FAIL'; + const verdict = report.allPassing ? 'PASS' : 'FAIL'; console.log(` Verdict: ${verdict}`); - if (evidence.warnCount > 0) { - console.log(` (${evidence.warnCount} warning${evidence.warnCount === 1 ? '' : 's'} — at ceiling boundary)`); + if (report.warnCount > 0) { + console.log(` (${report.warnCount} warning${report.warnCount === 1 ? '' : 's'} — at ceiling boundary)`); } console.log(''); } // CI mode: exit 1 on constraint failures - if (options.ciMode && !evidence.allPassing) { + if (options.ciMode && !report.allPassing) { return EXIT_CODE.POLICY_VIOLATION; } @@ -280,32 +269,3 @@ function parseStaleThreshold(raw: string): number { return parsed; } -function detectStaleBaselines( - doc: AdfDocument, - context: Record | undefined, - staleThreshold: number -): StaleBaselineWarning[] { - if (!context) return []; - const warnings: StaleBaselineWarning[] = []; - for (const section of doc.sections) { - if (section.key !== 'METRICS' || section.content.type !== 'metric') continue; - for (const entry of section.content.entries) { - if (entry.value <= 0) continue; - const key = entry.key.toLowerCase(); - const current = context[key]; - if (!Number.isFinite(current)) continue; - const ratio = current / entry.value; - if (ratio < staleThreshold) continue; - warnings.push({ - metric: key, - baseline: entry.value, - current, - delta: current - entry.value, - ratio: Number(ratio.toFixed(2)), - recommendedCeiling: Math.ceil(current * 1.15), - rationaleRequired: true, - }); - } - } - return warnings; -}