From bb65d91d67d11372043e7da1c6badbea3ac9b4e4 Mon Sep 17 00:00:00 2001
From: Srikanth Rao M <mskr30@yahoo.com>
Date: Sat, 28 Mar 2026 17:10:37 +0530
Subject: [PATCH 1/2] feat(cli): add analysis/ module with prompt builders,
 parsers, and normalizers

Move 9 pure-function modules from server/src/llm/ to cli/src/analysis/ as the
foundation for native analysis (--native mode, Issue #238, Phase 12 v4.8.0).

New modules in cli/src/analysis/:
- prompt-types.ts: AnalysisResponse, PromptQualityResponse, SQLiteMessageRow, ContentBlock
- prompt-constants.ts: Canonical categories and classification guidance strings
- prompts.ts: buildSessionAnalysisInstructions, buildPromptQualityInstructions, buildFacetOnlyInstructions
- message-format.ts: formatMessagesForAnalysis, classifyStoredUserMessage, formatSessionMetaLine
- response-parsers.ts: parseAnalysisResponse, parsePromptQualityResponse, extractJsonPayload
- normalize-utils.ts: levenshtein, normalizeCategory, kebabToTitleCase
- friction-normalize.ts: normalizeFrictionCategory
- pattern-normalize.ts: normalizePatternCategory, getPatternCategoryLabel
- prompt-quality-normalize.ts: normalizePromptQualityCategory, getPQCategoryLabel, getPQCategoryType

Added 9 ./analysis/* exports to cli/package.json.
Moved test files to cli/src/analysis/__tests__/ (5 test files, 131 tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 cli/package.json                              |  11 +-
 .../__tests__/friction-normalize.test.ts      | 153 ++++
 .../__tests__/normalize-utils.test.ts         |  69 ++
 .../__tests__/pattern-normalize.test.ts       | 189 +++++
 .../prompt-quality-normalize.test.ts          |  99 +++
 cli/src/analysis/__tests__/prompts.test.ts    | 702 ++++++++++++++++++
 cli/src/analysis/friction-normalize.ts        |  56 ++
 cli/src/analysis/message-format.ts            | 142 ++++
 cli/src/analysis/normalize-utils.ts           |  87 +++
 cli/src/analysis/pattern-normalize.ts         | 101 +++
 cli/src/analysis/prompt-constants.ts          | 189 +++++
 cli/src/analysis/prompt-quality-normalize.ts  | 131 ++++
 cli/src/analysis/prompt-types.ts              | 143 ++++
 cli/src/analysis/prompts.ts                   | 423 +++++++++++
 cli/src/analysis/response-parsers.ts          | 200 +++++
 15 files changed, 2694 insertions(+), 1 deletion(-)
 create mode 100644 cli/src/analysis/__tests__/friction-normalize.test.ts
 create mode 100644 cli/src/analysis/__tests__/normalize-utils.test.ts
 create mode 100644 cli/src/analysis/__tests__/pattern-normalize.test.ts
 create mode 100644 cli/src/analysis/__tests__/prompt-quality-normalize.test.ts
 create mode 100644 cli/src/analysis/__tests__/prompts.test.ts
 create mode 100644 cli/src/analysis/friction-normalize.ts
 create mode 100644 cli/src/analysis/message-format.ts
 create mode 100644 cli/src/analysis/normalize-utils.ts
 create mode 100644 cli/src/analysis/pattern-normalize.ts
 create mode 100644 cli/src/analysis/prompt-constants.ts
 create mode 100644 cli/src/analysis/prompt-quality-normalize.ts
 create mode 100644 cli/src/analysis/prompt-types.ts
 create mode 100644 cli/src/analysis/prompts.ts
 create mode 100644 cli/src/analysis/response-parsers.ts

diff --git a/cli/package.json b/cli/package.json
index d6b01f5..142c594 100644
--- a/cli/package.json
+++ b/cli/package.json
@@ -15,7 +15,16 @@
     "./utils/browser": "./dist/utils/browser.js",
     "./constants/llm-providers": "./dist/constants/llm-providers.js",
     "./utils/pricing": "./dist/utils/pricing.js",
-    "./utils/telemetry": "./dist/utils/telemetry.js"
+    "./utils/telemetry": "./dist/utils/telemetry.js",
+    "./analysis/prompts": "./dist/analysis/prompts.js",
+    "./analysis/prompt-types": "./dist/analysis/prompt-types.js",
+    "./analysis/prompt-constants": "./dist/analysis/prompt-constants.js",
+    "./analysis/message-format": "./dist/analysis/message-format.js",
+    "./analysis/response-parsers": "./dist/analysis/response-parsers.js",
+    "./analysis/normalize-utils": "./dist/analysis/normalize-utils.js",
+    "./analysis/friction-normalize": "./dist/analysis/friction-normalize.js",
+    "./analysis/pattern-normalize": "./dist/analysis/pattern-normalize.js",
+    "./analysis/prompt-quality-normalize": "./dist/analysis/prompt-quality-normalize.js"
   },
   "bin": {
     "code-insights": "./dist/index.js"
diff --git a/cli/src/analysis/__tests__/friction-normalize.test.ts b/cli/src/analysis/__tests__/friction-normalize.test.ts
new file mode 100644
index 0000000..25c7392
--- /dev/null
+++ b/cli/src/analysis/__tests__/friction-normalize.test.ts
@@ -0,0 +1,153 @@
+import { describe, it, expect } from 'vitest';
+import { normalizeFrictionCategory } from '../friction-normalize.js';
+
+// ──────────────────────────────────────────────────────
+// normalizeFrictionCategory
+// ──────────────────────────────────────────────────────
+
+describe('normalizeFrictionCategory', () => {
+  // ────────────────────────────────────────────────────
+  // Rule 1: Exact match (case-insensitive)
+  // ────────────────────────────────────────────────────
+
+  it('returns canonical for exact match', () => {
+    expect(normalizeFrictionCategory('knowledge-gap')).toBe('knowledge-gap');
+    expect(normalizeFrictionCategory('wrong-approach')).toBe('wrong-approach');
+    expect(normalizeFrictionCategory('stale-assumptions')).toBe('stale-assumptions');
+    expect(normalizeFrictionCategory('context-loss')).toBe('context-loss');
+    expect(normalizeFrictionCategory('scope-creep')).toBe('scope-creep');
+    expect(normalizeFrictionCategory('repeated-mistakes')).toBe('repeated-mistakes');
+  });
+
+  it('matches case-insensitively', () => {
+    expect(normalizeFrictionCategory('Knowledge-Gap')).toBe('knowledge-gap');
+    expect(normalizeFrictionCategory('WRONG-APPROACH')).toBe('wrong-approach');
+    expect(normalizeFrictionCategory('Stale-Assumptions')).toBe('stale-assumptions');
+  });
+
+  // ────────────────────────────────────────────────────
+  // Rule 2: Levenshtein distance <= 2
+  // ────────────────────────────────────────────────────
+
+  it('normalizes typos within Levenshtein distance 2', () => {
+    expect(normalizeFrictionCategory('knowlede-gap')).toBe('knowledge-gap');   // distance 1
+    expect(normalizeFrictionCategory('wrong-aproach')).toBe('wrong-approach'); // distance 1
+    expect(normalizeFrictionCategory('scope-crepp')).toBe('scope-creep');      // distance 1
+  });
+
+  it('does not match when Levenshtein distance > 2', () => {
+    // "typo-error" is distance 3 from "type-error" — too far
+    const result = normalizeFrictionCategory('completely-different-thing');
+    expect(result).toBe('completely-different-thing');
+  });
+
+  // ────────────────────────────────────────────────────
+  // Rule 3: Substring match (significant portion)
+  // ────────────────────────────────────────────────────
+
+  it('matches when canonical is a significant substring', () => {
+    // "scope-creep-issue" contains "scope-creep" (11 chars, 11/17 = 0.65 > 0.5)
+    expect(normalizeFrictionCategory('scope-creep-issue')).toBe('scope-creep');
+  });
+
+  it('does not match short substrings (< 5 chars)', () => {
+    // Very short overlaps should not trigger substring match
+    const result = normalizeFrictionCategory('abc');
+    expect(result).toBe('abc');
+  });
+
+  // ────────────────────────────────────────────────────
+  // Rule 1.5: Explicit alias match
+  // ────────────────────────────────────────────────────
+
+  it('remaps legacy canonical categories to new taxonomy', () => {
+    // These were canonical in the old 15-category taxonomy; they now map to new categories
+    expect(normalizeFrictionCategory('missing-dependency')).toBe('stale-assumptions');
+    expect(normalizeFrictionCategory('config-drift')).toBe('stale-assumptions');
+    expect(normalizeFrictionCategory('stale-cache')).toBe('stale-assumptions');
+    expect(normalizeFrictionCategory('version-mismatch')).toBe('stale-assumptions');
+    expect(normalizeFrictionCategory('permission-issue')).toBe('stale-assumptions');
+    expect(normalizeFrictionCategory('environment-mismatch')).toBe('stale-assumptions');
+    expect(normalizeFrictionCategory('race-condition')).toBe('wrong-approach');
+    expect(normalizeFrictionCategory('circular-dependency')).toBe('wrong-approach');
+    expect(normalizeFrictionCategory('test-failure')).toBe('wrong-approach');
+    expect(normalizeFrictionCategory('type-error')).toBe('knowledge-gap');
+    expect(normalizeFrictionCategory('api-misunderstanding')).toBe('knowledge-gap');
+  });
+
+  it('remaps legacy aliases case-insensitively', () => {
+    expect(normalizeFrictionCategory('Missing-Dependency')).toBe('stale-assumptions');
+    expect(normalizeFrictionCategory('TYPE-ERROR')).toBe('knowledge-gap');
+  });
+
+  it('resolves all agent-orchestration alias variants to the cluster target', () => {
+    expect(normalizeFrictionCategory('agent-lifecycle-issue')).toBe('agent-orchestration-failure');
+    expect(normalizeFrictionCategory('agent-communication-failure')).toBe('agent-orchestration-failure');
+    expect(normalizeFrictionCategory('agent-communication-breakdown')).toBe('agent-orchestration-failure');
+    expect(normalizeFrictionCategory('agent-lifecycle-management')).toBe('agent-orchestration-failure');
+    expect(normalizeFrictionCategory('agent-shutdown-failure')).toBe('agent-orchestration-failure');
+  });
+
+  it('resolves all rate-limit alias variants to the cluster target', () => {
+    expect(normalizeFrictionCategory('api-rate-limit')).toBe('rate-limit-hit');
+    expect(normalizeFrictionCategory('rate-limiting')).toBe('rate-limit-hit');
+    expect(normalizeFrictionCategory('rate-limited')).toBe('rate-limit-hit');
+  });
+
+  it('resolves aliases case-insensitively', () => {
+    expect(normalizeFrictionCategory('Agent-Lifecycle-Issue')).toBe('agent-orchestration-failure');
+    expect(normalizeFrictionCategory('API-RATE-LIMIT')).toBe('rate-limit-hit');
+  });
+
+  it('does not further normalize non-canonical alias targets via Levenshtein', () => {
+    // "agent-orchestration-failure" is NOT in CANONICAL_FRICTION_CATEGORIES,
+    // but when returned as an alias target it should be returned as-is (not mangled by Levenshtein).
+    // Here we test the target itself — it should pass through as a novel category since it
+    // doesn't match any canonical via Levenshtein and isn't in the alias map as a key.
+    const result = normalizeFrictionCategory('agent-orchestration-failure');
+    // Not canonical, not an alias key → returned as novel category (original casing)
+    expect(result).toBe('agent-orchestration-failure');
+  });
+
+  it('does not further normalize "rate-limit-hit" target when passed directly', () => {
+    // Same as above — "rate-limit-hit" is not canonical, so if someone passes it directly
+    // it comes back as-is (novel category).
+    const result = normalizeFrictionCategory('rate-limit-hit');
+    expect(result).toBe('rate-limit-hit');
+  });
+
+  // ────────────────────────────────────────────────────
+  // Rule 4: Novel category (no match)
+  // ────────────────────────────────────────────────────
+
+  it('returns original for novel categories', () => {
+    expect(normalizeFrictionCategory('database-deadlock')).toBe('database-deadlock');
+    expect(normalizeFrictionCategory('memory-leak')).toBe('memory-leak');
+    expect(normalizeFrictionCategory('flaky-ci')).toBe('flaky-ci');
+  });
+
+  it('preserves original casing for novel categories', () => {
+    expect(normalizeFrictionCategory('Custom-Category')).toBe('Custom-Category');
+  });
+
+  // ────────────────────────────────────────────────────
+  // All canonical categories are recognized
+  // ────────────────────────────────────────────────────
+
+  it('recognizes all 9 canonical categories', () => {
+    const canonicals = [
+      'wrong-approach',
+      'knowledge-gap',
+      'stale-assumptions',
+      'incomplete-requirements',
+      'context-loss',
+      'scope-creep',
+      'repeated-mistakes',
+      'documentation-gap',
+      'tooling-limitation',
+    ];
+    for (const cat of canonicals) {
+      expect(normalizeFrictionCategory(cat)).toBe(cat);
+    }
+  });
+});
diff --git a/cli/src/analysis/__tests__/normalize-utils.test.ts b/cli/src/analysis/__tests__/normalize-utils.test.ts
new file mode 100644
index 0000000..f242925
--- /dev/null
+++ b/cli/src/analysis/__tests__/normalize-utils.test.ts
@@ -0,0 +1,69 @@
+import { describe, it, expect } from 'vitest';
+import { levenshtein, normalizeCategory, kebabToTitleCase } from '../normalize-utils.js';
+
+describe('levenshtein', () => {
+  it('returns 0 for identical strings', () => {
+    expect(levenshtein('abc', 'abc')).toBe(0);
+  });
+
+  it('returns correct distance for single edit', () => {
+    expect(levenshtein('kitten', 'sitten')).toBe(1);
+  });
+
+  it('returns correct distance for multiple edits', () => {
+    expect(levenshtein('kitten', 'sitting')).toBe(3);
+  });
+
+  it('handles empty strings', () => {
+    expect(levenshtein('', 'abc')).toBe(3);
+    expect(levenshtein('abc', '')).toBe(3);
+    expect(levenshtein('', '')).toBe(0);
+  });
+});
+
+describe('normalizeCategory', () => {
+  const config: Parameters<typeof normalizeCategory>[1] = {
+    canonicalCategories: ['wrong-approach', 'knowledge-gap', 'stale-assumptions'],
+    aliases: { 'type-error': 'knowledge-gap', 'agent-issue': 'agent-failure' },
+  };
+
+  it('returns canonical for exact match (case-insensitive)', () => {
+    expect(normalizeCategory('knowledge-gap', config)).toBe('knowledge-gap');
+    expect(normalizeCategory('Knowledge-Gap', config)).toBe('knowledge-gap');
+  });
+
+  it('resolves aliases to their target', () => {
+    expect(normalizeCategory('type-error', config)).toBe('knowledge-gap');
+  });
+
+  it('resolves aliases to non-canonical cluster targets', () => {
+    expect(normalizeCategory('agent-issue', config)).toBe('agent-failure');
+  });
+
+  it('normalizes via Levenshtein distance <= 2', () => {
+    expect(normalizeCategory('knowlede-gap', config)).toBe('knowledge-gap'); // dist 1
+  });
+
+  it('normalizes via substring match', () => {
+    expect(normalizeCategory('stale-assumptions-here', config)).toBe('stale-assumptions');
+  });
+
+  it('returns original for no match', () => {
+    expect(normalizeCategory('completely-unrelated', config)).toBe('completely-unrelated');
+  });
+});
+
+describe('kebabToTitleCase', () => {
+  it('converts kebab-case to Title Case', () => {
+    expect(kebabToTitleCase('structured-planning')).toBe('Structured Planning');
+    expect(kebabToTitleCase('self-correction')).toBe('Self Correction');
+  });
+
+  it('handles single word', () => {
+    expect(kebabToTitleCase('planning')).toBe('Planning');
+  });
+
+  it('handles empty string', () => {
+    expect(kebabToTitleCase('')).toBe('');
+  });
+});
diff --git a/cli/src/analysis/__tests__/pattern-normalize.test.ts b/cli/src/analysis/__tests__/pattern-normalize.test.ts
new file mode 100644
index 0000000..9692124
--- /dev/null
+++ b/cli/src/analysis/__tests__/pattern-normalize.test.ts
@@ -0,0 +1,189 @@
+import { describe, it, expect } from 'vitest';
+import { normalizePatternCategory, getPatternCategoryLabel } from '../pattern-normalize.js';
+
+// ──────────────────────────────────────────────────────
+// normalizePatternCategory
+// ──────────────────────────────────────────────────────
+
+describe('normalizePatternCategory', () => {
+  // ────────────────────────────────────────────────────
+  // Rule 1: Exact match (case-insensitive)
+  // ────────────────────────────────────────────────────
+
+  it('returns canonical for exact match — all 8 categories', () => {
+    expect(normalizePatternCategory('structured-planning')).toBe('structured-planning');
+    expect(normalizePatternCategory('incremental-implementation')).toBe('incremental-implementation');
+    expect(normalizePatternCategory('verification-workflow')).toBe('verification-workflow');
+    expect(normalizePatternCategory('systematic-debugging')).toBe('systematic-debugging');
+    expect(normalizePatternCategory('self-correction')).toBe('self-correction');
+    expect(normalizePatternCategory('context-gathering')).toBe('context-gathering');
+    expect(normalizePatternCategory('domain-expertise')).toBe('domain-expertise');
+    expect(normalizePatternCategory('effective-tooling')).toBe('effective-tooling');
+  });
+
+  it('matches case-insensitively', () => {
+    expect(normalizePatternCategory('Structured-Planning')).toBe('structured-planning');
+    expect(normalizePatternCategory('INCREMENTAL-IMPLEMENTATION')).toBe('incremental-implementation');
+    expect(normalizePatternCategory('Self-Correction')).toBe('self-correction');
+    expect(normalizePatternCategory('Domain-Expertise')).toBe('domain-expertise');
+  });
+
+  // ────────────────────────────────────────────────────
+  // Rule 1.5: Explicit alias match
+  // ────────────────────────────────────────────────────
+
+  it('resolves all structured-planning aliases', () => {
+    expect(normalizePatternCategory('task-decomposition')).toBe('structured-planning');
+    expect(normalizePatternCategory('plan-first')).toBe('structured-planning');
+    expect(normalizePatternCategory('upfront-planning')).toBe('structured-planning');
+    expect(normalizePatternCategory('phased-approach')).toBe('structured-planning');
+    expect(normalizePatternCategory('task-breakdown')).toBe('structured-planning');
+    expect(normalizePatternCategory('planning-before-implementation')).toBe('structured-planning');
+  });
+
+  it('resolves all effective-tooling aliases', () => {
+    expect(normalizePatternCategory('agent-delegation')).toBe('effective-tooling');
+    expect(normalizePatternCategory('agent-orchestration')).toBe('effective-tooling');
+    expect(normalizePatternCategory('specialized-agents')).toBe('effective-tooling');
+    expect(normalizePatternCategory('multi-agent')).toBe('effective-tooling');
+    expect(normalizePatternCategory('tool-leverage')).toBe('effective-tooling');
+  });
+
+  it('resolves all verification-workflow aliases', () => {
+    expect(normalizePatternCategory('build-test-verify')).toBe('verification-workflow');
+    expect(normalizePatternCategory('test-driven-development')).toBe('verification-workflow');
+    expect(normalizePatternCategory('tdd')).toBe('verification-workflow');
+    expect(normalizePatternCategory('test-first')).toBe('verification-workflow');
+    expect(normalizePatternCategory('pre-commit-checks')).toBe('verification-workflow');
+  });
+
+  it('resolves all systematic-debugging aliases', () => {
+    expect(normalizePatternCategory('binary-search-debugging')).toBe('systematic-debugging');
+    expect(normalizePatternCategory('methodical-debugging')).toBe('systematic-debugging');
+    expect(normalizePatternCategory('log-based-debugging')).toBe('systematic-debugging');
+    expect(normalizePatternCategory('debugging-methodology')).toBe('systematic-debugging');
+  });
+
+  it('resolves all self-correction aliases', () => {
+    expect(normalizePatternCategory('course-correction')).toBe('self-correction');
+    expect(normalizePatternCategory('pivot-on-failure')).toBe('self-correction');
+    expect(normalizePatternCategory('backtracking')).toBe('self-correction');
+  });
+
+  it('resolves all context-gathering aliases', () => {
+    expect(normalizePatternCategory('code-reading-first')).toBe('context-gathering');
+    expect(normalizePatternCategory('codebase-exploration')).toBe('context-gathering');
+    expect(normalizePatternCategory('understanding-before-changing')).toBe('context-gathering');
+  });
+
+  it('resolves all domain-expertise aliases', () => {
+    expect(normalizePatternCategory('framework-knowledge')).toBe('domain-expertise');
+    expect(normalizePatternCategory('types-first')).toBe('domain-expertise');
+    expect(normalizePatternCategory('type-driven-development')).toBe('domain-expertise');
+    expect(normalizePatternCategory('schema-first')).toBe('domain-expertise');
+  });
+
+  it('resolves all incremental-implementation aliases', () => {
+    expect(normalizePatternCategory('small-steps')).toBe('incremental-implementation');
+    expect(normalizePatternCategory('iterative-building')).toBe('incremental-implementation');
+    expect(normalizePatternCategory('iterative-development')).toBe('incremental-implementation');
+  });
+
+  it('resolves aliases case-insensitively', () => {
+    expect(normalizePatternCategory('Task-Decomposition')).toBe('structured-planning');
+    expect(normalizePatternCategory('AGENT-DELEGATION')).toBe('effective-tooling');
+    expect(normalizePatternCategory('TDD')).toBe('verification-workflow');
+    expect(normalizePatternCategory('Course-Correction')).toBe('self-correction');
+  });
+
+  // ────────────────────────────────────────────────────
+  // Rule 2: Levenshtein distance <= 2
+  // ────────────────────────────────────────────────────
+
+  it('normalizes typos within Levenshtein distance 2', () => {
+    expect(normalizePatternCategory('self-corection')).toBe('self-correction');   // distance 1
+    expect(normalizePatternCategory('domain-expertse')).toBe('domain-expertise'); // distance 1
+    expect(normalizePatternCategory('context-gthering')).toBe('context-gathering'); // distance 1
+  });
+
+  it('does not match when Levenshtein distance > 2', () => {
+    const result = normalizePatternCategory('completely-unrelated');
+    expect(result).toBe('completely-unrelated');
+  });
+
+  // ────────────────────────────────────────────────────
+  // Rule 3: Substring match (significant portion)
+  // ────────────────────────────────────────────────────
+
+  it('matches when category is a significant extension of a canonical', () => {
+    // "self-correction-behavior" contains "self-correction" (15 chars, 15/24 = 0.625 > 0.5)
+    expect(normalizePatternCategory('self-correction-behavior')).toBe('self-correction');
+  });
+
+  it('does not match short substrings (< 5 chars)', () => {
+    const result = normalizePatternCategory('abc');
+    expect(result).toBe('abc');
+  });
+
+  // ────────────────────────────────────────────────────
+  // Rule 4: Novel category (no match)
+  // ────────────────────────────────────────────────────
+
+  it('returns original for novel categories', () => {
+    expect(normalizePatternCategory('pair-programming')).toBe('pair-programming');
+    expect(normalizePatternCategory('mob-programming')).toBe('mob-programming');
+    expect(normalizePatternCategory('rubber-duck-debugging')).toBe('rubber-duck-debugging');
+  });
+
+  it('preserves original casing for novel categories', () => {
+    expect(normalizePatternCategory('Custom-Pattern')).toBe('Custom-Pattern');
+    expect(normalizePatternCategory('My-Novel-Category')).toBe('My-Novel-Category');
+  });
+
+  // ────────────────────────────────────────────────────
+  // All canonical categories are recognized
+  // ────────────────────────────────────────────────────
+
+  it('recognizes all 8 canonical categories', () => {
+    const canonicals = [
+      'structured-planning',
+      'incremental-implementation',
+      'verification-workflow',
+      'systematic-debugging',
+      'self-correction',
+      'context-gathering',
+      'domain-expertise',
+      'effective-tooling',
+    ];
+    for (const cat of canonicals) {
+      expect(normalizePatternCategory(cat)).toBe(cat);
+    }
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// getPatternCategoryLabel
+// ──────────────────────────────────────────────────────
+
+describe('getPatternCategoryLabel', () => {
+  it('returns human-readable labels for all canonical categories', () => {
+    expect(getPatternCategoryLabel('structured-planning')).toBe('Structured Planning');
+    expect(getPatternCategoryLabel('incremental-implementation')).toBe('Incremental Implementation');
+    expect(getPatternCategoryLabel('verification-workflow')).toBe('Verification Workflow');
+    expect(getPatternCategoryLabel('systematic-debugging')).toBe('Systematic Debugging');
+    expect(getPatternCategoryLabel('self-correction')).toBe('Self-Correction');
+    expect(getPatternCategoryLabel('context-gathering')).toBe('Context Gathering');
+    expect(getPatternCategoryLabel('domain-expertise')).toBe('Domain Expertise');
+    expect(getPatternCategoryLabel('effective-tooling')).toBe('Effective Tooling');
+  });
+
+  it('converts novel kebab-case categories to Title Case', () => {
+    expect(getPatternCategoryLabel('pair-programming')).toBe('Pair Programming');
+    expect(getPatternCategoryLabel('mob-programming')).toBe('Mob Programming');
+    expect(getPatternCategoryLabel('rubber-duck-debugging')).toBe('Rubber Duck Debugging');
+  });
+
+  it('handles single-word novel categories', () => {
+    expect(getPatternCategoryLabel('refactoring')).toBe('Refactoring');
+  });
+});
diff --git a/cli/src/analysis/__tests__/prompt-quality-normalize.test.ts b/cli/src/analysis/__tests__/prompt-quality-normalize.test.ts
new file mode 100644
index 0000000..e4662d3
--- /dev/null
+++ b/cli/src/analysis/__tests__/prompt-quality-normalize.test.ts
@@ -0,0 +1,99 @@
+import { describe, it, expect } from 'vitest';
+import { normalizePromptQualityCategory, getPQCategoryLabel, getPQCategoryType } from '../prompt-quality-normalize.js';
+
+describe('normalizePromptQualityCategory', () => {
+  // Rule 1: Exact match
+  it('returns canonical for exact match', () => {
+    expect(normalizePromptQualityCategory('vague-request')).toBe('vague-request');
+    expect(normalizePromptQualityCategory('missing-context')).toBe('missing-context');
+    expect(normalizePromptQualityCategory('late-constraint')).toBe('late-constraint');
+    expect(normalizePromptQualityCategory('precise-request')).toBe('precise-request');
+    expect(normalizePromptQualityCategory('effective-context')).toBe('effective-context');
+    expect(normalizePromptQualityCategory('productive-correction')).toBe('productive-correction');
+  });
+
+  it('matches case-insensitively', () => {
+    expect(normalizePromptQualityCategory('Vague-Request')).toBe('vague-request');
+    expect(normalizePromptQualityCategory('MISSING-CONTEXT')).toBe('missing-context');
+  });
+
+  // Rule 1.5: Aliases
+  it('remaps common LLM variants to canonical categories', () => {
+    expect(normalizePromptQualityCategory('vague-instructions')).toBe('vague-request');
+    expect(normalizePromptQualityCategory('unclear-request')).toBe('vague-request');
+    expect(normalizePromptQualityCategory('imprecise-prompting')).toBe('vague-request');
+    expect(normalizePromptQualityCategory('missing-information')).toBe('missing-context');
+    expect(normalizePromptQualityCategory('insufficient-context')).toBe('missing-context');
+    expect(normalizePromptQualityCategory('late-context')).toBe('late-constraint');
+    expect(normalizePromptQualityCategory('late-requirements')).toBe('late-constraint');
+    expect(normalizePromptQualityCategory('piecemeal-requirements')).toBe('late-constraint');
+    expect(normalizePromptQualityCategory('drip-fed-requirements')).toBe('late-constraint');
+    expect(normalizePromptQualityCategory('unclear-feedback')).toBe('unclear-correction');
+    expect(normalizePromptQualityCategory('vague-correction')).toBe('unclear-correction');
+    expect(normalizePromptQualityCategory('context-drift')).toBe('scope-drift');
+    expect(normalizePromptQualityCategory('objective-bloat')).toBe('scope-drift');
+    expect(normalizePromptQualityCategory('session-bloat')).toBe('scope-drift');
+    expect(normalizePromptQualityCategory('no-acceptance-criteria')).toBe('missing-acceptance-criteria');
+    expect(normalizePromptQualityCategory('undefined-done')).toBe('missing-acceptance-criteria');
+    expect(normalizePromptQualityCategory('hidden-assumption')).toBe('assumption-not-surfaced');
+    expect(normalizePromptQualityCategory('unstated-assumption')).toBe('assumption-not-surfaced');
+    expect(normalizePromptQualityCategory('clear-request')).toBe('precise-request');
+    expect(normalizePromptQualityCategory('specific-request')).toBe('precise-request');
+    expect(normalizePromptQualityCategory('good-context')).toBe('effective-context');
+    expect(normalizePromptQualityCategory('upfront-context')).toBe('effective-context');
+    expect(normalizePromptQualityCategory('clear-correction')).toBe('productive-correction');
+    expect(normalizePromptQualityCategory('effective-feedback')).toBe('productive-correction');
+  });
+
+  // Rule 2: Levenshtein
+  it('normalizes typos within Levenshtein distance 2', () => {
+    expect(normalizePromptQualityCategory('vague-requst')).toBe('vague-request');
+    expect(normalizePromptQualityCategory('scope-drft')).toBe('scope-drift');
+  });
+
+  // Rule 4: Novel category
+  it('returns original for novel categories', () => {
+    expect(normalizePromptQualityCategory('over-delegation')).toBe('over-delegation');
+    expect(normalizePromptQualityCategory('micro-management')).toBe('micro-management');
+  });
+
+  it('recognizes all 10 canonical categories', () => {
+    const all = [
+      'vague-request', 'missing-context', 'late-constraint',
+      'unclear-correction', 'scope-drift', 'missing-acceptance-criteria',
+      'assumption-not-surfaced', 'precise-request', 'effective-context',
+      'productive-correction',
+    ];
+    for (const cat of all) {
+      expect(normalizePromptQualityCategory(cat)).toBe(cat);
+    }
+  });
+});
+
+describe('getPQCategoryLabel', () => {
+  it('returns human label for canonical categories', () => {
+    expect(getPQCategoryLabel('vague-request')).toBe('Vague Request');
+    expect(getPQCategoryLabel('late-constraint')).toBe('Late Constraint');
+    expect(getPQCategoryLabel('precise-request')).toBe('Precise Request');
+  });
+
+  it('converts novel categories to title case', () => {
+    expect(getPQCategoryLabel('over-delegation')).toBe('Over Delegation');
+  });
+});
+
+describe('getPQCategoryType', () => {
+  it('returns deficit for deficit categories', () => {
+    expect(getPQCategoryType('vague-request')).toBe('deficit');
+    expect(getPQCategoryType('late-constraint')).toBe('deficit');
+  });
+
+  it('returns strength for strength categories', () => {
+    expect(getPQCategoryType('precise-request')).toBe('strength');
+    expect(getPQCategoryType('effective-context')).toBe('strength');
+  });
+
+  it('returns deficit for unknown categories', () => {
+    expect(getPQCategoryType('over-delegation')).toBe('deficit');
+  });
+});
diff --git a/cli/src/analysis/__tests__/prompts.test.ts b/cli/src/analysis/__tests__/prompts.test.ts
new file mode 100644
index 0000000..7793df5
--- /dev/null
+++ b/cli/src/analysis/__tests__/prompts.test.ts
@@ -0,0 +1,702 @@
+import { describe, it, expect } from 'vitest';
+import {
+  classifyStoredUserMessage,
+  formatMessagesForAnalysis,
+  formatSessionMetaLine,
+} from '../message-format.js';
+import {
+  parseAnalysisResponse,
+  parsePromptQualityResponse,
+} from '../response-parsers.js';
+import {
+  SHARED_ANALYST_SYSTEM_PROMPT,
+  buildCacheableConversationBlock,
+  buildSessionAnalysisInstructions,
+  buildPromptQualityInstructions,
+  buildFacetOnlyInstructions,
+} from '../prompts.js';
+import type { SQLiteMessageRow } from '../prompt-types.js';
+
+// ──────────────────────────────────────────────────────
+// Helpers
+// ──────────────────────────────────────────────────────
+
+function makeMessage(overrides: Partial<SQLiteMessageRow> = {}): SQLiteMessageRow {
+  return {
+    id: 'msg-1',
+    session_id: 'sess-1',
+    type: 'user',
+    content: 'Hello world',
+    thinking: null,
+    tool_calls: '',
+    tool_results: '',
+    usage: null,
+    timestamp: '2025-06-15T10:00:00Z',
+    parent_id: null,
+    ...overrides,
+  };
+}
+
+// ──────────────────────────────────────────────────────
+// classifyStoredUserMessage
+// ──────────────────────────────────────────────────────
+
+describe('classifyStoredUserMessage', () => {
+  it('classifies JSON array with tool_result as tool-result', () => {
+    const content = '[{"type":"tool_result","tool_use_id":"toolu_abc","content":"File written successfully"}]';
+    expect(classifyStoredUserMessage(content)).toBe('tool-result');
+  });
+
+  it('classifies JSON array with multiple items including tool_result as tool-result', () => {
+    const content = '[{"type":"tool_result","tool_use_id":"toolu_xyz","content":"ok"},{"type":"tool_result","tool_use_id":"toolu_123","content":"done"}]';
+    expect(classifyStoredUserMessage(content)).toBe('tool-result');
+  });
+
+  it('does NOT classify a JSON array without tool_result keyword as tool-result', () => {
+    // A human might paste a JSON array in a message
+    const content = '[{"name":"Alice"},{"name":"Bob"}]';
+    expect(classifyStoredUserMessage(content)).toBe('human');
+  });
+
+  it('classifies "Here is a summary of our conversation" prefix as system-artifact', () => {
+    const content = 'Here is a summary of our conversation so far:\n\nWe discussed auth middleware...';
+    expect(classifyStoredUserMessage(content)).toBe('system-artifact');
+  });
+
+  it('classifies "This session is being continued" prefix as system-artifact', () => {
+    const content = 'This session is being continued from a previous conversation that ran out of context...';
+    expect(classifyStoredUserMessage(content)).toBe('system-artifact');
+  });
+
+  it('classifies single-line slash command as system-artifact', () => {
+    expect(classifyStoredUserMessage('/compact')).toBe('system-artifact');
+    expect(classifyStoredUserMessage('/review')).toBe('system-artifact');
+    expect(classifyStoredUserMessage('/test --coverage')).toBe('system-artifact');
+  });
+
+  it('classifies two-line slash command as system-artifact', () => {
+    const content = '/compact\nsome brief instruction';
+    expect(classifyStoredUserMessage(content)).toBe('system-artifact');
+  });
+
+  it('does NOT classify long slash content (>2 lines) as system-artifact — avoids false positives', () => {
+    // A human message starting with /usr/bin/... path in a longer paragraph
+    const content = '/usr/bin/node is the runtime I am using.\nPlease update the shebang in the file.\nAlso fix the permissions.';
+    expect(classifyStoredUserMessage(content)).toBe('human');
+  });
+
+  it('does NOT classify /UPPERCASE as system-artifact — only /[a-z] pattern', () => {
+    const content = '/NotACommand';
+    expect(classifyStoredUserMessage(content)).toBe('human');
+  });
+
+  it('classifies normal human text as human', () => {
+    expect(classifyStoredUserMessage('Fix the auth middleware to use Hono patterns')).toBe('human');
+    expect(classifyStoredUserMessage('Can you help me debug this?')).toBe('human');
+    expect(classifyStoredUserMessage('')).toBe('human');
+  });
+
+  it('classifies human message starting with [ but no tool_result as human', () => {
+    const content = '[Step 1] First do X\n[Step 2] Then do Y';
+    expect(classifyStoredUserMessage(content)).toBe('human');
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// formatSessionMetaLine
+// ──────────────────────────────────────────────────────
+
+describe('formatSessionMetaLine', () => {
+  it('returns empty string when meta is undefined', () => {
+    expect(formatSessionMetaLine(undefined)).toBe('');
+  });
+
+  it('returns empty string when all meta fields are zero/empty', () => {
+    expect(formatSessionMetaLine({ compactCount: 0, autoCompactCount: 0, slashCommands: [] })).toBe('');
+  });
+
+  it('formats auto-compact only', () => {
+    const result = formatSessionMetaLine({ autoCompactCount: 2 });
+    expect(result).toContain('2 context compaction');
+    expect(result).toContain('2 auto');
+    expect(result).toContain('session exceeded context window');
+    expect(result.endsWith('\n')).toBe(true);
+  });
+
+  it('formats manual compact only', () => {
+    const result = formatSessionMetaLine({ compactCount: 1 });
+    expect(result).toContain('1 context compaction');
+    expect(result).toContain('1 manual');
+    expect(result).not.toContain('auto');
+  });
+
+  it('formats both auto and manual compacts', () => {
+    const result = formatSessionMetaLine({ compactCount: 1, autoCompactCount: 2 });
+    expect(result).toContain('3 context compaction');
+    expect(result).toContain('2 auto');
+    expect(result).toContain('1 manual');
+  });
+
+  it('uses singular "compaction" for count of 1', () => {
+    const result = formatSessionMetaLine({ autoCompactCount: 1 });
+    expect(result).toContain('1 context compaction');
+    expect(result).not.toContain('compactions');
+  });
+
+  it('uses plural "compactions" for count > 1', () => {
+    const result = formatSessionMetaLine({ autoCompactCount: 3 });
+    expect(result).toContain('3 context compactions');
+  });
+
+  it('formats slash commands only', () => {
+    const result = formatSessionMetaLine({ slashCommands: ['/review', '/test'] });
+    expect(result).toContain('slash commands used: /review, /test');
+    expect(result).not.toContain('compaction');
+  });
+
+  it('formats compacts and slash commands together', () => {
+    const result = formatSessionMetaLine({
+      autoCompactCount: 1,
+      slashCommands: ['/compact', '/review'],
+    });
+    expect(result).toContain('Context signals:');
+    expect(result).toContain('context compaction');
+    expect(result).toContain('slash commands used:');
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// formatMessagesForAnalysis
+// ──────────────────────────────────────────────────────
+
+describe('formatMessagesForAnalysis', () => {
+  it('produces readable text with role labels', () => {
+    const messages = [
+      makeMessage({ type: 'user', content: 'Fix the bug' }),
+      makeMessage({ id: 'msg-2', type: 'assistant', content: 'Done!' }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('### User#0:');
+    expect(result).toContain('Fix the bug');
+    expect(result).toContain('### Assistant#0:');
+    expect(result).toContain('Done!');
+  });
+
+  it('increments user and assistant indices independently', () => {
+    const messages = [
+      makeMessage({ type: 'user', content: 'msg 1' }),
+      makeMessage({ id: 'msg-2', type: 'assistant', content: 'msg 2' }),
+      makeMessage({ id: 'msg-3', type: 'user', content: 'msg 3' }),
+      makeMessage({ id: 'msg-4', type: 'assistant', content: 'msg 4' }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('User#0');
+    expect(result).toContain('Assistant#0');
+    expect(result).toContain('User#1');
+    expect(result).toContain('Assistant#1');
+  });
+
+  it('includes tool call names when present', () => {
+    const messages = [
+      makeMessage({
+        type: 'assistant',
+        content: 'Let me read the file',
+        tool_calls: JSON.stringify([{ name: 'Read' }, { name: 'Write' }]),
+      }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('[Tools used: Read, Write]');
+  });
+
+  it('includes thinking content when present', () => {
+    const messages = [
+      makeMessage({
+        type: 'assistant',
+        content: 'The answer is 42',
+        thinking: 'I need to calculate this carefully',
+      }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('[Thinking: I need to calculate this carefully]');
+  });
+
+  it('includes tool results when present', () => {
+    const messages = [
+      makeMessage({
+        type: 'assistant',
+        content: 'Read the file',
+        tool_results: JSON.stringify([{ output: 'file contents here' }]),
+      }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('[Tool results: file contents here]');
+  });
+
+  it('handles empty messages array', () => {
+    const result = formatMessagesForAnalysis([]);
+    expect(result).toBe('');
+  });
+
+  it('handles malformed JSON in tool_calls gracefully', () => {
+    const messages = [
+      makeMessage({
+        type: 'assistant',
+        content: 'oops',
+        tool_calls: 'not valid json',
+      }),
+    ];
+    // Should not throw
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('oops');
+    // No [Tools used:] since parse failed
+    expect(result).not.toContain('[Tools used:');
+  });
+
+  it('labels tool-result user messages as [tool-result] and does NOT increment User#N', () => {
+    const toolResultContent = '[{"type":"tool_result","tool_use_id":"toolu_abc","content":"ok"}]';
+    const messages = [
+      makeMessage({ id: 'msg-1', type: 'user', content: 'First human message' }),
+      makeMessage({ id: 'msg-2', type: 'user', content: toolResultContent }),
+      makeMessage({ id: 'msg-3', type: 'user', content: 'Second human message' }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    // First and second human messages get indices 0 and 1 (tool-result in between skipped)
+    expect(result).toContain('### User#0:');
+    expect(result).toContain('### User#1:');
+    // No User#2 should appear (only 2 human messages)
+    expect(result).not.toContain('User#2');
+    // Tool-result gets [tool-result] label
+    expect(result).toContain('### [tool-result]:');
+  });
+
+  it('labels auto-compact user messages as [auto-compact] and does NOT increment User#N', () => {
+    const autoCompactContent = 'Here is a summary of our conversation so far:\n\nWe implemented auth...';
+    const messages = [
+      makeMessage({ id: 'msg-1', type: 'user', content: 'Start work' }),
+      makeMessage({ id: 'msg-2', type: 'user', content: autoCompactContent }),
+      makeMessage({ id: 'msg-3', type: 'user', content: 'Continue work' }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('### User#0:');
+    expect(result).toContain('### [auto-compact]:');
+    expect(result).toContain('### User#1:');
+    expect(result).not.toContain('User#2');
+  });
+
+  it('labels slash command user messages as [system] (not [auto-compact]) and does NOT increment User#N', () => {
+    // Slash commands are system artifacts but NOT compaction events — they get [system] label.
+    const messages = [
+      makeMessage({ id: 'msg-1', type: 'user', content: 'Start work' }),
+      makeMessage({ id: 'msg-2', type: 'user', content: '/compact' }),
+      makeMessage({ id: 'msg-3', type: 'user', content: 'Continue work' }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('### User#0:');
+    expect(result).toContain('### [system]:');
+    expect(result).not.toContain('[auto-compact]');
+    expect(result).toContain('### User#1:');
+    expect(result).not.toContain('User#2');
+  });
+
+  it('distinguishes [auto-compact] from [system] when both appear in same session', () => {
+    const autoCompactContent = 'This session is being continued from a previous conversation...';
+    const messages = [
+      makeMessage({ id: '1', type: 'user', content: 'Do something' }),
+      makeMessage({ id: '2', type: 'user', content: '/review' }),
+      makeMessage({ id: '3', type: 'user', content: autoCompactContent }),
+      makeMessage({ id: '4', type: 'user', content: 'Continue' }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('### [system]:');
+    expect(result).toContain('### [auto-compact]:');
+    // User index should still count only genuine human messages (2 of them: 'Do something' + 'Continue')
+    expect(result).toContain('### User#0:');
+    expect(result).toContain('### User#1:');
+    expect(result).not.toContain('User#2');
+  });
+
+  it('preserves User#N counter continuity across mixed message types', () => {
+    const toolResult = '[{"type":"tool_result","tool_use_id":"toolu_1","content":"done"}]';
+    const messages = [
+      makeMessage({ id: '1', type: 'user', content: 'Human 0' }),
+      makeMessage({ id: '2', type: 'user', content: toolResult }),
+      makeMessage({ id: '3', type: 'user', content: toolResult }),
+      makeMessage({ id: '4', type: 'user', content: 'Human 1' }),
+      makeMessage({ id: '5', type: 'assistant', content: 'Reply' }),
+      makeMessage({ id: '6', type: 'user', content: 'Human 2' }),
+    ];
+    const result = formatMessagesForAnalysis(messages);
+    expect(result).toContain('User#0');
+    expect(result).toContain('User#1');
+    expect(result).toContain('User#2');
+    expect(result).not.toContain('User#3');
+    // Two [tool-result] blocks appear
+    const toolResultCount = (result.match(/\[tool-result\]/g) ?? []).length;
+    expect(toolResultCount).toBe(2);
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// buildCacheableConversationBlock
+// ──────────────────────────────────────────────────────
+
+describe('buildCacheableConversationBlock', () => {
+  it('wraps formatted messages in conversation markers', () => {
+    const block = buildCacheableConversationBlock('### User#0:\nHello');
+    expect(block.text).toContain('--- CONVERSATION ---');
+    expect(block.text).toContain('--- END CONVERSATION ---');
+    expect(block.text).toContain('### User#0:\nHello');
+  });
+
+  it('sets cache_control to ephemeral', () => {
+    const block = buildCacheableConversationBlock('messages');
+    expect(block.cache_control).toEqual({ type: 'ephemeral' });
+  });
+
+  it('returns type text block', () => {
+    const block = buildCacheableConversationBlock('messages');
+    expect(block.type).toBe('text');
+  });
+
+  it('ends with double newline to separate instruction block', () => {
+    const block = buildCacheableConversationBlock('messages');
+    expect(block.text.endsWith('\n\n')).toBe(true);
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// buildSessionAnalysisInstructions
+// ──────────────────────────────────────────────────────
+
+describe('buildSessionAnalysisInstructions', () => {
+  it('includes project name in the instructions', () => {
+    const result = buildSessionAnalysisInstructions('my-app', null);
+    expect(result).toContain('Project: my-app');
+  });
+
+  it('includes session summary when provided', () => {
+    const result = buildSessionAnalysisInstructions('my-app', 'Fixed a critical bug');
+    expect(result).toContain('Session Summary: Fixed a critical bug');
+  });
+
+  it('omits session summary line when null', () => {
+    const result = buildSessionAnalysisInstructions('my-app', null);
+    expect(result).not.toContain('Session Summary:');
+  });
+
+  it('contains the PART 1 and PART 2 section headers', () => {
+    const result = buildSessionAnalysisInstructions('my-app', null);
+    expect(result).toContain('=== PART 1: SESSION FACETS ===');
+    expect(result).toContain('=== PART 2: INSIGHTS ===');
+  });
+
+  it('ends with json tags instruction', () => {
+    const result = buildSessionAnalysisInstructions('proj', null);
+    expect(result).toContain('<json>...</json>');
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// buildPromptQualityInstructions
+// ──────────────────────────────────────────────────────
+
+describe('buildPromptQualityInstructions', () => {
+  const sessionMeta = {
+    humanMessageCount: 8,
+    assistantMessageCount: 12,
+    toolExchangeCount: 31,
+  };
+
+  it('includes project name in the instructions', () => {
+    const result = buildPromptQualityInstructions('my-app', sessionMeta);
+    expect(result).toContain('Project: my-app');
+  });
+
+  it('formats session shape header with structured counts', () => {
+    const result = buildPromptQualityInstructions('my-app', sessionMeta);
+    expect(result).toContain('Session shape: 8 user messages, 12 assistant messages, 31 tool exchanges');
+  });
+
+  it('handles zero tool exchanges', () => {
+    const result = buildPromptQualityInstructions('proj', {
+      humanMessageCount: 2,
+      assistantMessageCount: 2,
+      toolExchangeCount: 0,
+    });
+    expect(result).toContain('2 user messages, 2 assistant messages, 0 tool exchanges');
+  });
+
+  it('omits Context signals line when meta is not provided', () => {
+    const result = buildPromptQualityInstructions('proj', sessionMeta);
+    expect(result).not.toContain('Context signals:');
+  });
+
+  it('includes Context signals line when meta with compactions is provided', () => {
+    const result = buildPromptQualityInstructions('proj', sessionMeta, {
+      compactCount: 1,
+      autoCompactCount: 2,
+    });
+    expect(result).toContain('Context signals:');
+    expect(result).toContain('context compaction');
+  });
+
+  it('includes slash commands in Context signals when meta has slash commands', () => {
+    const result = buildPromptQualityInstructions('proj', sessionMeta, {
+      slashCommands: ['/review', '/test'],
+    });
+    expect(result).toContain('slash commands used: /review, /test');
+  });
+
+  it('ends with json tags instruction', () => {
+    const result = buildPromptQualityInstructions('proj', sessionMeta);
+    expect(result).toContain('<json>...</json>');
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// buildFacetOnlyInstructions
+// ──────────────────────────────────────────────────────
+
+describe('buildFacetOnlyInstructions', () => {
+  it('includes project name', () => {
+    const result = buildFacetOnlyInstructions('my-app', null);
+    expect(result).toContain('Project: my-app');
+  });
+
+  it('includes session summary when provided', () => {
+    const result = buildFacetOnlyInstructions('my-app', 'Fixed auth bug');
+    expect(result).toContain('Session Summary: Fixed auth bug');
+  });
+
+  it('omits session summary when null', () => {
+    const result = buildFacetOnlyInstructions('my-app', null);
+    expect(result).not.toContain('Session Summary:');
+  });
+
+  it('ends with json tags instruction', () => {
+    const result = buildFacetOnlyInstructions('proj', null);
+    expect(result).toContain('<json>...</json>');
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// parseAnalysisResponse
+// ──────────────────────────────────────────────────────
+
+describe('parseAnalysisResponse', () => {
+  it('parses valid JSON in <json> tags', () => {
+    const response = `<json>
+{
+  "summary": {
+    "title": "Implemented auth",
+    "content": "Added login and logout",
+    "bullets": ["Login flow", "Logout flow"]
+  },
+  "decisions": [],
+  "learnings": []
+}
+</json>`;
+    const result = parseAnalysisResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.summary.title).toBe('Implemented auth');
+    expect(result.data.summary.bullets).toHaveLength(2);
+    expect(result.data.decisions).toEqual([]);
+    expect(result.data.learnings).toEqual([]);
+  });
+
+  it('parses raw JSON without tags', () => {
+    const response = `{
+  "summary": { "title": "Test", "content": "Content", "bullets": [] },
+  "decisions": [],
+  "learnings": []
+}`;
+    const result = parseAnalysisResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.summary.title).toBe('Test');
+  });
+
+  it('returns error for completely malformed response', () => {
+    const result = parseAnalysisResponse('This is not JSON at all');
+    expect(result.success).toBe(false);
+    if (result.success) return;
+    expect(result.error.error_type).toBe('no_json_found');
+  });
+
+  it('returns error for JSON missing required summary.title', () => {
+    const response = '<json>{ "summary": { "content": "no title" }, "decisions": [], "learnings": [] }</json>';
+    const result = parseAnalysisResponse(response);
+    expect(result.success).toBe(false);
+    if (result.success) return;
+    expect(result.error.error_type).toBe('invalid_structure');
+  });
+
+  it('defaults decisions and learnings to empty arrays when missing', () => {
+    const response = '<json>{ "summary": { "title": "Test", "content": "c", "bullets": [] } }</json>';
+    const result = parseAnalysisResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.decisions).toEqual([]);
+    expect(result.data.learnings).toEqual([]);
+  });
+
+  // Fix 2: LLM response structure validation — array guard tests
+  it('coerces decisions to [] when LLM returns a non-array string value', () => {
+    // LLM returned "decisions": "none" — string is truthy so || [] would NOT catch this
+    const response = '<json>{ "summary": { "title": "Test", "content": "c", "bullets": [] }, "decisions": "none", "learnings": [] }</json>';
+    const result = parseAnalysisResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    // Must be an array — not the string "none"
+    expect(Array.isArray(result.data.decisions)).toBe(true);
+    expect(result.data.decisions).toEqual([]);
+  });
+
+  it('coerces learnings to [] when LLM returns a non-array value', () => {
+    const response = '<json>{ "summary": { "title": "Test", "content": "c", "bullets": [] }, "decisions": [], "learnings": {} }</json>';
+    const result = parseAnalysisResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(Array.isArray(result.data.learnings)).toBe(true);
+    expect(result.data.learnings).toEqual([]);
+  });
+
+  it('coerces facet arrays to [] when LLM returns non-array facets', () => {
+    // LLM returned friction_points as a string instead of an array
+    const response = '<json>{ "summary": { "title": "Test", "content": "c", "bullets": [] }, "decisions": [], "learnings": [], "facets": { "friction_points": "none", "effective_patterns": null } }</json>';
+    const result = parseAnalysisResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    // Both must be arrays — .some() calls on monitors must not throw
+    expect(Array.isArray(result.data.facets?.friction_points)).toBe(true);
+    expect(Array.isArray(result.data.facets?.effective_patterns)).toBe(true);
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// parsePromptQualityResponse
+// ──────────────────────────────────────────────────────
+
+describe('parsePromptQualityResponse', () => {
+  it('parses valid response with findings and takeaways', () => {
+    const response = `<json>{
+      "efficiency_score": 85,
+      "message_overhead": 2,
+      "assessment": "Good prompting style overall",
+      "takeaways": [
+        {
+          "type": "improve",
+          "category": "vague-request",
+          "label": "Add file path to requests",
+          "message_ref": "User#3",
+          "original": "fix the bug",
+          "better_prompt": "Fix the null pointer in cli/src/commands/sync.ts line 42",
+          "why": "The original lacked enough detail to act on without guessing"
+        }
+      ],
+      "findings": [
+        {
+          "category": "vague-request",
+          "type": "deficit",
+          "description": "User#3 asked to fix a bug without specifying file, function, or error message",
+          "message_ref": "User#3",
+          "impact": "medium",
+          "confidence": 80
+        }
+      ],
+      "dimension_scores": {
+        "context_provision": 70,
+        "request_specificity": 65,
+        "scope_management": 90,
+        "information_timing": 80,
+        "correction_quality": 75
+      }
+    }</json>`;
+    const result = parsePromptQualityResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.efficiency_score).toBe(85);
+    expect(result.data.takeaways).toHaveLength(1);
+    expect(result.data.findings).toHaveLength(1);
+    expect(result.data.findings[0].category).toBe('vague-request');
+    expect(result.data.dimension_scores.scope_management).toBe(90);
+  });
+
+  it('clamps efficiency_score to 0-100 range', () => {
+    const response = '<json>{ "efficiency_score": 150, "message_overhead": 0, "assessment": "ok", "takeaways": [], "findings": [], "dimension_scores": { "context_provision": 50, "request_specificity": 50, "scope_management": 50, "information_timing": 50, "correction_quality": 50 } }</json>';
+    const result = parsePromptQualityResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.efficiency_score).toBe(100);
+  });
+
+  it('defaults missing dimension_scores to 50s', () => {
+    const response = '<json>{ "efficiency_score": 75, "message_overhead": 0, "assessment": "ok", "takeaways": [], "findings": [] }</json>';
+    const result = parsePromptQualityResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.dimension_scores.context_provision).toBe(50);
+    expect(result.data.dimension_scores.correction_quality).toBe(50);
+  });
+
+  it('accepts empty arrays (well-prompted session)', () => {
+    const response = '<json>{ "efficiency_score": 95, "message_overhead": 0, "assessment": "Excellent session", "takeaways": [], "findings": [], "dimension_scores": { "context_provision": 95, "request_specificity": 90, "scope_management": 95, "information_timing": 95, "correction_quality": 75 } }</json>';
+    const result = parsePromptQualityResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.takeaways).toHaveLength(0);
+    expect(result.data.findings).toHaveLength(0);
+  });
+
+  it('returns error for missing efficiency_score', () => {
+    const response = '<json>{ "assessment": "no score" }</json>';
+    const result = parsePromptQualityResponse(response);
+    expect(result.success).toBe(false);
+    if (result.success) return;
+    expect(result.error.error_type).toBe('invalid_structure');
+  });
+
+  it('returns error for completely invalid response', () => {
+    const result = parsePromptQualityResponse('not json');
+    expect(result.success).toBe(false);
+    if (result.success) return;
+    expect(result.error.error_type).toBe('no_json_found');
+  });
+
+  // Fix 2: array guard tests for parsePromptQualityResponse
+  it('coerces takeaways to [] when LLM returns a non-array string value', () => {
+    // LLM returned "takeaways": "none" — truthy string bypasses || [] coercion
+    const response = '<json>{ "efficiency_score": 80, "takeaways": "none", "findings": [] }</json>';
+    const result = parsePromptQualityResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(Array.isArray(result.data.takeaways)).toBe(true);
+    expect(result.data.takeaways).toEqual([]);
+  });
+
+  it('coerces findings to [] when LLM returns a non-array value (prevents .some() TypeError)', () => {
+    // LLM returned "findings": "none" — monitor on line 166 calls .some(), would throw without guard
+    const response = '<json>{ "efficiency_score": 80, "takeaways": [], "findings": "none" }</json>';
+    const result = parsePromptQualityResponse(response);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(Array.isArray(result.data.findings)).toBe(true);
+    expect(result.data.findings).toEqual([]);
+  });
+});
+
+// ──────────────────────────────────────────────────────
+// SHARED_ANALYST_SYSTEM_PROMPT
+// ──────────────────────────────────────────────────────
+
+describe('SHARED_ANALYST_SYSTEM_PROMPT', () => {
+  it('is a non-empty string', () => {
+    expect(typeof SHARED_ANALYST_SYSTEM_PROMPT).toBe('string');
+    expect(SHARED_ANALYST_SYSTEM_PROMPT.length).toBeGreaterThan(0);
+  });
+
+  it('instructs JSON output wrapped in json tags', () => {
+    expect(SHARED_ANALYST_SYSTEM_PROMPT).toContain('<json>');
+  });
+});
diff --git a/cli/src/analysis/friction-normalize.ts b/cli/src/analysis/friction-normalize.ts
new file mode 100644
index 0000000..2ade90a
--- /dev/null
+++ b/cli/src/analysis/friction-normalize.ts
@@ -0,0 +1,56 @@
+// Friction category normalization.
+// Clusters similar free-form friction categories to canonical ones during aggregation.
+
+import { CANONICAL_FRICTION_CATEGORIES } from './prompt-constants.js';
+import { normalizeCategory } from './normalize-utils.js';
+
+// Explicit alias map for clustering emergent category variants.
+// Targets don't need to be in CANONICAL_FRICTION_CATEGORIES —
+// this clusters semantically-equivalent novel categories together.
+// Insert alias lookup runs AFTER exact canonical match but BEFORE Levenshtein,
+// so well-known emergent variants are clustered deterministically.
+const FRICTION_ALIASES: Record<string, string> = {
+  // legacy canonical → new canonical (15→9 taxonomy revision)
+  'missing-dependency': 'stale-assumptions',
+  'config-drift': 'stale-assumptions',
+  'stale-cache': 'stale-assumptions',
+  'version-mismatch': 'stale-assumptions',
+  'permission-issue': 'stale-assumptions',
+  'environment-mismatch': 'stale-assumptions',
+  'race-condition': 'wrong-approach',
+  'circular-dependency': 'wrong-approach',
+  'test-failure': 'wrong-approach',
+  'type-error': 'knowledge-gap',
+  'api-misunderstanding': 'knowledge-gap',
+  // agent orchestration variants → cluster under one emergent name
+  'agent-lifecycle-issue': 'agent-orchestration-failure',
+  'agent-communication-failure': 'agent-orchestration-failure',
+  'agent-communication-breakdown': 'agent-orchestration-failure',
+  'agent-lifecycle-management': 'agent-orchestration-failure',
+  'agent-shutdown-failure': 'agent-orchestration-failure',
+  // rate limit variants → cluster under one emergent name
+  'api-rate-limit': 'rate-limit-hit',
+  'rate-limiting': 'rate-limit-hit',
+  'rate-limited': 'rate-limit-hit',
+};
+
+/**
+ * Normalize a friction category to the closest canonical category.
+ * Returns the original category if no close match is found.
+ *
+ * Matching rules (in order):
+ * 1. Exact match against canonical list → return as-is
+ * 1.5. Explicit alias match → return alias target (may be non-canonical)
+ * 2. Levenshtein distance <= 2 → return canonical match
+ * 3. Substring match (category contains canonical or vice versa) → return canonical
+ * 4. No match → return original (novel category)
+ *
+ * Note: alias targets in FRICTION_ALIASES bypass the canonical check intentionally.
+ * e.g., "agent-orchestration-failure" is not canonical but is a valid cluster target.
+ */
+export function normalizeFrictionCategory(category: string): string {
+  return normalizeCategory(category, {
+    canonicalCategories: CANONICAL_FRICTION_CATEGORIES,
+    aliases: FRICTION_ALIASES,
+  });
+}
diff --git a/cli/src/analysis/message-format.ts b/cli/src/analysis/message-format.ts
new file mode 100644
index 0000000..c659f3c
--- /dev/null
+++ b/cli/src/analysis/message-format.ts
@@ -0,0 +1,142 @@
+// SQLite message formatting utilities for LLM prompt construction.
+// Extracted from prompts.ts — used by prompt generator functions in prompts.ts.
+
+import type { SQLiteMessageRow, SessionMetadata } from './prompt-types.js';
+
+// Safely parse a JSON-encoded string field from SQLite.
+// Returns defaultValue if the field is null, empty, or invalid JSON.
+// Mirrors server/src/utils.ts safeParseJson — keep in sync.
+function safeParseJson<T>(value: string | null | undefined, defaultValue: T): T {
+  if (!value) return defaultValue;
+  try {
+    return JSON.parse(value) as T;
+  } catch {
+    return defaultValue;
+  }
+}
+
+// Internal types — only used within formatMessagesForAnalysis
+interface ParsedToolCall {
+  name?: string;
+}
+
+interface ParsedToolResult {
+  output?: string;
+}
+
+/**
+ * Detect the class of a stored user message from its content string.
+ * Operates on the DB content field (stringified), not raw JSONL.
+ *
+ * This mirrors classifyUserMessage() in cli/src/parser/jsonl.ts but works on
+ * stored content strings instead of parsed JSONL message objects. The DB stores
+ * message content as a plain string — tool-results are JSON arrays stringified,
+ * human text is stored as-is.
+ *
+ * Order matters — most specific checks first.
+ */
+export function classifyStoredUserMessage(content: string): 'human' | 'tool-result' | 'system-artifact' {
+  // Tool-result: content is a JSON array containing tool_result blocks.
+  // The DB stores these as stringified JSON arrays starting with '['.
+  if (content.startsWith('[') && content.includes('"tool_result"')) return 'tool-result';
+
+  // Auto-compact summary: Claude Code uses two known prefixes for LLM-initiated
+  // context compaction summaries. Both must be checked.
+  if (content.startsWith('Here is a summary of our conversation')) return 'system-artifact';
+  if (content.startsWith('This session is being continued')) return 'system-artifact';
+
+  // Slash command or skill load: single-line starting with / followed by a lowercase letter.
+  // Requires content.trim() to be short (≤2 lines) to avoid false-positives on messages
+  // containing file paths like "/usr/bin/..." as part of a longer instruction.
+  const trimmed = content.trim();
+  if (/^\/[a-z]/.test(trimmed) && trimmed.split('\n').length <= 2) return 'system-artifact';
+
+  return 'human';
+}
+
+/**
+ * Format SQLite message rows for LLM consumption.
+ * Handles snake_case fields and JSON-encoded tool_calls/tool_results.
+ *
+ * User#N indices only increment for genuine human messages. Tool-results and
+ * system artifacts (auto-compacts, slash commands) receive bracketed labels
+ * instead. This ensures User#N references in PQ takeaways and evidence fields
+ * align with actual human turns, not inflated by tool-result rows.
+ */
+export function formatMessagesForAnalysis(messages: SQLiteMessageRow[]): string {
+  let userIndex = 0;
+  let assistantIndex = 0;
+
+  return messages
+    .map((m) => {
+      let roleLabel: string;
+
+      if (m.type === 'user') {
+        const msgClass = classifyStoredUserMessage(m.content);
+        if (msgClass === 'tool-result') {
+          roleLabel = '[tool-result]';
+        } else if (msgClass === 'system-artifact') {
+          // Auto-compact summaries use two known prefixes — everything else (slash commands,
+          // skill loads) is a generic system artifact, not a compaction event.
+          const isAutoCompact = m.content.startsWith('Here is a summary of our conversation')
+            || m.content.startsWith('This session is being continued');
+          roleLabel = isAutoCompact ? '[auto-compact]' : '[system]';
+        } else {
+          // Genuine human message — increment counter
+          roleLabel = `User#${userIndex++}`;
+        }
+      } else if (m.type === 'assistant') {
+        roleLabel = `Assistant#${assistantIndex++}`;
+      } else {
+        roleLabel = 'System';
+      }
+
+      // Parse JSON-encoded tool_calls and tool_results via safeParseJson
+      const toolCalls = safeParseJson<ParsedToolCall[]>(m.tool_calls, []);
+      const toolResults = safeParseJson<ParsedToolResult[]>(m.tool_results, []);
+
+      const toolInfo = toolCalls.length > 0
+        ? `\n[Tools used: ${toolCalls.map(t => t.name || 'unknown').join(', ')}]`
+        : '';
+
+      // Include thinking content — capped at 1000 chars to stay within token budget
+      const thinkingInfo = m.thinking
+        ? `\n[Thinking: ${m.thinking.slice(0, 1000)}]`
+        : '';
+
+      // Include tool results for context — 500 chars per result (error messages need ~300-400 chars)
+      const resultInfo = toolResults.length > 0
+        ? `\n[Tool results: ${toolResults.map(r => (r.output || '').slice(0, 500)).join(' | ')}]`
+        : '';
+
+      return `### ${roleLabel}:\n${m.content}${thinkingInfo}${toolInfo}${resultInfo}`;
+    })
+    .join('\n\n');
+}
+
+/**
+ * Format a one-line context signals header from V6 session metadata.
+ * Returns empty string when no signals are present (pre-V6 sessions with NULL columns).
+ *
+ * Example output:
+ *   "Context signals: 3 context compactions (2 auto, 1 manual) — session exceeded context window; slash commands used: /review, /test\n"
+ */
+export function formatSessionMetaLine(meta?: SessionMetadata): string {
+  if (!meta) return '';
+  const parts: string[] = [];
+
+  const totalCompacts = (meta.compactCount ?? 0) + (meta.autoCompactCount ?? 0);
+  if (totalCompacts > 0) {
+    const breakdown: string[] = [];
+    if (meta.autoCompactCount) breakdown.push(`${meta.autoCompactCount} auto`);
+    if (meta.compactCount) breakdown.push(`${meta.compactCount} manual`);
+    parts.push(`${totalCompacts} context compaction${totalCompacts > 1 ? 's' : ''} (${breakdown.join(', ')}) — session exceeded context window`);
+  }
+
+  if (meta.slashCommands?.length) {
+    parts.push(`slash commands used: ${meta.slashCommands.join(', ')}`);
+  }
+
+  if (parts.length === 0) return '';
+  return `Context signals: ${parts.join('; ')}\n`;
+}
diff --git a/cli/src/analysis/normalize-utils.ts b/cli/src/analysis/normalize-utils.ts
new file mode 100644
index 0000000..d86e598
--- /dev/null
+++ b/cli/src/analysis/normalize-utils.ts
@@ -0,0 +1,87 @@
+// Shared normalization infrastructure for friction, pattern, and prompt-quality categories.
+// Each domain provides its own canonical list, alias map, and label map.
+
+/** Standard Levenshtein distance between two strings */
+export function levenshtein(a: string, b: string): number {
+  const m = a.length;
+  const n = b.length;
+  const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0) as number[]);
+
+  for (let i = 0; i <= m; i++) dp[i][0] = i;
+  for (let j = 0; j <= n; j++) dp[0][j] = j;
+
+  for (let i = 1; i <= m; i++) {
+    for (let j = 1; j <= n; j++) {
+      const cost = a[i - 1] === b[j - 1] ? 0 : 1;
+      dp[i][j] = Math.min(
+        dp[i - 1][j] + 1,
+        dp[i][j - 1] + 1,
+        dp[i - 1][j - 1] + cost
+      );
+    }
+  }
+
+  return dp[m][n];
+}
+
+export interface NormalizerConfig {
+  /** Canonical category strings (lowercase kebab-case) */
+  canonicalCategories: readonly string[];
+  /** Maps known aliases to their target (may be non-canonical cluster targets) */
+  aliases: Record<string, string>;
+}
+
+/**
+ * Generic category normalizer. Matching rules (in order):
+ * 1. Exact match against canonical list → return as-is
+ * 1.5. Explicit alias match → return alias target (may be non-canonical)
+ * 2. Levenshtein distance <= 2 → return canonical match
+ * 3. Substring match (shorter >= 5 chars, >= 50% of longer) → return canonical
+ * 4. No match → return original (novel category)
+ */
+export function normalizeCategory(category: string, config: NormalizerConfig): string {
+  const lower = category.toLowerCase();
+
+  // 1. Exact match
+  for (const canonical of config.canonicalCategories) {
+    if (lower === canonical) return canonical;
+  }
+
+  // 1.5. Explicit alias match
+  if (config.aliases[lower]) return config.aliases[lower];
+
+  // 2. Levenshtein distance <= 2
+  let bestMatch: string | null = null;
+  let bestDistance = Infinity;
+  for (const canonical of config.canonicalCategories) {
+    const dist = levenshtein(lower, canonical);
+    if (dist <= 2 && dist < bestDistance) {
+      bestDistance = dist;
+      bestMatch = canonical;
+    }
+  }
+  if (bestMatch) return bestMatch;
+
+  // 3. Substring match — only if the shorter string is a significant portion of the longer
+  // to avoid false positives like "type" matching "type-error"
+  for (const canonical of config.canonicalCategories) {
+    const shorter = lower.length < canonical.length ? lower : canonical;
+    const longer = lower.length < canonical.length ? canonical : lower;
+    if (shorter.length >= 5 && shorter.length / longer.length >= 0.5 && longer.includes(shorter)) {
+      return canonical;
+    }
+  }
+
+  // 4. No match — novel category
+  return category;
+}
+
+/**
+ * Convert kebab-case to Title Case. Shared fallback for category label functions.
+ */
+export function kebabToTitleCase(kebab: string): string {
+  return kebab
+    .split('-')
+    .map(word => word.charAt(0).toUpperCase() + word.slice(1))
+    .join(' ');
+}
diff --git a/cli/src/analysis/pattern-normalize.ts b/cli/src/analysis/pattern-normalize.ts
new file mode 100644
index 0000000..ec32d81
--- /dev/null
+++ b/cli/src/analysis/pattern-normalize.ts
@@ -0,0 +1,101 @@
+// Effective pattern category normalization.
+// Clusters similar free-form pattern categories to canonical ones during aggregation.
+// Delegates to normalize-utils.ts for the shared levenshtein/normalizeCategory algorithm.
+
+import { CANONICAL_PATTERN_CATEGORIES } from './prompt-constants.js';
+import { normalizeCategory, kebabToTitleCase } from './normalize-utils.js';
+
+// Human-readable labels for each canonical category.
+// Used in dashboard display (e.g., "structured-planning" → "Structured Planning").
+export const PATTERN_CATEGORY_LABELS: Record<string, string> = {
+  'structured-planning': 'Structured Planning',
+  'incremental-implementation': 'Incremental Implementation',
+  'verification-workflow': 'Verification Workflow',
+  'systematic-debugging': 'Systematic Debugging',
+  'self-correction': 'Self-Correction',
+  'context-gathering': 'Context Gathering',
+  'domain-expertise': 'Domain Expertise',
+  'effective-tooling': 'Effective Tooling',
+};
+
+// Explicit alias map for clustering emergent category variants.
+// Targets don't need to be in CANONICAL_PATTERN_CATEGORIES —
+// this clusters semantically-equivalent novel categories together.
+// Insert alias lookup runs AFTER exact canonical match but BEFORE Levenshtein,
+// so well-known emergent variants are clustered deterministically.
+const PATTERN_ALIASES: Record<string, string> = {
+  // structured-planning variants
+  'task-decomposition': 'structured-planning',
+  'plan-first': 'structured-planning',
+  'upfront-planning': 'structured-planning',
+  'phased-approach': 'structured-planning',
+  'task-breakdown': 'structured-planning',
+  'planning-before-implementation': 'structured-planning',
+
+  // effective-tooling variants
+  'agent-delegation': 'effective-tooling',
+  'agent-orchestration': 'effective-tooling',
+  'specialized-agents': 'effective-tooling',
+  'multi-agent': 'effective-tooling',
+  'tool-leverage': 'effective-tooling',
+
+  // verification-workflow variants
+  'build-test-verify': 'verification-workflow',
+  'test-driven-development': 'verification-workflow',
+  'tdd': 'verification-workflow',
+  'test-first': 'verification-workflow',
+  'pre-commit-checks': 'verification-workflow',
+
+  // systematic-debugging variants
+  'binary-search-debugging': 'systematic-debugging',
+  'methodical-debugging': 'systematic-debugging',
+  'log-based-debugging': 'systematic-debugging',
+  'debugging-methodology': 'systematic-debugging',
+
+  // self-correction variants
+  'course-correction': 'self-correction',
+  'pivot-on-failure': 'self-correction',
+  'backtracking': 'self-correction',
+
+  // context-gathering variants
+  'code-reading-first': 'context-gathering',
+  'codebase-exploration': 'context-gathering',
+  'understanding-before-changing': 'context-gathering',
+
+  // domain-expertise variants
+  'framework-knowledge': 'domain-expertise',
+  'types-first': 'domain-expertise',
+  'type-driven-development': 'domain-expertise',
+  'schema-first': 'domain-expertise',
+
+  // incremental-implementation variants
+  'small-steps': 'incremental-implementation',
+  'iterative-building': 'incremental-implementation',
+  'iterative-development': 'incremental-implementation',
+};
+
+/**
+ * Normalize a pattern category to the closest canonical category.
+ * Returns the original category if no close match is found.
+ *
+ * Matching rules (in order):
+ * 1. Exact match against canonical list → return as-is
+ * 1.5. Explicit alias match → return alias target (may be non-canonical)
+ * 2. Levenshtein distance <= 2 → return canonical match
+ * 3. Substring match (category contains canonical or vice versa) → return canonical
+ * 4. No match → return original (novel category)
+ */
+export function normalizePatternCategory(category: string): string {
+  return normalizeCategory(category, {
+    canonicalCategories: CANONICAL_PATTERN_CATEGORIES,
+    aliases: PATTERN_ALIASES,
+  });
+}
+
+/**
+ * Get a human-readable label for a pattern category.
+ * Falls back to Title Case conversion for novel categories.
+ */
+export function getPatternCategoryLabel(category: string): string {
+  return PATTERN_CATEGORY_LABELS[category] ?? kebabToTitleCase(category);
+}
diff --git a/cli/src/analysis/prompt-constants.ts b/cli/src/analysis/prompt-constants.ts
new file mode 100644
index 0000000..40d4f8c
--- /dev/null
+++ b/cli/src/analysis/prompt-constants.ts
@@ -0,0 +1,189 @@
+// Canonical category arrays and classification guidance strings for LLM analysis.
+// Extracted from prompts.ts — imported by normalizers and prompt generators.
+
+// Shared guidance for friction category and attribution classification.
+// Actor-neutral category definitions describe the gap, not the actor.
+// Attribution field captures who contributed to the friction for actionability.
+export const FRICTION_CLASSIFICATION_GUIDANCE = `
+FRICTION CLASSIFICATION GUIDANCE:
+
+Each friction point captures WHAT went wrong (category + description), WHO contributed (attribution), and WHY you classified it that way (_reasoning).
+
+CATEGORIES — classify the TYPE of gap or obstacle:
+- "wrong-approach": A strategy was pursued that didn't fit the task — wrong architecture, wrong tool, wrong pattern. Includes choosing a suboptimal tool when a better one was available.
+- "knowledge-gap": Incorrect knowledge was applied about a library, API, framework, or language feature. The capability existed but was used wrong.
+- "stale-assumptions": Work proceeded from assumptions about current state that were incorrect (stale files, changed config, different environment, tool behavior changed between versions).
+- "incomplete-requirements": Instructions were missing critical context, constraints, or acceptance criteria needed to proceed correctly.
+- "context-loss": Prior decisions or constraints established earlier in the session were lost or forgotten.
+- "scope-creep": Work expanded beyond the boundaries of the stated task.
+- "repeated-mistakes": The same or similar error occurred multiple times despite earlier correction.
+- "documentation-gap": Relevant docs existed but were inaccessible or unfindable during the session.
+- "tooling-limitation": The AI coding tool or its underlying model genuinely could not perform a needed action — missing file system access, unsupported language feature, context window overflow, inability to run a specific command type. Diagnostic: Could a reasonable user prompt or approach have achieved the same result? If the only workaround is unreasonably complex or loses significant fidelity, this IS a tooling-limitation. If a straightforward alternative existed → it is NOT tooling-limitation.
+  RECLASSIFY if any of these apply:
+  - Rate-limited or throttled → create "rate-limit-hit" instead
+  - Agent crashed or lost state → use "wrong-approach" or create "agent-orchestration-failure"
+  - Wrong tool chosen when a better one existed → "wrong-approach"
+  - User didn't know the tool could do something → "knowledge-gap"
+  - Tool worked differently than expected → "stale-assumptions"
+
+DISAMBIGUATION — use these to break ties when two categories seem to fit:
+- tooling-limitation vs wrong-approach: Limitation = the tool CANNOT do it (no workaround exists). Wrong-approach = the tool CAN do it but a suboptimal method was chosen.
+- tooling-limitation vs knowledge-gap: Limitation = the capability genuinely does not exist. Knowledge-gap = the capability exists but was applied incorrectly.
+- tooling-limitation vs stale-assumptions: Limitation = permanent gap in the tool. Stale-assumptions = the tool USED TO work differently or the assumption about current behavior was wrong.
+- wrong-approach vs knowledge-gap: Wrong-approach = strategic choice (chose library X over Y). Knowledge-gap = factual error (used library X's API incorrectly).
+- incomplete-requirements vs context-loss: Incomplete = the information was NEVER provided. Context-loss = it WAS provided earlier but was forgotten or dropped.
+
+When no category fits, create a specific kebab-case category. A precise novel category is better than a vague canonical one.
+
+ATTRIBUTION — 3-step decision tree (follow IN ORDER):
+Step 1: Is the cause external to the user-AI interaction? (missing docs, broken tooling, infra outage) → "environmental"
+Step 2: Could the USER have prevented this with better input? Evidence: vague prompt, missing context, no constraints, late requirements, ambiguous correction → "user-actionable"
+Step 3: User input was clear and the AI still failed → "ai-capability"
+When genuinely mixed between user-actionable and ai-capability, lean "user-actionable" — this tool helps users improve.
+
+DESCRIPTION RULES:
+- One neutral sentence describing the GAP, not the actor
+- Include specific details (file names, APIs, error messages)
+- Frame as "Missing X caused Y" NOT "The AI failed to X" or "The user forgot to X"
+- Let the attribution field carry the who`;
+
+export const CANONICAL_FRICTION_CATEGORIES = [
+  'wrong-approach',
+  'knowledge-gap',
+  'stale-assumptions',
+  'incomplete-requirements',
+  'context-loss',
+  'scope-creep',
+  'repeated-mistakes',
+  'documentation-gap',
+  'tooling-limitation',
+] as const;
+
+export const CANONICAL_PATTERN_CATEGORIES = [
+  'structured-planning',
+  'incremental-implementation',
+  'verification-workflow',
+  'systematic-debugging',
+  'self-correction',
+  'context-gathering',
+  'domain-expertise',
+  'effective-tooling',
+] as const;
+
+export const CANONICAL_PQ_DEFICIT_CATEGORIES = [
+  'vague-request',
+  'missing-context',
+  'late-constraint',
+  'unclear-correction',
+  'scope-drift',
+  'missing-acceptance-criteria',
+  'assumption-not-surfaced',
+] as const;
+
+export const CANONICAL_PQ_STRENGTH_CATEGORIES = [
+  'precise-request',
+  'effective-context',
+  'productive-correction',
+] as const;
+
+export const CANONICAL_PQ_CATEGORIES = [
+  ...CANONICAL_PQ_DEFICIT_CATEGORIES,
+  ...CANONICAL_PQ_STRENGTH_CATEGORIES,
+] as const;
+
+export const PROMPT_QUALITY_CLASSIFICATION_GUIDANCE = `
+PROMPT QUALITY CLASSIFICATION GUIDANCE:
+
+Each finding captures a specific moment where the user's prompting either caused friction (deficit) or enabled productivity (strength).
+
+DEFICIT CATEGORIES — classify prompting problems:
+- "vague-request": Request lacked specificity needed for the AI to act without guessing. Missing file paths, function names, expected behavior, or concrete details.
+  NOT this category if the AI had enough context to succeed but failed anyway — that is an AI capability issue, not a prompting issue.
+
+- "missing-context": Critical background knowledge about architecture, conventions, dependencies, or current state was not provided.
+  NOT this category if the information was available in the codebase and the AI could have found it by reading files — that is an AI context-gathering failure.
+
+- "late-constraint": A requirement or constraint was provided AFTER the AI had already started implementing a different approach, causing rework.
+  NOT this category if the constraint was genuinely discovered during implementation (requirements changed). Only classify if the user KNEW the constraint before the session started.
+
+- "unclear-correction": The user told the AI its output was wrong without explaining what was wrong or why. "That's not right", "try again", "no" without context.
+  NOT this category if the user gave a brief but sufficient correction ("use map instead of forEach" is clear enough).
+
+- "scope-drift": The session objective shifted mid-conversation, or multiple unrelated objectives were addressed in one session.
+  NOT this category if the user is working through logically connected subtasks of one objective.
+
+- "missing-acceptance-criteria": The user did not define what successful completion looks like, leading to back-and-forth about whether the output meets expectations.
+  NOT this category for exploratory sessions where the user is discovering what they want.
+
+- "assumption-not-surfaced": The user held an unstated assumption that the AI could not reasonably infer from code or conversation.
+  NOT this category if the assumption was reasonable for the AI to make (e.g., standard coding conventions).
+
+STRENGTH CATEGORIES — classify prompting successes (only when notably above average):
+- "precise-request": Request included enough specificity (file paths, function names, expected behavior, error messages) that the AI could act correctly on the first attempt.
+
+- "effective-context": User proactively shared architecture, conventions, prior decisions, or current state that the AI demonstrably used to make better decisions.
+
+- "productive-correction": When the AI went off track, the user provided a correction that included WHAT was wrong, WHY, and enough context for the AI to redirect effectively on the next response.
+
+CONTRASTIVE PAIRS:
+- vague-request vs missing-context: Was the problem in HOW THE TASK WAS DESCRIBED (vague-request) or WHAT BACKGROUND KNOWLEDGE WAS ABSENT (missing-context)?
+- late-constraint vs missing-context: Did the user EVENTUALLY provide it in the same session? Yes → late-constraint. Never → missing-context.
+- missing-context vs assumption-not-surfaced: Is this a FACT the user could have copy-pasted (missing-context), or a BELIEF/PREFERENCE they held (assumption-not-surfaced)?
+- scope-drift vs missing-acceptance-criteria: Did the user try to do TOO MANY THINGS (scope-drift) or ONE THING WITHOUT DEFINING SUCCESS (missing-acceptance-criteria)?
+- unclear-correction vs vague-request: Was this the user's FIRST MESSAGE about this task (vague-request) or a RESPONSE TO AI OUTPUT (unclear-correction)?
+
+DIMENSION SCORING (0-100):
+- context_provision: How well did the user provide relevant background upfront?
+  90+: Proactively shared architecture, constraints, conventions. 50-69: Notable gaps causing detours. <30: No context, AI working blind.
+- request_specificity: How precise were task requests?
+  90+: File paths, expected behavior, scope boundaries. 50-69: Mix of specific and vague. <30: Nearly all requests lacked detail.
+- scope_management: How focused was the session?
+  90+: Single clear objective, logical progression. 50-69: Some drift but primary goal met. <30: Unfocused, no clear objective.
+- information_timing: Were requirements provided when needed?
+  90+: All constraints front-loaded before implementation. 50-69: Some important requirements late. <30: Requirements drip-fed, constant corrections.
+- correction_quality: How well did the user redirect the AI?
+  90+: Corrections included what, why, and context. 50-69: Mix of clear and unclear. <30: Corrections gave almost no signal.
+  Score 75 if no corrections were needed (absence of corrections in a successful session = good prompting).
+
+EDGE CASES:
+- Short sessions (<5 user messages): Score conservatively. Do not penalize for missing elements unnecessary in quick tasks.
+- Exploration sessions: Do not penalize for missing acceptance criteria or scope drift.
+- Sessions where AI performed well despite vague prompts: Still classify deficits. Impact should be "low" since no visible cost.
+- Agentic/delegation sessions: If the user gave a clear high-level directive and the AI autonomously planned and executed successfully, do not penalize for low message count or lack of micro-level specificity. Effective delegation IS good prompting. Focus on the quality of the initial delegation prompt.`;
+
+export const EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE = `
+EFFECTIVE PATTERN CLASSIFICATION GUIDANCE:
+
+Each effective pattern captures a technique or approach that contributed to a productive session outcome.
+
+BASELINE EXCLUSION — do NOT classify these as patterns:
+- Routine file reads at session start (Read/Glob/Grep on <5 files before editing)
+- Following explicit user instructions (user said "run tests" → running tests is not a pattern)
+- Basic tool usage (single file edits, standard CLI commands)
+- Trivial self-corrections (typo fixes, minor syntax errors caught immediately)
+Only classify behavior that is NOTABLY thorough, strategic, or beyond baseline expectations.
+
+CATEGORIES — classify the TYPE of effective pattern:
+- "structured-planning": Decomposed the task into explicit steps, defined scope boundaries, or established a plan BEFORE writing code. Signal: plan/task-list/scope-definition appears before implementation.
+- "incremental-implementation": Work progressed in small, verifiable steps with validation between them. Signal: multiple small edits with checks between, not one large batch.
+- "verification-workflow": Proactive correctness checks (builds, tests, linters, types) BEFORE considering work complete. Signal: test/build/lint commands when nothing was known broken.
+- "systematic-debugging": Methodical investigation using structured techniques (binary search, log insertion, reproduction isolation). Signal: multiple targeted diagnostic steps, not random guessing.
+- "self-correction": Recognized a wrong path and pivoted WITHOUT user correction. Signal: explicit acknowledgment of mistake + approach change. NOT this if the user pointed out the error.
+- "context-gathering": NOTABLY thorough investigation before changes — reading 5+ files, cross-module exploration, schema/type/config review. Signal: substantial Read/Grep/Glob usage spanning multiple directories before any Edit/Write.
+- "domain-expertise": Applied specific framework/API/language knowledge correctly on first attempt without searching. Signal: correct non-obvious API usage with no preceding search and no subsequent error. NOT this if files were read first — that is context-gathering.
+- "effective-tooling": Leveraged advanced tool capabilities that multiplied productivity — agent delegation, parallel work, multi-file coordination, strategic mode selection. Signal: use of tool features beyond basic read/write/edit.
+
+CONTRASTIVE PAIRS:
+- structured-planning vs incremental-implementation: Planning = DECIDING what to do (before). Incremental = HOW you execute (during). Can have one without the other.
+- context-gathering vs domain-expertise: Gathering = ACTIVE INVESTIGATION (reading files). Expertise = APPLYING EXISTING KNOWLEDGE without investigation. If files were read first → context-gathering.
+- verification-workflow vs systematic-debugging: Verification = PROACTIVE (checking working code). Debugging = REACTIVE (investigating a failure).
+- self-correction vs user-directed: Self-correction = AI caught own mistake unprompted. User said "that's wrong" → NOT self-correction.
+
+DRIVER — 4-step decision tree (follow IN ORDER):
+Step 1: Did user infrastructure enable this? (CLAUDE.md rules, agent configs, hookify hooks, custom commands, system prompts) → "user-driven"
+Step 2: Did the user explicitly request this behavior? (asked for plan, requested tests, directed investigation) → "user-driven"
+Step 3: Did the AI exhibit this without any user prompting or infrastructure? → "ai-driven"
+Step 4: Both made distinct, identifiable contributions → "collaborative"
+Use "collaborative" ONLY when you can name what EACH party contributed. If uncertain, prefer the more specific label.
+
+When no canonical category fits, create a specific kebab-case category (a precise novel category is better than forcing a poor fit).`;
diff --git a/cli/src/analysis/prompt-quality-normalize.ts b/cli/src/analysis/prompt-quality-normalize.ts
new file mode 100644
index 0000000..9dfa0b2
--- /dev/null
+++ b/cli/src/analysis/prompt-quality-normalize.ts
@@ -0,0 +1,131 @@
+// Prompt quality category normalization.
+// Clusters similar free-form categories to canonical ones during aggregation.
+// Delegates to normalize-utils.ts for the shared levenshtein/normalizeCategory algorithm.
+
+import { CANONICAL_PQ_CATEGORIES, CANONICAL_PQ_STRENGTH_CATEGORIES } from './prompt-constants.js';
+import { normalizeCategory, kebabToTitleCase } from './normalize-utils.js';
+
+// Human-readable labels for each canonical category.
+export const PQ_CATEGORY_LABELS: Record<string, string> = {
+  'vague-request': 'Vague Request',
+  'missing-context': 'Missing Context',
+  'late-constraint': 'Late Constraint',
+  'unclear-correction': 'Unclear Correction',
+  'scope-drift': 'Scope Drift',
+  'missing-acceptance-criteria': 'Missing Acceptance Criteria',
+  'assumption-not-surfaced': 'Assumption Not Surfaced',
+  'precise-request': 'Precise Request',
+  'effective-context': 'Effective Context',
+  'productive-correction': 'Productive Correction',
+};
+
+const STRENGTH_SET = new Set<string>(CANONICAL_PQ_STRENGTH_CATEGORIES);
+
+// Explicit alias map for clustering emergent category variants.
+// Targets don't need to be in CANONICAL_PQ_CATEGORIES —
+// this clusters semantically-equivalent novel categories together.
+// Alias lookup runs AFTER exact canonical match but BEFORE Levenshtein,
+// so well-known emergent variants are clustered deterministically.
+const PQ_ALIASES: Record<string, string> = {
+  // vague-request variants
+  'vague-instructions': 'vague-request',
+  'unclear-request': 'vague-request',
+  'imprecise-prompting': 'vague-request',
+  'ambiguous-request': 'vague-request',
+  'incomplete-request': 'vague-request',
+  'generic-request': 'vague-request',
+
+  // missing-context variants
+  'missing-information': 'missing-context',
+  'insufficient-context': 'missing-context',
+  'no-context': 'missing-context',
+  'lack-of-context': 'missing-context',
+  'missing-background': 'missing-context',
+
+  // late-constraint variants
+  'late-context': 'late-constraint',
+  'late-requirements': 'late-constraint',
+  'piecemeal-requirements': 'late-constraint',
+  'drip-fed-requirements': 'late-constraint',
+  'incremental-requirements': 'late-constraint',
+  'late-specification': 'late-constraint',
+
+  // unclear-correction variants
+  'unclear-feedback': 'unclear-correction',
+  'vague-correction': 'unclear-correction',
+  'unhelpful-correction': 'unclear-correction',
+  'vague-feedback': 'unclear-correction',
+
+  // scope-drift variants
+  'context-drift': 'scope-drift',
+  'objective-bloat': 'scope-drift',
+  'session-bloat': 'scope-drift',
+  'topic-switching': 'scope-drift',
+  'scope-creep': 'scope-drift',
+
+  // missing-acceptance-criteria variants
+  'no-acceptance-criteria': 'missing-acceptance-criteria',
+  'undefined-done': 'missing-acceptance-criteria',
+  'no-definition-of-done': 'missing-acceptance-criteria',
+  'unclear-success-criteria': 'missing-acceptance-criteria',
+
+  // assumption-not-surfaced variants
+  'hidden-assumption': 'assumption-not-surfaced',
+  'unstated-assumption': 'assumption-not-surfaced',
+  'implicit-assumption': 'assumption-not-surfaced',
+  'unspoken-expectation': 'assumption-not-surfaced',
+
+  // precise-request variants (strengths)
+  'clear-request': 'precise-request',
+  'specific-request': 'precise-request',
+  'well-specified-request': 'precise-request',
+  'detailed-request': 'precise-request',
+
+  // effective-context variants (strengths)
+  'good-context': 'effective-context',
+  'upfront-context': 'effective-context',
+  'proactive-context': 'effective-context',
+  'rich-context': 'effective-context',
+
+  // productive-correction variants (strengths)
+  'clear-correction': 'productive-correction',
+  'effective-feedback': 'productive-correction',
+  'helpful-correction': 'productive-correction',
+  'constructive-feedback': 'productive-correction',
+};
+
+/**
+ * Normalize a prompt quality category to the closest canonical category.
+ * Returns the original category if no close match is found.
+ *
+ * Matching rules (in order):
+ * 1. Exact match against canonical list → return as-is
+ * 1.5. Explicit alias match → return alias target (may be non-canonical)
+ * 2. Levenshtein distance <= 2 → return canonical match
+ * 3. Substring match (category contains canonical or vice versa) → return canonical
+ * 4. No match → return original (novel category)
+ *
+ * Note: alias targets in PQ_ALIASES bypass the canonical check intentionally.
+ */
+export function normalizePromptQualityCategory(category: string): string {
+  return normalizeCategory(category, {
+    canonicalCategories: CANONICAL_PQ_CATEGORIES,
+    aliases: PQ_ALIASES,
+  });
+}
+
+/**
+ * Get a human-readable label for a prompt quality category.
+ * Falls back to Title Case conversion for novel categories.
+ */
+export function getPQCategoryLabel(category: string): string {
+  return PQ_CATEGORY_LABELS[category] ?? kebabToTitleCase(category);
+}
+
+/**
+ * Get the type (deficit or strength) for a prompt quality category.
+ * Novel categories default to deficit.
+ */
+export function getPQCategoryType(category: string): 'deficit' | 'strength' {
+  return STRENGTH_SET.has(category) ? 'strength' : 'deficit';
+}
diff --git a/cli/src/analysis/prompt-types.ts b/cli/src/analysis/prompt-types.ts
new file mode 100644
index 0000000..dfb5fd8
--- /dev/null
+++ b/cli/src/analysis/prompt-types.ts
@@ -0,0 +1,143 @@
+// Type definitions for LLM prompt analysis.
+// Extracted from prompts.ts — shared by message-format.ts, response-parsers.ts, and analysis.ts.
+
+// SQLite row format for messages — snake_case with JSON-encoded arrays.
+// This matches the shape returned by server/src/routes/messages.ts.
+export interface SQLiteMessageRow {
+  id: string;
+  session_id: string;
+  type: 'user' | 'assistant' | 'system';
+  content: string;
+  thinking: string | null;
+  tool_calls: string;       // JSON-encoded ToolCall[]
+  tool_results: string;     // JSON-encoded ToolResult[]
+  usage: string | null;
+  timestamp: string;
+  parent_id: string | null;
+}
+
+/**
+ * Optional session metadata from V6 columns.
+ * Passed to prompt generators to add context signals about context compaction
+ * and slash command usage. Only present when at least one V6 field is non-empty.
+ */
+export interface SessionMetadata {
+  compactCount?: number;       // from sessions.compact_count (user-initiated /compact)
+  autoCompactCount?: number;   // from sessions.auto_compact_count (LLM-initiated compaction)
+  slashCommands?: string[];    // from sessions.slash_commands (JSON array of command names)
+}
+
+/**
+ * A structured content block for LLM messages.
+ * Used to enable prompt caching (Anthropic ephemeral cache) and structured multi-part messages.
+ * The `cache_control` field instructs Anthropic to cache everything up to and including this block.
+ * Mirrors server/src/llm/types.ts ContentBlock — keep in sync.
+ */
+export interface ContentBlock {
+  type: 'text';
+  text: string;
+  cache_control?: { type: 'ephemeral' };
+}
+
+export interface AnalysisResponse {
+  facets?: {
+    outcome_satisfaction: string;
+    workflow_pattern: string | null;
+    had_course_correction: boolean;
+    course_correction_reason: string | null;
+    iteration_count: number;
+    friction_points: Array<{
+      _reasoning?: string;
+      category: string;
+      attribution?: string;
+      description: string;
+      severity: string;
+      resolution: string;
+    }>;
+    effective_patterns: Array<{
+      _reasoning?: string;
+      category: string;
+      description: string;
+      confidence: number;
+      driver?: 'user-driven' | 'ai-driven' | 'collaborative';
+    }>;
+  };
+  summary: {
+    title: string;
+    content: string;
+    outcome?: 'success' | 'partial' | 'abandoned' | 'blocked';
+    bullets: string[];
+  };
+  decisions: Array<{
+    title: string;
+    situation?: string;
+    choice?: string;
+    reasoning: string;
+    alternatives?: Array<{ option: string; rejected_because: string }>;
+    trade_offs?: string;
+    revisit_when?: string;
+    confidence?: number;
+    evidence?: string[];
+  }>;
+  learnings: Array<{
+    title: string;
+    symptom?: string;
+    root_cause?: string;
+    takeaway?: string;
+    applies_when?: string;
+    confidence?: number;
+    evidence?: string[];
+  }>;
+}
+
+export interface ParseError {
+  error_type: 'json_parse_error' | 'no_json_found' | 'invalid_structure';
+  error_message: string;
+  response_length: number;
+  response_preview: string;
+}
+
+export type ParseResult<T> =
+  | { success: true; data: T }
+  | { success: false; error: ParseError };
+
+export interface PromptQualityFinding {
+  category: string;
+  type: 'deficit' | 'strength';
+  description: string;
+  message_ref: string;
+  impact: 'high' | 'medium' | 'low';
+  confidence: number;
+  suggested_improvement?: string;
+}
+
+export interface PromptQualityTakeaway {
+  type: 'improve' | 'reinforce';
+  category: string;
+  label: string;
+  message_ref: string;
+  // improve fields
+  original?: string;
+  better_prompt?: string;
+  why?: string;
+  // reinforce fields
+  what_worked?: string;
+  why_effective?: string;
+}
+
+export interface PromptQualityDimensionScores {
+  context_provision: number;
+  request_specificity: number;
+  scope_management: number;
+  information_timing: number;
+  correction_quality: number;
+}
+
+export interface PromptQualityResponse {
+  efficiency_score: number;
+  message_overhead: number;
+  assessment: string;
+  takeaways: PromptQualityTakeaway[];
+  findings: PromptQualityFinding[];
+  dimension_scores: PromptQualityDimensionScores;
+}
diff --git a/cli/src/analysis/prompts.ts b/cli/src/analysis/prompts.ts
new file mode 100644
index 0000000..6d9e11c
--- /dev/null
+++ b/cli/src/analysis/prompts.ts
@@ -0,0 +1,423 @@
+// Prompt template strings and generator functions for LLM session analysis.
+// Types → prompt-types.ts, constants → prompt-constants.ts,
+// formatting → message-format.ts, parsers → response-parsers.ts.
+
+import type { SessionMetadata, ContentBlock } from './prompt-types.js';
+import {
+  FRICTION_CLASSIFICATION_GUIDANCE,
+  CANONICAL_FRICTION_CATEGORIES,
+  CANONICAL_PATTERN_CATEGORIES,
+  CANONICAL_PQ_DEFICIT_CATEGORIES,
+  CANONICAL_PQ_STRENGTH_CATEGORIES,
+  PROMPT_QUALITY_CLASSIFICATION_GUIDANCE,
+  EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE,
+} from './prompt-constants.js';
+import { formatSessionMetaLine } from './message-format.js';
+
+// =============================================================================
+// SHARED SYSTEM PROMPT
+// A minimal (~100 token) system prompt shared by all analysis calls.
+// The full classification guidance and schema examples live in the instruction
+// suffix (user[1]), keeping the system prompt cacheable across calls.
+// =============================================================================
+
+/**
+ * Shared system prompt for all LLM analysis calls.
+ * Paired with buildCacheableConversationBlock() + an analysis-specific instruction block.
+ */
+export const SHARED_ANALYST_SYSTEM_PROMPT = `You are a senior staff engineer analyzing an AI coding session. You will receive the conversation transcript followed by specific extraction instructions. Respond with valid JSON only, wrapped in <json>...</json> tags.`;
+
+// =============================================================================
+// CACHEABLE CONVERSATION BLOCK
+// Wraps the formatted conversation in an Anthropic ephemeral cache block.
+// CRITICAL: Must contain ONLY the formatted messages — no project name, no session
+// metadata, no per-session variables. This ensures cache hits across sessions.
+// =============================================================================
+
+/**
+ * Wrap formatted conversation messages in a cacheable content block.
+ * The cache_control field instructs Anthropic to cache everything up to
+ * and including this block (ephemeral, 5-minute TTL).
+ *
+ * Non-Anthropic providers receive this as a ContentBlock[] and use
+ * flattenContent() to convert it to a plain string.
+ *
+ * @param formattedMessages - Output of formatMessagesForAnalysis()
+ */
+export function buildCacheableConversationBlock(formattedMessages: string): ContentBlock {
+  return {
+    type: 'text',
+    // Trailing double newline ensures the instruction block (user[1]) reads as a
+    // distinct section when providers flatten content blocks to a single string.
+    text: `--- CONVERSATION ---\n${formattedMessages}\n--- END CONVERSATION ---\n\n`,
+    cache_control: { type: 'ephemeral' },
+  };
+}
+
+// =============================================================================
+// SESSION ANALYSIS INSTRUCTIONS
+// The instruction suffix for session analysis calls (user[1]).
+// Contains the full analyst persona, schema, and quality guidance.
+// Per-session variables (project name, summary, meta) go here — NOT in the
+// cached conversation block.
+// =============================================================================
+
+/**
+ * Build the instruction suffix for session analysis.
+ * Used as the second content block in the user message, after the cached conversation.
+ */
+export function buildSessionAnalysisInstructions(
+  projectName: string,
+  sessionSummary: string | null,
+  meta?: SessionMetadata
+): string {
+  return `You are a senior staff engineer writing entries for a team's engineering knowledge base. You've just observed an AI-assisted coding session and your job is to extract the insights that would save another engineer time if they encountered a similar situation 6 months from now.
+
+Your audience is a developer who has never seen this session but works on the same codebase. They need enough context to understand WHY a decision was made, WHAT specific gotcha was discovered, and WHEN this knowledge applies.
+
+Project: ${projectName}
+${sessionSummary ? `Session Summary: ${sessionSummary}\n` : ''}${formatSessionMetaLine(meta)}
+=== PART 1: SESSION FACETS ===
+Extract these FIRST as a holistic session assessment:
+
+1. outcome_satisfaction: Rate the session outcome.
+   - "high": Task completed successfully, user satisfied
+   - "medium": Partial completion or minor issues
+   - "low": Significant problems, user frustrated
+   - "abandoned": Session ended without achieving the goal
+
+2. workflow_pattern: Identify the dominant workflow pattern (or null if unclear).
+   Recommended values: "plan-then-implement", "iterative-refinement", "debug-fix-verify", "explore-then-build", "direct-execution"
+
+3. friction_points: Identify up to 5 moments where progress was blocked or slowed (array, max 5).
+   Each friction point has:
+   - _reasoning: (REQUIRED) Your reasoning chain for category + attribution. 2-3 sentences max. Walk through the decision tree steps. This field is saved but not shown to users — use it to think before classifying.
+   - category: Use one of these PREFERRED categories when applicable: ${CANONICAL_FRICTION_CATEGORIES.join(', ')}. Create a new kebab-case category only when none of these fit.
+   - attribution: "user-actionable" (better user input would have prevented this), "ai-capability" (AI failed despite adequate input), or "environmental" (external constraint)
+   - description: One neutral sentence describing what happened, with specific details (file names, APIs, errors)
+   - severity: "high" (blocked progress for multiple turns), "medium" (caused a detour), "low" (minor hiccup)
+   - resolution: "resolved" (fixed in session), "workaround" (bypassed), "unresolved" (still broken)
+${FRICTION_CLASSIFICATION_GUIDANCE}
+
+4. effective_patterns: Up to 3 techniques or approaches that worked particularly well (array, max 3).
+   Each has:
+   - _reasoning: (REQUIRED) Your reasoning chain for category + driver. 2-3 sentences max. Walk through the decision tree steps and baseline exclusion check. This field is saved but not shown to users — use it to think before classifying.
+   - category: Use one of these PREFERRED categories when applicable: structured-planning, incremental-implementation, verification-workflow, systematic-debugging, self-correction, context-gathering, domain-expertise, effective-tooling. Create a new kebab-case category only when none fit.
+   - description: Specific technique worth repeating (1-2 sentences with concrete detail)
+   - confidence: 0-100 how confident you are this is genuinely effective
+   - driver: Who drove this pattern — "user-driven" (user explicitly requested it), "ai-driven" (AI exhibited it without prompting), or "collaborative" (both contributed or emerged from interaction)
+${EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE}
+
+5. had_course_correction: true if the user redirected the AI from a wrong approach, false otherwise
+6. course_correction_reason: If had_course_correction is true, briefly explain what was corrected (or null)
+7. iteration_count: Number of times the user had to clarify, correct, or re-explain something
+
+If the session has minimal friction and straightforward execution, use empty arrays for friction_points, set outcome_satisfaction to "high", and iteration_count to 0.
+
+=== PART 2: INSIGHTS ===
+Then extract these:
+
+You will extract:
+1. **Summary**: A narrative of what was accomplished and the outcome
+2. **Decisions**: Technical choices made — with full situation context, reasoning, rejected alternatives, trade-offs, and conditions for revisiting (max 3)
+3. **Learnings**: Technical discoveries, gotchas, debugging breakthroughs — with the observable symptom, root cause, and a transferable takeaway (max 5)
+
+Quality Standards:
+- Only include insights you would write in a team knowledge base for future reference
+- Each insight MUST reference concrete details: specific file names, library names, error messages, API endpoints, or code patterns
+- Do not invent file names, APIs, errors, or details not present in the conversation
+- Rate your confidence in each insight's value (0-100). Only include insights you rate 70+.
+- It is better to return 0 insights in a category than to include generic or trivial ones
+- If a session is straightforward with no notable decisions or learnings, say so in the summary and leave other categories empty
+
+Length Guidance:
+- Fill every field in the schema. An empty "trade_offs" or "revisit_when" is worse than a longer response.
+- Total response: stay under 2000 tokens. If you must cut, drop lower-confidence insights rather than compressing high-confidence ones.
+- Evidence: 1-3 short quotes per insight, referencing turn labels.
+- Prefer precision over brevity — a specific 3-sentence insight beats a vague 1-sentence insight.
+
+DO NOT include insights like these (too generic/trivial):
+- "Used debugging techniques to fix an issue"
+- "Made architectural decisions about the codebase"
+- "Implemented a new feature" (the summary already covers this)
+- "Used React hooks for state management" (too generic without specifics)
+- "Fixed a bug in the code" (what bug? what was the root cause?)
+- Anything that restates the task without adding transferable knowledge
+
+Here is an example of an EXCELLENT insight — this is the quality bar:
+
+EXCELLENT learning:
+{
+  "title": "Tailwind v4 requires @theme inline{} for CSS variable utilities",
+  "symptom": "After Tailwind v3→v4 upgrade, custom utilities like bg-primary stopped working. Classes present in HTML but no styles applied.",
+  "root_cause": "Tailwind v4 removed tailwind.config.js theme extension. CSS variables in :root are not automatically available as utilities — must be registered via @theme inline {} in the CSS file.",
+  "takeaway": "When migrating Tailwind v3→v4 with shadcn/ui: add @theme inline {} mapping CSS variables, add @custom-variant dark for class-based dark mode, replace tailwindcss-animate with tw-animate-css.",
+  "applies_when": "Any Tailwind v3→v4 migration using CSS variables for theming, especially with shadcn/ui.",
+  "confidence": 95,
+  "evidence": ["User#12: 'The colors are all gone after the upgrade'", "Assistant#13: 'Tailwind v4 requires explicit @theme inline registration...'"]
+}
+
+Extract insights in this JSON format:
+{
+  "facets": {
+    "outcome_satisfaction": "high | medium | low | abandoned",
+    "workflow_pattern": "plan-then-implement | iterative-refinement | debug-fix-verify | explore-then-build | direct-execution | null",
+    "had_course_correction": false,
+    "course_correction_reason": null,
+    "iteration_count": 0,
+    "friction_points": [
+      {
+        "_reasoning": "User said 'fix the auth' without specifying OAuth vs session-based or which file. Step 1: not external — this is about the prompt, not infrastructure. Step 2: user could have specified which auth flow → user-actionable. Category: incomplete-requirements fits better than vague-request because specific constraints (which flow, which file) were missing, not the overall task description.",
+        "category": "incomplete-requirements",
+        "attribution": "user-actionable",
+        "description": "Missing specification of which auth flow (OAuth vs session) caused implementation of wrong provider in auth.ts",
+        "severity": "medium",
+        "resolution": "resolved"
+      },
+      {
+        "_reasoning": "AI applied Express middleware pattern to a Hono route despite conversation showing Hono imports. Step 1: not external. Step 2: user provided clear Hono context in prior messages. Step 3: AI failed despite adequate input → ai-capability. Category: knowledge-gap — incorrect framework API knowledge was applied.",
+        "category": "knowledge-gap",
+        "attribution": "ai-capability",
+        "description": "Express-style middleware pattern applied to Hono route despite Hono imports visible in conversation context",
+        "severity": "high",
+        "resolution": "resolved"
+      }
+    ],
+    "effective_patterns": [
+      {
+        "_reasoning": "Before editing, AI read 8 files across server/src/routes/ and server/src/llm/ to understand the data flow. Baseline check: 8 files across 2 directories = beyond routine (<5 file) reads. Step 1: no CLAUDE.md rule requiring this. Step 2: user didn't ask for investigation. Step 3: AI explored autonomously → ai-driven. Category: context-gathering (active investigation, not pre-existing knowledge).",
+        "category": "context-gathering",
+        "description": "Read 8 files across routes/ and llm/ directories to map the data flow before modifying the aggregation query, preventing a type mismatch that would have required rework",
+        "confidence": 88,
+        "driver": "ai-driven"
+      }
+    ]
+  },
+  "summary": {
+    "title": "Brief title describing main accomplishment (max 80 chars)",
+    "content": "2-4 sentence narrative: what was the goal, what was done, what was the outcome. Mention the primary file or component changed.",
+    "outcome": "success | partial | abandoned | blocked",
+    "bullets": ["Each bullet names a specific artifact (file, function, endpoint) and what changed"]
+  },
+  "decisions": [
+    {
+      "title": "The specific technical choice made (max 80 chars)",
+      "situation": "What problem or requirement led to this decision point",
+      "choice": "What was chosen and how it was implemented",
+      "reasoning": "Why this choice was made — the key factors that tipped the decision",
+      "alternatives": [
+        {"option": "Name of alternative", "rejected_because": "Why it was not chosen"}
+      ],
+      "trade_offs": "What downsides were accepted, what was given up",
+      "revisit_when": "Under what conditions this decision should be reconsidered (or 'N/A' if permanent)",
+      "confidence": 85,
+      "evidence": ["User#4: quoted text...", "Assistant#5: quoted text..."]
+    }
+  ],
+  "learnings": [
+    {
+      "title": "Specific technical discovery or gotcha (max 80 chars)",
+      "symptom": "What went wrong or was confusing — the observable behavior that triggered investigation",
+      "root_cause": "The underlying technical reason — why it happened",
+      "takeaway": "The transferable lesson — what to do or avoid in similar situations, useful outside this project",
+      "applies_when": "Conditions under which this knowledge is relevant (framework version, configuration, etc.)",
+      "confidence": 80,
+      "evidence": ["User#7: quoted text...", "Assistant#8: quoted text..."]
+    }
+  ]
+}
+
+Only include insights rated 70+ confidence. If you cannot cite evidence, drop the insight. Return empty arrays for categories with no strong insights. Max 3 decisions, 5 learnings.
+Evidence should reference the labeled turns in the conversation (e.g., "User#2", "Assistant#5").
+
+Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
+}
+
+// =============================================================================
+// PROMPT QUALITY INSTRUCTIONS
+// The instruction suffix for prompt quality analysis calls (user[1]).
+// =============================================================================
+
+/**
+ * Build the instruction suffix for prompt quality analysis.
+ * Used as the second content block in the user message, after the cached conversation.
+ */
+export function buildPromptQualityInstructions(
+  projectName: string,
+  sessionMeta: {
+    humanMessageCount: number;
+    assistantMessageCount: number;
+    toolExchangeCount: number;
+  },
+  meta?: SessionMetadata
+): string {
+  return `You are a prompt engineering coach helping developers communicate more effectively with AI coding assistants. You review conversations and identify specific moments where better prompting would have saved time — AND moments where the user prompted particularly well.
+
+You will produce:
+1. **Takeaways**: Concrete before/after examples the user can learn from (max 4)
+2. **Findings**: Categorized findings for cross-session aggregation (max 8)
+3. **Dimension scores**: 5 numeric dimensions for progress tracking
+4. **Efficiency score**: 0-100 overall rating
+5. **Assessment**: 2-3 sentence summary
+
+Project: ${projectName}
+Session shape: ${sessionMeta.humanMessageCount} user messages, ${sessionMeta.assistantMessageCount} assistant messages, ${sessionMeta.toolExchangeCount} tool exchanges
+${formatSessionMetaLine(meta)}
+Before evaluating, mentally walk through the conversation and identify:
+1. Each time the assistant asked for clarification that could have been avoided
+2. Each time the user corrected the assistant's interpretation
+3. Each time the user repeated an instruction they gave earlier
+4. Whether critical context or requirements were provided late
+5. Whether the user discussed the plan/approach before implementation
+6. Moments where the user's prompt was notably well-crafted
+7. If context compactions occurred, note that the AI may have lost context — repeated instructions IMMEDIATELY after a compaction are NOT a user prompting deficit
+These are your candidate findings. Only include them if they are genuinely actionable.
+
+${PROMPT_QUALITY_CLASSIFICATION_GUIDANCE}
+
+Guidelines:
+- Focus on USER messages only — don't critique the assistant's responses
+- Be constructive, not judgmental — the goal is to help users improve
+- A score of 100 means every user message was perfectly clear and complete
+- A score of 50 means about half the messages could have been more efficient
+- Include BOTH deficits and strengths — what went right matters as much as what went wrong
+- If the user prompted well, say so — don't manufacture issues
+- If the session had context compactions, do NOT penalize the user for repeating instructions immediately after a compaction — the AI lost context, not the user. Repetition unrelated to compaction events should still be flagged.
+
+Length Guidance:
+- Max 4 takeaways (ordered: improve first, then reinforce), max 8 findings
+- better_prompt must be a complete, usable prompt — not vague meta-advice
+- assessment: 2-3 sentences
+- Total response: stay under 2500 tokens
+
+Evaluate the user's prompting quality and respond with this JSON format:
+{
+  "efficiency_score": 75,
+  "message_overhead": 3,
+  "assessment": "2-3 sentence summary of prompting style and efficiency",
+  "takeaways": [
+    {
+      "type": "improve",
+      "category": "late-constraint",
+      "label": "Short human-readable heading",
+      "message_ref": "User#5",
+      "original": "The user's original message (abbreviated)",
+      "better_prompt": "A concrete rewrite with the missing context included",
+      "why": "One sentence: why the original caused friction"
+    },
+    {
+      "type": "reinforce",
+      "category": "precise-request",
+      "label": "Short human-readable heading",
+      "message_ref": "User#0",
+      "what_worked": "What the user did well",
+      "why_effective": "Why it led to a good outcome"
+    }
+  ],
+  "findings": [
+    {
+      "category": "late-constraint",
+      "type": "deficit",
+      "description": "One neutral sentence with specific details",
+      "message_ref": "User#5",
+      "impact": "high",
+      "confidence": 90,
+      "suggested_improvement": "Concrete rewrite or behavioral change"
+    },
+    {
+      "category": "precise-request",
+      "type": "strength",
+      "description": "One sentence describing what the user did well",
+      "message_ref": "User#0",
+      "impact": "medium",
+      "confidence": 85
+    }
+  ],
+  "dimension_scores": {
+    "context_provision": 70,
+    "request_specificity": 65,
+    "scope_management": 80,
+    "information_timing": 55,
+    "correction_quality": 75
+  }
+}
+
+Category values — use these PREFERRED categories:
+Deficits: ${CANONICAL_PQ_DEFICIT_CATEGORIES.join(', ')}
+Strengths: ${CANONICAL_PQ_STRENGTH_CATEGORIES.join(', ')}
+Create a new kebab-case category only when none of these fit.
+
+Rules:
+- message_ref uses the labeled turns in the conversation (e.g., "User#0", "User#5")
+- Only include genuinely notable findings, not normal back-and-forth
+- Takeaways are the user-facing highlights — max 4, ordered: improve first, then reinforce
+- Findings are the full categorized set for aggregation — max 8
+- If the user prompted well, include strength findings and reinforce takeaways — don't manufacture issues
+- message_overhead is how many fewer messages the session could have taken with better prompts
+- dimension_scores: each 0-100. Score correction_quality as 75 if no corrections were needed.
+
+Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
+}
+
+// =============================================================================
+// FACET-ONLY INSTRUCTIONS
+// The instruction suffix for facet-only extraction calls (user[1]).
+// =============================================================================
+
+/**
+ * Build the instruction suffix for facet-only extraction (backfill path).
+ * Used as the second content block in the user message, after the cached conversation.
+ */
+export function buildFacetOnlyInstructions(
+  projectName: string,
+  sessionSummary: string | null,
+  meta?: SessionMetadata
+): string {
+  return `You are assessing an AI coding session to extract structured metadata for cross-session pattern analysis.
+
+Project: ${projectName}
+${sessionSummary ? `Session Summary: ${sessionSummary}\n` : ''}${formatSessionMetaLine(meta)}
+Extract session facets — a holistic assessment of how the session went:
+
+1. outcome_satisfaction: "high" (completed successfully), "medium" (partial), "low" (problems), "abandoned" (gave up)
+2. workflow_pattern: The dominant pattern, or null. Values: "plan-then-implement", "iterative-refinement", "debug-fix-verify", "explore-then-build", "direct-execution"
+3. friction_points: Up to 5 moments where progress stalled (array).
+   Each: { _reasoning (3-step attribution decision tree reasoning), category (kebab-case, prefer: ${CANONICAL_FRICTION_CATEGORIES.join(', ')}), attribution ("user-actionable"|"ai-capability"|"environmental"), description (one neutral sentence with specific details), severity ("high"|"medium"|"low"), resolution ("resolved"|"workaround"|"unresolved") }
+${FRICTION_CLASSIFICATION_GUIDANCE}
+4. effective_patterns: Up to 3 things that worked well (array).
+   Each: { _reasoning (driver decision tree reasoning — check user infrastructure first), category (kebab-case, prefer: ${CANONICAL_PATTERN_CATEGORIES.join(', ')}), description (specific technique, 1-2 sentences), confidence (0-100), driver ("user-driven"|"ai-driven"|"collaborative") }
+${EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE}
+5. had_course_correction: true/false — did the user redirect the AI?
+6. course_correction_reason: Brief explanation if true, null otherwise
+7. iteration_count: How many user clarification/correction cycles occurred
+
+Extract facets in this JSON format:
+{
+  "outcome_satisfaction": "high | medium | low | abandoned",
+  "workflow_pattern": "string or null",
+  "had_course_correction": false,
+  "course_correction_reason": null,
+  "iteration_count": 0,
+  "friction_points": [
+    {
+      "_reasoning": "Reasoning for category + attribution classification",
+      "category": "kebab-case-category",
+      "attribution": "user-actionable | ai-capability | environmental",
+      "description": "One neutral sentence about the gap, with specific details",
+      "severity": "high | medium | low",
+      "resolution": "resolved | workaround | unresolved"
+    }
+  ],
+  "effective_patterns": [
+    {
+      "_reasoning": "Reasoning for category + driver classification, including baseline check",
+      "category": "kebab-case-category",
+      "description": "technique",
+      "confidence": 85,
+      "driver": "user-driven | ai-driven | collaborative"
+    }
+  ]
+}
+
+Respond with valid JSON only, wrapped in <json>...</json> tags.`;
+}
diff --git a/cli/src/analysis/response-parsers.ts b/cli/src/analysis/response-parsers.ts
new file mode 100644
index 0000000..542b5d6
--- /dev/null
+++ b/cli/src/analysis/response-parsers.ts
@@ -0,0 +1,200 @@
+// LLM response parsing utilities.
+// Extracted from prompts.ts — handles JSON extraction, repair, and validation.
+
+import { jsonrepair } from 'jsonrepair';
+import type { AnalysisResponse, ParseError, ParseResult, PromptQualityResponse, PromptQualityDimensionScores } from './prompt-types.js';
+
+function buildResponsePreview(text: string, head = 200, tail = 200): string {
+  if (text.length <= head + tail + 20) return text;
+  return `${text.slice(0, head)}\n...[${text.length - head - tail} chars omitted]...\n${text.slice(-tail)}`;
+}
+
+export function extractJsonPayload(response: string): string | null {
+  const tagged = response.match(/<json>\s*([\s\S]*?)\s*<\/json>/i);
+  if (tagged?.[1]) return tagged[1].trim();
+  const jsonMatch = response.match(/\{[\s\S]*\}/);
+  return jsonMatch ? jsonMatch[0] : null;
+}
+
+/**
+ * Parse the LLM response into structured insights.
+ */
+export function parseAnalysisResponse(response: string): ParseResult<AnalysisResponse> {
+  const response_length = response.length;
+
+  const preview = buildResponsePreview(response);
+
+  const jsonPayload = extractJsonPayload(response);
+  if (!jsonPayload) {
+    console.error('No JSON found in analysis response');
+    return {
+      success: false,
+      error: { error_type: 'no_json_found', error_message: 'No JSON found in analysis response', response_length, response_preview: preview },
+    };
+  }
+
+  let parsed: AnalysisResponse;
+  try {
+    parsed = JSON.parse(jsonPayload) as AnalysisResponse;
+  } catch {
+    // Attempt repair — handles trailing commas, unclosed braces, truncated output
+    try {
+      parsed = JSON.parse(jsonrepair(jsonPayload)) as AnalysisResponse;
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      console.error('Failed to parse analysis response (after jsonrepair):', err);
+      return {
+        success: false,
+        error: { error_type: 'json_parse_error', error_message: msg, response_length, response_preview: preview },
+      };
+    }
+  }
+
+  if (!parsed.summary || typeof parsed.summary.title !== 'string') {
+    console.error('Invalid analysis response structure');
+    return {
+      success: false,
+      error: { error_type: 'invalid_structure', error_message: 'Missing or invalid summary field', response_length, response_preview: preview },
+    };
+  }
+
+  // Guard against LLM returning non-array values (e.g. "decisions": "none").
+  // || [] alone won't catch truthy non-arrays — Array.isArray is required.
+  parsed.decisions = Array.isArray(parsed.decisions) ? parsed.decisions : [];
+  parsed.learnings = Array.isArray(parsed.learnings) ? parsed.learnings : [];
+
+  // Normalize facet arrays before monitors access .some() — a non-array truthy value
+  // (e.g. LLM returns "friction_points": "none") would throw a TypeError on .some().
+  if (parsed.facets) {
+    if (!Array.isArray(parsed.facets.friction_points)) parsed.facets.friction_points = [];
+    if (!Array.isArray(parsed.facets.effective_patterns)) parsed.facets.effective_patterns = [];
+  }
+
+  // Observability: two-tier tooling-limitation monitor.
+  // Tier 1: _reasoning contains misclassification signals NOT in a negation context → likely wrong category.
+  // Tier 2: no conflicting signals (or signal was negated) → generic reminder to verify.
+  // Re-evaluate after ~30 sessions with improved FRICTION_CLASSIFICATION_GUIDANCE.
+  if (parsed.facets?.friction_points?.some(fp => fp.category === 'tooling-limitation')) {
+    // Expanded regex covers both literal terms and GPT-4o paraphrasing patterns
+    const MISCLASS_SIGNALS = /rate.?limit|throttl|quota.?exceed|crash|fail.{0,10}unexpect|lost.?state|context.{0,10}(?:drop|lost|unavail)|wrong.?tool|different.?(?:approach|method)|(?:didn.t|did not|unaware).{0,10}(?:know|capabil)|(?:older|previous).?version|used to (?:work|be)|behavio.?r.?change/i;
+    const NEGATION_CONTEXT = /\bnot\b|\bnor\b|\bisn.t\b|\bwasn.t\b|\brule[d]? out\b|\brejected?\b|\beliminated?\b|\breclassif/i;
+    const toolingFps = parsed.facets.friction_points.filter(fp => fp.category === 'tooling-limitation');
+    for (const fp of toolingFps) {
+      if (!fp._reasoning) {
+        console.warn('[friction-monitor] LLM classified friction as "tooling-limitation" without _reasoning — cannot verify');
+        continue;
+      }
+      const matchResult = fp._reasoning.match(MISCLASS_SIGNALS);
+      if (matchResult) {
+        // Check if the signal appears in a negation context (model correctly eliminating the alternative)
+        const matchIdx = fp._reasoning.search(MISCLASS_SIGNALS);
+        const preceding = fp._reasoning.slice(Math.max(0, matchIdx - 40), matchIdx);
+        if (!NEGATION_CONTEXT.test(preceding)) {
+          console.warn(`[friction-monitor] Likely misclassification: "tooling-limitation" with reasoning mentioning "${matchResult[0]}" — review category`);
+        }
+        // If negated, the model correctly considered and rejected the alternative — no warning
+      } else {
+        console.warn('[friction-monitor] LLM classified friction as "tooling-limitation" — verify genuine tool limitation');
+      }
+    }
+  }
+
+  // Observability: warn when LLM returns effective_pattern without category or driver field,
+  // or with an unrecognized driver value.
+  // Catches models that ignore the classification instructions (especially smaller Ollama models).
+  // Remove after confirming classification quality over ~20 new sessions.
+  if (parsed.facets?.effective_patterns?.some(ep => !ep.category)) {
+    console.warn('[pattern-monitor] LLM returned effective_pattern without category field');
+  }
+  if (parsed.facets?.effective_patterns?.some(ep => !ep.driver)) {
+    console.warn('[pattern-monitor] LLM returned effective_pattern without driver field — driver classification may be incomplete');
+  }
+  const VALID_DRIVERS = new Set(['user-driven', 'ai-driven', 'collaborative']);
+  if (parsed.facets?.effective_patterns?.some(ep => ep.driver && !VALID_DRIVERS.has(ep.driver))) {
+    console.warn('[pattern-monitor] LLM returned unexpected driver value — check classification quality');
+  }
+
+  // Validation: check for missing _reasoning CoT scratchpad fields.
+  // These fields ensure the model walks through the attribution/driver decision trees
+  // before committing to classification values.
+  // (Monitoring period complete — warn calls removed after confirming CoT compliance)
+  if (parsed.facets?.friction_points?.some(fp => !fp._reasoning)) {
+    // Missing _reasoning: classification may lack decision-tree rigor
+  }
+  if (parsed.facets?.effective_patterns?.some(ep => !ep._reasoning)) {
+    // Missing _reasoning: classification may lack decision-tree rigor
+  }
+
+  return { success: true, data: parsed };
+}
+
+export function parsePromptQualityResponse(response: string): ParseResult<PromptQualityResponse> {
+  const response_length = response.length;
+  const preview = buildResponsePreview(response);
+
+  const jsonPayload = extractJsonPayload(response);
+  if (!jsonPayload) {
+    console.error('No JSON found in prompt quality response');
+    return {
+      success: false,
+      error: { error_type: 'no_json_found', error_message: 'No JSON found in prompt quality response', response_length, response_preview: preview },
+    };
+  }
+
+  let parsed: PromptQualityResponse;
+  try {
+    parsed = JSON.parse(jsonPayload) as PromptQualityResponse;
+  } catch {
+    try {
+      parsed = JSON.parse(jsonrepair(jsonPayload)) as PromptQualityResponse;
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      console.error('Failed to parse prompt quality response (after jsonrepair):', msg);
+      return {
+        success: false,
+        error: { error_type: 'json_parse_error', error_message: msg, response_length, response_preview: preview },
+      };
+    }
+  }
+
+  if (typeof parsed.efficiency_score !== 'number') {
+    console.error('Invalid prompt quality response: missing efficiency_score');
+    return {
+      success: false,
+      error: { error_type: 'invalid_structure', error_message: 'Missing or invalid efficiency_score field', response_length, response_preview: preview },
+    };
+  }
+
+  // Clamp and default
+  parsed.efficiency_score = Math.max(0, Math.min(100, Math.round(parsed.efficiency_score)));
+  parsed.message_overhead = parsed.message_overhead ?? 0;
+  parsed.assessment = parsed.assessment || '';
+  // Guard against LLM returning non-array values (e.g. "findings": "none") —
+  // || [] alone won't catch truthy non-arrays, and .some() on line 166 would throw.
+  parsed.takeaways = Array.isArray(parsed.takeaways) ? parsed.takeaways : [];
+  parsed.findings = Array.isArray(parsed.findings) ? parsed.findings : [];
+  parsed.dimension_scores = parsed.dimension_scores || {
+    context_provision: 50,
+    request_specificity: 50,
+    scope_management: 50,
+    information_timing: 50,
+    correction_quality: 50,
+  };
+
+  // Clamp dimension scores
+  for (const key of Object.keys(parsed.dimension_scores) as Array<keyof PromptQualityDimensionScores>) {
+    parsed.dimension_scores[key] = Math.max(0, Math.min(100, Math.round(parsed.dimension_scores[key] ?? 50)));
+  }
+
+  // Validation: check for missing category or unexpected type values in findings.
+  // (Monitoring period complete — warn calls removed after confirming classification quality)
+  if (parsed.findings.some(f => !f.category)) {
+    // Finding missing category field
+  }
+
+  if (parsed.findings.some(f => f.type && f.type !== 'deficit' && f.type !== 'strength')) {
+    // Finding has unexpected type value — expected deficit or strength
+  }
+
+  return { success: true, data: parsed };
+}

From 09e0806b48eb92fbbfb62d0dd7b67ea8e48db8e1 Mon Sep 17 00:00:00 2001
From: Srikanth Rao M <mskr30@yahoo.com>
Date: Sat, 28 Mar 2026 17:10:48 +0530
Subject: [PATCH 2/2] refactor(server): convert 9 llm/ modules to re-exports
 from @code-insights/cli/analysis/*

Server files now re-export from the CLI analysis/ package, preserving the exact
same public API surface with zero breaking changes.

Converted to re-exports:
- server/src/llm/prompt-types.ts
- server/src/llm/prompt-constants.ts
- server/src/llm/prompts.ts
- server/src/llm/message-format.ts
- server/src/llm/response-parsers.ts
- server/src/llm/normalize-utils.ts
- server/src/llm/friction-normalize.ts
- server/src/llm/pattern-normalize.ts
- server/src/llm/prompt-quality-normalize.ts

All existing server imports (analysis.ts, facet-extraction.ts, prompt-quality-analysis.ts,
analysis-db.ts, shared-aggregation.ts) continue to work unchanged via re-exports.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 server/src/llm/friction-normalize.ts       |  59 +--
 server/src/llm/message-format.ts           | 138 +------
 server/src/llm/normalize-utils.ts          |  91 +----
 server/src/llm/pattern-normalize.ts        | 104 +----
 server/src/llm/prompt-constants.ts         | 201 +---------
 server/src/llm/prompt-quality-normalize.ts | 139 +------
 server/src/llm/prompt-types.ts             | 145 +------
 server/src/llm/prompts.ts                  | 434 +--------------------
 server/src/llm/response-parsers.ts         | 207 +---------
 9 files changed, 67 insertions(+), 1451 deletions(-)

diff --git a/server/src/llm/friction-normalize.ts b/server/src/llm/friction-normalize.ts
index 2ade90a..ba129a6 100644
--- a/server/src/llm/friction-normalize.ts
+++ b/server/src/llm/friction-normalize.ts
@@ -1,56 +1,3 @@
-// Friction category normalization.
-// Clusters similar free-form friction categories to canonical ones during aggregation.
-
-import { CANONICAL_FRICTION_CATEGORIES } from './prompt-constants.js';
-import { normalizeCategory } from './normalize-utils.js';
-
-// Explicit alias map for clustering emergent category variants.
-// Targets don't need to be in CANONICAL_FRICTION_CATEGORIES —
-// this clusters semantically-equivalent novel categories together.
-// Insert alias lookup runs AFTER exact canonical match but BEFORE Levenshtein,
-// so well-known emergent variants are clustered deterministically.
-const FRICTION_ALIASES: Record<string, string> = {
-  // legacy canonical → new canonical (15→9 taxonomy revision)
-  'missing-dependency': 'stale-assumptions',
-  'config-drift': 'stale-assumptions',
-  'stale-cache': 'stale-assumptions',
-  'version-mismatch': 'stale-assumptions',
-  'permission-issue': 'stale-assumptions',
-  'environment-mismatch': 'stale-assumptions',
-  'race-condition': 'wrong-approach',
-  'circular-dependency': 'wrong-approach',
-  'test-failure': 'wrong-approach',
-  'type-error': 'knowledge-gap',
-  'api-misunderstanding': 'knowledge-gap',
-  // agent orchestration variants → cluster under one emergent name
-  'agent-lifecycle-issue': 'agent-orchestration-failure',
-  'agent-communication-failure': 'agent-orchestration-failure',
-  'agent-communication-breakdown': 'agent-orchestration-failure',
-  'agent-lifecycle-management': 'agent-orchestration-failure',
-  'agent-shutdown-failure': 'agent-orchestration-failure',
-  // rate limit variants → cluster under one emergent name
-  'api-rate-limit': 'rate-limit-hit',
-  'rate-limiting': 'rate-limit-hit',
-  'rate-limited': 'rate-limit-hit',
-};
-
-/**
- * Normalize a friction category to the closest canonical category.
- * Returns the original category if no close match is found.
- *
- * Matching rules (in order):
- * 1. Exact match against canonical list → return as-is
- * 1.5. Explicit alias match → return alias target (may be non-canonical)
- * 2. Levenshtein distance <= 2 → return canonical match
- * 3. Substring match (category contains canonical or vice versa) → return canonical
- * 4. No match → return original (novel category)
- *
- * Note: alias targets in FRICTION_ALIASES bypass the canonical check intentionally.
- * e.g., "agent-orchestration-failure" is not canonical but is a valid cluster target.
- */
-export function normalizeFrictionCategory(category: string): string {
-  return normalizeCategory(category, {
-    canonicalCategories: CANONICAL_FRICTION_CATEGORIES,
-    aliases: FRICTION_ALIASES,
-  });
-}
+// Re-exports from @code-insights/cli/analysis/friction-normalize.
+// Moved to CLI package so the CLI can use friction normalization for native analysis (--native mode).
+export { normalizeFrictionCategory } from '@code-insights/cli/analysis/friction-normalize';
diff --git a/server/src/llm/message-format.ts b/server/src/llm/message-format.ts
index d7792ab..3e0d7e4 100644
--- a/server/src/llm/message-format.ts
+++ b/server/src/llm/message-format.ts
@@ -1,131 +1,7 @@
-// SQLite message formatting utilities for LLM prompt construction.
-// Extracted from prompts.ts — used by prompt generator functions in prompts.ts.
-
-import type { SQLiteMessageRow, SessionMetadata } from './prompt-types.js';
-import { safeParseJson } from '../utils.js';
-
-// Internal types — only used within formatMessagesForAnalysis
-interface ParsedToolCall {
-  name?: string;
-}
-
-interface ParsedToolResult {
-  output?: string;
-}
-
-/**
- * Detect the class of a stored user message from its content string.
- * Operates on the DB content field (stringified), not raw JSONL.
- *
- * This mirrors classifyUserMessage() in cli/src/parser/jsonl.ts but works on
- * stored content strings instead of parsed JSONL message objects. The DB stores
- * message content as a plain string — tool-results are JSON arrays stringified,
- * human text is stored as-is.
- *
- * Order matters — most specific checks first.
- */
-export function classifyStoredUserMessage(content: string): 'human' | 'tool-result' | 'system-artifact' {
-  // Tool-result: content is a JSON array containing tool_result blocks.
-  // The DB stores these as stringified JSON arrays starting with '['.
-  if (content.startsWith('[') && content.includes('"tool_result"')) return 'tool-result';
-
-  // Auto-compact summary: Claude Code uses two known prefixes for LLM-initiated
-  // context compaction summaries. Both must be checked.
-  if (content.startsWith('Here is a summary of our conversation')) return 'system-artifact';
-  if (content.startsWith('This session is being continued')) return 'system-artifact';
-
-  // Slash command or skill load: single-line starting with / followed by a lowercase letter.
-  // Requires content.trim() to be short (≤2 lines) to avoid false-positives on messages
-  // containing file paths like "/usr/bin/..." as part of a longer instruction.
-  const trimmed = content.trim();
-  if (/^\/[a-z]/.test(trimmed) && trimmed.split('\n').length <= 2) return 'system-artifact';
-
-  return 'human';
-}
-
-/**
- * Format SQLite message rows for LLM consumption.
- * Handles snake_case fields and JSON-encoded tool_calls/tool_results.
- *
- * User#N indices only increment for genuine human messages. Tool-results and
- * system artifacts (auto-compacts, slash commands) receive bracketed labels
- * instead. This ensures User#N references in PQ takeaways and evidence fields
- * align with actual human turns, not inflated by tool-result rows.
- */
-export function formatMessagesForAnalysis(messages: SQLiteMessageRow[]): string {
-  let userIndex = 0;
-  let assistantIndex = 0;
-
-  return messages
-    .map((m) => {
-      let roleLabel: string;
-
-      if (m.type === 'user') {
-        const msgClass = classifyStoredUserMessage(m.content);
-        if (msgClass === 'tool-result') {
-          roleLabel = '[tool-result]';
-        } else if (msgClass === 'system-artifact') {
-          // Auto-compact summaries use two known prefixes — everything else (slash commands,
-          // skill loads) is a generic system artifact, not a compaction event.
-          const isAutoCompact = m.content.startsWith('Here is a summary of our conversation')
-            || m.content.startsWith('This session is being continued');
-          roleLabel = isAutoCompact ? '[auto-compact]' : '[system]';
-        } else {
-          // Genuine human message — increment counter
-          roleLabel = `User#${userIndex++}`;
-        }
-      } else if (m.type === 'assistant') {
-        roleLabel = `Assistant#${assistantIndex++}`;
-      } else {
-        roleLabel = 'System';
-      }
-
-      // Parse JSON-encoded tool_calls and tool_results via safeParseJson
-      const toolCalls = safeParseJson<ParsedToolCall[]>(m.tool_calls, []);
-      const toolResults = safeParseJson<ParsedToolResult[]>(m.tool_results, []);
-
-      const toolInfo = toolCalls.length > 0
-        ? `\n[Tools used: ${toolCalls.map(t => t.name || 'unknown').join(', ')}]`
-        : '';
-
-      // Include thinking content — capped at 1000 chars to stay within token budget
-      const thinkingInfo = m.thinking
-        ? `\n[Thinking: ${m.thinking.slice(0, 1000)}]`
-        : '';
-
-      // Include tool results for context — 500 chars per result (error messages need ~300-400 chars)
-      const resultInfo = toolResults.length > 0
-        ? `\n[Tool results: ${toolResults.map(r => (r.output || '').slice(0, 500)).join(' | ')}]`
-        : '';
-
-      return `### ${roleLabel}:\n${m.content}${thinkingInfo}${toolInfo}${resultInfo}`;
-    })
-    .join('\n\n');
-}
-
-/**
- * Format a one-line context signals header from V6 session metadata.
- * Returns empty string when no signals are present (pre-V6 sessions with NULL columns).
- *
- * Example output:
- *   "Context signals: 3 context compactions (2 auto, 1 manual) — session exceeded context window; slash commands used: /review, /test\n"
- */
-export function formatSessionMetaLine(meta?: SessionMetadata): string {
-  if (!meta) return '';
-  const parts: string[] = [];
-
-  const totalCompacts = (meta.compactCount ?? 0) + (meta.autoCompactCount ?? 0);
-  if (totalCompacts > 0) {
-    const breakdown: string[] = [];
-    if (meta.autoCompactCount) breakdown.push(`${meta.autoCompactCount} auto`);
-    if (meta.compactCount) breakdown.push(`${meta.compactCount} manual`);
-    parts.push(`${totalCompacts} context compaction${totalCompacts > 1 ? 's' : ''} (${breakdown.join(', ')}) — session exceeded context window`);
-  }
-
-  if (meta.slashCommands?.length) {
-    parts.push(`slash commands used: ${meta.slashCommands.join(', ')}`);
-  }
-
-  if (parts.length === 0) return '';
-  return `Context signals: ${parts.join('; ')}\n`;
-}
+// Re-exports from @code-insights/cli/analysis/message-format.
+// Moved to CLI package so the CLI can use message formatting for native analysis (--native mode).
+export {
+  classifyStoredUserMessage,
+  formatMessagesForAnalysis,
+  formatSessionMetaLine,
+} from '@code-insights/cli/analysis/message-format';
diff --git a/server/src/llm/normalize-utils.ts b/server/src/llm/normalize-utils.ts
index d86e598..86ba81f 100644
--- a/server/src/llm/normalize-utils.ts
+++ b/server/src/llm/normalize-utils.ts
@@ -1,87 +1,4 @@
-// Shared normalization infrastructure for friction, pattern, and prompt-quality categories.
-// Each domain provides its own canonical list, alias map, and label map.
-
-/** Standard Levenshtein distance between two strings */
-export function levenshtein(a: string, b: string): number {
-  const m = a.length;
-  const n = b.length;
-  const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0) as number[]);
-
-  for (let i = 0; i <= m; i++) dp[i][0] = i;
-  for (let j = 0; j <= n; j++) dp[0][j] = j;
-
-  for (let i = 1; i <= m; i++) {
-    for (let j = 1; j <= n; j++) {
-      const cost = a[i - 1] === b[j - 1] ? 0 : 1;
-      dp[i][j] = Math.min(
-        dp[i - 1][j] + 1,
-        dp[i][j - 1] + 1,
-        dp[i - 1][j - 1] + cost
-      );
-    }
-  }
-
-  return dp[m][n];
-}
-
-export interface NormalizerConfig {
-  /** Canonical category strings (lowercase kebab-case) */
-  canonicalCategories: readonly string[];
-  /** Maps known aliases to their target (may be non-canonical cluster targets) */
-  aliases: Record<string, string>;
-}
-
-/**
- * Generic category normalizer. Matching rules (in order):
- * 1. Exact match against canonical list → return as-is
- * 1.5. Explicit alias match → return alias target (may be non-canonical)
- * 2. Levenshtein distance <= 2 → return canonical match
- * 3. Substring match (shorter >= 5 chars, >= 50% of longer) → return canonical
- * 4. No match → return original (novel category)
- */
-export function normalizeCategory(category: string, config: NormalizerConfig): string {
-  const lower = category.toLowerCase();
-
-  // 1. Exact match
-  for (const canonical of config.canonicalCategories) {
-    if (lower === canonical) return canonical;
-  }
-
-  // 1.5. Explicit alias match
-  if (config.aliases[lower]) return config.aliases[lower];
-
-  // 2. Levenshtein distance <= 2
-  let bestMatch: string | null = null;
-  let bestDistance = Infinity;
-  for (const canonical of config.canonicalCategories) {
-    const dist = levenshtein(lower, canonical);
-    if (dist <= 2 && dist < bestDistance) {
-      bestDistance = dist;
-      bestMatch = canonical;
-    }
-  }
-  if (bestMatch) return bestMatch;
-
-  // 3. Substring match — only if the shorter string is a significant portion of the longer
-  // to avoid false positives like "type" matching "type-error"
-  for (const canonical of config.canonicalCategories) {
-    const shorter = lower.length < canonical.length ? lower : canonical;
-    const longer = lower.length < canonical.length ? canonical : lower;
-    if (shorter.length >= 5 && shorter.length / longer.length >= 0.5 && longer.includes(shorter)) {
-      return canonical;
-    }
-  }
-
-  // 4. No match — novel category
-  return category;
-}
-
-/**
- * Convert kebab-case to Title Case. Shared fallback for category label functions.
- */
-export function kebabToTitleCase(kebab: string): string {
-  return kebab
-    .split('-')
-    .map(word => word.charAt(0).toUpperCase() + word.slice(1))
-    .join(' ');
-}
+// Re-exports from @code-insights/cli/analysis/normalize-utils.
+// Moved to CLI package so the CLI can use these utilities for native analysis (--native mode).
+export type { NormalizerConfig } from '@code-insights/cli/analysis/normalize-utils';
+export { levenshtein, normalizeCategory, kebabToTitleCase } from '@code-insights/cli/analysis/normalize-utils';
diff --git a/server/src/llm/pattern-normalize.ts b/server/src/llm/pattern-normalize.ts
index ec32d81..1403f2c 100644
--- a/server/src/llm/pattern-normalize.ts
+++ b/server/src/llm/pattern-normalize.ts
@@ -1,101 +1,3 @@
-// Effective pattern category normalization.
-// Clusters similar free-form pattern categories to canonical ones during aggregation.
-// Delegates to normalize-utils.ts for the shared levenshtein/normalizeCategory algorithm.
-
-import { CANONICAL_PATTERN_CATEGORIES } from './prompt-constants.js';
-import { normalizeCategory, kebabToTitleCase } from './normalize-utils.js';
-
-// Human-readable labels for each canonical category.
-// Used in dashboard display (e.g., "structured-planning" → "Structured Planning").
-export const PATTERN_CATEGORY_LABELS: Record<string, string> = {
-  'structured-planning': 'Structured Planning',
-  'incremental-implementation': 'Incremental Implementation',
-  'verification-workflow': 'Verification Workflow',
-  'systematic-debugging': 'Systematic Debugging',
-  'self-correction': 'Self-Correction',
-  'context-gathering': 'Context Gathering',
-  'domain-expertise': 'Domain Expertise',
-  'effective-tooling': 'Effective Tooling',
-};
-
-// Explicit alias map for clustering emergent category variants.
-// Targets don't need to be in CANONICAL_PATTERN_CATEGORIES —
-// this clusters semantically-equivalent novel categories together.
-// Insert alias lookup runs AFTER exact canonical match but BEFORE Levenshtein,
-// so well-known emergent variants are clustered deterministically.
-const PATTERN_ALIASES: Record<string, string> = {
-  // structured-planning variants
-  'task-decomposition': 'structured-planning',
-  'plan-first': 'structured-planning',
-  'upfront-planning': 'structured-planning',
-  'phased-approach': 'structured-planning',
-  'task-breakdown': 'structured-planning',
-  'planning-before-implementation': 'structured-planning',
-
-  // effective-tooling variants
-  'agent-delegation': 'effective-tooling',
-  'agent-orchestration': 'effective-tooling',
-  'specialized-agents': 'effective-tooling',
-  'multi-agent': 'effective-tooling',
-  'tool-leverage': 'effective-tooling',
-
-  // verification-workflow variants
-  'build-test-verify': 'verification-workflow',
-  'test-driven-development': 'verification-workflow',
-  'tdd': 'verification-workflow',
-  'test-first': 'verification-workflow',
-  'pre-commit-checks': 'verification-workflow',
-
-  // systematic-debugging variants
-  'binary-search-debugging': 'systematic-debugging',
-  'methodical-debugging': 'systematic-debugging',
-  'log-based-debugging': 'systematic-debugging',
-  'debugging-methodology': 'systematic-debugging',
-
-  // self-correction variants
-  'course-correction': 'self-correction',
-  'pivot-on-failure': 'self-correction',
-  'backtracking': 'self-correction',
-
-  // context-gathering variants
-  'code-reading-first': 'context-gathering',
-  'codebase-exploration': 'context-gathering',
-  'understanding-before-changing': 'context-gathering',
-
-  // domain-expertise variants
-  'framework-knowledge': 'domain-expertise',
-  'types-first': 'domain-expertise',
-  'type-driven-development': 'domain-expertise',
-  'schema-first': 'domain-expertise',
-
-  // incremental-implementation variants
-  'small-steps': 'incremental-implementation',
-  'iterative-building': 'incremental-implementation',
-  'iterative-development': 'incremental-implementation',
-};
-
-/**
- * Normalize a pattern category to the closest canonical category.
- * Returns the original category if no close match is found.
- *
- * Matching rules (in order):
- * 1. Exact match against canonical list → return as-is
- * 1.5. Explicit alias match → return alias target (may be non-canonical)
- * 2. Levenshtein distance <= 2 → return canonical match
- * 3. Substring match (category contains canonical or vice versa) → return canonical
- * 4. No match → return original (novel category)
- */
-export function normalizePatternCategory(category: string): string {
-  return normalizeCategory(category, {
-    canonicalCategories: CANONICAL_PATTERN_CATEGORIES,
-    aliases: PATTERN_ALIASES,
-  });
-}
-
-/**
- * Get a human-readable label for a pattern category.
- * Falls back to Title Case conversion for novel categories.
- */
-export function getPatternCategoryLabel(category: string): string {
-  return PATTERN_CATEGORY_LABELS[category] ?? kebabToTitleCase(category);
-}
+// Re-exports from @code-insights/cli/analysis/pattern-normalize.
+// Moved to CLI package so the CLI can use pattern normalization for native analysis (--native mode).
+export { PATTERN_CATEGORY_LABELS, normalizePatternCategory, getPatternCategoryLabel } from '@code-insights/cli/analysis/pattern-normalize';
diff --git a/server/src/llm/prompt-constants.ts b/server/src/llm/prompt-constants.ts
index 40d4f8c..8befdf4 100644
--- a/server/src/llm/prompt-constants.ts
+++ b/server/src/llm/prompt-constants.ts
@@ -1,189 +1,12 @@
-// Canonical category arrays and classification guidance strings for LLM analysis.
-// Extracted from prompts.ts — imported by normalizers and prompt generators.
-
-// Shared guidance for friction category and attribution classification.
-// Actor-neutral category definitions describe the gap, not the actor.
-// Attribution field captures who contributed to the friction for actionability.
-export const FRICTION_CLASSIFICATION_GUIDANCE = `
-FRICTION CLASSIFICATION GUIDANCE:
-
-Each friction point captures WHAT went wrong (category + description), WHO contributed (attribution), and WHY you classified it that way (_reasoning).
-
-CATEGORIES — classify the TYPE of gap or obstacle:
-- "wrong-approach": A strategy was pursued that didn't fit the task — wrong architecture, wrong tool, wrong pattern. Includes choosing a suboptimal tool when a better one was available.
-- "knowledge-gap": Incorrect knowledge was applied about a library, API, framework, or language feature. The capability existed but was used wrong.
-- "stale-assumptions": Work proceeded from assumptions about current state that were incorrect (stale files, changed config, different environment, tool behavior changed between versions).
-- "incomplete-requirements": Instructions were missing critical context, constraints, or acceptance criteria needed to proceed correctly.
-- "context-loss": Prior decisions or constraints established earlier in the session were lost or forgotten.
-- "scope-creep": Work expanded beyond the boundaries of the stated task.
-- "repeated-mistakes": The same or similar error occurred multiple times despite earlier correction.
-- "documentation-gap": Relevant docs existed but were inaccessible or unfindable during the session.
-- "tooling-limitation": The AI coding tool or its underlying model genuinely could not perform a needed action — missing file system access, unsupported language feature, context window overflow, inability to run a specific command type. Diagnostic: Could a reasonable user prompt or approach have achieved the same result? If the only workaround is unreasonably complex or loses significant fidelity, this IS a tooling-limitation. If a straightforward alternative existed → it is NOT tooling-limitation.
-  RECLASSIFY if any of these apply:
-  - Rate-limited or throttled → create "rate-limit-hit" instead
-  - Agent crashed or lost state → use "wrong-approach" or create "agent-orchestration-failure"
-  - Wrong tool chosen when a better one existed → "wrong-approach"
-  - User didn't know the tool could do something → "knowledge-gap"
-  - Tool worked differently than expected → "stale-assumptions"
-
-DISAMBIGUATION — use these to break ties when two categories seem to fit:
-- tooling-limitation vs wrong-approach: Limitation = the tool CANNOT do it (no workaround exists). Wrong-approach = the tool CAN do it but a suboptimal method was chosen.
-- tooling-limitation vs knowledge-gap: Limitation = the capability genuinely does not exist. Knowledge-gap = the capability exists but was applied incorrectly.
-- tooling-limitation vs stale-assumptions: Limitation = permanent gap in the tool. Stale-assumptions = the tool USED TO work differently or the assumption about current behavior was wrong.
-- wrong-approach vs knowledge-gap: Wrong-approach = strategic choice (chose library X over Y). Knowledge-gap = factual error (used library X's API incorrectly).
-- incomplete-requirements vs context-loss: Incomplete = the information was NEVER provided. Context-loss = it WAS provided earlier but was forgotten or dropped.
-
-When no category fits, create a specific kebab-case category. A precise novel category is better than a vague canonical one.
-
-ATTRIBUTION — 3-step decision tree (follow IN ORDER):
-Step 1: Is the cause external to the user-AI interaction? (missing docs, broken tooling, infra outage) → "environmental"
-Step 2: Could the USER have prevented this with better input? Evidence: vague prompt, missing context, no constraints, late requirements, ambiguous correction → "user-actionable"
-Step 3: User input was clear and the AI still failed → "ai-capability"
-When genuinely mixed between user-actionable and ai-capability, lean "user-actionable" — this tool helps users improve.
-
-DESCRIPTION RULES:
-- One neutral sentence describing the GAP, not the actor
-- Include specific details (file names, APIs, error messages)
-- Frame as "Missing X caused Y" NOT "The AI failed to X" or "The user forgot to X"
-- Let the attribution field carry the who`;
-
-export const CANONICAL_FRICTION_CATEGORIES = [
-  'wrong-approach',
-  'knowledge-gap',
-  'stale-assumptions',
-  'incomplete-requirements',
-  'context-loss',
-  'scope-creep',
-  'repeated-mistakes',
-  'documentation-gap',
-  'tooling-limitation',
-] as const;
-
-export const CANONICAL_PATTERN_CATEGORIES = [
-  'structured-planning',
-  'incremental-implementation',
-  'verification-workflow',
-  'systematic-debugging',
-  'self-correction',
-  'context-gathering',
-  'domain-expertise',
-  'effective-tooling',
-] as const;
-
-export const CANONICAL_PQ_DEFICIT_CATEGORIES = [
-  'vague-request',
-  'missing-context',
-  'late-constraint',
-  'unclear-correction',
-  'scope-drift',
-  'missing-acceptance-criteria',
-  'assumption-not-surfaced',
-] as const;
-
-export const CANONICAL_PQ_STRENGTH_CATEGORIES = [
-  'precise-request',
-  'effective-context',
-  'productive-correction',
-] as const;
-
-export const CANONICAL_PQ_CATEGORIES = [
-  ...CANONICAL_PQ_DEFICIT_CATEGORIES,
-  ...CANONICAL_PQ_STRENGTH_CATEGORIES,
-] as const;
-
-export const PROMPT_QUALITY_CLASSIFICATION_GUIDANCE = `
-PROMPT QUALITY CLASSIFICATION GUIDANCE:
-
-Each finding captures a specific moment where the user's prompting either caused friction (deficit) or enabled productivity (strength).
-
-DEFICIT CATEGORIES — classify prompting problems:
-- "vague-request": Request lacked specificity needed for the AI to act without guessing. Missing file paths, function names, expected behavior, or concrete details.
-  NOT this category if the AI had enough context to succeed but failed anyway — that is an AI capability issue, not a prompting issue.
-
-- "missing-context": Critical background knowledge about architecture, conventions, dependencies, or current state was not provided.
-  NOT this category if the information was available in the codebase and the AI could have found it by reading files — that is an AI context-gathering failure.
-
-- "late-constraint": A requirement or constraint was provided AFTER the AI had already started implementing a different approach, causing rework.
-  NOT this category if the constraint was genuinely discovered during implementation (requirements changed). Only classify if the user KNEW the constraint before the session started.
-
-- "unclear-correction": The user told the AI its output was wrong without explaining what was wrong or why. "That's not right", "try again", "no" without context.
-  NOT this category if the user gave a brief but sufficient correction ("use map instead of forEach" is clear enough).
-
-- "scope-drift": The session objective shifted mid-conversation, or multiple unrelated objectives were addressed in one session.
-  NOT this category if the user is working through logically connected subtasks of one objective.
-
-- "missing-acceptance-criteria": The user did not define what successful completion looks like, leading to back-and-forth about whether the output meets expectations.
-  NOT this category for exploratory sessions where the user is discovering what they want.
-
-- "assumption-not-surfaced": The user held an unstated assumption that the AI could not reasonably infer from code or conversation.
-  NOT this category if the assumption was reasonable for the AI to make (e.g., standard coding conventions).
-
-STRENGTH CATEGORIES — classify prompting successes (only when notably above average):
-- "precise-request": Request included enough specificity (file paths, function names, expected behavior, error messages) that the AI could act correctly on the first attempt.
-
-- "effective-context": User proactively shared architecture, conventions, prior decisions, or current state that the AI demonstrably used to make better decisions.
-
-- "productive-correction": When the AI went off track, the user provided a correction that included WHAT was wrong, WHY, and enough context for the AI to redirect effectively on the next response.
-
-CONTRASTIVE PAIRS:
-- vague-request vs missing-context: Was the problem in HOW THE TASK WAS DESCRIBED (vague-request) or WHAT BACKGROUND KNOWLEDGE WAS ABSENT (missing-context)?
-- late-constraint vs missing-context: Did the user EVENTUALLY provide it in the same session? Yes → late-constraint. Never → missing-context.
-- missing-context vs assumption-not-surfaced: Is this a FACT the user could have copy-pasted (missing-context), or a BELIEF/PREFERENCE they held (assumption-not-surfaced)?
-- scope-drift vs missing-acceptance-criteria: Did the user try to do TOO MANY THINGS (scope-drift) or ONE THING WITHOUT DEFINING SUCCESS (missing-acceptance-criteria)?
-- unclear-correction vs vague-request: Was this the user's FIRST MESSAGE about this task (vague-request) or a RESPONSE TO AI OUTPUT (unclear-correction)?
-
-DIMENSION SCORING (0-100):
-- context_provision: How well did the user provide relevant background upfront?
-  90+: Proactively shared architecture, constraints, conventions. 50-69: Notable gaps causing detours. <30: No context, AI working blind.
-- request_specificity: How precise were task requests?
-  90+: File paths, expected behavior, scope boundaries. 50-69: Mix of specific and vague. <30: Nearly all requests lacked detail.
-- scope_management: How focused was the session?
-  90+: Single clear objective, logical progression. 50-69: Some drift but primary goal met. <30: Unfocused, no clear objective.
-- information_timing: Were requirements provided when needed?
-  90+: All constraints front-loaded before implementation. 50-69: Some important requirements late. <30: Requirements drip-fed, constant corrections.
-- correction_quality: How well did the user redirect the AI?
-  90+: Corrections included what, why, and context. 50-69: Mix of clear and unclear. <30: Corrections gave almost no signal.
-  Score 75 if no corrections were needed (absence of corrections in a successful session = good prompting).
-
-EDGE CASES:
-- Short sessions (<5 user messages): Score conservatively. Do not penalize for missing elements unnecessary in quick tasks.
-- Exploration sessions: Do not penalize for missing acceptance criteria or scope drift.
-- Sessions where AI performed well despite vague prompts: Still classify deficits. Impact should be "low" since no visible cost.
-- Agentic/delegation sessions: If the user gave a clear high-level directive and the AI autonomously planned and executed successfully, do not penalize for low message count or lack of micro-level specificity. Effective delegation IS good prompting. Focus on the quality of the initial delegation prompt.`;
-
-export const EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE = `
-EFFECTIVE PATTERN CLASSIFICATION GUIDANCE:
-
-Each effective pattern captures a technique or approach that contributed to a productive session outcome.
-
-BASELINE EXCLUSION — do NOT classify these as patterns:
-- Routine file reads at session start (Read/Glob/Grep on <5 files before editing)
-- Following explicit user instructions (user said "run tests" → running tests is not a pattern)
-- Basic tool usage (single file edits, standard CLI commands)
-- Trivial self-corrections (typo fixes, minor syntax errors caught immediately)
-Only classify behavior that is NOTABLY thorough, strategic, or beyond baseline expectations.
-
-CATEGORIES — classify the TYPE of effective pattern:
-- "structured-planning": Decomposed the task into explicit steps, defined scope boundaries, or established a plan BEFORE writing code. Signal: plan/task-list/scope-definition appears before implementation.
-- "incremental-implementation": Work progressed in small, verifiable steps with validation between them. Signal: multiple small edits with checks between, not one large batch.
-- "verification-workflow": Proactive correctness checks (builds, tests, linters, types) BEFORE considering work complete. Signal: test/build/lint commands when nothing was known broken.
-- "systematic-debugging": Methodical investigation using structured techniques (binary search, log insertion, reproduction isolation). Signal: multiple targeted diagnostic steps, not random guessing.
-- "self-correction": Recognized a wrong path and pivoted WITHOUT user correction. Signal: explicit acknowledgment of mistake + approach change. NOT this if the user pointed out the error.
-- "context-gathering": NOTABLY thorough investigation before changes — reading 5+ files, cross-module exploration, schema/type/config review. Signal: substantial Read/Grep/Glob usage spanning multiple directories before any Edit/Write.
-- "domain-expertise": Applied specific framework/API/language knowledge correctly on first attempt without searching. Signal: correct non-obvious API usage with no preceding search and no subsequent error. NOT this if files were read first — that is context-gathering.
-- "effective-tooling": Leveraged advanced tool capabilities that multiplied productivity — agent delegation, parallel work, multi-file coordination, strategic mode selection. Signal: use of tool features beyond basic read/write/edit.
-
-CONTRASTIVE PAIRS:
-- structured-planning vs incremental-implementation: Planning = DECIDING what to do (before). Incremental = HOW you execute (during). Can have one without the other.
-- context-gathering vs domain-expertise: Gathering = ACTIVE INVESTIGATION (reading files). Expertise = APPLYING EXISTING KNOWLEDGE without investigation. If files were read first → context-gathering.
-- verification-workflow vs systematic-debugging: Verification = PROACTIVE (checking working code). Debugging = REACTIVE (investigating a failure).
-- self-correction vs user-directed: Self-correction = AI caught own mistake unprompted. User said "that's wrong" → NOT self-correction.
-
-DRIVER — 4-step decision tree (follow IN ORDER):
-Step 1: Did user infrastructure enable this? (CLAUDE.md rules, agent configs, hookify hooks, custom commands, system prompts) → "user-driven"
-Step 2: Did the user explicitly request this behavior? (asked for plan, requested tests, directed investigation) → "user-driven"
-Step 3: Did the AI exhibit this without any user prompting or infrastructure? → "ai-driven"
-Step 4: Both made distinct, identifiable contributions → "collaborative"
-Use "collaborative" ONLY when you can name what EACH party contributed. If uncertain, prefer the more specific label.
-
-When no canonical category fits, create a specific kebab-case category (a precise novel category is better than forcing a poor fit).`;
+// Re-exports from @code-insights/cli/analysis/prompt-constants.
+// Moved to CLI package so the CLI can use these constants for native analysis (--native mode).
+export {
+  FRICTION_CLASSIFICATION_GUIDANCE,
+  CANONICAL_FRICTION_CATEGORIES,
+  CANONICAL_PATTERN_CATEGORIES,
+  CANONICAL_PQ_DEFICIT_CATEGORIES,
+  CANONICAL_PQ_STRENGTH_CATEGORIES,
+  CANONICAL_PQ_CATEGORIES,
+  PROMPT_QUALITY_CLASSIFICATION_GUIDANCE,
+  EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE,
+} from '@code-insights/cli/analysis/prompt-constants';
diff --git a/server/src/llm/prompt-quality-normalize.ts b/server/src/llm/prompt-quality-normalize.ts
index 9dfa0b2..8112b9e 100644
--- a/server/src/llm/prompt-quality-normalize.ts
+++ b/server/src/llm/prompt-quality-normalize.ts
@@ -1,131 +1,8 @@
-// Prompt quality category normalization.
-// Clusters similar free-form categories to canonical ones during aggregation.
-// Delegates to normalize-utils.ts for the shared levenshtein/normalizeCategory algorithm.
-
-import { CANONICAL_PQ_CATEGORIES, CANONICAL_PQ_STRENGTH_CATEGORIES } from './prompt-constants.js';
-import { normalizeCategory, kebabToTitleCase } from './normalize-utils.js';
-
-// Human-readable labels for each canonical category.
-export const PQ_CATEGORY_LABELS: Record<string, string> = {
-  'vague-request': 'Vague Request',
-  'missing-context': 'Missing Context',
-  'late-constraint': 'Late Constraint',
-  'unclear-correction': 'Unclear Correction',
-  'scope-drift': 'Scope Drift',
-  'missing-acceptance-criteria': 'Missing Acceptance Criteria',
-  'assumption-not-surfaced': 'Assumption Not Surfaced',
-  'precise-request': 'Precise Request',
-  'effective-context': 'Effective Context',
-  'productive-correction': 'Productive Correction',
-};
-
-const STRENGTH_SET = new Set<string>(CANONICAL_PQ_STRENGTH_CATEGORIES);
-
-// Explicit alias map for clustering emergent category variants.
-// Targets don't need to be in CANONICAL_PQ_CATEGORIES —
-// this clusters semantically-equivalent novel categories together.
-// Alias lookup runs AFTER exact canonical match but BEFORE Levenshtein,
-// so well-known emergent variants are clustered deterministically.
-const PQ_ALIASES: Record<string, string> = {
-  // vague-request variants
-  'vague-instructions': 'vague-request',
-  'unclear-request': 'vague-request',
-  'imprecise-prompting': 'vague-request',
-  'ambiguous-request': 'vague-request',
-  'incomplete-request': 'vague-request',
-  'generic-request': 'vague-request',
-
-  // missing-context variants
-  'missing-information': 'missing-context',
-  'insufficient-context': 'missing-context',
-  'no-context': 'missing-context',
-  'lack-of-context': 'missing-context',
-  'missing-background': 'missing-context',
-
-  // late-constraint variants
-  'late-context': 'late-constraint',
-  'late-requirements': 'late-constraint',
-  'piecemeal-requirements': 'late-constraint',
-  'drip-fed-requirements': 'late-constraint',
-  'incremental-requirements': 'late-constraint',
-  'late-specification': 'late-constraint',
-
-  // unclear-correction variants
-  'unclear-feedback': 'unclear-correction',
-  'vague-correction': 'unclear-correction',
-  'unhelpful-correction': 'unclear-correction',
-  'vague-feedback': 'unclear-correction',
-
-  // scope-drift variants
-  'context-drift': 'scope-drift',
-  'objective-bloat': 'scope-drift',
-  'session-bloat': 'scope-drift',
-  'topic-switching': 'scope-drift',
-  'scope-creep': 'scope-drift',
-
-  // missing-acceptance-criteria variants
-  'no-acceptance-criteria': 'missing-acceptance-criteria',
-  'undefined-done': 'missing-acceptance-criteria',
-  'no-definition-of-done': 'missing-acceptance-criteria',
-  'unclear-success-criteria': 'missing-acceptance-criteria',
-
-  // assumption-not-surfaced variants
-  'hidden-assumption': 'assumption-not-surfaced',
-  'unstated-assumption': 'assumption-not-surfaced',
-  'implicit-assumption': 'assumption-not-surfaced',
-  'unspoken-expectation': 'assumption-not-surfaced',
-
-  // precise-request variants (strengths)
-  'clear-request': 'precise-request',
-  'specific-request': 'precise-request',
-  'well-specified-request': 'precise-request',
-  'detailed-request': 'precise-request',
-
-  // effective-context variants (strengths)
-  'good-context': 'effective-context',
-  'upfront-context': 'effective-context',
-  'proactive-context': 'effective-context',
-  'rich-context': 'effective-context',
-
-  // productive-correction variants (strengths)
-  'clear-correction': 'productive-correction',
-  'effective-feedback': 'productive-correction',
-  'helpful-correction': 'productive-correction',
-  'constructive-feedback': 'productive-correction',
-};
-
-/**
- * Normalize a prompt quality category to the closest canonical category.
- * Returns the original category if no close match is found.
- *
- * Matching rules (in order):
- * 1. Exact match against canonical list → return as-is
- * 1.5. Explicit alias match → return alias target (may be non-canonical)
- * 2. Levenshtein distance <= 2 → return canonical match
- * 3. Substring match (category contains canonical or vice versa) → return canonical
- * 4. No match → return original (novel category)
- *
- * Note: alias targets in PQ_ALIASES bypass the canonical check intentionally.
- */
-export function normalizePromptQualityCategory(category: string): string {
-  return normalizeCategory(category, {
-    canonicalCategories: CANONICAL_PQ_CATEGORIES,
-    aliases: PQ_ALIASES,
-  });
-}
-
-/**
- * Get a human-readable label for a prompt quality category.
- * Falls back to Title Case conversion for novel categories.
- */
-export function getPQCategoryLabel(category: string): string {
-  return PQ_CATEGORY_LABELS[category] ?? kebabToTitleCase(category);
-}
-
-/**
- * Get the type (deficit or strength) for a prompt quality category.
- * Novel categories default to deficit.
- */
-export function getPQCategoryType(category: string): 'deficit' | 'strength' {
-  return STRENGTH_SET.has(category) ? 'strength' : 'deficit';
-}
+// Re-exports from @code-insights/cli/analysis/prompt-quality-normalize.
+// Moved to CLI package so the CLI can use PQ normalization for native analysis (--native mode).
+export {
+  PQ_CATEGORY_LABELS,
+  normalizePromptQualityCategory,
+  getPQCategoryLabel,
+  getPQCategoryType,
+} from '@code-insights/cli/analysis/prompt-quality-normalize';
diff --git a/server/src/llm/prompt-types.ts b/server/src/llm/prompt-types.ts
index 7b550a2..38be34f 100644
--- a/server/src/llm/prompt-types.ts
+++ b/server/src/llm/prompt-types.ts
@@ -1,131 +1,14 @@
-// Type definitions for LLM prompt analysis.
-// Extracted from prompts.ts — shared by message-format.ts, response-parsers.ts, and analysis.ts.
-
-// SQLite row format for messages — snake_case with JSON-encoded arrays.
-// This matches the shape returned by server/src/routes/messages.ts.
-export interface SQLiteMessageRow {
-  id: string;
-  session_id: string;
-  type: 'user' | 'assistant' | 'system';
-  content: string;
-  thinking: string | null;
-  tool_calls: string;       // JSON-encoded ToolCall[]
-  tool_results: string;     // JSON-encoded ToolResult[]
-  usage: string | null;
-  timestamp: string;
-  parent_id: string | null;
-}
-
-/**
- * Optional session metadata from V6 columns.
- * Passed to prompt generators to add context signals about context compaction
- * and slash command usage. Only present when at least one V6 field is non-empty.
- */
-export interface SessionMetadata {
-  compactCount?: number;       // from sessions.compact_count (user-initiated /compact)
-  autoCompactCount?: number;   // from sessions.auto_compact_count (LLM-initiated compaction)
-  slashCommands?: string[];    // from sessions.slash_commands (JSON array of command names)
-}
-
-export interface AnalysisResponse {
-  facets?: {
-    outcome_satisfaction: string;
-    workflow_pattern: string | null;
-    had_course_correction: boolean;
-    course_correction_reason: string | null;
-    iteration_count: number;
-    friction_points: Array<{
-      _reasoning?: string;
-      category: string;
-      attribution?: string;
-      description: string;
-      severity: string;
-      resolution: string;
-    }>;
-    effective_patterns: Array<{
-      _reasoning?: string;
-      category: string;
-      description: string;
-      confidence: number;
-      driver?: 'user-driven' | 'ai-driven' | 'collaborative';
-    }>;
-  };
-  summary: {
-    title: string;
-    content: string;
-    outcome?: 'success' | 'partial' | 'abandoned' | 'blocked';
-    bullets: string[];
-  };
-  decisions: Array<{
-    title: string;
-    situation?: string;
-    choice?: string;
-    reasoning: string;
-    alternatives?: Array<{ option: string; rejected_because: string }>;
-    trade_offs?: string;
-    revisit_when?: string;
-    confidence?: number;
-    evidence?: string[];
-  }>;
-  learnings: Array<{
-    title: string;
-    symptom?: string;
-    root_cause?: string;
-    takeaway?: string;
-    applies_when?: string;
-    confidence?: number;
-    evidence?: string[];
-  }>;
-}
-
-export interface ParseError {
-  error_type: 'json_parse_error' | 'no_json_found' | 'invalid_structure';
-  error_message: string;
-  response_length: number;
-  response_preview: string;
-}
-
-export type ParseResult<T> =
-  | { success: true; data: T }
-  | { success: false; error: ParseError };
-
-export interface PromptQualityFinding {
-  category: string;
-  type: 'deficit' | 'strength';
-  description: string;
-  message_ref: string;
-  impact: 'high' | 'medium' | 'low';
-  confidence: number;
-  suggested_improvement?: string;
-}
-
-export interface PromptQualityTakeaway {
-  type: 'improve' | 'reinforce';
-  category: string;
-  label: string;
-  message_ref: string;
-  // improve fields
-  original?: string;
-  better_prompt?: string;
-  why?: string;
-  // reinforce fields
-  what_worked?: string;
-  why_effective?: string;
-}
-
-export interface PromptQualityDimensionScores {
-  context_provision: number;
-  request_specificity: number;
-  scope_management: number;
-  information_timing: number;
-  correction_quality: number;
-}
-
-export interface PromptQualityResponse {
-  efficiency_score: number;
-  message_overhead: number;
-  assessment: string;
-  takeaways: PromptQualityTakeaway[];
-  findings: PromptQualityFinding[];
-  dimension_scores: PromptQualityDimensionScores;
-}
+// Re-exports from @code-insights/cli/analysis/prompt-types.
+// Moved to CLI package so the CLI can use these types for native analysis (--native mode).
+export type {
+  SQLiteMessageRow,
+  SessionMetadata,
+  ContentBlock,
+  AnalysisResponse,
+  ParseError,
+  ParseResult,
+  PromptQualityFinding,
+  PromptQualityTakeaway,
+  PromptQualityDimensionScores,
+  PromptQualityResponse,
+} from '@code-insights/cli/analysis/prompt-types';
diff --git a/server/src/llm/prompts.ts b/server/src/llm/prompts.ts
index 6d0f228..f9a6968 100644
--- a/server/src/llm/prompts.ts
+++ b/server/src/llm/prompts.ts
@@ -1,425 +1,9 @@
-// Prompt template strings and generator functions for LLM session analysis.
-// Types → prompt-types.ts, constants → prompt-constants.ts,
-// formatting → message-format.ts, parsers → response-parsers.ts.
-
-import type { SessionMetadata } from './prompt-types.js';
-import type { ContentBlock } from './types.js';
-import {
-  FRICTION_CLASSIFICATION_GUIDANCE,
-  CANONICAL_FRICTION_CATEGORIES,
-  CANONICAL_PATTERN_CATEGORIES,
-  CANONICAL_PQ_DEFICIT_CATEGORIES,
-  CANONICAL_PQ_STRENGTH_CATEGORIES,
-  PROMPT_QUALITY_CLASSIFICATION_GUIDANCE,
-  EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE,
-} from './prompt-constants.js';
-import { formatSessionMetaLine } from './message-format.js';
-
-// =============================================================================
-// SHARED SYSTEM PROMPT
-// A minimal (~100 token) system prompt shared by all analysis calls.
-// The full classification guidance and schema examples live in the instruction
-// suffix (user[1]), keeping the system prompt cacheable across calls.
-// =============================================================================
-
-/**
- * Shared system prompt for all LLM analysis calls.
- * Paired with buildCacheableConversationBlock() + an analysis-specific instruction block.
- */
-export const SHARED_ANALYST_SYSTEM_PROMPT = `You are a senior staff engineer analyzing an AI coding session. You will receive the conversation transcript followed by specific extraction instructions. Respond with valid JSON only, wrapped in <json>...</json> tags.`;
-
-// =============================================================================
-// CACHEABLE CONVERSATION BLOCK
-// Wraps the formatted conversation in an Anthropic ephemeral cache block.
-// CRITICAL: Must contain ONLY the formatted messages — no project name, no session
-// metadata, no per-session variables. This ensures cache hits across sessions.
-// =============================================================================
-
-/**
- * Wrap formatted conversation messages in a cacheable content block.
- * The cache_control field instructs Anthropic to cache everything up to
- * and including this block (ephemeral, 5-minute TTL).
- *
- * Non-Anthropic providers receive this as a ContentBlock[] and use
- * flattenContent() to convert it to a plain string.
- *
- * @param formattedMessages - Output of formatMessagesForAnalysis()
- */
-export function buildCacheableConversationBlock(formattedMessages: string): ContentBlock {
-  return {
-    type: 'text',
-    // Trailing double newline ensures the instruction block (user[1]) reads as a
-    // distinct section when providers flatten content blocks to a single string.
-    text: `--- CONVERSATION ---\n${formattedMessages}\n--- END CONVERSATION ---\n\n`,
-    cache_control: { type: 'ephemeral' },
-  };
-}
-
-// =============================================================================
-// SESSION ANALYSIS INSTRUCTIONS
-// The instruction suffix for session analysis calls (user[1]).
-// Contains the full analyst persona, schema, and quality guidance.
-// Per-session variables (project name, summary, meta) go here — NOT in the
-// cached conversation block.
-// =============================================================================
-
-/**
- * Build the instruction suffix for session analysis.
- * Used as the second content block in the user message, after the cached conversation.
- */
-export function buildSessionAnalysisInstructions(
-  projectName: string,
-  sessionSummary: string | null,
-  meta?: SessionMetadata
-): string {
-  return `You are a senior staff engineer writing entries for a team's engineering knowledge base. You've just observed an AI-assisted coding session and your job is to extract the insights that would save another engineer time if they encountered a similar situation 6 months from now.
-
-Your audience is a developer who has never seen this session but works on the same codebase. They need enough context to understand WHY a decision was made, WHAT specific gotcha was discovered, and WHEN this knowledge applies.
-
-Project: ${projectName}
-${sessionSummary ? `Session Summary: ${sessionSummary}\n` : ''}${formatSessionMetaLine(meta)}
-=== PART 1: SESSION FACETS ===
-Extract these FIRST as a holistic session assessment:
-
-1. outcome_satisfaction: Rate the session outcome.
-   - "high": Task completed successfully, user satisfied
-   - "medium": Partial completion or minor issues
-   - "low": Significant problems, user frustrated
-   - "abandoned": Session ended without achieving the goal
-
-2. workflow_pattern: Identify the dominant workflow pattern (or null if unclear).
-   Recommended values: "plan-then-implement", "iterative-refinement", "debug-fix-verify", "explore-then-build", "direct-execution"
-
-3. friction_points: Identify up to 5 moments where progress was blocked or slowed (array, max 5).
-   Each friction point has:
-   - _reasoning: (REQUIRED) Your reasoning chain for category + attribution. 2-3 sentences max. Walk through the decision tree steps. This field is saved but not shown to users — use it to think before classifying.
-   - category: Use one of these PREFERRED categories when applicable: ${CANONICAL_FRICTION_CATEGORIES.join(', ')}. Create a new kebab-case category only when none of these fit.
-   - attribution: "user-actionable" (better user input would have prevented this), "ai-capability" (AI failed despite adequate input), or "environmental" (external constraint)
-   - description: One neutral sentence describing what happened, with specific details (file names, APIs, errors)
-   - severity: "high" (blocked progress for multiple turns), "medium" (caused a detour), "low" (minor hiccup)
-   - resolution: "resolved" (fixed in session), "workaround" (bypassed), "unresolved" (still broken)
-${FRICTION_CLASSIFICATION_GUIDANCE}
-
-4. effective_patterns: Up to 3 techniques or approaches that worked particularly well (array, max 3).
-   Each has:
-   - _reasoning: (REQUIRED) Your reasoning chain for category + driver. 2-3 sentences max. Walk through the decision tree steps and baseline exclusion check. This field is saved but not shown to users — use it to think before classifying.
-   - category: Use one of these PREFERRED categories when applicable: structured-planning, incremental-implementation, verification-workflow, systematic-debugging, self-correction, context-gathering, domain-expertise, effective-tooling. Create a new kebab-case category only when none fit.
-   - description: Specific technique worth repeating (1-2 sentences with concrete detail)
-   - confidence: 0-100 how confident you are this is genuinely effective
-   - driver: Who drove this pattern — "user-driven" (user explicitly requested it), "ai-driven" (AI exhibited it without prompting), or "collaborative" (both contributed or emerged from interaction)
-${EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE}
-
-5. had_course_correction: true if the user redirected the AI from a wrong approach, false otherwise
-6. course_correction_reason: If had_course_correction is true, briefly explain what was corrected (or null)
-7. iteration_count: Number of times the user had to clarify, correct, or re-explain something
-
-If the session has minimal friction and straightforward execution, use empty arrays for friction_points, set outcome_satisfaction to "high", and iteration_count to 0.
-
-=== PART 2: INSIGHTS ===
-Then extract these:
-
-You will extract:
-1. **Summary**: A narrative of what was accomplished and the outcome
-2. **Decisions**: Technical choices made — with full situation context, reasoning, rejected alternatives, trade-offs, and conditions for revisiting (max 3)
-3. **Learnings**: Technical discoveries, gotchas, debugging breakthroughs — with the observable symptom, root cause, and a transferable takeaway (max 5)
-
-Quality Standards:
-- Only include insights you would write in a team knowledge base for future reference
-- Each insight MUST reference concrete details: specific file names, library names, error messages, API endpoints, or code patterns
-- Do not invent file names, APIs, errors, or details not present in the conversation
-- Rate your confidence in each insight's value (0-100). Only include insights you rate 70+.
-- It is better to return 0 insights in a category than to include generic or trivial ones
-- If a session is straightforward with no notable decisions or learnings, say so in the summary and leave other categories empty
-
-Length Guidance:
-- Fill every field in the schema. An empty "trade_offs" or "revisit_when" is worse than a longer response.
-- Total response: stay under 2000 tokens. If you must cut, drop lower-confidence insights rather than compressing high-confidence ones.
-- Evidence: 1-3 short quotes per insight, referencing turn labels.
-- Prefer precision over brevity — a specific 3-sentence insight beats a vague 1-sentence insight.
-
-DO NOT include insights like these (too generic/trivial):
-- "Used debugging techniques to fix an issue"
-- "Made architectural decisions about the codebase"
-- "Implemented a new feature" (the summary already covers this)
-- "Used React hooks for state management" (too generic without specifics)
-- "Fixed a bug in the code" (what bug? what was the root cause?)
-- Anything that restates the task without adding transferable knowledge
-
-Here is an example of an EXCELLENT insight — this is the quality bar:
-
-EXCELLENT learning:
-{
-  "title": "Tailwind v4 requires @theme inline{} for CSS variable utilities",
-  "symptom": "After Tailwind v3→v4 upgrade, custom utilities like bg-primary stopped working. Classes present in HTML but no styles applied.",
-  "root_cause": "Tailwind v4 removed tailwind.config.js theme extension. CSS variables in :root are not automatically available as utilities — must be registered via @theme inline {} in the CSS file.",
-  "takeaway": "When migrating Tailwind v3→v4 with shadcn/ui: add @theme inline {} mapping CSS variables, add @custom-variant dark for class-based dark mode, replace tailwindcss-animate with tw-animate-css.",
-  "applies_when": "Any Tailwind v3→v4 migration using CSS variables for theming, especially with shadcn/ui.",
-  "confidence": 95,
-  "evidence": ["User#12: 'The colors are all gone after the upgrade'", "Assistant#13: 'Tailwind v4 requires explicit @theme inline registration...'"]
-}
-
-Extract insights in this JSON format:
-{
-  "facets": {
-    "outcome_satisfaction": "high | medium | low | abandoned",
-    "workflow_pattern": "plan-then-implement | iterative-refinement | debug-fix-verify | explore-then-build | direct-execution | null",
-    "had_course_correction": false,
-    "course_correction_reason": null,
-    "iteration_count": 0,
-    "friction_points": [
-      {
-        "_reasoning": "User said 'fix the auth' without specifying OAuth vs session-based or which file. Step 1: not external — this is about the prompt, not infrastructure. Step 2: user could have specified which auth flow → user-actionable. Category: incomplete-requirements fits better than vague-request because specific constraints (which flow, which file) were missing, not the overall task description.",
-        "category": "incomplete-requirements",
-        "attribution": "user-actionable",
-        "description": "Missing specification of which auth flow (OAuth vs session) caused implementation of wrong provider in auth.ts",
-        "severity": "medium",
-        "resolution": "resolved"
-      },
-      {
-        "_reasoning": "AI applied Express middleware pattern to a Hono route despite conversation showing Hono imports. Step 1: not external. Step 2: user provided clear Hono context in prior messages. Step 3: AI failed despite adequate input → ai-capability. Category: knowledge-gap — incorrect framework API knowledge was applied.",
-        "category": "knowledge-gap",
-        "attribution": "ai-capability",
-        "description": "Express-style middleware pattern applied to Hono route despite Hono imports visible in conversation context",
-        "severity": "high",
-        "resolution": "resolved"
-      }
-    ],
-    "effective_patterns": [
-      {
-        "_reasoning": "Before editing, AI read 8 files across server/src/routes/ and server/src/llm/ to understand the data flow. Baseline check: 8 files across 2 directories = beyond routine (<5 file) reads. Step 1: no CLAUDE.md rule requiring this. Step 2: user didn't ask for investigation. Step 3: AI explored autonomously → ai-driven. Category: context-gathering (active investigation, not pre-existing knowledge).",
-        "category": "context-gathering",
-        "description": "Read 8 files across routes/ and llm/ directories to map the data flow before modifying the aggregation query, preventing a type mismatch that would have required rework",
-        "confidence": 88,
-        "driver": "ai-driven"
-      }
-    ]
-  },
-  "summary": {
-    "title": "Brief title describing main accomplishment (max 80 chars)",
-    "content": "2-4 sentence narrative: what was the goal, what was done, what was the outcome. Mention the primary file or component changed.",
-    "outcome": "success | partial | abandoned | blocked",
-    "bullets": ["Each bullet names a specific artifact (file, function, endpoint) and what changed"]
-  },
-  "decisions": [
-    {
-      "title": "The specific technical choice made (max 80 chars)",
-      "situation": "What problem or requirement led to this decision point",
-      "choice": "What was chosen and how it was implemented",
-      "reasoning": "Why this choice was made — the key factors that tipped the decision",
-      "alternatives": [
-        {"option": "Name of alternative", "rejected_because": "Why it was not chosen"}
-      ],
-      "trade_offs": "What downsides were accepted, what was given up",
-      "revisit_when": "Under what conditions this decision should be reconsidered (or 'N/A' if permanent)",
-      "confidence": 85,
-      "evidence": ["User#4: quoted text...", "Assistant#5: quoted text..."]
-    }
-  ],
-  "learnings": [
-    {
-      "title": "Specific technical discovery or gotcha (max 80 chars)",
-      "symptom": "What went wrong or was confusing — the observable behavior that triggered investigation",
-      "root_cause": "The underlying technical reason — why it happened",
-      "takeaway": "The transferable lesson — what to do or avoid in similar situations, useful outside this project",
-      "applies_when": "Conditions under which this knowledge is relevant (framework version, configuration, etc.)",
-      "confidence": 80,
-      "evidence": ["User#7: quoted text...", "Assistant#8: quoted text..."]
-    }
-  ]
-}
-
-Only include insights rated 70+ confidence. If you cannot cite evidence, drop the insight. Return empty arrays for categories with no strong insights. Max 3 decisions, 5 learnings.
-Evidence should reference the labeled turns in the conversation (e.g., "User#2", "Assistant#5").
-
-Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
-}
-
-// =============================================================================
-// PROMPT QUALITY INSTRUCTIONS
-// The instruction suffix for prompt quality analysis calls (user[1]).
-// =============================================================================
-
-/**
- * Build the instruction suffix for prompt quality analysis.
- * Used as the second content block in the user message, after the cached conversation.
- */
-export function buildPromptQualityInstructions(
-  projectName: string,
-  sessionMeta: {
-    humanMessageCount: number;
-    assistantMessageCount: number;
-    toolExchangeCount: number;
-  },
-  meta?: SessionMetadata
-): string {
-  return `You are a prompt engineering coach helping developers communicate more effectively with AI coding assistants. You review conversations and identify specific moments where better prompting would have saved time — AND moments where the user prompted particularly well.
-
-You will produce:
-1. **Takeaways**: Concrete before/after examples the user can learn from (max 4)
-2. **Findings**: Categorized findings for cross-session aggregation (max 8)
-3. **Dimension scores**: 5 numeric dimensions for progress tracking
-4. **Efficiency score**: 0-100 overall rating
-5. **Assessment**: 2-3 sentence summary
-
-Project: ${projectName}
-Session shape: ${sessionMeta.humanMessageCount} user messages, ${sessionMeta.assistantMessageCount} assistant messages, ${sessionMeta.toolExchangeCount} tool exchanges
-${formatSessionMetaLine(meta)}
-Before evaluating, mentally walk through the conversation and identify:
-1. Each time the assistant asked for clarification that could have been avoided
-2. Each time the user corrected the assistant's interpretation
-3. Each time the user repeated an instruction they gave earlier
-4. Whether critical context or requirements were provided late
-5. Whether the user discussed the plan/approach before implementation
-6. Moments where the user's prompt was notably well-crafted
-7. If context compactions occurred, note that the AI may have lost context — repeated instructions IMMEDIATELY after a compaction are NOT a user prompting deficit
-These are your candidate findings. Only include them if they are genuinely actionable.
-
-${PROMPT_QUALITY_CLASSIFICATION_GUIDANCE}
-
-Guidelines:
-- Focus on USER messages only — don't critique the assistant's responses
-- Be constructive, not judgmental — the goal is to help users improve
-- A score of 100 means every user message was perfectly clear and complete
-- A score of 50 means about half the messages could have been more efficient
-- Include BOTH deficits and strengths — what went right matters as much as what went wrong
-- If the user prompted well, say so — don't manufacture issues
-- If the session had context compactions, do NOT penalize the user for repeating instructions immediately after a compaction — the AI lost context, not the user. Repetition unrelated to compaction events should still be flagged.
-
-Length Guidance:
-- Max 4 takeaways (ordered: improve first, then reinforce), max 8 findings
-- better_prompt must be a complete, usable prompt — not vague meta-advice
-- assessment: 2-3 sentences
-- Total response: stay under 2500 tokens
-
-Evaluate the user's prompting quality and respond with this JSON format:
-{
-  "efficiency_score": 75,
-  "message_overhead": 3,
-  "assessment": "2-3 sentence summary of prompting style and efficiency",
-  "takeaways": [
-    {
-      "type": "improve",
-      "category": "late-constraint",
-      "label": "Short human-readable heading",
-      "message_ref": "User#5",
-      "original": "The user's original message (abbreviated)",
-      "better_prompt": "A concrete rewrite with the missing context included",
-      "why": "One sentence: why the original caused friction"
-    },
-    {
-      "type": "reinforce",
-      "category": "precise-request",
-      "label": "Short human-readable heading",
-      "message_ref": "User#0",
-      "what_worked": "What the user did well",
-      "why_effective": "Why it led to a good outcome"
-    }
-  ],
-  "findings": [
-    {
-      "category": "late-constraint",
-      "type": "deficit",
-      "description": "One neutral sentence with specific details",
-      "message_ref": "User#5",
-      "impact": "high",
-      "confidence": 90,
-      "suggested_improvement": "Concrete rewrite or behavioral change"
-    },
-    {
-      "category": "precise-request",
-      "type": "strength",
-      "description": "One sentence describing what the user did well",
-      "message_ref": "User#0",
-      "impact": "medium",
-      "confidence": 85
-    }
-  ],
-  "dimension_scores": {
-    "context_provision": 70,
-    "request_specificity": 65,
-    "scope_management": 80,
-    "information_timing": 55,
-    "correction_quality": 75
-  }
-}
-
-Category values — use these PREFERRED categories:
-Deficits: ${CANONICAL_PQ_DEFICIT_CATEGORIES.join(', ')}
-Strengths: ${CANONICAL_PQ_STRENGTH_CATEGORIES.join(', ')}
-Create a new kebab-case category only when none of these fit.
-
-Rules:
-- message_ref uses the labeled turns in the conversation (e.g., "User#0", "User#5")
-- Only include genuinely notable findings, not normal back-and-forth
-- Takeaways are the user-facing highlights — max 4, ordered: improve first, then reinforce
-- Findings are the full categorized set for aggregation — max 8
-- If the user prompted well, include strength findings and reinforce takeaways — don't manufacture issues
-- message_overhead is how many fewer messages the session could have taken with better prompts
-- dimension_scores: each 0-100. Score correction_quality as 75 if no corrections were needed.
-
-Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
-}
-
-// =============================================================================
-// FACET-ONLY INSTRUCTIONS
-// The instruction suffix for facet-only extraction calls (user[1]).
-// =============================================================================
-
-/**
- * Build the instruction suffix for facet-only extraction (backfill path).
- * Used as the second content block in the user message, after the cached conversation.
- */
-export function buildFacetOnlyInstructions(
-  projectName: string,
-  sessionSummary: string | null,
-  meta?: SessionMetadata
-): string {
-  return `You are assessing an AI coding session to extract structured metadata for cross-session pattern analysis.
-
-Project: ${projectName}
-${sessionSummary ? `Session Summary: ${sessionSummary}\n` : ''}${formatSessionMetaLine(meta)}
-Extract session facets — a holistic assessment of how the session went:
-
-1. outcome_satisfaction: "high" (completed successfully), "medium" (partial), "low" (problems), "abandoned" (gave up)
-2. workflow_pattern: The dominant pattern, or null. Values: "plan-then-implement", "iterative-refinement", "debug-fix-verify", "explore-then-build", "direct-execution"
-3. friction_points: Up to 5 moments where progress stalled (array).
-   Each: { _reasoning (3-step attribution decision tree reasoning), category (kebab-case, prefer: ${CANONICAL_FRICTION_CATEGORIES.join(', ')}), attribution ("user-actionable"|"ai-capability"|"environmental"), description (one neutral sentence with specific details), severity ("high"|"medium"|"low"), resolution ("resolved"|"workaround"|"unresolved") }
-${FRICTION_CLASSIFICATION_GUIDANCE}
-4. effective_patterns: Up to 3 things that worked well (array).
-   Each: { _reasoning (driver decision tree reasoning — check user infrastructure first), category (kebab-case, prefer: ${CANONICAL_PATTERN_CATEGORIES.join(', ')}), description (specific technique, 1-2 sentences), confidence (0-100), driver ("user-driven"|"ai-driven"|"collaborative") }
-${EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE}
-5. had_course_correction: true/false — did the user redirect the AI?
-6. course_correction_reason: Brief explanation if true, null otherwise
-7. iteration_count: How many user clarification/correction cycles occurred
-
-Extract facets in this JSON format:
-{
-  "outcome_satisfaction": "high | medium | low | abandoned",
-  "workflow_pattern": "string or null",
-  "had_course_correction": false,
-  "course_correction_reason": null,
-  "iteration_count": 0,
-  "friction_points": [
-    {
-      "_reasoning": "Reasoning for category + attribution classification",
-      "category": "kebab-case-category",
-      "attribution": "user-actionable | ai-capability | environmental",
-      "description": "One neutral sentence about the gap, with specific details",
-      "severity": "high | medium | low",
-      "resolution": "resolved | workaround | unresolved"
-    }
-  ],
-  "effective_patterns": [
-    {
-      "_reasoning": "Reasoning for category + driver classification, including baseline check",
-      "category": "kebab-case-category",
-      "description": "technique",
-      "confidence": 85,
-      "driver": "user-driven | ai-driven | collaborative"
-    }
-  ]
-}
-
-Respond with valid JSON only, wrapped in <json>...</json> tags.`;
-}
-
+// Re-exports from @code-insights/cli/analysis/prompts.
+// Moved to CLI package so the CLI can use prompt builders for native analysis (--native mode).
+export {
+  SHARED_ANALYST_SYSTEM_PROMPT,
+  buildCacheableConversationBlock,
+  buildSessionAnalysisInstructions,
+  buildPromptQualityInstructions,
+  buildFacetOnlyInstructions,
+} from '@code-insights/cli/analysis/prompts';
diff --git a/server/src/llm/response-parsers.ts b/server/src/llm/response-parsers.ts
index 542b5d6..6af7d40 100644
--- a/server/src/llm/response-parsers.ts
+++ b/server/src/llm/response-parsers.ts
@@ -1,200 +1,7 @@
-// LLM response parsing utilities.
-// Extracted from prompts.ts — handles JSON extraction, repair, and validation.
-
-import { jsonrepair } from 'jsonrepair';
-import type { AnalysisResponse, ParseError, ParseResult, PromptQualityResponse, PromptQualityDimensionScores } from './prompt-types.js';
-
-function buildResponsePreview(text: string, head = 200, tail = 200): string {
-  if (text.length <= head + tail + 20) return text;
-  return `${text.slice(0, head)}\n...[${text.length - head - tail} chars omitted]...\n${text.slice(-tail)}`;
-}
-
-export function extractJsonPayload(response: string): string | null {
-  const tagged = response.match(/<json>\s*([\s\S]*?)\s*<\/json>/i);
-  if (tagged?.[1]) return tagged[1].trim();
-  const jsonMatch = response.match(/\{[\s\S]*\}/);
-  return jsonMatch ? jsonMatch[0] : null;
-}
-
-/**
- * Parse the LLM response into structured insights.
- */
-export function parseAnalysisResponse(response: string): ParseResult<AnalysisResponse> {
-  const response_length = response.length;
-
-  const preview = buildResponsePreview(response);
-
-  const jsonPayload = extractJsonPayload(response);
-  if (!jsonPayload) {
-    console.error('No JSON found in analysis response');
-    return {
-      success: false,
-      error: { error_type: 'no_json_found', error_message: 'No JSON found in analysis response', response_length, response_preview: preview },
-    };
-  }
-
-  let parsed: AnalysisResponse;
-  try {
-    parsed = JSON.parse(jsonPayload) as AnalysisResponse;
-  } catch {
-    // Attempt repair — handles trailing commas, unclosed braces, truncated output
-    try {
-      parsed = JSON.parse(jsonrepair(jsonPayload)) as AnalysisResponse;
-    } catch (err) {
-      const msg = err instanceof Error ? err.message : String(err);
-      console.error('Failed to parse analysis response (after jsonrepair):', err);
-      return {
-        success: false,
-        error: { error_type: 'json_parse_error', error_message: msg, response_length, response_preview: preview },
-      };
-    }
-  }
-
-  if (!parsed.summary || typeof parsed.summary.title !== 'string') {
-    console.error('Invalid analysis response structure');
-    return {
-      success: false,
-      error: { error_type: 'invalid_structure', error_message: 'Missing or invalid summary field', response_length, response_preview: preview },
-    };
-  }
-
-  // Guard against LLM returning non-array values (e.g. "decisions": "none").
-  // || [] alone won't catch truthy non-arrays — Array.isArray is required.
-  parsed.decisions = Array.isArray(parsed.decisions) ? parsed.decisions : [];
-  parsed.learnings = Array.isArray(parsed.learnings) ? parsed.learnings : [];
-
-  // Normalize facet arrays before monitors access .some() — a non-array truthy value
-  // (e.g. LLM returns "friction_points": "none") would throw a TypeError on .some().
-  if (parsed.facets) {
-    if (!Array.isArray(parsed.facets.friction_points)) parsed.facets.friction_points = [];
-    if (!Array.isArray(parsed.facets.effective_patterns)) parsed.facets.effective_patterns = [];
-  }
-
-  // Observability: two-tier tooling-limitation monitor.
-  // Tier 1: _reasoning contains misclassification signals NOT in a negation context → likely wrong category.
-  // Tier 2: no conflicting signals (or signal was negated) → generic reminder to verify.
-  // Re-evaluate after ~30 sessions with improved FRICTION_CLASSIFICATION_GUIDANCE.
-  if (parsed.facets?.friction_points?.some(fp => fp.category === 'tooling-limitation')) {
-    // Expanded regex covers both literal terms and GPT-4o paraphrasing patterns
-    const MISCLASS_SIGNALS = /rate.?limit|throttl|quota.?exceed|crash|fail.{0,10}unexpect|lost.?state|context.{0,10}(?:drop|lost|unavail)|wrong.?tool|different.?(?:approach|method)|(?:didn.t|did not|unaware).{0,10}(?:know|capabil)|(?:older|previous).?version|used to (?:work|be)|behavio.?r.?change/i;
-    const NEGATION_CONTEXT = /\bnot\b|\bnor\b|\bisn.t\b|\bwasn.t\b|\brule[d]? out\b|\brejected?\b|\beliminated?\b|\breclassif/i;
-    const toolingFps = parsed.facets.friction_points.filter(fp => fp.category === 'tooling-limitation');
-    for (const fp of toolingFps) {
-      if (!fp._reasoning) {
-        console.warn('[friction-monitor] LLM classified friction as "tooling-limitation" without _reasoning — cannot verify');
-        continue;
-      }
-      const matchResult = fp._reasoning.match(MISCLASS_SIGNALS);
-      if (matchResult) {
-        // Check if the signal appears in a negation context (model correctly eliminating the alternative)
-        const matchIdx = fp._reasoning.search(MISCLASS_SIGNALS);
-        const preceding = fp._reasoning.slice(Math.max(0, matchIdx - 40), matchIdx);
-        if (!NEGATION_CONTEXT.test(preceding)) {
-          console.warn(`[friction-monitor] Likely misclassification: "tooling-limitation" with reasoning mentioning "${matchResult[0]}" — review category`);
-        }
-        // If negated, the model correctly considered and rejected the alternative — no warning
-      } else {
-        console.warn('[friction-monitor] LLM classified friction as "tooling-limitation" — verify genuine tool limitation');
-      }
-    }
-  }
-
-  // Observability: warn when LLM returns effective_pattern without category or driver field,
-  // or with an unrecognized driver value.
-  // Catches models that ignore the classification instructions (especially smaller Ollama models).
-  // Remove after confirming classification quality over ~20 new sessions.
-  if (parsed.facets?.effective_patterns?.some(ep => !ep.category)) {
-    console.warn('[pattern-monitor] LLM returned effective_pattern without category field');
-  }
-  if (parsed.facets?.effective_patterns?.some(ep => !ep.driver)) {
-    console.warn('[pattern-monitor] LLM returned effective_pattern without driver field — driver classification may be incomplete');
-  }
-  const VALID_DRIVERS = new Set(['user-driven', 'ai-driven', 'collaborative']);
-  if (parsed.facets?.effective_patterns?.some(ep => ep.driver && !VALID_DRIVERS.has(ep.driver))) {
-    console.warn('[pattern-monitor] LLM returned unexpected driver value — check classification quality');
-  }
-
-  // Validation: check for missing _reasoning CoT scratchpad fields.
-  // These fields ensure the model walks through the attribution/driver decision trees
-  // before committing to classification values.
-  // (Monitoring period complete — warn calls removed after confirming CoT compliance)
-  if (parsed.facets?.friction_points?.some(fp => !fp._reasoning)) {
-    // Missing _reasoning: classification may lack decision-tree rigor
-  }
-  if (parsed.facets?.effective_patterns?.some(ep => !ep._reasoning)) {
-    // Missing _reasoning: classification may lack decision-tree rigor
-  }
-
-  return { success: true, data: parsed };
-}
-
-export function parsePromptQualityResponse(response: string): ParseResult<PromptQualityResponse> {
-  const response_length = response.length;
-  const preview = buildResponsePreview(response);
-
-  const jsonPayload = extractJsonPayload(response);
-  if (!jsonPayload) {
-    console.error('No JSON found in prompt quality response');
-    return {
-      success: false,
-      error: { error_type: 'no_json_found', error_message: 'No JSON found in prompt quality response', response_length, response_preview: preview },
-    };
-  }
-
-  let parsed: PromptQualityResponse;
-  try {
-    parsed = JSON.parse(jsonPayload) as PromptQualityResponse;
-  } catch {
-    try {
-      parsed = JSON.parse(jsonrepair(jsonPayload)) as PromptQualityResponse;
-    } catch (err) {
-      const msg = err instanceof Error ? err.message : String(err);
-      console.error('Failed to parse prompt quality response (after jsonrepair):', msg);
-      return {
-        success: false,
-        error: { error_type: 'json_parse_error', error_message: msg, response_length, response_preview: preview },
-      };
-    }
-  }
-
-  if (typeof parsed.efficiency_score !== 'number') {
-    console.error('Invalid prompt quality response: missing efficiency_score');
-    return {
-      success: false,
-      error: { error_type: 'invalid_structure', error_message: 'Missing or invalid efficiency_score field', response_length, response_preview: preview },
-    };
-  }
-
-  // Clamp and default
-  parsed.efficiency_score = Math.max(0, Math.min(100, Math.round(parsed.efficiency_score)));
-  parsed.message_overhead = parsed.message_overhead ?? 0;
-  parsed.assessment = parsed.assessment || '';
-  // Guard against LLM returning non-array values (e.g. "findings": "none") —
-  // || [] alone won't catch truthy non-arrays, and .some() on line 166 would throw.
-  parsed.takeaways = Array.isArray(parsed.takeaways) ? parsed.takeaways : [];
-  parsed.findings = Array.isArray(parsed.findings) ? parsed.findings : [];
-  parsed.dimension_scores = parsed.dimension_scores || {
-    context_provision: 50,
-    request_specificity: 50,
-    scope_management: 50,
-    information_timing: 50,
-    correction_quality: 50,
-  };
-
-  // Clamp dimension scores
-  for (const key of Object.keys(parsed.dimension_scores) as Array<keyof PromptQualityDimensionScores>) {
-    parsed.dimension_scores[key] = Math.max(0, Math.min(100, Math.round(parsed.dimension_scores[key] ?? 50)));
-  }
-
-  // Validation: check for missing category or unexpected type values in findings.
-  // (Monitoring period complete — warn calls removed after confirming classification quality)
-  if (parsed.findings.some(f => !f.category)) {
-    // Finding missing category field
-  }
-
-  if (parsed.findings.some(f => f.type && f.type !== 'deficit' && f.type !== 'strength')) {
-    // Finding has unexpected type value — expected deficit or strength
-  }
-
-  return { success: true, data: parsed };
-}
+// Re-exports from @code-insights/cli/analysis/response-parsers.
+// Moved to CLI package so the CLI can use response parsers for native analysis (--native mode).
+export {
+  extractJsonPayload,
+  parseAnalysisResponse,
+  parsePromptQualityResponse,
+} from '@code-insights/cli/analysis/response-parsers';