From 4acd3b79759dcc8c30b4459d04c93ac42ed66044 Mon Sep 17 00:00:00 2001 From: Srikanth Rao M Date: Sun, 29 Mar 2026 10:04:54 +0530 Subject: [PATCH 1/8] feat(cli): add AnalysisRunner interface and RunAnalysis types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defines the abstraction layer between the insights command and LLM backends. Adding a future runner (e.g. CursorNativeRunner) requires only implementing this interface — no changes to the calling code. Co-Authored-By: Claude Sonnet 4.6 --- cli/src/analysis/runner-types.ts | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 cli/src/analysis/runner-types.ts diff --git a/cli/src/analysis/runner-types.ts b/cli/src/analysis/runner-types.ts new file mode 100644 index 0000000..fc65aee --- /dev/null +++ b/cli/src/analysis/runner-types.ts @@ -0,0 +1,35 @@ +/** + * AnalysisRunner interface — the abstraction between the `insights` command + * and the actual LLM backend (native claude -p, or configured provider). + * + * Adding a new runner (e.g. CursorNativeRunner) requires only implementing + * this interface — no changes to the `insights` command. + */ + +export interface AnalysisRunner { + readonly name: string; + runAnalysis(params: RunAnalysisParams): Promise; +} + +export interface RunAnalysisParams { + systemPrompt: string; + userPrompt: string; + /** JSON schema file content for structured output (used by native mode via --json-schema). */ + jsonSchema?: object; +} + +export interface RunAnalysisResult { + rawJson: string; + durationMs: number; + /** + * Token counts. + * Native mode: always 0 — tokens are counted as part of the overall Claude Code session. + * Provider mode: actual token counts from the LLM API response. + */ + inputTokens: number; + outputTokens: number; + cacheCreationTokens?: number; + cacheReadTokens?: number; + model: string; + provider: string; +} From a39d2ca2bf7dce932c3bf1d7296b463d62c04782 Mon Sep 17 00:00:00 2001 From: Srikanth Rao M Date: Sun, 29 Mar 2026 10:05:01 +0530 Subject: [PATCH 2/8] feat(cli): add ClaudeNativeRunner using execFileSync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Executes analysis via `claude -p` non-interactive mode. Uses execFileSync (not exec) to prevent shell injection. Temp files cleaned up in finally block. Tokens are 0 — counted as part of the overall Claude Code session. Co-Authored-By: Claude Sonnet 4.6 --- .../analysis/__tests__/native-runner.test.ts | 150 ++++++++++++++++++ cli/src/analysis/native-runner.ts | 85 ++++++++++ 2 files changed, 235 insertions(+) create mode 100644 cli/src/analysis/__tests__/native-runner.test.ts create mode 100644 cli/src/analysis/native-runner.ts diff --git a/cli/src/analysis/__tests__/native-runner.test.ts b/cli/src/analysis/__tests__/native-runner.test.ts new file mode 100644 index 0000000..d2f3b8f --- /dev/null +++ b/cli/src/analysis/__tests__/native-runner.test.ts @@ -0,0 +1,150 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { join } from 'path'; +import { tmpdir } from 'os'; + +// Mock child_process and fs before importing the module under test. +vi.mock('child_process', () => ({ + execFileSync: vi.fn(), +})); +vi.mock('fs', () => ({ + writeFileSync: vi.fn(), + unlinkSync: vi.fn(), +})); + +import { execFileSync } from 'child_process'; +import { writeFileSync, unlinkSync } from 'fs'; +import { ClaudeNativeRunner } from '../native-runner.js'; + +const mockExecFileSync = vi.mocked(execFileSync); +const mockWriteFileSync = vi.mocked(writeFileSync); +const mockUnlinkSync = vi.mocked(unlinkSync); + +describe('ClaudeNativeRunner.validate()', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('does not throw when claude is in PATH', () => { + mockExecFileSync.mockReturnValueOnce(Buffer.from('claude 1.0.0')); + expect(() => ClaudeNativeRunner.validate()).not.toThrow(); + expect(mockExecFileSync).toHaveBeenCalledWith('claude', ['--version'], { stdio: 'pipe' }); + }); + + it('throws a helpful message when claude is not found', () => { + mockExecFileSync.mockImplementationOnce(() => { throw new Error('ENOENT'); }); + expect(() => ClaudeNativeRunner.validate()).toThrow(/claude CLI not found in PATH/); + }); +}); + +describe('ClaudeNativeRunner.runAnalysis()', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('calls execFileSync with correct args (no schema)', async () => { + mockExecFileSync.mockReturnValueOnce('{"summary": {"title": "test", "content": "c", "bullets": []}}' as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + const result = await runner.runAnalysis({ + systemPrompt: 'You are an analyst.', + userPrompt: 'Analyze this session.', + }); + + expect(mockExecFileSync).toHaveBeenCalledWith( + 'claude', + expect.arrayContaining(['-p', '--output-format', 'json', '--append-system-prompt-file', expect.stringContaining('ci-prompt-'), '--bare']), + expect.objectContaining({ + input: 'Analyze this session.', + encoding: 'utf-8', + timeout: 120_000, + maxBuffer: 10 * 1024 * 1024, + }) + ); + + // --json-schema flag must NOT appear when jsonSchema is not provided + const callArgs = mockExecFileSync.mock.calls[0][1] as string[]; + expect(callArgs).not.toContain('--json-schema'); + }); + + it('includes --json-schema arg when jsonSchema is provided', async () => { + mockExecFileSync.mockReturnValueOnce('{"summary": {"title": "t", "content": "c", "bullets": []}}' as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + await runner.runAnalysis({ + systemPrompt: 'system', + userPrompt: 'user', + jsonSchema: { type: 'object', properties: {} }, + }); + + const callArgs = mockExecFileSync.mock.calls[0][1] as string[]; + expect(callArgs).toContain('--json-schema'); + + // Schema file path should be in args + const schemaIndex = callArgs.indexOf('--json-schema'); + expect(callArgs[schemaIndex + 1]).toContain('ci-schema-'); + }); + + it('returns correct result shape with zero tokens', async () => { + const rawJson = '{"summary": {"title": "T", "content": "C", "bullets": []}}'; + mockExecFileSync.mockReturnValueOnce(rawJson as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + const result = await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); + + expect(result.rawJson).toBe(rawJson); + expect(result.inputTokens).toBe(0); + expect(result.outputTokens).toBe(0); + expect(result.model).toBe('claude-native'); + expect(result.provider).toBe('claude-code-native'); + expect(result.durationMs).toBeGreaterThanOrEqual(0); + }); + + it('writes system prompt to a temp file', async () => { + mockExecFileSync.mockReturnValueOnce('' as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + await runner.runAnalysis({ systemPrompt: 'SYSTEM_CONTENT', userPrompt: 'u' }).catch(() => {}); + + expect(mockWriteFileSync).toHaveBeenCalledWith( + expect.stringContaining('ci-prompt-'), + 'SYSTEM_CONTENT', + 'utf-8' + ); + }); + + it('cleans up temp files when execFileSync succeeds', async () => { + mockExecFileSync.mockReturnValueOnce('{}' as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); + + expect(mockUnlinkSync).toHaveBeenCalledWith(expect.stringContaining('ci-prompt-')); + }); + + it('cleans up temp files even when execFileSync throws', async () => { + mockExecFileSync.mockImplementationOnce(() => { throw new Error('timeout'); }); + const runner = new ClaudeNativeRunner(); + + await expect(runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' })).rejects.toThrow('timeout'); + + expect(mockUnlinkSync).toHaveBeenCalledWith(expect.stringContaining('ci-prompt-')); + }); + + it('cleans up both temp files when schema is provided and execFileSync throws', async () => { + mockExecFileSync.mockImplementationOnce(() => { throw new Error('fail'); }); + const runner = new ClaudeNativeRunner(); + + await expect( + runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u', jsonSchema: { type: 'object' } }) + ).rejects.toThrow('fail'); + + const unlinkCalls = mockUnlinkSync.mock.calls.map(c => c[0] as string); + expect(unlinkCalls.some(p => p.includes('ci-prompt-'))).toBe(true); + expect(unlinkCalls.some(p => p.includes('ci-schema-'))).toBe(true); + }); + + it('has the correct runner name', () => { + const runner = new ClaudeNativeRunner(); + expect(runner.name).toBe('claude-code-native'); + }); +}); diff --git a/cli/src/analysis/native-runner.ts b/cli/src/analysis/native-runner.ts new file mode 100644 index 0000000..ff11138 --- /dev/null +++ b/cli/src/analysis/native-runner.ts @@ -0,0 +1,85 @@ +/** + * ClaudeNativeRunner — executes analysis via `claude -p` (non-interactive mode). + * + * Uses execFileSync (NOT exec) to prevent shell injection: arguments are passed + * as an array, never interpolated into a shell command string. + * + * Token counts are 0 because native-mode tokens are counted as part of the + * overall Claude Code session — Code Insights incurs no separate cost. + */ + +import { execFileSync } from 'child_process'; +import { writeFileSync, unlinkSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import type { AnalysisRunner, RunAnalysisParams, RunAnalysisResult } from './runner-types.js'; + +export class ClaudeNativeRunner implements AnalysisRunner { + readonly name = 'claude-code-native'; + + /** + * Validate that the `claude` CLI is available in PATH. + * Call this once before running analysis to give the user a clear error + * instead of a cryptic ENOENT from execFileSync. + */ + static validate(): void { + try { + execFileSync('claude', ['--version'], { stdio: 'pipe' }); + } catch { + throw new Error( + 'claude CLI not found in PATH. --native requires Claude Code to be installed.\n' + + 'Install it from: https://claude.ai/download' + ); + } + } + + async runAnalysis(params: RunAnalysisParams): Promise { + const start = Date.now(); + const ts = Date.now(); + + // Write system prompt to a temp file — claude -p reads it via --append-system-prompt-file. + // Temp file avoids command-line length limits and shell escaping issues. + const promptFile = join(tmpdir(), `ci-prompt-${ts}.txt`); + writeFileSync(promptFile, params.systemPrompt, 'utf-8'); + + let schemaFile: string | undefined; + if (params.jsonSchema) { + schemaFile = join(tmpdir(), `ci-schema-${ts}.json`); + writeFileSync(schemaFile, JSON.stringify(params.jsonSchema), 'utf-8'); + } + + try { + const args = [ + '-p', + '--output-format', 'json', + '--append-system-prompt-file', promptFile, + '--bare', + ]; + if (schemaFile) { + args.push('--json-schema', schemaFile); + } + + const rawOutput = execFileSync('claude', args, { + input: params.userPrompt, + encoding: 'utf-8', + timeout: 120_000, // 2-minute hard limit per analysis call + maxBuffer: 10 * 1024 * 1024, // 10 MB + }); + + return { + rawJson: rawOutput, + durationMs: Date.now() - start, + inputTokens: 0, + outputTokens: 0, + model: 'claude-native', + provider: 'claude-code-native', + }; + } finally { + // Always clean up temp files, even if execFileSync throws. + try { unlinkSync(promptFile); } catch { /* ignore — file may not exist */ } + if (schemaFile) { + try { unlinkSync(schemaFile); } catch { /* ignore */ } + } + } + } +} From 2feea1e8e1a472a467933f7886d11802a97479c1 Mon Sep 17 00:00:00 2001 From: Srikanth Rao M Date: Sun, 29 Mar 2026 10:05:07 +0530 Subject: [PATCH 3/8] feat(cli): add ProviderRunner wrapping configured LLM provider Delegates analysis to the configured provider (OpenAI, Anthropic, Gemini, Ollama). Inlines provider dispatch in CLI to avoid a circular dependency with the server package (server -> cli). All providers use only Node.js built-in fetch. Co-Authored-By: Claude Sonnet 4.6 --- .../__tests__/provider-runner.test.ts | 179 +++++++++++++ cli/src/analysis/provider-runner.ts | 241 ++++++++++++++++++ 2 files changed, 420 insertions(+) create mode 100644 cli/src/analysis/__tests__/provider-runner.test.ts create mode 100644 cli/src/analysis/provider-runner.ts diff --git a/cli/src/analysis/__tests__/provider-runner.test.ts b/cli/src/analysis/__tests__/provider-runner.test.ts new file mode 100644 index 0000000..a7be38f --- /dev/null +++ b/cli/src/analysis/__tests__/provider-runner.test.ts @@ -0,0 +1,179 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import type { LLMProviderConfig } from '../../types.js'; + +// Mock loadConfig so tests don't read ~/.code-insights/config.json +vi.mock('../../utils/config.js', () => ({ + loadConfig: vi.fn(), +})); + +// Mock global fetch so tests don't make real HTTP calls +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +import { loadConfig } from '../../utils/config.js'; +import { ProviderRunner } from '../provider-runner.js'; + +const mockLoadConfig = vi.mocked(loadConfig); + +// Helper — build a minimal LLMProviderConfig +function makeConfig(overrides: Partial = {}): LLMProviderConfig { + return { + provider: 'openai', + model: 'gpt-4o', + apiKey: 'sk-test', + ...overrides, + } as LLMProviderConfig; +} + +// Helper — build a fetch Response mock +function makeFetchResponse(body: unknown, status = 200): Response { + return { + ok: status >= 200 && status < 300, + status, + json: () => Promise.resolve(body), + text: () => Promise.resolve(JSON.stringify(body)), + } as unknown as Response; +} + +describe('ProviderRunner.fromConfig()', () => { + beforeEach(() => vi.clearAllMocks()); + + it('throws when LLM is not configured', () => { + mockLoadConfig.mockReturnValue(null); + expect(() => ProviderRunner.fromConfig()).toThrow(/LLM not configured/); + }); + + it('throws when apiKey is missing for non-ollama providers', () => { + mockLoadConfig.mockReturnValue({ + dashboard: { llm: makeConfig({ apiKey: undefined }) }, + } as ReturnType); + expect(() => ProviderRunner.fromConfig()).toThrow(/requires an API key/); + }); + + it('creates a runner from valid config', () => { + mockLoadConfig.mockReturnValue({ + dashboard: { llm: makeConfig() }, + } as ReturnType); + const runner = ProviderRunner.fromConfig(); + expect(runner).toBeInstanceOf(ProviderRunner); + expect(runner.name).toBe('openai'); + }); + + it('accepts ollama config without apiKey', () => { + mockLoadConfig.mockReturnValue({ + dashboard: { llm: makeConfig({ provider: 'ollama', apiKey: undefined, model: 'llama3' }) }, + } as ReturnType); + const runner = ProviderRunner.fromConfig(); + expect(runner.name).toBe('ollama'); + }); +}); + +describe('ProviderRunner.runAnalysis() — OpenAI', () => { + beforeEach(() => vi.clearAllMocks()); + + it('calls OpenAI endpoint with correct payload', async () => { + mockFetch.mockResolvedValueOnce(makeFetchResponse({ + choices: [{ message: { content: '{"summary": {"title": "T", "content": "C", "bullets": []}}' } }], + usage: { prompt_tokens: 100, completion_tokens: 50 }, + })); + + const runner = new ProviderRunner(makeConfig()); + const result = await runner.runAnalysis({ systemPrompt: 'sys', userPrompt: 'user' }); + + expect(mockFetch).toHaveBeenCalledWith( + 'https://api.openai.com/v1/chat/completions', + expect.objectContaining({ + method: 'POST', + headers: expect.objectContaining({ 'Authorization': 'Bearer sk-test' }), + }) + ); + + const body = JSON.parse((mockFetch.mock.calls[0][1] as RequestInit).body as string); + expect(body.model).toBe('gpt-4o'); + expect(body.messages).toEqual([ + { role: 'system', content: 'sys' }, + { role: 'user', content: 'user' }, + ]); + }); + + it('returns rawJson, token counts, model and provider', async () => { + const rawJson = '{"summary": {"title": "T", "content": "C", "bullets": []}}'; + mockFetch.mockResolvedValueOnce(makeFetchResponse({ + choices: [{ message: { content: rawJson } }], + usage: { prompt_tokens: 200, completion_tokens: 80 }, + })); + + const runner = new ProviderRunner(makeConfig()); + const result = await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); + + expect(result.rawJson).toBe(rawJson); + expect(result.inputTokens).toBe(200); + expect(result.outputTokens).toBe(80); + expect(result.model).toBe('gpt-4o'); + expect(result.provider).toBe('openai'); + expect(result.durationMs).toBeGreaterThanOrEqual(0); + }); + + it('throws on non-2xx response', async () => { + mockFetch.mockResolvedValueOnce(makeFetchResponse( + { error: { message: 'Invalid API key.' } }, + 401 + )); + + const runner = new ProviderRunner(makeConfig()); + await expect(runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' })) + .rejects.toThrow('Invalid API key.'); + }); +}); + +describe('ProviderRunner.runAnalysis() — Anthropic', () => { + beforeEach(() => vi.clearAllMocks()); + + it('calls Anthropic endpoint with correct headers', async () => { + mockFetch.mockResolvedValueOnce(makeFetchResponse({ + content: [{ text: '{"facets": null}' }], + usage: { input_tokens: 300, output_tokens: 60, cache_creation_input_tokens: 50, cache_read_input_tokens: 100 }, + })); + + const runner = new ProviderRunner(makeConfig({ provider: 'anthropic', model: 'claude-opus-4-5', apiKey: 'ak-test' })); + const result = await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); + + expect(mockFetch).toHaveBeenCalledWith( + 'https://api.anthropic.com/v1/messages', + expect.objectContaining({ + headers: expect.objectContaining({ + 'x-api-key': 'ak-test', + 'anthropic-beta': 'prompt-caching-2024-07-31', + }), + }) + ); + + expect(result.inputTokens).toBe(300); + expect(result.outputTokens).toBe(60); + expect(result.cacheCreationTokens).toBe(50); + expect(result.cacheReadTokens).toBe(100); + }); +}); + +describe('ProviderRunner — jsonSchema param', () => { + beforeEach(() => vi.clearAllMocks()); + + it('does not pass jsonSchema to the LLM API (only used by NativeRunner)', async () => { + // ProviderRunner ignores jsonSchema — the LLM API enforces structure differently. + mockFetch.mockResolvedValueOnce(makeFetchResponse({ + choices: [{ message: { content: '{}' } }], + })); + + const runner = new ProviderRunner(makeConfig()); + await runner.runAnalysis({ + systemPrompt: 's', + userPrompt: 'u', + jsonSchema: { type: 'object', properties: {} }, + }); + + const body = JSON.parse((mockFetch.mock.calls[0][1] as RequestInit).body as string); + // jsonSchema must NOT appear in the request body to the LLM provider + expect(body).not.toHaveProperty('json_schema'); + expect(body).not.toHaveProperty('jsonSchema'); + }); +}); diff --git a/cli/src/analysis/provider-runner.ts b/cli/src/analysis/provider-runner.ts new file mode 100644 index 0000000..2ab84ed --- /dev/null +++ b/cli/src/analysis/provider-runner.ts @@ -0,0 +1,241 @@ +/** + * ProviderRunner — delegates analysis to the configured LLM provider + * (OpenAI, Anthropic, Gemini, or Ollama). + * + * Design note: The CLI cannot import from @code-insights/server (server depends + * on CLI — importing in the other direction would create a circular dependency). + * All LLM providers use only Node.js built-in `fetch` (Node 18+), so this module + * inlines the minimal provider dispatch that mirrors server/src/llm/client.ts. + * If the server LLM client grows substantially (new providers, streaming, etc.), + * that work is tracked in Issue #240. + */ + +import { loadConfig } from '../utils/config.js'; +import type { LLMProviderConfig } from '../types.js'; +import type { AnalysisRunner, RunAnalysisParams, RunAnalysisResult } from './runner-types.js'; + +// ── Minimal LLM types (mirrors server/src/llm/types.ts) ────────────────────── + +interface LLMMessage { + role: 'system' | 'user' | 'assistant'; + content: string; +} + +interface LLMResponse { + content: string; + usage?: { + inputTokens: number; + outputTokens: number; + cacheCreationTokens?: number; + cacheReadTokens?: number; + }; +} + +type LLMChatFn = (messages: LLMMessage[]) => Promise; + +// ── Provider implementations ────────────────────────────────────────────────── + +function makeOpenAIChat(apiKey: string, model: string): LLMChatFn { + return async (messages) => { + const response = await fetch('https://api.openai.com/v1/chat/completions', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + }, + body: JSON.stringify({ model, messages, temperature: 0.7, max_tokens: 8192 }), + }); + if (!response.ok) { + const err = await response.json().catch(() => ({})) as { error?: { message?: string } }; + throw new Error(err.error?.message || `OpenAI API error (HTTP ${response.status})`); + } + const data = await response.json() as { + choices: Array<{ message: { content: string } }>; + usage?: { prompt_tokens: number; completion_tokens: number }; + }; + return { + content: data.choices[0]?.message?.content || '', + usage: data.usage + ? { inputTokens: data.usage.prompt_tokens, outputTokens: data.usage.completion_tokens } + : undefined, + }; + }; +} + +function makeAnthropicChat(apiKey: string, model: string): LLMChatFn { + return async (messages) => { + const systemMsg = messages.find(m => m.role === 'system'); + const chatMsgs = messages.filter(m => m.role !== 'system'); + const response = await fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-api-key': apiKey, + 'anthropic-version': '2023-06-01', + 'anthropic-beta': 'prompt-caching-2024-07-31', + }, + body: JSON.stringify({ + model, + max_tokens: 8192, + system: systemMsg?.content, + messages: chatMsgs.map(m => ({ role: m.role, content: m.content })), + }), + }); + if (!response.ok) { + const err = await response.json().catch(() => ({})) as { error?: { message?: string } }; + throw new Error(err.error?.message || `Anthropic API error (HTTP ${response.status})`); + } + const data = await response.json() as { + content: Array<{ text: string }>; + usage?: { + input_tokens: number; + output_tokens: number; + cache_creation_input_tokens?: number; + cache_read_input_tokens?: number; + }; + }; + return { + content: data.content[0]?.text || '', + usage: data.usage ? { + inputTokens: data.usage.input_tokens, + outputTokens: data.usage.output_tokens, + ...(data.usage.cache_creation_input_tokens !== undefined && { + cacheCreationTokens: data.usage.cache_creation_input_tokens, + }), + ...(data.usage.cache_read_input_tokens !== undefined && { + cacheReadTokens: data.usage.cache_read_input_tokens, + }), + } : undefined, + }; + }; +} + +function makeGeminiChat(apiKey: string, model: string): LLMChatFn { + return async (messages) => { + const systemMsg = messages.find(m => m.role === 'system'); + const chatMsgs = messages.filter(m => m.role !== 'system'); + const body: Record = { + contents: chatMsgs.map(m => ({ + role: m.role === 'assistant' ? 'model' : 'user', + parts: [{ text: m.content }], + })), + generationConfig: { temperature: 0.7, maxOutputTokens: 8192, responseMimeType: 'application/json' }, + }; + if (systemMsg) { + body.systemInstruction = { parts: [{ text: systemMsg.content }] }; + } + const response = await fetch( + `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${apiKey}`, + { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body) } + ); + if (!response.ok) { + const err = await response.json().catch(() => ({})) as { error?: { message?: string } }; + throw new Error(err.error?.message || `Gemini API error (HTTP ${response.status})`); + } + const data = await response.json() as { + candidates: Array<{ content: { parts: Array<{ text: string }> } }>; + usageMetadata?: { promptTokenCount: number; candidatesTokenCount: number }; + }; + return { + content: data.candidates[0]?.content?.parts[0]?.text || '', + usage: data.usageMetadata ? { + inputTokens: data.usageMetadata.promptTokenCount, + outputTokens: data.usageMetadata.candidatesTokenCount, + } : undefined, + }; + }; +} + +function makeOllamaChat(model: string, baseUrl?: string): LLMChatFn { + const url = baseUrl || 'http://localhost:11434'; + return async (messages) => { + const response = await fetch(`${url}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model, messages, stream: false, options: { temperature: 0.7 } }), + }); + if (!response.ok) { + const detail = await response.text().catch(() => ''); + throw new Error(`Ollama API error (HTTP ${response.status})${detail ? ` - ${detail}` : ''}`); + } + const data = await response.json() as { + message?: { content: string }; + prompt_eval_count?: number; + eval_count?: number; + }; + return { + content: data.message?.content || '', + usage: { inputTokens: data.prompt_eval_count || 0, outputTokens: data.eval_count || 0 }, + }; + }; +} + +function makeChatFn(config: LLMProviderConfig): LLMChatFn { + switch (config.provider) { + case 'openai': return makeOpenAIChat(config.apiKey ?? '', config.model); + case 'anthropic': return makeAnthropicChat(config.apiKey ?? '', config.model); + case 'gemini': return makeGeminiChat(config.apiKey ?? '', config.model); + case 'ollama': return makeOllamaChat(config.model, config.baseUrl); + default: throw new Error(`Unknown LLM provider: ${(config as LLMProviderConfig).provider}`); + } +} + +// ── ProviderRunner ──────────────────────────────────────────────────────────── + +export class ProviderRunner implements AnalysisRunner { + readonly name: string; + private readonly chat: LLMChatFn; + private readonly _model: string; + private readonly _provider: string; + + constructor(config: LLMProviderConfig) { + this.name = config.provider; + this._model = config.model; + this._provider = config.provider; + this.chat = makeChatFn(config); + } + + /** + * Create a ProviderRunner from the current CLI config. + * Throws if LLM is not configured. + */ + static fromConfig(): ProviderRunner { + const config = loadConfig(); + const llm = config?.dashboard?.llm; + if (!llm) { + throw new Error('LLM not configured. Run `code-insights config llm` to configure a provider.'); + } + if (llm.provider !== 'ollama' && !llm.apiKey) { + throw new Error( + `LLM provider '${llm.provider}' requires an API key. Run \`code-insights config llm\` to set it.` + ); + } + return new ProviderRunner(llm); + } + + async runAnalysis(params: RunAnalysisParams): Promise { + const start = Date.now(); + + const messages: LLMMessage[] = [ + { role: 'system', content: params.systemPrompt }, + { role: 'user', content: params.userPrompt }, + ]; + + const response = await this.chat(messages); + + return { + rawJson: response.content, + durationMs: Date.now() - start, + inputTokens: response.usage?.inputTokens ?? 0, + outputTokens: response.usage?.outputTokens ?? 0, + ...(response.usage?.cacheCreationTokens !== undefined && { + cacheCreationTokens: response.usage.cacheCreationTokens, + }), + ...(response.usage?.cacheReadTokens !== undefined && { + cacheReadTokens: response.usage.cacheReadTokens, + }), + model: this._model, + provider: this._provider, + }; + } +} From 2e15c855d480f5a3696618410744dca77a696928 Mon Sep 17 00:00:00 2001 From: Srikanth Rao M Date: Sun, 29 Mar 2026 10:05:15 +0530 Subject: [PATCH 4/8] feat(cli): add hand-maintained JSON schemas for session-analysis and prompt-quality Flat JSON schemas for claude -p --json-schema structured output. Derived from AnalysisResponse and PromptQualityResponse in prompt-types.ts. Schema sync test validates required properties match TypeScript types. Co-Authored-By: Claude Sonnet 4.6 --- .../schemas/__tests__/schema-sync.test.ts | 138 ++++++++++++++ cli/src/analysis/schemas/prompt-quality.json | 132 +++++++++++++ .../analysis/schemas/session-analysis.json | 177 ++++++++++++++++++ 3 files changed, 447 insertions(+) create mode 100644 cli/src/analysis/schemas/__tests__/schema-sync.test.ts create mode 100644 cli/src/analysis/schemas/prompt-quality.json create mode 100644 cli/src/analysis/schemas/session-analysis.json diff --git a/cli/src/analysis/schemas/__tests__/schema-sync.test.ts b/cli/src/analysis/schemas/__tests__/schema-sync.test.ts new file mode 100644 index 0000000..fc16876 --- /dev/null +++ b/cli/src/analysis/schemas/__tests__/schema-sync.test.ts @@ -0,0 +1,138 @@ +/** + * Schema sync test — ensures hand-maintained JSON schemas stay in sync with + * the TypeScript types in prompt-types.ts. + * + * Why: The JSON schemas are used by `claude -p --json-schema` for structured output. + * If someone adds a field to AnalysisResponse or PromptQualityResponse but forgets + * to update the schema (or vice versa), this test fails in CI. + * + * Coverage: Top-level required properties only. + * Nested object shapes are not validated here — the LLM and response parsers + * provide the runtime validation layer for nested fields. + */ + +import { describe, it, expect } from 'vitest'; +import { readFileSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const schemasDir = join(__dirname, '..'); + +function loadSchema(filename: string): { required?: string[]; properties?: Record } { + const raw = readFileSync(join(schemasDir, filename), 'utf-8'); + return JSON.parse(raw); +} + +// ── AnalysisResponse top-level required fields ──────────────────────────────── +// Source of truth: AnalysisResponse interface in cli/src/analysis/prompt-types.ts +// Update this list when you add/remove top-level properties from AnalysisResponse. +const ANALYSIS_RESPONSE_TOP_LEVEL_REQUIRED = [ + 'facets', + 'summary', + 'decisions', + 'learnings', +] as const; + +// ── AnalysisResponse.facets required fields ─────────────────────────────────── +const ANALYSIS_FACETS_REQUIRED = [ + 'outcome_satisfaction', + 'workflow_pattern', + 'had_course_correction', + 'course_correction_reason', + 'iteration_count', + 'friction_points', + 'effective_patterns', +] as const; + +// ── PromptQualityResponse top-level required fields ─────────────────────────── +// Source of truth: PromptQualityResponse interface in cli/src/analysis/prompt-types.ts +// Update this list when you add/remove top-level properties from PromptQualityResponse. +const PROMPT_QUALITY_RESPONSE_TOP_LEVEL_REQUIRED = [ + 'efficiency_score', + 'message_overhead', + 'assessment', + 'takeaways', + 'findings', + 'dimension_scores', +] as const; + +// ── PromptQualityDimensionScores required fields ────────────────────────────── +const DIMENSION_SCORES_REQUIRED = [ + 'context_provision', + 'request_specificity', + 'scope_management', + 'information_timing', + 'correction_quality', +] as const; + +// ── Tests ───────────────────────────────────────────────────────────────────── + +describe('session-analysis.json schema sync', () => { + const schema = loadSchema('session-analysis.json'); + + it('has all AnalysisResponse top-level required fields', () => { + const schemaRequired = schema.required ?? []; + for (const field of ANALYSIS_RESPONSE_TOP_LEVEL_REQUIRED) { + expect(schemaRequired, `Missing required field '${field}' in session-analysis.json`).toContain(field); + } + }); + + it('has no extra top-level required fields not in AnalysisResponse', () => { + const schemaRequired = schema.required ?? []; + for (const field of schemaRequired) { + expect( + ANALYSIS_RESPONSE_TOP_LEVEL_REQUIRED as readonly string[], + `Extra required field '${field}' in session-analysis.json not present in AnalysisResponse` + ).toContain(field); + } + }); + + it('has all facets required fields', () => { + const facetsSchema = (schema.properties?.facets as { required?: string[] }) ?? {}; + const facetsRequired = facetsSchema.required ?? []; + for (const field of ANALYSIS_FACETS_REQUIRED) { + expect(facetsRequired, `Missing facets required field '${field}' in session-analysis.json`).toContain(field); + } + }); + + it('schema file is valid JSON', () => { + // If loadSchema didn't throw, the file is valid JSON. + expect(schema).toBeDefined(); + expect(typeof schema).toBe('object'); + }); +}); + +describe('prompt-quality.json schema sync', () => { + const schema = loadSchema('prompt-quality.json'); + + it('has all PromptQualityResponse top-level required fields', () => { + const schemaRequired = schema.required ?? []; + for (const field of PROMPT_QUALITY_RESPONSE_TOP_LEVEL_REQUIRED) { + expect(schemaRequired, `Missing required field '${field}' in prompt-quality.json`).toContain(field); + } + }); + + it('has no extra top-level required fields not in PromptQualityResponse', () => { + const schemaRequired = schema.required ?? []; + for (const field of schemaRequired) { + expect( + PROMPT_QUALITY_RESPONSE_TOP_LEVEL_REQUIRED as readonly string[], + `Extra required field '${field}' in prompt-quality.json not present in PromptQualityResponse` + ).toContain(field); + } + }); + + it('has all dimension_scores required fields', () => { + const dimSchema = (schema.properties?.dimension_scores as { required?: string[] }) ?? {}; + const dimRequired = dimSchema.required ?? []; + for (const field of DIMENSION_SCORES_REQUIRED) { + expect(dimRequired, `Missing dimension_scores field '${field}' in prompt-quality.json`).toContain(field); + } + }); + + it('schema file is valid JSON', () => { + expect(schema).toBeDefined(); + expect(typeof schema).toBe('object'); + }); +}); diff --git a/cli/src/analysis/schemas/prompt-quality.json b/cli/src/analysis/schemas/prompt-quality.json new file mode 100644 index 0000000..8441b70 --- /dev/null +++ b/cli/src/analysis/schemas/prompt-quality.json @@ -0,0 +1,132 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "PromptQualityResponse", + "description": "Hand-maintained JSON schema for claude -p --json-schema. Derived from PromptQualityResponse in prompt-types.ts. Keep in sync when prompt-types.ts changes.", + "type": "object", + "required": [ + "efficiency_score", + "message_overhead", + "assessment", + "takeaways", + "findings", + "dimension_scores" + ], + "properties": { + "efficiency_score": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "Overall prompt quality score (0-100)" + }, + "message_overhead": { + "type": "number", + "minimum": 0, + "description": "Estimated message overhead (wasted turns)" + }, + "assessment": { + "type": "string", + "description": "Short narrative summary of prompt quality" + }, + "takeaways": { + "type": "array", + "description": "User-facing before/after examples shown on Insights page", + "items": { + "type": "object", + "required": ["type", "category", "label", "message_ref"], + "properties": { + "type": { + "type": "string", + "enum": ["improve", "reinforce"] + }, + "category": { "type": "string" }, + "label": { "type": "string" }, + "message_ref": { "type": "string" }, + "original": { "type": "string" }, + "better_prompt": { "type": "string" }, + "why": { "type": "string" }, + "what_worked": { "type": "string" }, + "why_effective": { "type": "string" } + } + } + }, + "findings": { + "type": "array", + "description": "Categorized findings for Reflect aggregation", + "items": { + "type": "object", + "required": ["category", "type", "description", "message_ref", "impact", "confidence"], + "properties": { + "category": { + "type": "string", + "enum": [ + "vague-request", + "missing-context", + "late-constraint", + "unclear-correction", + "scope-drift", + "missing-acceptance-criteria", + "assumption-not-surfaced", + "precise-request", + "effective-context", + "productive-correction" + ] + }, + "type": { + "type": "string", + "enum": ["deficit", "strength"] + }, + "description": { "type": "string" }, + "message_ref": { "type": "string" }, + "impact": { + "type": "string", + "enum": ["high", "medium", "low"] + }, + "confidence": { + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "suggested_improvement": { "type": "string" } + } + } + }, + "dimension_scores": { + "type": "object", + "description": "Five-axis scores (0-100) for prompt quality dimensions", + "required": [ + "context_provision", + "request_specificity", + "scope_management", + "information_timing", + "correction_quality" + ], + "properties": { + "context_provision": { + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "request_specificity": { + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "scope_management": { + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "information_timing": { + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "correction_quality": { + "type": "number", + "minimum": 0, + "maximum": 100 + } + } + } + } +} diff --git a/cli/src/analysis/schemas/session-analysis.json b/cli/src/analysis/schemas/session-analysis.json new file mode 100644 index 0000000..de3e235 --- /dev/null +++ b/cli/src/analysis/schemas/session-analysis.json @@ -0,0 +1,177 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SessionAnalysisResponse", + "description": "Hand-maintained JSON schema for claude -p --json-schema. Derived from AnalysisResponse in prompt-types.ts. Keep in sync when prompt-types.ts changes.", + "type": "object", + "required": ["facets", "summary", "decisions", "learnings"], + "properties": { + "facets": { + "type": "object", + "required": [ + "outcome_satisfaction", + "workflow_pattern", + "had_course_correction", + "course_correction_reason", + "iteration_count", + "friction_points", + "effective_patterns" + ], + "properties": { + "outcome_satisfaction": { + "type": "string", + "description": "Session outcome satisfaction level" + }, + "workflow_pattern": { + "type": ["string", "null"], + "description": "Primary workflow pattern observed" + }, + "had_course_correction": { + "type": "boolean", + "description": "Whether the session had a significant course correction" + }, + "course_correction_reason": { + "type": ["string", "null"], + "description": "Reason for course correction if it occurred" + }, + "iteration_count": { + "type": "integer", + "minimum": 0, + "description": "Number of meaningful iterations in the session" + }, + "friction_points": { + "type": "array", + "items": { + "type": "object", + "required": ["category", "description", "severity", "resolution"], + "properties": { + "_reasoning": { + "type": "string", + "description": "CoT scratchpad — saved for prompt tuning" + }, + "category": { + "type": "string", + "enum": [ + "wrong-approach", + "knowledge-gap", + "stale-assumptions", + "incomplete-requirements", + "context-loss", + "scope-creep", + "repeated-mistakes", + "documentation-gap", + "tooling-limitation" + ] + }, + "attribution": { + "type": "string", + "enum": ["user-actionable", "ai-capability", "environmental"] + }, + "description": { "type": "string" }, + "severity": { + "type": "string", + "enum": ["high", "medium", "low"] + }, + "resolution": { "type": "string" } + } + } + }, + "effective_patterns": { + "type": "array", + "items": { + "type": "object", + "required": ["category", "description", "confidence"], + "properties": { + "_reasoning": { + "type": "string", + "description": "CoT scratchpad — saved for prompt tuning" + }, + "category": { + "type": "string", + "enum": [ + "structured-planning", + "incremental-implementation", + "verification-workflow", + "systematic-debugging", + "self-correction", + "context-gathering", + "domain-expertise", + "effective-tooling" + ] + }, + "description": { "type": "string" }, + "confidence": { + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "driver": { + "type": "string", + "enum": ["user-driven", "ai-driven", "collaborative"] + } + } + } + } + } + }, + "summary": { + "type": "object", + "required": ["title", "content", "bullets"], + "properties": { + "title": { "type": "string" }, + "content": { "type": "string" }, + "outcome": { + "type": "string", + "enum": ["success", "partial", "abandoned", "blocked"] + }, + "bullets": { + "type": "array", + "items": { "type": "string" } + } + } + }, + "decisions": { + "type": "array", + "items": { + "type": "object", + "required": ["title", "reasoning"], + "properties": { + "title": { "type": "string" }, + "situation": { "type": "string" }, + "choice": { "type": "string" }, + "reasoning": { "type": "string" }, + "alternatives": { + "type": "array", + "items": { + "type": "object", + "required": ["option", "rejected_because"], + "properties": { + "option": { "type": "string" }, + "rejected_because": { "type": "string" } + } + } + }, + "trade_offs": { "type": "string" }, + "revisit_when": { "type": "string" }, + "confidence": { "type": "number", "minimum": 0, "maximum": 100 }, + "evidence": { "type": "array", "items": { "type": "string" } } + } + } + }, + "learnings": { + "type": "array", + "items": { + "type": "object", + "required": ["title"], + "properties": { + "title": { "type": "string" }, + "symptom": { "type": "string" }, + "root_cause": { "type": "string" }, + "takeaway": { "type": "string" }, + "applies_when": { "type": "string" }, + "confidence": { "type": "number", "minimum": 0, "maximum": 100 }, + "evidence": { "type": "array", "items": { "type": "string" } } + } + } + } + } +} From 7961cbbfb30a7e782eebf3384e53f4012272826c Mon Sep 17 00:00:00 2001 From: Srikanth Rao M Date: Sun, 29 Mar 2026 10:05:21 +0530 Subject: [PATCH 5/8] feat(cli): add package.json exports for runner modules and JSON schemas Adds ./analysis/runner-types, ./analysis/native-runner, ./analysis/provider-runner, and schema exports. Updates build script to copy JSON schema files to dist/. Co-Authored-By: Claude Sonnet 4.6 --- cli/package.json | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cli/package.json b/cli/package.json index 142c594..be38c41 100644 --- a/cli/package.json +++ b/cli/package.json @@ -24,13 +24,18 @@ "./analysis/normalize-utils": "./dist/analysis/normalize-utils.js", "./analysis/friction-normalize": "./dist/analysis/friction-normalize.js", "./analysis/pattern-normalize": "./dist/analysis/pattern-normalize.js", - "./analysis/prompt-quality-normalize": "./dist/analysis/prompt-quality-normalize.js" + "./analysis/prompt-quality-normalize": "./dist/analysis/prompt-quality-normalize.js", + "./analysis/runner-types": "./dist/analysis/runner-types.js", + "./analysis/native-runner": "./dist/analysis/native-runner.js", + "./analysis/provider-runner": "./dist/analysis/provider-runner.js", + "./analysis/schemas/session-analysis.json": "./dist/analysis/schemas/session-analysis.json", + "./analysis/schemas/prompt-quality.json": "./dist/analysis/schemas/prompt-quality.json" }, "bin": { "code-insights": "./dist/index.js" }, "scripts": { - "build": "tsc", + "build": "tsc && node -e \"const{cpSync,mkdirSync}=require('fs');mkdirSync('dist/analysis/schemas',{recursive:true});cpSync('src/analysis/schemas/session-analysis.json','dist/analysis/schemas/session-analysis.json');cpSync('src/analysis/schemas/prompt-quality.json','dist/analysis/schemas/prompt-quality.json');\"", "dev": "tsc --watch", "start": "node dist/index.js", "test": "vitest run", From c56fc9599ccf275eacf7179a99efa305b62e1763 Mon Sep 17 00:00:00 2001 From: Srikanth Rao M Date: Sun, 29 Mar 2026 10:15:41 +0530 Subject: [PATCH 6/8] fix(cli): parse claude -p event envelope in ClaudeNativeRunner claude -p --output-format json returns a JSON array of typed events. The actual LLM text lives in the result event's result field, not in the raw output. Extract it via extractResultFromEnvelope(), check is_error, and throw with a clear message on claude-level failures. Also adds random suffix to temp file names to prevent concurrent collisions. Tests updated to mock the envelope format throughout. Co-Authored-By: Claude Sonnet 4.6 --- .../analysis/__tests__/native-runner.test.ts | 119 +++++++++++++++--- cli/src/analysis/native-runner.ts | 62 ++++++++- 2 files changed, 158 insertions(+), 23 deletions(-) diff --git a/cli/src/analysis/__tests__/native-runner.test.ts b/cli/src/analysis/__tests__/native-runner.test.ts index d2f3b8f..f5c728d 100644 --- a/cli/src/analysis/__tests__/native-runner.test.ts +++ b/cli/src/analysis/__tests__/native-runner.test.ts @@ -1,6 +1,4 @@ -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { join } from 'path'; -import { tmpdir } from 'os'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; // Mock child_process and fs before importing the module under test. vi.mock('child_process', () => ({ @@ -19,10 +17,29 @@ const mockExecFileSync = vi.mocked(execFileSync); const mockWriteFileSync = vi.mocked(writeFileSync); const mockUnlinkSync = vi.mocked(unlinkSync); +// ── Helpers ─────────────────────────────────────────────────────────────────── + +/** + * Build the JSON envelope that `claude -p --output-format json` actually returns. + * The LLM text lives in the result event's `result` field. + */ +function makeEnvelope(llmText: string, isError = false): string { + return JSON.stringify([ + { type: 'system', subtype: 'init', session_id: 'test-session' }, + { type: 'assistant', message: { content: [{ type: 'text', text: llmText }] } }, + { + type: 'result', + subtype: isError ? 'error_during_execution' : 'success', + result: llmText, + is_error: isError, + }, + ]); +} + +// ── validate() ──────────────────────────────────────────────────────────────── + describe('ClaudeNativeRunner.validate()', () => { - beforeEach(() => { - vi.clearAllMocks(); - }); + beforeEach(() => vi.clearAllMocks()); it('does not throw when claude is in PATH', () => { mockExecFileSync.mockReturnValueOnce(Buffer.from('claude 1.0.0')); @@ -36,16 +53,17 @@ describe('ClaudeNativeRunner.validate()', () => { }); }); +// ── runAnalysis() ───────────────────────────────────────────────────────────── + describe('ClaudeNativeRunner.runAnalysis()', () => { - beforeEach(() => { - vi.clearAllMocks(); - }); + beforeEach(() => vi.clearAllMocks()); it('calls execFileSync with correct args (no schema)', async () => { - mockExecFileSync.mockReturnValueOnce('{"summary": {"title": "test", "content": "c", "bullets": []}}' as unknown as Buffer); + const llmJson = '{"summary": {"title": "test", "content": "c", "bullets": []}}'; + mockExecFileSync.mockReturnValueOnce(makeEnvelope(llmJson) as unknown as Buffer); const runner = new ClaudeNativeRunner(); - const result = await runner.runAnalysis({ + await runner.runAnalysis({ systemPrompt: 'You are an analyst.', userPrompt: 'Analyze this session.', }); @@ -67,7 +85,8 @@ describe('ClaudeNativeRunner.runAnalysis()', () => { }); it('includes --json-schema arg when jsonSchema is provided', async () => { - mockExecFileSync.mockReturnValueOnce('{"summary": {"title": "t", "content": "c", "bullets": []}}' as unknown as Buffer); + const llmJson = '{"summary": {"title": "t", "content": "c", "bullets": []}}'; + mockExecFileSync.mockReturnValueOnce(makeEnvelope(llmJson) as unknown as Buffer); const runner = new ClaudeNativeRunner(); await runner.runAnalysis({ @@ -79,19 +98,30 @@ describe('ClaudeNativeRunner.runAnalysis()', () => { const callArgs = mockExecFileSync.mock.calls[0][1] as string[]; expect(callArgs).toContain('--json-schema'); - // Schema file path should be in args const schemaIndex = callArgs.indexOf('--json-schema'); expect(callArgs[schemaIndex + 1]).toContain('ci-schema-'); }); + it('extracts rawJson from the result event (not the full envelope)', async () => { + const llmJson = '{"summary": {"title": "T", "content": "C", "bullets": []}}'; + mockExecFileSync.mockReturnValueOnce(makeEnvelope(llmJson) as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + const result = await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); + + // Must be the extracted LLM text, not the raw event array + expect(result.rawJson).toBe(llmJson); + expect(result.rawJson).not.toContain('"type":"result"'); + }); + it('returns correct result shape with zero tokens', async () => { - const rawJson = '{"summary": {"title": "T", "content": "C", "bullets": []}}'; - mockExecFileSync.mockReturnValueOnce(rawJson as unknown as Buffer); + const llmJson = '{"summary": {"title": "T", "content": "C", "bullets": []}}'; + mockExecFileSync.mockReturnValueOnce(makeEnvelope(llmJson) as unknown as Buffer); const runner = new ClaudeNativeRunner(); const result = await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); - expect(result.rawJson).toBe(rawJson); + expect(result.rawJson).toBe(llmJson); expect(result.inputTokens).toBe(0); expect(result.outputTokens).toBe(0); expect(result.model).toBe('claude-native'); @@ -99,11 +129,41 @@ describe('ClaudeNativeRunner.runAnalysis()', () => { expect(result.durationMs).toBeGreaterThanOrEqual(0); }); + it('throws when is_error is true on the result event', async () => { + const errorMsg = 'Context window exceeded'; + mockExecFileSync.mockReturnValueOnce(makeEnvelope(errorMsg, true) as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + await expect(runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' })) + .rejects.toThrow(/claude -p reported an error/); + }); + + it('throws when output is not a JSON array', async () => { + mockExecFileSync.mockReturnValueOnce('not json at all' as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + await expect(runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' })) + .rejects.toThrow(/non-JSON output/); + }); + + it('throws when JSON array has no result event', async () => { + const noResultEnvelope = JSON.stringify([ + { type: 'system', subtype: 'init' }, + { type: 'assistant', message: {} }, + ]); + mockExecFileSync.mockReturnValueOnce(noResultEnvelope as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + await expect(runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' })) + .rejects.toThrow(/no result event/); + }); + it('writes system prompt to a temp file', async () => { - mockExecFileSync.mockReturnValueOnce('' as unknown as Buffer); + const llmJson = '{"summary": {"title": "T", "content": "C", "bullets": []}}'; + mockExecFileSync.mockReturnValueOnce(makeEnvelope(llmJson) as unknown as Buffer); const runner = new ClaudeNativeRunner(); - await runner.runAnalysis({ systemPrompt: 'SYSTEM_CONTENT', userPrompt: 'u' }).catch(() => {}); + await runner.runAnalysis({ systemPrompt: 'SYSTEM_CONTENT', userPrompt: 'u' }); expect(mockWriteFileSync).toHaveBeenCalledWith( expect.stringContaining('ci-prompt-'), @@ -112,8 +172,29 @@ describe('ClaudeNativeRunner.runAnalysis()', () => { ); }); + it('temp file names include a random suffix to prevent collisions', async () => { + // Run twice and verify the file IDs differ + const llmJson = '{}'; + mockExecFileSync + .mockReturnValueOnce(makeEnvelope(llmJson) as unknown as Buffer) + .mockReturnValueOnce(makeEnvelope(llmJson) as unknown as Buffer); + + const runner = new ClaudeNativeRunner(); + await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); + await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); + + const promptFiles = (mockWriteFileSync.mock.calls as unknown as [string, string, string][]) + .filter(([path]) => path.includes('ci-prompt-')) + .map(([path]) => path); + + expect(promptFiles).toHaveLength(2); + // The two file paths must differ (random suffix) + expect(promptFiles[0]).not.toBe(promptFiles[1]); + }); + it('cleans up temp files when execFileSync succeeds', async () => { - mockExecFileSync.mockReturnValueOnce('{}' as unknown as Buffer); + const llmJson = '{"summary": {"title": "T", "content": "C", "bullets": []}}'; + mockExecFileSync.mockReturnValueOnce(makeEnvelope(llmJson) as unknown as Buffer); const runner = new ClaudeNativeRunner(); await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); diff --git a/cli/src/analysis/native-runner.ts b/cli/src/analysis/native-runner.ts index ff11138..69592f7 100644 --- a/cli/src/analysis/native-runner.ts +++ b/cli/src/analysis/native-runner.ts @@ -14,6 +14,55 @@ import { tmpdir } from 'os'; import { join } from 'path'; import type { AnalysisRunner, RunAnalysisParams, RunAnalysisResult } from './runner-types.js'; +// `claude -p --output-format json` returns a JSON array of typed event objects. +// We care only about the final result event. +interface ClaudeEvent { + type: string; + subtype?: string; +} + +interface ClaudeResultEvent extends ClaudeEvent { + type: 'result'; + subtype: 'success' | 'error_max_turns' | 'error_during_execution'; + result: string; + is_error: boolean; +} + +function isResultEvent(e: ClaudeEvent): e is ClaudeResultEvent { + return e.type === 'result'; +} + +/** + * Extract the LLM text payload from a `claude -p --output-format json` response. + * The output is an array of event objects; the actual content lives in the + * `result` event's `result` field. + */ +function extractResultFromEnvelope(rawOutput: string): string { + let events: ClaudeEvent[]; + try { + events = JSON.parse(rawOutput) as ClaudeEvent[]; + } catch { + throw new Error( + `claude -p returned non-JSON output. Output preview: ${rawOutput.slice(0, 200)}` + ); + } + + if (!Array.isArray(events)) { + throw new Error('claude -p output was JSON but not an array of events as expected.'); + } + + const resultEvent = events.find(isResultEvent); + if (!resultEvent) { + throw new Error('claude -p output contained no result event. Events: ' + JSON.stringify(events.map(e => e.type))); + } + + if (resultEvent.is_error) { + throw new Error(`claude -p reported an error: ${resultEvent.result}`); + } + + return resultEvent.result; +} + export class ClaudeNativeRunner implements AnalysisRunner { readonly name = 'claude-code-native'; @@ -35,16 +84,17 @@ export class ClaudeNativeRunner implements AnalysisRunner { async runAnalysis(params: RunAnalysisParams): Promise { const start = Date.now(); - const ts = Date.now(); + // Include a random suffix to avoid collisions if two analyses run concurrently. + const fileId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; // Write system prompt to a temp file — claude -p reads it via --append-system-prompt-file. // Temp file avoids command-line length limits and shell escaping issues. - const promptFile = join(tmpdir(), `ci-prompt-${ts}.txt`); + const promptFile = join(tmpdir(), `ci-prompt-${fileId}.txt`); writeFileSync(promptFile, params.systemPrompt, 'utf-8'); let schemaFile: string | undefined; if (params.jsonSchema) { - schemaFile = join(tmpdir(), `ci-schema-${ts}.json`); + schemaFile = join(tmpdir(), `ci-schema-${fileId}.json`); writeFileSync(schemaFile, JSON.stringify(params.jsonSchema), 'utf-8'); } @@ -66,8 +116,12 @@ export class ClaudeNativeRunner implements AnalysisRunner { maxBuffer: 10 * 1024 * 1024, // 10 MB }); + // claude -p --output-format json wraps the response in an event array. + // Extract the actual LLM text from the result event. + const rawJson = extractResultFromEnvelope(rawOutput); + return { - rawJson: rawOutput, + rawJson, durationMs: Date.now() - start, inputTokens: 0, outputTokens: 0, From 3f10671fe72927e83606a389f9e896ccbce8445c Mon Sep 17 00:00:00 2001 From: Srikanth Rao M Date: Sun, 29 Mar 2026 10:15:48 +0530 Subject: [PATCH 7/8] test(cli): add Gemini + Ollama dispatch tests to ProviderRunner Adds coverage for all four configured providers. Also adds a comment explaining why LLMMessage.content is intentionally narrower (string only) than the server type that allows ContentBlock[]. Co-Authored-By: Claude Sonnet 4.6 --- .../__tests__/provider-runner.test.ts | 86 +++++++++++++++++++ cli/src/analysis/provider-runner.ts | 3 + 2 files changed, 89 insertions(+) diff --git a/cli/src/analysis/__tests__/provider-runner.test.ts b/cli/src/analysis/__tests__/provider-runner.test.ts index a7be38f..4b671d3 100644 --- a/cli/src/analysis/__tests__/provider-runner.test.ts +++ b/cli/src/analysis/__tests__/provider-runner.test.ts @@ -155,6 +155,92 @@ describe('ProviderRunner.runAnalysis() — Anthropic', () => { }); }); +describe('ProviderRunner.runAnalysis() — Gemini', () => { + beforeEach(() => vi.clearAllMocks()); + + it('calls Gemini endpoint with correct URL and payload', async () => { + const rawJson = '{"summary": {"title": "G", "content": "C", "bullets": []}}'; + mockFetch.mockResolvedValueOnce(makeFetchResponse({ + candidates: [{ content: { parts: [{ text: rawJson }] } }], + usageMetadata: { promptTokenCount: 150, candidatesTokenCount: 40 }, + })); + + const runner = new ProviderRunner(makeConfig({ provider: 'gemini', model: 'gemini-1.5-flash', apiKey: 'gk-test' })); + const result = await runner.runAnalysis({ systemPrompt: 'sys', userPrompt: 'user' }); + + const [url, init] = mockFetch.mock.calls[0] as [string, RequestInit]; + expect(url).toContain('generativelanguage.googleapis.com'); + expect(url).toContain('gk-test'); + expect(url).toContain('gemini-1.5-flash'); + + const body = JSON.parse(init.body as string); + // System message routed to systemInstruction, not contents + expect(body.systemInstruction).toEqual({ parts: [{ text: 'sys' }] }); + expect(body.contents[0].parts[0].text).toBe('user'); + + expect(result.rawJson).toBe(rawJson); + expect(result.inputTokens).toBe(150); + expect(result.outputTokens).toBe(40); + expect(result.provider).toBe('gemini'); + }); + + it('throws on Gemini API error', async () => { + mockFetch.mockResolvedValueOnce(makeFetchResponse( + { error: { message: 'API key not valid.' } }, + 400 + )); + + const runner = new ProviderRunner(makeConfig({ provider: 'gemini', model: 'gemini-1.5-flash', apiKey: 'bad' })); + await expect(runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' })) + .rejects.toThrow('API key not valid.'); + }); +}); + +describe('ProviderRunner.runAnalysis() — Ollama', () => { + beforeEach(() => vi.clearAllMocks()); + + it('calls Ollama endpoint with correct payload', async () => { + const rawJson = '{"summary": {"title": "O", "content": "C", "bullets": []}}'; + mockFetch.mockResolvedValueOnce(makeFetchResponse({ + message: { content: rawJson }, + prompt_eval_count: 80, + eval_count: 30, + })); + + const runner = new ProviderRunner(makeConfig({ provider: 'ollama', model: 'llama3', apiKey: undefined })); + const result = await runner.runAnalysis({ systemPrompt: 'sys', userPrompt: 'user' }); + + const [url, init] = mockFetch.mock.calls[0] as [string, RequestInit]; + expect(url).toBe('http://localhost:11434/api/chat'); + + const body = JSON.parse(init.body as string); + expect(body.model).toBe('llama3'); + expect(body.stream).toBe(false); + + expect(result.rawJson).toBe(rawJson); + expect(result.inputTokens).toBe(80); + expect(result.outputTokens).toBe(30); + expect(result.provider).toBe('ollama'); + }); + + it('uses custom baseUrl when provided', async () => { + mockFetch.mockResolvedValueOnce(makeFetchResponse({ + message: { content: '{}' }, + })); + + const runner = new ProviderRunner(makeConfig({ + provider: 'ollama', + model: 'mistral', + apiKey: undefined, + baseUrl: 'http://my-ollama:11434', + })); + await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); + + const [url] = mockFetch.mock.calls[0] as [string]; + expect(url).toBe('http://my-ollama:11434/api/chat'); + }); +}); + describe('ProviderRunner — jsonSchema param', () => { beforeEach(() => vi.clearAllMocks()); diff --git a/cli/src/analysis/provider-runner.ts b/cli/src/analysis/provider-runner.ts index 2ab84ed..52f5f83 100644 --- a/cli/src/analysis/provider-runner.ts +++ b/cli/src/analysis/provider-runner.ts @@ -18,6 +18,9 @@ import type { AnalysisRunner, RunAnalysisParams, RunAnalysisResult } from './run interface LLMMessage { role: 'system' | 'user' | 'assistant'; + // Intentionally narrower than server/src/llm/types.ts LLMMessage (which allows ContentBlock[]). + // ProviderRunner always sends plain strings — prompt caching via ContentBlock[] is a + // dashboard/API concern. The insights CLI command builds simple system+user pairs. content: string; } From 1bdb9dd4640ee4693d69a5c7da5ed2c09f766ca2 Mon Sep 17 00:00:00 2001 From: Srikanth Rao M Date: Sun, 29 Mar 2026 10:41:35 +0530 Subject: [PATCH 8/8] test(cli): add coverage gap tests for native-runner, provider-runner, schema-sync native-runner: JSON-object-not-array, empty array, error_max_turns subtype provider-runner: Anthropic system message extraction, missing usage defaults to 0, unknown provider throws schema-sync: friction_points item required fields, effective_patterns item required fields, findings item required fields Co-Authored-By: Claude Sonnet 4.6 --- .../analysis/__tests__/native-runner.test.ts | 28 +++++++++++++ .../__tests__/provider-runner.test.ts | 42 +++++++++++++++++++ .../schemas/__tests__/schema-sync.test.ts | 27 ++++++++++++ 3 files changed, 97 insertions(+) diff --git a/cli/src/analysis/__tests__/native-runner.test.ts b/cli/src/analysis/__tests__/native-runner.test.ts index f5c728d..2b4c419 100644 --- a/cli/src/analysis/__tests__/native-runner.test.ts +++ b/cli/src/analysis/__tests__/native-runner.test.ts @@ -146,6 +146,22 @@ describe('ClaudeNativeRunner.runAnalysis()', () => { .rejects.toThrow(/non-JSON output/); }); + it('throws when output is JSON but not an array', async () => { + mockExecFileSync.mockReturnValueOnce('{"type":"result"}' as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + await expect(runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' })) + .rejects.toThrow(/not an array/); + }); + + it('throws when event array is empty', async () => { + mockExecFileSync.mockReturnValueOnce('[]' as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + await expect(runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' })) + .rejects.toThrow(/no result event/); + }); + it('throws when JSON array has no result event', async () => { const noResultEnvelope = JSON.stringify([ { type: 'system', subtype: 'init' }, @@ -158,6 +174,18 @@ describe('ClaudeNativeRunner.runAnalysis()', () => { .rejects.toThrow(/no result event/); }); + it('throws on error_max_turns subtype', async () => { + const envelope = JSON.stringify([ + { type: 'system', subtype: 'init' }, + { type: 'result', subtype: 'error_max_turns', result: 'Max turns reached', is_error: true }, + ]); + mockExecFileSync.mockReturnValueOnce(envelope as unknown as Buffer); + const runner = new ClaudeNativeRunner(); + + await expect(runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' })) + .rejects.toThrow(/claude -p reported an error.*Max turns/); + }); + it('writes system prompt to a temp file', async () => { const llmJson = '{"summary": {"title": "T", "content": "C", "bullets": []}}'; mockExecFileSync.mockReturnValueOnce(makeEnvelope(llmJson) as unknown as Buffer); diff --git a/cli/src/analysis/__tests__/provider-runner.test.ts b/cli/src/analysis/__tests__/provider-runner.test.ts index 4b671d3..749b41c 100644 --- a/cli/src/analysis/__tests__/provider-runner.test.ts +++ b/cli/src/analysis/__tests__/provider-runner.test.ts @@ -155,6 +155,48 @@ describe('ProviderRunner.runAnalysis() — Anthropic', () => { }); }); +describe('ProviderRunner.runAnalysis() — Anthropic message shaping', () => { + beforeEach(() => vi.clearAllMocks()); + + it('extracts system message from messages array for Anthropic', async () => { + mockFetch.mockResolvedValueOnce(makeFetchResponse({ + content: [{ text: '{}' }], + usage: { input_tokens: 10, output_tokens: 5 }, + })); + + const runner = new ProviderRunner(makeConfig({ provider: 'anthropic', model: 'claude-sonnet-4-20250514', apiKey: 'ak' })); + await runner.runAnalysis({ systemPrompt: 'BE HELPFUL', userPrompt: 'analyze' }); + + const body = JSON.parse((mockFetch.mock.calls[0][1] as RequestInit).body as string); + expect(body.system).toBe('BE HELPFUL'); + expect(body.messages).toEqual([{ role: 'user', content: 'analyze' }]); + }); +}); + +describe('ProviderRunner.runAnalysis() — missing usage', () => { + beforeEach(() => vi.clearAllMocks()); + + it('returns zero tokens when usage is missing from OpenAI response', async () => { + mockFetch.mockResolvedValueOnce(makeFetchResponse({ + choices: [{ message: { content: '{}' } }], + // no usage field + })); + + const runner = new ProviderRunner(makeConfig()); + const result = await runner.runAnalysis({ systemPrompt: 's', userPrompt: 'u' }); + + expect(result.inputTokens).toBe(0); + expect(result.outputTokens).toBe(0); + }); +}); + +describe('ProviderRunner — constructor', () => { + it('throws on unknown provider', () => { + expect(() => new ProviderRunner({ provider: 'unknown' as never, model: 'x', apiKey: 'k' })) + .toThrow(/Unknown LLM provider/); + }); +}); + describe('ProviderRunner.runAnalysis() — Gemini', () => { beforeEach(() => vi.clearAllMocks()); diff --git a/cli/src/analysis/schemas/__tests__/schema-sync.test.ts b/cli/src/analysis/schemas/__tests__/schema-sync.test.ts index fc16876..2cac794 100644 --- a/cli/src/analysis/schemas/__tests__/schema-sync.test.ts +++ b/cli/src/analysis/schemas/__tests__/schema-sync.test.ts @@ -96,6 +96,23 @@ describe('session-analysis.json schema sync', () => { } }); + it('friction_points items have required category, description, severity, resolution fields', () => { + const fpSchema = (schema.properties?.facets as { properties?: Record }) + ?.properties?.friction_points?.items; + expect(fpSchema?.required).toContain('category'); + expect(fpSchema?.required).toContain('description'); + expect(fpSchema?.required).toContain('severity'); + expect(fpSchema?.required).toContain('resolution'); + }); + + it('effective_patterns items have required category, description, confidence fields', () => { + const epSchema = (schema.properties?.facets as { properties?: Record }) + ?.properties?.effective_patterns?.items; + expect(epSchema?.required).toContain('category'); + expect(epSchema?.required).toContain('description'); + expect(epSchema?.required).toContain('confidence'); + }); + it('schema file is valid JSON', () => { // If loadSchema didn't throw, the file is valid JSON. expect(schema).toBeDefined(); @@ -131,6 +148,16 @@ describe('prompt-quality.json schema sync', () => { } }); + it('findings items have required category, type, description, message_ref, impact, confidence fields', () => { + const fSchema = (schema.properties?.findings as { items?: { required?: string[] } })?.items; + expect(fSchema?.required).toContain('category'); + expect(fSchema?.required).toContain('type'); + expect(fSchema?.required).toContain('description'); + expect(fSchema?.required).toContain('message_ref'); + expect(fSchema?.required).toContain('impact'); + expect(fSchema?.required).toContain('confidence'); + }); + it('schema file is valid JSON', () => { expect(schema).toBeDefined(); expect(typeof schema).toBe('object');