diff --git a/src/codex-cache.ts b/src/codex-cache.ts new file mode 100644 index 0000000..d408cb5 --- /dev/null +++ b/src/codex-cache.ts @@ -0,0 +1,143 @@ +import { readFile, mkdir, stat, open, rename, unlink } from 'fs/promises' +import { existsSync } from 'fs' +import { randomBytes } from 'crypto' +import { join } from 'path' +import { homedir } from 'os' + +import type { ParsedProviderCall } from './providers/types.js' + +const CODEX_CACHE_VERSION = 1 +const CACHE_FILE = 'codex-results.json' + +type FileFingerprint = { mtimeMs: number; sizeBytes: number } + +type FileEntry = { + mtimeMs: number + sizeBytes: number + project: string + calls: ParsedProviderCall[] +} + +type ResultCache = { + version: number + files: Record +} + +function getCacheDir(): string { + return process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn') +} + +function getCachePath(): string { + return join(getCacheDir(), CACHE_FILE) +} + +let memCache: ResultCache | null = null + +async function loadCache(): Promise { + if (memCache) return memCache + try { + const raw = await readFile(getCachePath(), 'utf-8') + const cache = JSON.parse(raw) as ResultCache + if (cache.version === CODEX_CACHE_VERSION && cache.files && typeof cache.files === 'object') { + memCache = cache + return cache + } + } catch {} + memCache = { version: CODEX_CACHE_VERSION, files: {} } + return memCache +} + +function getEntry(cache: ResultCache, filePath: string, fp: FileFingerprint): FileEntry | null { + if (!Object.hasOwn(cache.files, filePath)) return null + const entry = cache.files[filePath] + if (entry && entry.mtimeMs === fp.mtimeMs && entry.sizeBytes === fp.sizeBytes) { + return entry + } + return null +} + +export async function readCachedCodexResults( + filePath: string, +): Promise { + try { + const s = await stat(filePath) + const cache = await loadCache() + const entry = getEntry(cache, filePath, { mtimeMs: s.mtimeMs, sizeBytes: s.size }) + return entry?.calls ?? null + } catch {} + return null +} + +export async function getCachedCodexProject( + filePath: string, +): Promise { + try { + const s = await stat(filePath) + const cache = await loadCache() + const entry = getEntry(cache, filePath, { mtimeMs: s.mtimeMs, sizeBytes: s.size }) + return entry?.project ?? null + } catch {} + return null +} + +export async function fingerprintFile( + filePath: string, +): Promise { + try { + const s = await stat(filePath) + return { mtimeMs: s.mtimeMs, sizeBytes: s.size } + } catch { + return null + } +} + +export async function writeCachedCodexResults( + filePath: string, + project: string, + calls: ParsedProviderCall[], + fingerprint: FileFingerprint, +): Promise { + try { + const cache = await loadCache() + cache.files[filePath] = { + mtimeMs: fingerprint.mtimeMs, + sizeBytes: fingerprint.sizeBytes, + project, + calls, + } + } catch {} +} + +export async function flushCodexCache(): Promise { + if (!memCache) return + try { + // Evict entries for files that no longer exist on disk + const paths = Object.keys(memCache.files) + for (const p of paths) { + try { + await stat(p) + } catch { + delete memCache.files[p] + } + } + + const dir = getCacheDir() + if (!existsSync(dir)) await mkdir(dir, { recursive: true }) + const finalPath = getCachePath() + const tempPath = `${finalPath}.${randomBytes(8).toString('hex')}.tmp` + const payload = JSON.stringify(memCache) + const handle = await open(tempPath, 'w', 0o600) + try { + await handle.writeFile(payload, { encoding: 'utf-8' }) + await handle.sync() + } finally { + await handle.close() + } + try { + await rename(tempPath, finalPath) + } catch (err) { + try { await unlink(tempPath) } catch {} + throw err + } + } catch {} +} diff --git a/src/parser.ts b/src/parser.ts index ab4eacd..530a99b 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -3,6 +3,7 @@ import { basename, join } from 'path' import { readSessionLines } from './fs-utils.js' import { calculateCost, getShortModelName } from './models.js' import { discoverAllSessions, getProvider } from './providers/index.js' +import { flushCodexCache } from './codex-cache.js' import type { ParsedProviderCall } from './providers/types.js' import type { AssistantMessageContent, @@ -402,36 +403,40 @@ async function parseProviderSources( const sessionMap = new Map() - for (const source of sources) { - if (dateRange) { - try { - const s = await stat(source.path) - if (s.mtimeMs < dateRange.start.getTime()) continue - } catch { /* fall through; treat unknown stat as "may contain data" */ } - } - const parser = provider.createSessionParser( - { path: source.path, project: source.project, provider: providerName }, - seenKeys, - ) - - for await (const call of parser.parse()) { + try { + for (const source of sources) { if (dateRange) { - if (!call.timestamp) continue - const ts = new Date(call.timestamp) - if (ts < dateRange.start || ts > dateRange.end) continue + try { + const s = await stat(source.path) + if (s.mtimeMs < dateRange.start.getTime()) continue + } catch { /* fall through; treat unknown stat as "may contain data" */ } } + const parser = provider.createSessionParser( + { path: source.path, project: source.project, provider: providerName }, + seenKeys, + ) + + for await (const call of parser.parse()) { + if (dateRange) { + if (!call.timestamp) continue + const ts = new Date(call.timestamp) + if (ts < dateRange.start || ts > dateRange.end) continue + } - const turn = providerCallToTurn(call) - const classified = classifyTurn(turn) - const key = `${providerName}:${call.sessionId}:${source.project}` + const turn = providerCallToTurn(call) + const classified = classifyTurn(turn) + const key = `${providerName}:${call.sessionId}:${source.project}` - const existing = sessionMap.get(key) - if (existing) { - existing.turns.push(classified) - } else { - sessionMap.set(key, { project: source.project, turns: [classified] }) + const existing = sessionMap.get(key) + if (existing) { + existing.turns.push(classified) + } else { + sessionMap.set(key, { project: source.project, turns: [classified] }) + } } } + } finally { + if (providerName === 'codex') await flushCodexCache() } const projectMap = new Map() diff --git a/src/providers/codex.ts b/src/providers/codex.ts index 2eac408..dc21aa8 100644 --- a/src/providers/codex.ts +++ b/src/providers/codex.ts @@ -1,9 +1,10 @@ -import { readdir, stat } from 'fs/promises' +import { readdir, stat, open } from 'fs/promises' import { basename, join } from 'path' import { homedir } from 'os' import { readSessionFile } from '../fs-utils.js' import { calculateCost } from '../models.js' +import { readCachedCodexResults, writeCachedCodexResults, getCachedCodexProject, fingerprintFile } from '../codex-cache.js' import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js' const modelDisplayNames: Record = { @@ -69,14 +70,21 @@ function sanitizeProject(cwd: string): string { } async function readFirstLine(filePath: string): Promise { - const content = await readSessionFile(filePath) - if (content === null) return null - const line = content.split('\n')[0] - if (!line?.trim()) return null + let fh try { + fh = await open(filePath, 'r') + const buf = Buffer.alloc(16384) + const { bytesRead } = await fh.read(buf, 0, 16384, 0) + if (bytesRead === 0) return null + const text = buf.toString('utf-8', 0, bytesRead) + const nl = text.indexOf('\n') + const line = nl >= 0 ? text.slice(0, nl) : text + if (!line.trim()) return null return JSON.parse(line) as CodexEntry } catch { return null + } finally { + await fh?.close() } } @@ -121,6 +129,12 @@ async function discoverSessionsInDir(codexDir: string): Promise const s = await stat(filePath).catch(() => null) if (!s?.isFile()) continue + const cachedProject = await getCachedCodexProject(filePath) + if (cachedProject) { + sources.push({ path: filePath, project: cachedProject, provider: 'codex' }) + continue + } + const { valid, meta } = await isValidCodexSession(filePath) if (!valid || !meta) continue @@ -145,6 +159,19 @@ function resolveModel(info: CodexEntry['payload'], sessionModel?: string): strin function createParser(source: SessionSource, seenKeys: Set): SessionParser { return { async *parse(): AsyncGenerator { + const cached = await readCachedCodexResults(source.path) + if (cached) { + for (const call of cached) { + if (seenKeys.has(call.deduplicationKey)) continue + seenKeys.add(call.deduplicationKey) + yield call + } + return + } + + const fp = await fingerprintFile(source.path) + if (!fp) return + const content = await readSessionFile(source.path) if (content === null) return const lines = content.split('\n').filter(l => l.trim()) @@ -157,6 +184,7 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars let prevReasoning = 0 let pendingTools: string[] = [] let pendingUserMessage = '' + const results: ParsedProviderCall[] = [] for (const line of lines) { let entry: CodexEntry @@ -258,7 +286,7 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars 0, ) - yield { + results.push({ provider: 'codex', model, inputTokens: uncachedInputTokens, @@ -276,12 +304,18 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars deduplicationKey: dedupKey, userMessage: pendingUserMessage, sessionId, - } + }) pendingTools = [] pendingUserMessage = '' } } + + await writeCachedCodexResults(source.path, source.project, results, fp) + + for (const call of results) { + yield call + } }, } }