Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions src/fs-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ import { createInterface } from 'readline'
export const MAX_SESSION_FILE_BYTES = 128 * 1024 * 1024
export const STREAM_THRESHOLD_BYTES = 8 * 1024 * 1024

// Line-by-line streaming has bounded memory (one line at a time) and is not
// constrained by V8's string limit, so it can safely handle multi-GB session
// files. The cap here is purely a sanity check against pathological inputs;
// real Codex sessions for heavy users have been observed at 250+ MB and will
// continue to grow as context windows expand.
export const MAX_STREAM_SESSION_FILE_BYTES = 2 * 1024 * 1024 * 1024

function verbose(): boolean {
return process.env.CODEBURN_VERBOSE === '1'
}
Expand Down Expand Up @@ -78,8 +85,10 @@ export async function* readSessionLines(filePath: string): AsyncGenerator<string
return
}

if (size > MAX_SESSION_FILE_BYTES) {
warn(`skipped oversize file ${filePath} (${size} bytes > cap ${MAX_SESSION_FILE_BYTES})`)
if (size > MAX_STREAM_SESSION_FILE_BYTES) {
warn(
`skipped oversize file ${filePath} (${size} bytes > stream cap ${MAX_STREAM_SESSION_FILE_BYTES})`,
)
return
}

Expand Down
21 changes: 16 additions & 5 deletions src/providers/codex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { createInterface } from 'readline'
import { basename, join } from 'path'
import { homedir } from 'os'

import { readSessionFile } from '../fs-utils.js'
import { readSessionLines } from '../fs-utils.js'
import { calculateCost } from '../models.js'
import { readCachedCodexResults, writeCachedCodexResults, getCachedCodexProject, fingerprintFile } from '../codex-cache.js'
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
Expand Down Expand Up @@ -201,9 +201,6 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
const fp = await fingerprintFile(source.path)
if (!fp) return

const content = await readSessionFile(source.path)
if (content === null) return
const lines = content.split('\n').filter(l => l.trim())
let sessionModel: string | undefined
let sessionId = ''
let prevCumulativeTotal = 0
Expand All @@ -215,9 +212,18 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
let pendingUserMessage = ''
let pendingOutputChars = 0
let estCounter = 0
let sawAnyLine = false
const results: ParsedProviderCall[] = []

for (const line of lines) {
// Stream the session file line by line. Heavy Codex sessions can exceed
// 250 MB on disk; reading the entire file into a string would either hit
// the readSessionFile cap or push V8 toward its 512 MB string limit
// after split('\n'). readSessionLines streams via readline so memory
// stays bounded to the longest line.
for await (const rawLine of readSessionLines(source.path)) {
sawAnyLine = true
const line = rawLine.trim()
if (!line) continue
let entry: CodexEntry
try {
entry = JSON.parse(line) as CodexEntry
Expand Down Expand Up @@ -391,6 +397,11 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
}
}

// If the stream yielded nothing the file was unreadable, oversized, or
// empty. Skip cache write so a transient failure can't pin an empty
// result set against a fingerprint that would otherwise be re-parsed.
if (!sawAnyLine) return

await writeCachedCodexResults(source.path, source.project, results, fp)

for (const call of results) {
Expand Down
Loading