diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 36f5a6264..ba48e743e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,7 @@ on: env: NODE_VERSION_PRIMARY: '24' + NODE_VERSION_SERVER: '22' jobs: # ── Lint ────────────────────────────────────────────────────────────────── @@ -188,7 +189,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 with: - node-version: ${{ env.NODE_VERSION_PRIMARY }} + node-version: ${{ env.NODE_VERSION_SERVER }} cache: 'npm' cache-dependency-path: package-lock.json - run: npm ci diff --git a/shared/supervision-config.ts b/shared/supervision-config.ts index 95e2f4227..bcde0c4ad 100644 --- a/shared/supervision-config.ts +++ b/shared/supervision-config.ts @@ -55,6 +55,8 @@ export const SUPERVISION_DEFAULT_TIMEOUT_MS = 12_000; export const SUPERVISION_DEFAULT_MAX_PARSE_RETRIES = 1; export const SUPERVISION_DEFAULT_AUDIT_MODE: SupervisionAuditMode = 'audit'; export const SUPERVISION_DEFAULT_MAX_AUDIT_LOOPS = 2; +export const SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_STREAK = 2; +export const SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_TOTAL = 0; export const SUPERVISION_DEFAULT_PROMPT_VERSION = SUPERVISION_CONTRACT_IDS.DECISION; export const SUPERVISION_DEFAULT_TASK_RUN_PROMPT_VERSION = SUPERVISION_CONTRACT_IDS.TASK_RUN_STATUS; @@ -107,6 +109,8 @@ export type SessionSupervisionSnapshotIssue = | 'invalid_global_custom_instructions' | 'invalid_preset' | 'invalid_max_parse_retries' + | 'invalid_max_auto_continue_streak' + | 'invalid_max_auto_continue_total' | 'missing_audit_mode' | 'invalid_audit_mode' | 'invalid_max_audit_loops' @@ -126,6 +130,8 @@ export interface SupervisorDefaultConfig { model: string; timeoutMs: number; promptVersion: string; + maxAutoContinueStreak: number; + maxAutoContinueTotal: number; /** * Optional global supervision custom instructions. Free text appended to the * supervisor prompt for every Auto-enabled session that does not set @@ -165,6 +171,8 @@ export interface SessionSupervisionSnapshot extends SupervisorDefaultConfig { */ globalCustomInstructions?: string; maxParseRetries: number; + maxAutoContinueStreak: number; + maxAutoContinueTotal: number; auditMode: SupervisionAuditMode; maxAuditLoops: number; taskRunPromptVersion: string; @@ -186,6 +194,12 @@ function normalizePositiveInteger(value: unknown, fallback: number, minimum = 1) return int >= minimum ? int : fallback; } +function normalizeNonNegativeInteger(value: unknown, fallback: number): number { + if (typeof value !== 'number' || !Number.isFinite(value)) return fallback; + const int = Math.floor(value); + return int >= 0 ? int : fallback; +} + export function isSupportedSupervisionBackend(value: string | null | undefined): value is SharedContextRuntimeBackend { const trimmed = trimString(value); return !!trimmed && SUPERVISION_SUPPORTED_BACKENDS.includes(trimmed as SharedContextRuntimeBackend); @@ -238,6 +252,8 @@ export function normalizeSupervisorDefaultConfig( model, timeoutMs: normalizePositiveInteger(merged.timeoutMs, SUPERVISION_DEFAULT_TIMEOUT_MS, 1), promptVersion: trimString(merged.promptVersion) ?? SUPERVISION_DEFAULT_PROMPT_VERSION, + maxAutoContinueStreak: normalizeNonNegativeInteger(merged.maxAutoContinueStreak, SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_STREAK), + maxAutoContinueTotal: normalizeNonNegativeInteger(merged.maxAutoContinueTotal, SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_TOTAL), ...(customInstructions ? { customInstructions } : {}), ...(preset ? { preset } : {}), }; @@ -296,17 +312,46 @@ export function getSessionSupervisionSnapshotIssues( if (record.globalCustomInstructions != null && typeof record.globalCustomInstructions !== 'string') { issues.push('invalid_global_custom_instructions'); } - if (typeof record.maxParseRetries !== 'number' || !Number.isFinite(record.maxParseRetries) || Math.floor(record.maxParseRetries) < 1) { + if ( + record.maxParseRetries != null + && (typeof record.maxParseRetries !== 'number' || !Number.isFinite(record.maxParseRetries) || Math.floor(record.maxParseRetries) < 1) + ) { issues.push('invalid_max_parse_retries'); } + if ( + record.maxAutoContinueStreak != null + && ( + typeof record.maxAutoContinueStreak !== 'number' + || !Number.isFinite(record.maxAutoContinueStreak) + || Math.floor(record.maxAutoContinueStreak) < 0 + ) + ) { + issues.push('invalid_max_auto_continue_streak'); + } + if ( + record.maxAutoContinueTotal != null + && ( + typeof record.maxAutoContinueTotal !== 'number' + || !Number.isFinite(record.maxAutoContinueTotal) + || Math.floor(record.maxAutoContinueTotal) < 0 + ) + ) { + issues.push('invalid_max_auto_continue_total'); + } if (mode === SUPERVISION_MODE.SUPERVISED_AUDIT) { - if (record.auditMode == null || record.auditMode === '') issues.push('missing_audit_mode'); - else if (!isSupportedSupervisionAuditMode(String(record.auditMode))) issues.push('invalid_audit_mode'); - if (typeof record.maxAuditLoops !== 'number' || !Number.isFinite(record.maxAuditLoops) || Math.floor(record.maxAuditLoops) < 1) { + if (record.auditMode != null && record.auditMode !== '' && !isSupportedSupervisionAuditMode(String(record.auditMode))) { + issues.push('invalid_audit_mode'); + } + if ( + record.maxAuditLoops != null + && (typeof record.maxAuditLoops !== 'number' || !Number.isFinite(record.maxAuditLoops) || Math.floor(record.maxAuditLoops) < 1) + ) { issues.push('invalid_max_audit_loops'); } - if (!trimString(record.taskRunPromptVersion)) issues.push('invalid_task_run_prompt_version'); + if (record.taskRunPromptVersion != null && !trimString(record.taskRunPromptVersion)) { + issues.push('invalid_task_run_prompt_version'); + } } return issues; @@ -329,6 +374,8 @@ export function normalizeSessionSupervisionSnapshot( : false; const globalCustomInstructions = trimString(merged.globalCustomInstructions); const maxParseRetries = normalizePositiveInteger(merged.maxParseRetries, SUPERVISION_DEFAULT_MAX_PARSE_RETRIES, 1); + const maxAutoContinueStreak = normalizeNonNegativeInteger(merged.maxAutoContinueStreak, SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_STREAK); + const maxAutoContinueTotal = normalizeNonNegativeInteger(merged.maxAutoContinueTotal, SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_TOTAL); const auditMode = isSupportedSupervisionAuditMode(merged.auditMode) ? merged.auditMode : SUPERVISION_DEFAULT_AUDIT_MODE; const maxAuditLoops = normalizePositiveInteger(merged.maxAuditLoops, SUPERVISION_DEFAULT_MAX_AUDIT_LOOPS, 1); return { @@ -340,6 +387,8 @@ export function normalizeSessionSupervisionSnapshot( ...(customInstructionsOverride ? { customInstructionsOverride: true } : {}), ...(globalCustomInstructions ? { globalCustomInstructions } : {}), maxParseRetries, + maxAutoContinueStreak, + maxAutoContinueTotal, auditMode, maxAuditLoops, taskRunPromptVersion: trimString(merged.taskRunPromptVersion) ?? SUPERVISION_DEFAULT_TASK_RUN_PROMPT_VERSION, @@ -479,6 +528,8 @@ export const TASK_RUN_PROMPT_VERSION = SUPERVISION_DEFAULT_TASK_RUN_PROMPT_VERSI export const DEFAULT_SUPERVISION_AUDIT_MODE = SUPERVISION_DEFAULT_AUDIT_MODE; export const DEFAULT_SUPERVISION_MAX_AUDIT_LOOPS = SUPERVISION_DEFAULT_MAX_AUDIT_LOOPS; export const DEFAULT_SUPERVISION_MAX_PARSE_RETRIES = SUPERVISION_DEFAULT_MAX_PARSE_RETRIES; +export const DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK = SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_STREAK; +export const DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL = SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_TOTAL; export function parseTaskRunTerminalStateDetailsFromText(text: string): ParsedTaskRunTerminalState { const matches = [...text.matchAll(//g)]; diff --git a/src/agent/providers/claude-code-sdk.ts b/src/agent/providers/claude-code-sdk.ts index d10c0e75e..4856b247a 100644 --- a/src/agent/providers/claude-code-sdk.ts +++ b/src/agent/providers/claude-code-sdk.ts @@ -25,7 +25,7 @@ import type { ProviderContextPayload } from '../../../shared/context-types.js'; import type { TransportAttachment } from '../../../shared/transport-attachments.js'; import logger from '../../util/logger.js'; import { CLAUDE_SDK_EFFORT_LEVELS, type TransportEffortLevel } from '../../../shared/effort-levels.js'; -import { normalizeTransportCwd, resolveExecutableForSpawn } from '../transport-paths.js'; +import { normalizeTransportCwd, resolveClaudeCodePathForSdk, resolveExecutableForSpawn } from '../transport-paths.js'; const CLAUDE_BIN = 'claude'; const DEFAULT_PERMISSION_MODE: PermissionMode = 'bypassPermissions'; @@ -114,7 +114,7 @@ export class ClaudeCodeSdkProvider implements TransportProvider { private statusCallbacks: Array<(sessionId: string, status: ProviderStatusUpdate) => void> = []; async connect(config: ProviderConfig): Promise { - const binaryPath = this.resolveBinaryPath(config); + const binaryPath = this.getConfiguredBinaryPath(config); const resolved = resolveExecutableForSpawn(binaryPath); await access(resolved.executable, fsConstants.X_OK).catch(async () => { const { execFile } = await import('node:child_process'); @@ -300,11 +300,7 @@ export class ClaudeCodeSdkProvider implements TransportProvider { } : {}), }; options.spawnClaudeCodeProcess = (req: { command: string; args: string[]; cwd?: string; env?: Record; signal?: AbortSignal }) => { - const finalCommand = this.windowsSpawnOverride?.executable ?? req.command; - const finalArgs = this.windowsSpawnOverride - ? [...this.windowsSpawnOverride.prependArgs, ...req.args] - : req.args; - const child = spawn(finalCommand, finalArgs, { + const child = spawn(req.command, req.args, { cwd: req.cwd, env: req.env, signal: req.signal, @@ -575,21 +571,13 @@ export class ClaudeCodeSdkProvider implements TransportProvider { return DEFAULT_PERMISSION_MODE; } + private getConfiguredBinaryPath(config: ProviderConfig | null): string { + return typeof config?.binaryPath === 'string' && config.binaryPath.trim() ? config.binaryPath : CLAUDE_BIN; + } + private resolveBinaryPath(config: ProviderConfig | null): string { - const raw = typeof config?.binaryPath === 'string' && config.binaryPath.trim() ? config.binaryPath : CLAUDE_BIN; - // For windows + .cmd shim, the SDK can't spawn this directly. - // We pass it to the SDK as a marker, then override spawn via - // spawnClaudeCodeProcess (see send()). - if (process.platform === 'win32') { - const resolved = resolveExecutableForSpawn(raw); - this.windowsSpawnOverride = resolved.prependArgs.length > 0 - ? { executable: resolved.executable, prependArgs: resolved.prependArgs } - : null; - return resolved.executable; - } - return raw; + return resolveClaudeCodePathForSdk(this.getConfiguredBinaryPath(config)); } - private windowsSpawnOverride: { executable: string; prependArgs: string[] } | null = null; private emitSessionInfo(sessionId: string, info: SessionInfoUpdate): void { for (const cb of this.sessionInfoCallbacks) cb(sessionId, info); diff --git a/src/agent/transport-paths.ts b/src/agent/transport-paths.ts index 383bc4673..511cec638 100644 --- a/src/agent/transport-paths.ts +++ b/src/agent/transport-paths.ts @@ -31,7 +31,10 @@ export function resolveBinaryOnWindows(name: string): string { // OS delimiter (':' on Linux), which breaks tests that fake // `process.platform = 'win32'` on a posix CI runner. const WIN_DELIMITER = ';'; - const pathDirs = (process.env.PATH ?? '').split(WIN_DELIMITER).filter(Boolean); + const pathDirs = uniqueNonEmpty([ + ...(process.env.PATH ?? '').split(WIN_DELIMITER), + ...getWindowsGlobalCliDirs(), + ]); const pathExtRaw = process.env.PATHEXT ?? '.COM;.EXE;.BAT;.CMD'; const exts = pathExtRaw.split(WIN_DELIMITER).filter(Boolean); const hasExt = exts.some((e) => name.toLowerCase().endsWith(e.toLowerCase())); @@ -50,6 +53,33 @@ export function resolveBinaryOnWindows(name: string): string { return name; } +function uniqueNonEmpty(values: Array): string[] { + return [...new Set(values.filter((value): value is string => typeof value === 'string' && value.trim().length > 0))]; +} + +function getWindowsGlobalCliDirs(): string[] { + return uniqueNonEmpty([ + process.env.APPDATA ? path.join(process.env.APPDATA, 'npm') : undefined, + process.env.USERPROFILE ? path.join(process.env.USERPROFILE, 'AppData', 'Roaming', 'npm') : undefined, + ]); +} + +function getWindowsClaudeInstallCandidates(name: string): string[] { + const basename = path.basename(name); + const hasExt = /\.[^\\/]+$/.test(basename); + const fileNames = hasExt ? [basename] : [basename, `${basename}.exe`, `${basename}.cmd`, `${basename}.bat`]; + const dirs = uniqueNonEmpty([ + ...getWindowsGlobalCliDirs(), + process.env.LOCALAPPDATA ? path.join(process.env.LOCALAPPDATA, 'Programs', 'Claude') : undefined, + process.env.LOCALAPPDATA ? path.join(process.env.LOCALAPPDATA, 'Programs', 'Claude Code') : undefined, + process.env.ProgramFiles ? path.join(process.env.ProgramFiles, 'Claude') : undefined, + process.env.ProgramFiles ? path.join(process.env.ProgramFiles, 'Claude Code') : undefined, + process.env['ProgramFiles(x86)'] ? path.join(process.env['ProgramFiles(x86)'], 'Claude') : undefined, + process.env['ProgramFiles(x86)'] ? path.join(process.env['ProgramFiles(x86)'], 'Claude Code') : undefined, + ]); + return dirs.flatMap((dir) => fileNames.map((fileName) => path.join(dir, fileName))); +} + export function resolveBinaryWithWindowsFallbacks(name: string, windowsCandidates: string[] = []): string { if (process.platform !== 'win32') return name; for (const candidate of windowsCandidates) { @@ -58,6 +88,23 @@ export function resolveBinaryWithWindowsFallbacks(name: string, windowsCandidate return resolveBinaryOnWindows(name); } +/** Resolve a CLI path suitable for passing to an SDK option like + * `pathToClaudeCodeExecutable`. + * + * On Windows, npm global installs usually expose `claude.cmd`, but SDKs that + * spawn the path directly without `shell: true` need either a real `.exe` or + * the underlying `.js` entrypoint. This helper converts npm shims to their + * script path and also searches common per-user install locations when the + * daemon service PATH is sparse. */ +export function resolveClaudeCodePathForSdk(name = 'claude'): string { + if (process.platform !== 'win32') return name; + const resolved = resolveBinaryWithWindowsFallbacks(name, getWindowsClaudeInstallCandidates(name)); + if (/\.(cmd|bat)$/i.test(resolved)) { + return parseNpmCmdShim(resolved) ?? resolved; + } + return resolved; +} + /** Result of resolving a binary that may be an npm .cmd shim. * When the resolved path is a real .exe, just `{ executable }`. * When it's a Windows .cmd shim, returns the underlying node script so diff --git a/src/context/summary-compressor.ts b/src/context/summary-compressor.ts index 9e696f3a3..ba30a8f6b 100644 --- a/src/context/summary-compressor.ts +++ b/src/context/summary-compressor.ts @@ -15,6 +15,7 @@ import type { TransportProvider, ProviderError } from '../agent/transport-provid import type { AgentMessage } from '../../shared/agent-message.js'; import { randomUUID } from 'node:crypto'; import logger from '../util/logger.js'; +import { resolveClaudeCodePathForSdk } from '../agent/transport-paths.js'; import { resolveProcessingProviderSessionConfig, type ProcessingBackendSelection as CompressionBackendSelection, @@ -482,9 +483,13 @@ async function sendViaSdkQuery(prompt: string): Promise { delete process.env.CLAUDECODE; try { let result = ''; + const pathToClaudeCodeExecutable = resolveClaudeCodePathForSdk(); for await (const msg of query({ prompt: COMPRESSOR_SYSTEM_PROMPT + '\n\n' + prompt, - options: { maxTurns: 1 }, + options: { + maxTurns: 1, + pathToClaudeCodeExecutable, + }, })) { if (msg.type === 'assistant') { const content = (msg as { message?: { content?: unknown } }).message?.content; diff --git a/src/daemon/gemini-watcher.ts b/src/daemon/gemini-watcher.ts index 0e78d6c0f..fddb9f5b2 100644 --- a/src/daemon/gemini-watcher.ts +++ b/src/daemon/gemini-watcher.ts @@ -2,9 +2,10 @@ * Watches Gemini CLI conversation JSON files for structured events. */ -import { watch, readdir, stat, readFile } from 'fs/promises'; +import { watch, readdir, stat, readFile, open } from 'fs/promises'; import { join } from 'path'; import { homedir } from 'os'; +import { createHash } from 'crypto'; import { timelineEmitter } from './timeline-emitter.js'; import { capturePane } from '../agent/tmux.js'; import { detectStatus } from '../agent/detect.js'; @@ -20,6 +21,9 @@ const POLL_INTERVAL_MS = 1500; // Balanced: responsive enough without causing st const IDLE_LOCK_MS = 2000; // After emitting idle, ignore terminal noise for this long const RUNNING_LOCK_MS = 3000; // After emitting running, don't transition to idle for this long const RETRY_DELAY_MS = 100; +const FULL_READ_LIMIT_BYTES = 4 * 1024 * 1024; // 4 MiB +const TAIL_READ_LIMIT_BYTES = 2 * 1024 * 1024; // 2 MiB +const OVERSIZED_TAIL_MESSAGE_LIMIT = 48; const pendingGeminiFileTools = new Map(); const completedGeminiFileTools = new Set(); const MAX_TRACKED_GEMINI_FILE_TOOLS = 512; @@ -94,6 +98,93 @@ async function findLatestSessionFile(excludeClaimed = true): Promise= 0) { + objects.push(chunk.slice(start, i + 1)); + start = -1; + } + } + } + + return objects; +} + +async function readOversizedConversationTail(filePath: string, fileSize: number): Promise { + let fh: Awaited> | null = null; + try { + fh = await open(filePath, 'r'); + const readSize = Math.min(fileSize, TAIL_READ_LIMIT_BYTES); + const buf = Buffer.allocUnsafe(readSize); + const { bytesRead } = await fh.read(buf, 0, readSize, fileSize - readSize); + if (bytesRead === 0) return null; + + const chunk = buf.subarray(0, bytesRead).toString('utf8'); + const objects = extractTopLevelJsonObjects(chunk); + const messages = objects + .flatMap((raw) => { + try { + const parsed = JSON.parse(raw) as Record; + const type = parsed.type; + if (type === 'user' || type === 'gemini' || type === 'info') return [parsed]; + } catch { + /* ignore malformed tail fragments */ + } + return []; + }) + .slice(-OVERSIZED_TAIL_MESSAGE_LIMIT); + + if (messages.length === 0) return null; + return { + lastUpdated: '', + messages, + truncated: true, + }; + } catch { + return null; + } finally { + if (fh) await fh.close().catch(() => {}); + } +} + // ── Message parsing ──────────────────────────────────────────────────────────── function parseMessage(sessionName: string, msg: any, hist?: any, streaming = false): void { @@ -256,6 +347,8 @@ export interface WatcherState { turnHadAssistantText?: boolean; /** Prevent repeated retrack attempts for the same no-text running→idle turn. */ noTextRetrackAttempted?: boolean; + /** Tail fingerprints used when the conversation file is too large for full parse. */ + _oversizedRecentMessageIds?: string[]; } const watchers = new Map(); @@ -472,6 +565,10 @@ function inferConversationStatus(conv: any): 'running' | 'idle' | null { async function readConversation(filePath: string, sessionName?: string): Promise { for (let attempt = 0; attempt < 3; attempt++) { try { + const fileStat = await stat(filePath); + if (fileStat.size > FULL_READ_LIMIT_BYTES) { + return await readOversizedConversationTail(filePath, fileStat.size); + } const raw = await readFile(filePath, 'utf8'); if (!raw.trim()) continue; return JSON.parse(raw); @@ -484,6 +581,28 @@ async function readConversation(filePath: string, sessionName?: string): Promise return null; } +function buildGeminiMessageFingerprint(msg: any): string { + return createHash('sha1') + .update(JSON.stringify({ + type: msg?.type ?? null, + timestamp: msg?.timestamp ?? null, + content: msg?.content ?? null, + thoughts: msg?.thoughts ?? null, + toolCalls: msg?.toolCalls ?? null, + tokens: msg?.tokens ?? null, + model: msg?.model ?? null, + })) + .digest('hex'); +} + +function arraysEqual(left: string[], right: string[]): boolean { + if (left.length !== right.length) return false; + for (let i = 0; i < left.length; i++) { + if (left[i] !== right[i]) return false; + } + return true; +} + export async function pollTick(sessionName: string, state: WatcherState): Promise { // Re-entrancy guard — prevent overlapping pollTick from fs.watch + poll timer if (state.polling) return; @@ -541,19 +660,51 @@ async function pollTickInner(sessionName: string, state: WatcherState): Promise< const conversationStatus = inferConversationStatus(conv); state.lastConversationStatus = conversationStatus; - if (conv.lastUpdated === state.lastUpdated && conv.messages.length === state.seenCount) { + const isTruncatedTail = conv.truncated === true; + const recentMessageIds = isTruncatedTail + ? conv.messages.map((msg: any) => buildGeminiMessageFingerprint(msg)) + : []; + const previousRecentMessageIds = state._oversizedRecentMessageIds ?? []; + + if (!isTruncatedTail && conv.lastUpdated === state.lastUpdated && conv.messages.length === state.seenCount) { // JSON unchanged — terminal spinner is ground truth. // terminalThinkingCheck handles all state transitions via assertSpinnerGate. await terminalThinkingCheck(sessionName, state); return; } - const lastIdx = conv.messages.length - 1; - const isUpdate = conv.messages.length === state.seenCount && lastIdx >= 0; - const messagesToProcess = isUpdate ? [conv.messages[lastIdx]] : conv.messages.slice(state.seenCount); + let lastIdx = conv.messages.length - 1; + let isUpdate = conv.messages.length === state.seenCount && lastIdx >= 0; + let messagesToProcess = isUpdate ? [conv.messages[lastIdx]] : conv.messages.slice(state.seenCount); + + if (isTruncatedTail) { + const sameExceptLast = recentMessageIds.length > 0 + && previousRecentMessageIds.length === recentMessageIds.length + && arraysEqual(previousRecentMessageIds.slice(0, -1), recentMessageIds.slice(0, -1)) + && previousRecentMessageIds[previousRecentMessageIds.length - 1] !== recentMessageIds[recentMessageIds.length - 1]; + if (sameExceptLast) { + lastIdx = conv.messages.length - 1; + isUpdate = lastIdx >= 0; + messagesToProcess = lastIdx >= 0 ? [conv.messages[lastIdx]] : []; + } else { + const previousSet = new Set(previousRecentMessageIds); + messagesToProcess = conv.messages.filter((_: any, index: number) => !previousSet.has(recentMessageIds[index] ?? '')); + isUpdate = false; + lastIdx = conv.messages.length - 1; + } + if (messagesToProcess.length === 0 && arraysEqual(previousRecentMessageIds, recentMessageIds)) { + await terminalThinkingCheck(sessionName, state); + return; + } + } - state.seenCount = conv.messages.length; - state.lastUpdated = conv.lastUpdated; + state.seenCount = isTruncatedTail + ? Math.max(state.seenCount, conv.messages.length) + : conv.messages.length; + state.lastUpdated = isTruncatedTail + ? `__oversized__:${state._lastMtimeMs ?? 0}:${state._lastSize ?? 0}` + : conv.lastUpdated; + state._oversizedRecentMessageIds = isTruncatedTail ? recentMessageIds : undefined; state._terminalThinkingEmitted = false; // Reset: JSON has content now, terminal-based thinking no longer needed for (let i = 0; i < messagesToProcess.length; i++) { @@ -619,6 +770,7 @@ function activateFile(sessionName: string, state: WatcherState, newFile: string) state._lastIno = undefined; state._readFailCount = 0; state._terminalThinkingEmitted = false; + state._oversizedRecentMessageIds = undefined; state.idleConfirmCount = 0; if (state.idleDebounceTimer) { clearTimeout(state.idleDebounceTimer); state.idleDebounceTimer = undefined; } @@ -645,8 +797,11 @@ export async function startWatching(sessionName: string, sessionUuid: string): P const conv = await readConversation(found, sessionName); if (conv) { state.seenCount = conv.messages.length; - state.lastUpdated = conv.lastUpdated; + state.lastUpdated = conv.truncated ? '__oversized__' : conv.lastUpdated; state.lastConversationStatus = inferConversationStatus(conv); + if (conv.truncated) { + state._oversizedRecentMessageIds = conv.messages.map((msg: any) => buildGeminiMessageFingerprint(msg)); + } // Seed _lastMsgLen so the first pollTick "changed file" path doesn't // treat a metadata-only update (lastUpdated timestamp) as new content. const lastMsg = conv.messages[conv.messages.length - 1]; @@ -724,8 +879,11 @@ export async function startWatchingDiscovered( claimedFiles.set(fullPath, sessionName); state.sessionUuid = conv.sessionId; state.seenCount = conv.messages?.length ?? 0; - state.lastUpdated = conv.lastUpdated ?? ''; + state.lastUpdated = conv.truncated ? '__oversized__' : (conv.lastUpdated ?? ''); state.lastConversationStatus = inferConversationStatus(conv); + if (conv.truncated) { + state._oversizedRecentMessageIds = conv.messages.map((msg: any) => buildGeminiMessageFingerprint(msg)); + } const lm = conv.messages?.[conv.messages.length - 1]; state._lastMsgLen = typeof lm?.content === 'string' ? lm.content.length : -1; // Persist to local session store so daemon restarts can use the UUID diff --git a/src/daemon/supervision-automation.ts b/src/daemon/supervision-automation.ts index b3e5bac88..1e3ccc565 100644 --- a/src/daemon/supervision-automation.ts +++ b/src/daemon/supervision-automation.ts @@ -11,6 +11,8 @@ import { getCachedGlobalCustomInstructions } from './supervisor-defaults-cache.j import logger from '../util/logger.js'; import { SUPERVISION_CONTRACT_IDS, + SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_STREAK, + SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_TOTAL, SUPERVISION_MODE, SUPERVISION_UNAVAILABLE_REASONS, extractSessionSupervisionSnapshot, @@ -53,18 +55,6 @@ function enrichSnapshotWithGlobalDefaults( type TaskRunPhase = 'execution' | 'auditing'; -/** - * Hard cap on auto-dispatched continue turns per task-run. - * - * Was 8 historically — but even when the supervisor returned specific-looking - * `continue` verdicts, running 8 cycles before handing back to the user - * amplified any residual ambiguity into a frustrating back-and-forth. Per - * user direction (issue: "不断拉扯"), we now allow AT MOST 2 auto-continue - * dispatches before escalating to `ask_human`. If two concrete nextActions - * didn't close the gap, the pattern is stuck in a loop the supervisor can't - * resolve autonomously — surface it to the human. - */ -const MAX_AUTO_CONTINUE_STEPS = 2; const SUPERVISION_WAITING_LABEL = 'Supervised: analyzing completion...'; const SUPERVISION_AUDIT_WAITING_LABEL = 'Supervised: running automated audit...'; const SUPERVISION_COMPLETE_LABEL = 'Supervised: task looks complete.'; @@ -82,6 +72,8 @@ interface ActiveTaskRunState { userText: string; phase: TaskRunPhase; continueLoops: number; + continueStreakCount: number; + lastContinueBucket?: string; evaluating: boolean; sawAssistantOutput: boolean; lastAssistantText?: string; @@ -133,6 +125,37 @@ function trimString(value: unknown): string | undefined { return typeof value === 'string' && value.trim() ? value.trim() : undefined; } +function normalizeContinueBucketText(value: string | undefined): string { + return (value ?? '') + .toLowerCase() + .replace(/[`"'“”‘’]/g, '') + .replace(/\s+/g, ' ') + .trim(); +} + +function classifyContinueBucket(decision: { nextAction?: string; gap?: string; reason: string }): string { + const text = normalizeContinueBucketText([ + decision.nextAction, + decision.gap, + decision.reason, + ].filter((entry): entry is string => typeof entry === 'string' && entry.trim().length > 0).join(' ')); + if (!text) return 'generic'; + + const categories: Array<{ key: string; pattern: RegExp }> = [ + { key: 'commit_push', pattern: /\b(commit|push|git push|git commit|merge|sync|提交|推送|合并)\b/iu }, + { key: 'test_verify', pattern: /\b(test|tests|testing|verify|verification|validate|validation|regression|vitest|pytest|jest|检查|验证|测试|回归)\b/iu }, + { key: 'audit_review', pattern: /\b(audit|review|审计|审核|评审)\b/iu }, + { key: 'fix_repair', pattern: /\b(fix|repair|bug|regression|修复|返工|rework)\b/iu }, + { key: 'implement_code', pattern: /\b(implement|code|edit|change|update|refactor|write|add|实现|修改|编写|补充|重构)\b/iu }, + { key: 'docs_spec', pattern: /\b(doc|docs|documentation|spec|openspec|proposal|design|文档|规范|设计|proposal)\b/iu }, + { key: 'deploy_restart', pattern: /\b(deploy|release|restart|daemon|发布|部署|重启)\b/iu }, + { key: 'investigate', pattern: /\b(check|inspect|investigate|diagnose|analyze|look into|查看|排查|分析|调查)\b/iu }, + ]; + const matched = categories.find((entry) => entry.pattern.test(text)); + if (matched) return matched.key; + return text.slice(0, 120); +} + function formatUnavailableReason(reason: SupervisionUnavailableReason | undefined): string | null { switch (reason) { case SUPERVISION_UNAVAILABLE_REASONS.PROVIDER_NOT_CONNECTED: @@ -645,6 +668,7 @@ class SupervisionAutomation { userText: text, phase: 'execution', continueLoops: 0, + continueStreakCount: 0, evaluating: false, sawAssistantOutput: false, reworkDispatches: 0, @@ -772,11 +796,29 @@ class SupervisionAutomation { return; } case 'continue': { - if (latest.continueLoops >= MAX_AUTO_CONTINUE_STEPS) { - this.emitWarning(run.sessionName, `Automation reached the auto-continue limit (${MAX_AUTO_CONTINUE_STEPS}); handing control back to the human.`); + const continueBucket = classifyContinueBucket({ + reason: decision.reason, + nextAction: decision.nextAction, + gap: decision.gap, + }); + const nextStreakCount = latest.lastContinueBucket === continueBucket + ? latest.continueStreakCount + 1 + : 1; + const maxAutoContinueStreak = latest.snapshot.maxAutoContinueStreak ?? SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_STREAK; + const maxAutoContinueTotal = latest.snapshot.maxAutoContinueTotal ?? SUPERVISION_DEFAULT_MAX_AUTO_CONTINUE_TOTAL; + + if (maxAutoContinueStreak > 0 && nextStreakCount > maxAutoContinueStreak) { + this.emitWarning(run.sessionName, `Automation reached the repeated auto-continue limit (${maxAutoContinueStreak}) for ${continueBucket}; handing control back to the human.`); + this.finishRun(run.sessionName, 'needs_input'); + return; + } + if (maxAutoContinueTotal > 0 && latest.continueLoops >= maxAutoContinueTotal) { + this.emitWarning(run.sessionName, `Automation reached the auto-continue hard limit (${maxAutoContinueTotal}); handing control back to the human.`); this.finishRun(run.sessionName, 'needs_input'); return; } + latest.lastContinueBucket = continueBucket; + latest.continueStreakCount = nextStreakCount; // Forward the full decision so the continue prompt can lead with // the supervisor's concrete nextAction. Without this, the target // agent only sees the reason and has to infer what to do next — diff --git a/src/daemon/timeline-store.ts b/src/daemon/timeline-store.ts index 713ce9f5c..fd3a1b4a3 100644 --- a/src/daemon/timeline-store.ts +++ b/src/daemon/timeline-store.ts @@ -4,7 +4,7 @@ * Storage: ~/.imcodes/timeline/{sessionName}.jsonl */ -import { mkdirSync, appendFileSync, readFileSync, writeFileSync, readdirSync, statSync, unlinkSync, openSync, readSync, fstatSync, closeSync } from 'fs'; +import { mkdirSync, appendFileSync, writeFileSync, readdirSync, statSync, unlinkSync, openSync, readSync, fstatSync, closeSync } from 'fs'; import { join } from 'path'; import { homedir } from 'os'; import type { TimelineEvent } from './timeline-event.js'; @@ -142,20 +142,13 @@ class TimelineStore { */ truncate(sessionName: string, keepLast = MAX_EVENTS_PER_FILE): void { const filePath = this.filePath(sessionName); - let raw: string; - try { - raw = readFileSync(filePath, 'utf-8'); - } catch { - return; - } - - const lines = raw.trimEnd().split('\n').filter(l => l.length > 0); - if (lines.length <= keepLast) return; + const newestFirst = readTailLines(filePath, keepLast + 1); + if (newestFirst.length <= keepLast) return; - const kept = lines.slice(lines.length - keepLast); + const kept = newestFirst.slice(0, keepLast).reverse(); try { writeFileSync(filePath, kept.join('\n') + '\n'); - logger.info({ sessionName, before: lines.length, after: kept.length }, 'TimelineStore: truncated'); + logger.info({ sessionName, after: kept.length }, 'TimelineStore: truncated'); } catch (err) { logger.debug({ err, sessionName }, 'TimelineStore: truncate write failed'); } diff --git a/test/agent/transport-paths.test.ts b/test/agent/transport-paths.test.ts index 2e97d8d0b..58b43092e 100644 --- a/test/agent/transport-paths.test.ts +++ b/test/agent/transport-paths.test.ts @@ -2,7 +2,13 @@ import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest'; import * as fs from 'node:fs'; import path from 'node:path'; -import { normalizeTransportCwd, resolveBinaryOnWindows, parseNpmCmdShim, resolveExecutableForSpawn } from '../../src/agent/transport-paths.js'; +import { + normalizeTransportCwd, + resolveBinaryOnWindows, + parseNpmCmdShim, + resolveExecutableForSpawn, + resolveClaudeCodePathForSdk, +} from '../../src/agent/transport-paths.js'; describe('normalizeTransportCwd', () => { it('returns an absolute cwd on non-Windows hosts', () => { @@ -112,6 +118,31 @@ describe('resolveBinaryOnWindows', () => { fs.rmSync(tmpDir, { recursive: true, force: true }); } }); + + it('falls back to APPDATA npm shims when PATH is missing', () => { + const origPlatform = process.platform; + const origPath = process.env.PATH; + const origAppData = process.env.APPDATA; + const origPathExt = process.env.PATHEXT; + const tmpDir = fs.mkdtempSync(path.join(require('os').tmpdir(), 'resolve-appdata-test-')); + const npmDir = path.join(tmpDir, 'npm'); + fs.mkdirSync(npmDir, { recursive: true }); + fs.writeFileSync(path.join(npmDir, 'claude.cmd'), '@echo off\r\necho hi\r\n'); + Object.defineProperty(process, 'platform', { value: 'win32' }); + process.env.PATH = ''; + process.env.APPDATA = tmpDir; + process.env.PATHEXT = '.com;.exe;.bat;.cmd'; + try { + const resolved = resolveBinaryOnWindows('claude'); + expect(resolved.toLowerCase().endsWith(path.join('npm', 'claude.cmd').toLowerCase())).toBe(true); + } finally { + Object.defineProperty(process, 'platform', { value: origPlatform }); + process.env.PATH = origPath; + if (origAppData === undefined) delete process.env.APPDATA; else process.env.APPDATA = origAppData; + if (origPathExt === undefined) delete process.env.PATHEXT; else process.env.PATHEXT = origPathExt; + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); }); describe('resolveExecutableForSpawn', () => { @@ -194,3 +225,37 @@ describe('resolveExecutableForSpawn', () => { } }); }); + +describe('resolveClaudeCodePathForSdk', () => { + it('returns the underlying js entrypoint for npm cmd shims', () => { + const origPlatform = process.platform; + const origPath = process.env.PATH; + const origPathExt = process.env.PATHEXT; + const origAppData = process.env.APPDATA; + const tmpDir = fs.mkdtempSync(path.join(require('os').tmpdir(), 'claude-sdk-path-test-')); + const npmDir = path.join(tmpDir, 'npm'); + const scriptDir = path.join(npmDir, 'node_modules', '@anthropic-ai', 'claude-code', 'bin'); + fs.mkdirSync(scriptDir, { recursive: true }); + fs.writeFileSync(path.join(scriptDir, 'claude.js'), '#!/usr/bin/env node\n'); + fs.writeFileSync( + path.join(npmDir, 'claude.cmd'), + '@ECHO off\r\n' + + 'CALL :find_dp0\r\n' + + 'endLocal & goto #_undefined_# 2>NUL || title %COMSPEC% & "%_prog%" "%dp0%\\node_modules\\@anthropic-ai\\claude-code\\bin\\claude.js" %*\r\n', + ); + Object.defineProperty(process, 'platform', { value: 'win32' }); + process.env.PATH = ''; + process.env.APPDATA = tmpDir; + process.env.PATHEXT = '.com;.exe;.bat;.cmd'; + try { + const resolved = resolveClaudeCodePathForSdk(); + expect(resolved.replace(/\\/g, '/')).toContain('node_modules/@anthropic-ai/claude-code/bin/claude.js'); + } finally { + Object.defineProperty(process, 'platform', { value: origPlatform }); + process.env.PATH = origPath; + if (origAppData === undefined) delete process.env.APPDATA; else process.env.APPDATA = origAppData; + if (origPathExt === undefined) delete process.env.PATHEXT; else process.env.PATHEXT = origPathExt; + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); +}); diff --git a/test/context/summary-compressor-serial.test.ts b/test/context/summary-compressor-serial.test.ts index d131a44c7..531d449b8 100644 --- a/test/context/summary-compressor-serial.test.ts +++ b/test/context/summary-compressor-serial.test.ts @@ -1,4 +1,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; import type { CompressionInput } from '../../src/context/summary-compressor.js'; /** @@ -155,4 +158,51 @@ describe('summary-compressor — concurrent compressWithSdk calls serialize', () state.order.filter((e) => e.startsWith('start:')).length, ); }); + + it('passes a resolved Claude path into direct SDK compression on Windows', async () => { + const origPlatform = process.platform; + const origPath = process.env.PATH; + const origPathExt = process.env.PATHEXT; + const origAppData = process.env.APPDATA; + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'summary-compressor-claude-path-')); + const npmDir = path.join(tmpDir, 'npm'); + const scriptDir = path.join(npmDir, 'node_modules', '@anthropic-ai', 'claude-code', 'bin'); + fs.mkdirSync(scriptDir, { recursive: true }); + fs.writeFileSync(path.join(scriptDir, 'claude.js'), '#!/usr/bin/env node\n'); + fs.writeFileSync( + path.join(npmDir, 'claude.cmd'), + '@ECHO off\r\n' + + 'CALL :find_dp0\r\n' + + 'endLocal & goto #_undefined_# 2>NUL || title %COMSPEC% & "%_prog%" "%dp0%\\node_modules\\@anthropic-ai\\claude-code\\bin\\claude.js" %*\r\n', + ); + + Object.defineProperty(process, 'platform', { value: 'win32' }); + process.env.PATH = ''; + process.env.APPDATA = tmpDir; + process.env.PATHEXT = '.com;.exe;.bat;.cmd'; + + queryMock.mockImplementation(async function* (arg: { options?: Record }) { + expect(String(arg.options?.pathToClaudeCodeExecutable ?? '').replace(/\\/g, '/')) + .toContain('node_modules/@anthropic-ai/claude-code/bin/claude.js'); + yield { + type: 'assistant', + message: { + content: [{ type: 'text', text: 'SUMMARY' }], + }, + }; + }); + + try { + const { compressWithSdk } = await import('../../src/context/summary-compressor.js'); + const result = await compressWithSdk(makeInput('windows-path')); + expect(result.summary).toBe('SUMMARY'); + expect(result.fromSdk).toBe(true); + } finally { + Object.defineProperty(process, 'platform', { value: origPlatform }); + process.env.PATH = origPath; + if (origAppData === undefined) delete process.env.APPDATA; else process.env.APPDATA = origAppData; + if (origPathExt === undefined) delete process.env.PATHEXT; else process.env.PATHEXT = origPathExt; + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); }); diff --git a/test/daemon/codex-watcher-tail-history.test.ts b/test/daemon/codex-watcher-tail-history.test.ts new file mode 100644 index 000000000..528cbe0f2 --- /dev/null +++ b/test/daemon/codex-watcher-tail-history.test.ts @@ -0,0 +1,123 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; + +const mocks = vi.hoisted(() => ({ + emit: vi.fn(), +})); + +vi.mock('../../src/daemon/timeline-emitter.js', () => ({ + timelineEmitter: { + emit: mocks.emit, + }, +})); + +vi.mock('../../src/util/logger.js', () => ({ + default: { debug: vi.fn(), warn: vi.fn(), info: vi.fn(), error: vi.fn() }, +})); + +vi.mock('../../src/daemon/memory-inject.js', () => ({ + buildSessionBootstrapContext: vi.fn(async () => ''), + buildCodexMemoryEntry: vi.fn(() => ''), + readProcessedMemoryItems: vi.fn(async () => []), +})); + +vi.mock('../../src/context/shared-context-flags.js', () => ({ + legacyInjectionDisabled: vi.fn(() => true), +})); + +vi.mock('../../src/daemon/memory-context-timeline.js', () => ({ + buildMemoryContextTimelinePayload: vi.fn(() => null), +})); + +vi.mock('../../src/store/session-store.js', () => ({ + updateSessionState: vi.fn(), +})); + +vi.mock('../../src/util/model-context.js', () => ({ + resolveContextWindow: vi.fn(() => 200000), +})); + +vi.mock('../../src/daemon/watcher-controls.js', () => ({ + registerWatcherControl: vi.fn(), + unregisterWatcherControl: vi.fn(), +})); + +describe('codex-watcher tail history replay', () => { + const originalHome = process.env.HOME; + const originalUserProfile = process.env.USERPROFILE; + let tempHome: string; + + beforeEach(() => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-21T12:00:00Z')); + mocks.emit.mockClear(); + tempHome = mkdtempSync(join(tmpdir(), 'imcodes-codex-tail-')); + process.env.HOME = tempHome; + process.env.USERPROFILE = tempHome; + }); + + afterEach(async () => { + const { stopWatching, resetParseStateForTests } = await import('../../src/daemon/codex-watcher.js'); + stopWatching('codex-tail-session'); + resetParseStateForTests(); + vi.useRealTimers(); + vi.resetModules(); + if (originalHome === undefined) delete process.env.HOME; + else process.env.HOME = originalHome; + if (originalUserProfile === undefined) delete process.env.USERPROFILE; + else process.env.USERPROFILE = originalUserProfile; + rmSync(tempHome, { recursive: true, force: true }); + }); + + it('replays the tail of oversized rollout history instead of the head', async () => { + const sessionDir = join(tempHome, '.codex', 'sessions', '2026', '04', '21'); + mkdirSync(sessionDir, { recursive: true }); + const projectDir = join(tempHome, 'project'); + mkdirSync(projectDir, { recursive: true }); + const rolloutPath = join(sessionDir, 'rollout-2026-04-21T12-00-00-12345678-1234-1234-1234-123456789abc.jsonl'); + + const largePayload = 'x'.repeat(2048); + const lines = [ + JSON.stringify({ + timestamp: '2026-04-21T12:00:00.000Z', + type: 'session_meta', + payload: { + id: '12345678-1234-1234-1234-123456789abc', + cwd: projectDir, + cli_version: '0.118.0', + source: 'cli', + model_provider: 'openai', + }, + }), + ...Array.from({ length: 700 }, (_, index) => JSON.stringify({ + timestamp: `2026-04-21T12:${String(Math.floor(index / 60)).padStart(2, '0')}:${String(index % 60).padStart(2, '0')}.000Z`, + type: 'event_msg', + payload: { + type: 'user_message', + message: `tail-message-${index}:${largePayload}`, + images: [], + local_images: [], + }, + })), + ]; + writeFileSync(rolloutPath, lines.join('\n') + '\n', 'utf8'); + + const { startWatching, stopWatching } = await import('../../src/daemon/codex-watcher.js'); + await startWatching('codex-tail-session', projectDir); + + await vi.waitFor(() => { + expect(mocks.emit.mock.calls.length).toBeGreaterThan(0); + }); + + const payloads = mocks.emit.mock.calls + .filter((call) => call[1] === 'user.message') + .map((call) => String((call[2] as { text?: unknown }).text ?? '')); + + expect(payloads.some((text) => text.startsWith('tail-message-699:'))).toBe(true); + expect(payloads.some((text) => text.startsWith('tail-message-0:'))).toBe(false); + + stopWatching('codex-tail-session'); + }); +}); diff --git a/test/daemon/gemini-watcher-tracking.test.ts b/test/daemon/gemini-watcher-tracking.test.ts index de26d18fd..34d7ef171 100644 --- a/test/daemon/gemini-watcher-tracking.test.ts +++ b/test/daemon/gemini-watcher-tracking.test.ts @@ -22,6 +22,7 @@ vi.mock('../../src/store/session-store.js', () => ({ vi.mock('fs/promises', () => ({ readFile: vi.fn(), + open: vi.fn(), stat: vi.fn(), readdir: vi.fn().mockResolvedValue([]), watch: vi.fn(), @@ -197,4 +198,31 @@ describe('Gemini watcher — rotation preserves message processing', () => { expect(state.seenCount).toBe(6); expect(state.lastUpdated).toBe('ts-rotated'); }); + + it('tail-parses oversized conversation files instead of full readFile', async () => { + const tailChunk = [ + ',{"type":"user","content":[{"text":"hello from user"}],"timestamp":"2026-01-01T00:00:00Z"}', + ',{"type":"gemini","content":"reply from gemini","timestamp":"2026-01-01T00:00:01Z"}', + ']}', + ].join(''); + const tailBuffer = Buffer.from(tailChunk, 'utf8'); + const fileHandle = { + read: vi.fn(async (buffer: Buffer) => { + tailBuffer.copy(buffer, 0); + return { bytesRead: tailBuffer.length, buffer }; + }), + close: vi.fn(async () => undefined), + }; + + vi.mocked(fs.stat).mockResolvedValue({ mtimeMs: 4000, size: 10 * 1024 * 1024, ino: 3 } as any); + vi.mocked(fs.open).mockResolvedValue(fileHandle as any); + + const state = makeState(); + await pollTick('test', state); + + expect(fs.readFile).not.toHaveBeenCalled(); + expect(fs.open).toHaveBeenCalledWith('/tmp/session.json', 'r'); + expect(state._oversizedRecentMessageIds).toHaveLength(2); + expect(state.lastConversationStatus).toBe('idle'); + }); }); diff --git a/test/daemon/supervision-automation.test.ts b/test/daemon/supervision-automation.test.ts index d420de40e..ea6471ad3 100644 --- a/test/daemon/supervision-automation.test.ts +++ b/test/daemon/supervision-automation.test.ts @@ -64,7 +64,12 @@ async function cleanupProjectDir() { projectDir = null; } -async function seedSession(mode: 'supervised' | 'supervised_audit' = 'supervised_audit', withOpenSpecChange = false, maxAuditLoops = 2) { +async function seedSession( + mode: 'supervised' | 'supervised_audit' = 'supervised_audit', + withOpenSpecChange = false, + maxAuditLoops = 2, + overrides: Record = {}, +) { const snapshot = normalizeSessionSupervisionSnapshot({ mode: mode === 'supervised' ? SUPERVISION_MODE.SUPERVISED : SUPERVISION_MODE.SUPERVISED_AUDIT, backend: 'codex-sdk', @@ -75,6 +80,7 @@ async function seedSession(mode: 'supervised' | 'supervised_audit' = 'supervised auditMode: 'audit', maxAuditLoops, taskRunPromptVersion: 'task_run_status_v1', + ...overrides, }); const seededProjectDir = await seedProjectDir(withOpenSpecChange); upsertSession({ @@ -200,6 +206,103 @@ describe('SupervisionAutomation', () => { ])); }); + it('stops after the configured repeated continue streak for the same bucket', async () => { + const snapshot = await seedSession('supervised', false, 2, { + maxAutoContinueStreak: 2, + maxAutoContinueTotal: 0, + }); + mockSupervisionDecide + .mockResolvedValueOnce({ decision: 'continue', reason: 'write tests for the missing cases', confidence: 0.7 }) + .mockResolvedValueOnce({ decision: 'continue', reason: 'write tests for edge cases too', confidence: 0.7 }) + .mockResolvedValueOnce({ decision: 'continue', reason: 'write tests for regressions as well', confidence: 0.7 }); + + supervisionAutomation.init(); + supervisionAutomation.registerTaskIntent('deck_supervision_brain', 'cmd-streak', 'implement the feature', snapshot); + beginRun('cmd-streak', 'implement the feature'); + + completeTurn('implemented the code'); + await sleep(25); + completeTurn('added a first batch of tests'); + await sleep(25); + completeTurn('added another batch of tests'); + await sleep(25); + + expect(mockTransportRuntime.send).toHaveBeenCalledTimes(2); + expect(supervisionAutomation.getActiveRun('deck_supervision_brain')).toBeUndefined(); + expect(timelineEmitter.replay('deck_supervision_brain', 0).events).toEqual(expect.arrayContaining([ + expect.objectContaining({ + type: 'assistant.text', + payload: expect.objectContaining({ + automationKind: 'supervision-warning', + text: '⚠️ Automation reached the repeated auto-continue limit (2) for test_verify; handing control back to the human.', + }), + }), + ])); + }); + + it('allows different continue types until the hard total limit is reached', async () => { + const snapshot = await seedSession('supervised', false, 2, { + maxAutoContinueStreak: 2, + maxAutoContinueTotal: 2, + }); + mockSupervisionDecide + .mockResolvedValueOnce({ decision: 'continue', reason: 'write missing tests', confidence: 0.7 }) + .mockResolvedValueOnce({ decision: 'continue', reason: 'restart the daemon to pick up the config', confidence: 0.7 }) + .mockResolvedValueOnce({ decision: 'continue', reason: 'inspect the logs again', confidence: 0.7 }); + + supervisionAutomation.init(); + supervisionAutomation.registerTaskIntent('deck_supervision_brain', 'cmd-total', 'implement the feature', snapshot); + beginRun('cmd-total', 'implement the feature'); + + completeTurn('implemented the code'); + await sleep(25); + completeTurn('added tests'); + await sleep(25); + completeTurn('restarted the daemon'); + await sleep(25); + + expect(mockTransportRuntime.send).toHaveBeenCalledTimes(2); + expect(supervisionAutomation.getActiveRun('deck_supervision_brain')).toBeUndefined(); + expect(timelineEmitter.replay('deck_supervision_brain', 0).events).toEqual(expect.arrayContaining([ + expect.objectContaining({ + type: 'assistant.text', + payload: expect.objectContaining({ + automationKind: 'supervision-warning', + text: '⚠️ Automation reached the auto-continue hard limit (2); handing control back to the human.', + }), + }), + ])); + }); + + it('treats zero auto-continue limits as unlimited', async () => { + const snapshot = await seedSession('supervised', false, 2, { + maxAutoContinueStreak: 0, + maxAutoContinueTotal: 0, + }); + mockSupervisionDecide + .mockResolvedValueOnce({ decision: 'continue', reason: 'write missing tests', confidence: 0.7 }) + .mockResolvedValueOnce({ decision: 'continue', reason: 'write more missing tests', confidence: 0.7 }) + .mockResolvedValueOnce({ decision: 'continue', reason: 'write final missing tests', confidence: 0.7 }); + + supervisionAutomation.init(); + supervisionAutomation.registerTaskIntent('deck_supervision_brain', 'cmd-unlimited', 'implement the feature', snapshot); + beginRun('cmd-unlimited', 'implement the feature'); + + completeTurn('implemented the code'); + await sleep(25); + completeTurn('added a first batch of tests'); + await sleep(25); + completeTurn('added a second batch of tests'); + await sleep(25); + + expect(mockTransportRuntime.send).toHaveBeenCalledTimes(3); + expect(supervisionAutomation.getActiveRun('deck_supervision_brain')).toMatchObject({ + continueLoops: 3, + continueStreakCount: 3, + lastContinueBucket: 'test_verify', + }); + }); + it('emits and clears a supervision waiting status around completion evaluation', async () => { const snapshot = await seedSession('supervised'); diff --git a/test/daemon/timeline-store.tail-truncate.test.ts b/test/daemon/timeline-store.tail-truncate.test.ts new file mode 100644 index 000000000..b81ba08a7 --- /dev/null +++ b/test/daemon/timeline-store.tail-truncate.test.ts @@ -0,0 +1,81 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { join } from 'path'; +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; + +describe('timeline-store truncate', () => { + const originalHome = process.env.HOME; + const originalUserProfile = process.env.USERPROFILE; + let tempHome: string | null = null; + + afterEach(() => { + vi.restoreAllMocks(); + vi.resetModules(); + if (originalHome === undefined) delete process.env.HOME; + else process.env.HOME = originalHome; + if (originalUserProfile === undefined) delete process.env.USERPROFILE; + else process.env.USERPROFILE = originalUserProfile; + if (tempHome) rmSync(tempHome, { recursive: true, force: true }); + tempHome = null; + }); + + it('keeps the newest lines without readFileSync on oversized history files', async () => { + tempHome = mkdtempSync(join(tmpdir(), 'imcodes-timeline-store-')); + process.env.HOME = tempHome; + process.env.USERPROFILE = tempHome; + + vi.doMock('fs', async () => { + const actual = await vi.importActual('fs'); + return { + ...actual, + readFileSync: vi.fn(() => { + throw new Error('timelineStore.truncate should not call readFileSync'); + }), + }; + }); + const { timelineStore } = await import('../../src/daemon/timeline-store.js'); + + const filePath = join(tempHome, '.imcodes', 'timeline', 'oversized_session.jsonl'); + mkdirSync(join(tempHome, '.imcodes', 'timeline'), { recursive: true }); + const lines = Array.from({ length: 6002 }, (_, index) => JSON.stringify({ + seq: index, + payload: 'x'.repeat(512), + })); + writeFileSync(filePath, lines.join('\n') + '\n', 'utf8'); + + timelineStore.truncate('oversized_session', 5000); + + const kept = readFileSync(filePath, 'utf8').trimEnd().split('\n'); + expect(kept).toHaveLength(5000); + expect(JSON.parse(kept[0]!).seq).toBe(1002); + expect(JSON.parse(kept[kept.length - 1]!).seq).toBe(6001); + }); + + it('reads the tail of oversized timeline history and reports the latest event from the tail', async () => { + tempHome = mkdtempSync(join(tmpdir(), 'imcodes-timeline-store-')); + process.env.HOME = tempHome; + process.env.USERPROFILE = tempHome; + + const { timelineStore } = await import('../../src/daemon/timeline-store.js'); + + const filePath = join(tempHome, '.imcodes', 'timeline', 'tail_read_session.jsonl'); + mkdirSync(join(tempHome, '.imcodes', 'timeline'), { recursive: true }); + const lines = Array.from({ length: 6200 }, (_, index) => JSON.stringify({ + sessionId: 'tail_read_session', + seq: index + 1, + epoch: 1, + ts: index + 1, + type: 'assistant.text', + payload: { text: `message-${index}` }, + })); + writeFileSync(filePath, lines.join('\n') + '\n', 'utf8'); + + const events = timelineStore.read('tail_read_session', { limit: 50 }); + expect(events).toHaveLength(50); + expect(events[0]?.seq).toBe(6151); + expect(events[events.length - 1]?.seq).toBe(6200); + + const latest = timelineStore.getLatest('tail_read_session'); + expect(latest).toEqual({ epoch: 1, seq: 6200 }); + }); +}); diff --git a/test/supervision-config.test.ts b/test/supervision-config.test.ts index dbeea6404..4ee86bbc8 100644 --- a/test/supervision-config.test.ts +++ b/test/supervision-config.test.ts @@ -4,6 +4,8 @@ import { DEFAULT_PRIMARY_CONTEXT_MODEL } from '../shared/context-model-defaults. import { AUDIT_VERDICT_MARKERS, DEFAULT_SUPERVISION_BACKEND, + DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, SUPERVISION_AUDIT_MODES, SUPERVISION_CONTRACT_IDS, SUPERVISION_DEFAULT_AUDIT_MODE, @@ -42,6 +44,8 @@ describe('supervision config helpers', () => { expect(config.model).toBe(CODEX_MODEL_IDS[0]); expect(config.timeoutMs).toBe(DEFAULT_SUPERVISION_TIMEOUT_MS); expect(config.promptVersion).toBe(SUPERVISION_DEFAULT_PROMPT_VERSION); + expect(config.maxAutoContinueStreak).toBe(DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK); + expect(config.maxAutoContinueTotal).toBe(DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL); }); it('falls back to the backend default model when the model is invalid', () => { @@ -79,11 +83,52 @@ describe('supervision config helpers', () => { expect(snapshot.promptVersion).toBe(SUPERVISION_CONTRACT_IDS.DECISION_REPAIR); expect(snapshot.customInstructions).toBe('Prefer tests before complete.'); expect(snapshot.maxParseRetries).toBe(2); + expect(snapshot.maxAutoContinueStreak).toBe(DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK); + expect(snapshot.maxAutoContinueTotal).toBe(DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL); expect(snapshot.auditMode).toBe('audit>plan'); expect(snapshot.maxAuditLoops).toBe(3); expect(snapshot.taskRunPromptVersion).toBe(SUPERVISION_CONTRACT_IDS.TASK_RUN_STATUS); }); + it('accepts zero auto-continue limits and preserves them in snapshots', () => { + const snapshot = normalizeSessionSupervisionSnapshot({ + mode: SUPERVISION_MODE.SUPERVISED, + backend: 'codex-sdk', + model: CODEX_MODEL_IDS[0], + maxAutoContinueStreak: 0, + maxAutoContinueTotal: 0, + }); + + expect(snapshot.maxAutoContinueStreak).toBe(0); + expect(snapshot.maxAutoContinueTotal).toBe(0); + }); + + it('parses sparse persisted snapshots by filling optional tuning defaults', () => { + const snapshot = extractSessionSupervisionSnapshot({ + supervision: { + mode: SUPERVISION_MODE.SUPERVISED_AUDIT, + backend: 'codex-sdk', + model: CODEX_MODEL_IDS[0], + timeoutMs: 12_000, + promptVersion: SUPERVISION_CONTRACT_IDS.DECISION, + }, + }); + + expect(snapshot).toMatchObject({ + mode: SUPERVISION_MODE.SUPERVISED_AUDIT, + backend: 'codex-sdk', + model: CODEX_MODEL_IDS[0], + timeoutMs: 12_000, + promptVersion: SUPERVISION_CONTRACT_IDS.DECISION, + maxParseRetries: 1, + maxAutoContinueStreak: DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + maxAutoContinueTotal: DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, + auditMode: SUPERVISION_DEFAULT_AUDIT_MODE, + maxAuditLoops: 2, + taskRunPromptVersion: SUPERVISION_DEFAULT_TASK_RUN_PROMPT_VERSION, + }); + }); + it('flags invalid persisted supervision snapshots instead of silently activating normalized automation', () => { const transportConfig = { keep: true, @@ -95,6 +140,8 @@ describe('supervision config helpers', () => { promptVersion: '', customInstructions: { invalid: true }, maxParseRetries: 0, + maxAutoContinueStreak: -1, + maxAutoContinueTotal: -1, auditMode: 'not-an-audit-mode' as never, maxAuditLoops: 0, taskRunPromptVersion: '', @@ -245,6 +292,22 @@ describe('supervision config helpers', () => { expect(issues).toContain('invalid_custom_instructions_override'); }); + it('surfaces invalid auto-continue limit issues for negative values', () => { + const issues = getSessionSupervisionSnapshotIssues({ + mode: SUPERVISION_MODE.SUPERVISED, + backend: 'codex-sdk', + model: CODEX_MODEL_IDS[0], + timeoutMs: 12_000, + promptVersion: SUPERVISION_DEFAULT_PROMPT_VERSION, + maxParseRetries: 1, + maxAutoContinueStreak: -1, + maxAutoContinueTotal: -2, + }); + + expect(issues).toContain('invalid_max_auto_continue_streak'); + expect(issues).toContain('invalid_max_auto_continue_total'); + }); + it('round-trips globalCustomInstructions cache on the session snapshot', () => { const snapshot = normalizeSessionSupervisionSnapshot({ mode: SUPERVISION_MODE.SUPERVISED, diff --git a/web/src/app.tsx b/web/src/app.tsx index 28c5f0afb..b0d767b62 100644 --- a/web/src/app.tsx +++ b/web/src/app.tsx @@ -224,11 +224,11 @@ export function App() { const [mobileHideTabBar, setMobileHideTabBar] = useState(() => localStorage.getItem('mobile_hide_tab_bar') === '1'); const [sidebarCollapsed, setSidebarCollapsed] = useState(() => loadSidebarCollapsed()); const handleToggleSidebar = useCallback(() => { - setSidebarCollapsed((prev) => { - saveSidebarCollapsed(!prev); - return !prev; - }); + setSidebarCollapsed((prev) => !prev); }, []); + useEffect(() => { + saveSidebarCollapsed(sidebarCollapsed); + }, [sidebarCollapsed]); const [showDesktopFileBrowser, setShowDesktopFileBrowser] = useState(false); const [showDesktopLocalWebPreview, setShowDesktopLocalWebPreview] = useState(false); const [localWebPreviewPort, setLocalWebPreviewPort] = useState(''); diff --git a/web/src/components/SessionControls.tsx b/web/src/components/SessionControls.tsx index 7f7aded04..123f6aef8 100644 --- a/web/src/components/SessionControls.tsx +++ b/web/src/components/SessionControls.tsx @@ -121,10 +121,33 @@ type ModelChoice = 'opus[1M]' | 'sonnet' | 'haiku'; const INLINE_PASTE_TEXT_CHAR_LIMIT = 1200; +type ComposerAttachment = { path: string; name: string }; + function buildPastedTextFileName(now = new Date()): string { const compact = now.toISOString().replace(/[:.]/g, '-'); return `pasted-text-${compact}.txt`; } + +function parseStoredComposerAttachments(raw: string | null): ComposerAttachment[] { + if (!raw) return []; + try { + const parsed = JSON.parse(raw); + if (!Array.isArray(parsed)) return []; + return parsed.flatMap((entry) => { + if (!entry || typeof entry !== 'object') return []; + const path = typeof (entry as { path?: unknown }).path === 'string' + ? (entry as { path: string }).path.trim() + : ''; + const name = typeof (entry as { name?: unknown }).name === 'string' + ? (entry as { name: string }).name.trim() + : ''; + if (!path || !name) return []; + return [{ path, name }]; + }); + } catch { + return []; + } +} type CodexModelChoice = 'gpt-5.4' | 'gpt-5.4-mini' | 'gpt-5.2'; type QwenModelChoice = string; type P2pMode = string; // 'solo' | single modes | combo pipelines like 'brainstorm>discuss>plan' | typeof P2P_CONFIG_MODE @@ -458,13 +481,14 @@ export function SessionControls({ ws, activeSession, inputRef, onAfterAction, on // History navigation state const histIdxRef = useRef(-1); // -1 = not navigating; 0 = most recent const draftRef = useRef(''); // saved unsent text while navigating + const attachmentDraftRef = useRef([]); // File upload state const fileInputRef = useRef(null); const [uploading, setUploading] = useState(false); const [uploadProgress, setUploadProgress] = useState(0); const [uploadError, setUploadError] = useState(null); const [sendWarning, setSendWarning] = useState(null); - const [attachments, setAttachments] = useState>([]); + const [attachments, setAttachments] = useState([]); const sendWarningTimerRef = useRef | null>(null); const [localTransportConfig, setLocalTransportConfig] = useState | null>(activeSession?.transportConfig ?? null); @@ -509,6 +533,7 @@ export function SessionControls({ ws, activeSession, inputRef, onAfterAction, on // Persist input draft across unmount/remount (sub-session minimize/restore) const draftKey = activeSession ? `rcc_draft_${activeSession.name}` : null; + const attachmentDraftKey = activeSession ? `rcc_draft_attachments_${activeSession.name}` : null; useEffect(() => { if (!draftKey || !divRef.current) return; const saved = sessionStorage.getItem(draftKey); @@ -522,6 +547,38 @@ export function SessionControls({ ws, activeSession, inputRef, onAfterAction, on }; }, [draftKey]); // eslint-disable-line react-hooks/exhaustive-deps + useEffect(() => { + if (!attachmentDraftKey) { + setAttachments([]); + attachmentDraftRef.current = []; + return; + } + const saved = parseStoredComposerAttachments(sessionStorage.getItem(attachmentDraftKey)); + setAttachments(saved); + attachmentDraftRef.current = saved; + return () => { + try { + if (attachmentDraftRef.current.length > 0) { + sessionStorage.setItem(attachmentDraftKey, JSON.stringify(attachmentDraftRef.current)); + } + else sessionStorage.removeItem(attachmentDraftKey); + } catch { + /* ignore */ + } + }; + }, [attachmentDraftKey]); + + useEffect(() => { + attachmentDraftRef.current = attachments; + if (!attachmentDraftKey) return; + try { + if (attachments.length > 0) sessionStorage.setItem(attachmentDraftKey, JSON.stringify(attachments)); + else sessionStorage.removeItem(attachmentDraftKey); + } catch { + /* ignore */ + } + }, [attachmentDraftKey, attachments]); + useEffect(() => () => { if (sendWarningTimerRef.current) clearTimeout(sendWarningTimerRef.current); }, []); @@ -884,6 +941,8 @@ export function SessionControls({ ws, activeSession, inputRef, onAfterAction, on model: defaults.model, timeoutMs: defaults.timeoutMs, promptVersion: defaults.promptVersion, + maxAutoContinueStreak: defaults.maxAutoContinueStreak, + maxAutoContinueTotal: defaults.maxAutoContinueTotal, }; } @@ -1506,6 +1565,7 @@ export function SessionControls({ ws, activeSession, inputRef, onAfterAction, on histIdxRef.current = -1; draftRef.current = ''; if (draftKey) sessionStorage.removeItem(draftKey); + if (attachmentDraftKey) sessionStorage.removeItem(attachmentDraftKey); } return; } @@ -1547,8 +1607,9 @@ export function SessionControls({ ws, activeSession, inputRef, onAfterAction, on histIdxRef.current = -1; draftRef.current = ''; if (draftKey) sessionStorage.removeItem(draftKey); + if (attachmentDraftKey) sessionStorage.removeItem(attachmentDraftKey); } - }, [activeSession, draftKey, editingQueuedMessageId, onRemoveQuote, onSend, quickData, quotes, sendQueuedMessageMutation, sendSessionMessage]); + }, [activeSession, attachmentDraftKey, draftKey, editingQueuedMessageId, onRemoveQuote, onSend, quickData, quotes, sendQueuedMessageMutation, sendSessionMessage]); const handleQueuedMessageEdit = useCallback((entry: { clientMessageId: string; text: string }) => { if (!isEditableQueuedEntry(entry)) return; diff --git a/web/src/components/SessionSettingsDialog.tsx b/web/src/components/SessionSettingsDialog.tsx index 88d1bd712..eb3ddb653 100644 --- a/web/src/components/SessionSettingsDialog.tsx +++ b/web/src/components/SessionSettingsDialog.tsx @@ -9,6 +9,8 @@ import { SESSION_AGENT_TYPES, TRANSPORT_SESSION_AGENT_TYPES, type SessionAgentTy import type { SharedContextRuntimeBackend } from '@shared/context-types.js'; import { doesSharedContextBackendSupportPresets, isKnownSharedContextModelForBackend } from '@shared/shared-context-runtime-config.js'; import { + DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, buildTransportConfigWithSupervision, DEFAULT_SUPERVISION_MAX_AUDIT_LOOPS, DEFAULT_SUPERVISION_MAX_PARSE_RETRIES, @@ -76,6 +78,8 @@ type SupervisionDraft = { */ customInstructionsOverride?: boolean; maxParseRetries?: number; + maxAutoContinueStreak?: number; + maxAutoContinueTotal?: number; auditMode?: SupervisionAuditMode; maxAuditLoops?: number; taskRunPromptVersion?: string; @@ -88,7 +92,7 @@ type SupervisionDraft = { // to decide merging. type SupervisionRuntimeDraft = Pick< SupervisionDraft, - 'backend' | 'model' | 'preset' | 'timeoutMs' | 'promptVersion' | 'customInstructions' + 'backend' | 'model' | 'preset' | 'timeoutMs' | 'promptVersion' | 'customInstructions' | 'maxAutoContinueStreak' | 'maxAutoContinueTotal' >; function timeoutMsToUiSeconds(timeoutMs: number | undefined): number { @@ -530,6 +534,8 @@ export function SessionSettingsDialog({ preset: resolvedDefaults.preset, timeoutMs: resolvedDefaults.timeoutMs, promptVersion: resolvedDefaults.promptVersion, + maxAutoContinueStreak: prev.maxAutoContinueStreak ?? resolvedDefaults.maxAutoContinueStreak ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + maxAutoContinueTotal: prev.maxAutoContinueTotal ?? resolvedDefaults.maxAutoContinueTotal ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, maxParseRetries: prev.maxParseRetries ?? DEFAULT_SUPERVISION_MAX_PARSE_RETRIES, maxAuditLoops: prev.maxAuditLoops ?? DEFAULT_SUPERVISION_MAX_AUDIT_LOOPS, taskRunPromptVersion: prev.taskRunPromptVersion ?? TASK_RUN_PROMPT_VERSION, @@ -550,6 +556,8 @@ export function SessionSettingsDialog({ const supervisionCustomInstructions = typeof supervision.customInstructions === 'string' ? supervision.customInstructions : ''; const supervisionCustomInstructionsOverride = supervision.customInstructionsOverride === true; const supervisionParseRetries = supervision.maxParseRetries ?? DEFAULT_SUPERVISION_MAX_PARSE_RETRIES; + const supervisionAutoContinueStreak = supervision.maxAutoContinueStreak ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK; + const supervisionAutoContinueTotal = supervision.maxAutoContinueTotal ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL; const supervisionAuditMode = supervision.auditMode; const supervisionAuditLoops = supervision.maxAuditLoops ?? DEFAULT_SUPERVISION_MAX_AUDIT_LOOPS; const taskRunPromptVersion = supervision.taskRunPromptVersion ?? TASK_RUN_PROMPT_VERSION; @@ -561,6 +569,8 @@ export function SessionSettingsDialog({ const supervisorDefaultsPromptVersion = supervisorDefaults.promptVersion ?? SUPERVISION_PROMPT_VERSION; const supervisorDefaultsModelOptions = supervisorDefaultsBackend ? getSupervisionModelOptions(supervisorDefaultsBackend) : []; const supervisorDefaultsCustomInstructions = typeof supervisorDefaults.customInstructions === 'string' ? supervisorDefaults.customInstructions : ''; + const supervisorDefaultsAutoContinueStreak = supervisorDefaults.maxAutoContinueStreak ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK; + const supervisorDefaultsAutoContinueTotal = supervisorDefaults.maxAutoContinueTotal ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL; const supervisionPreset = typeof supervision.preset === 'string' ? supervision.preset : ''; const supervisorDefaultsPreset = typeof supervisorDefaults.preset === 'string' ? supervisorDefaults.preset : ''; // Gate preset picker visibility: needs a ws channel to fetch presets, a @@ -605,6 +615,8 @@ export function SessionSettingsDialog({ ? { globalCustomInstructions: supervisorDefaultsCustomInstructions.trim() } : {}), maxParseRetries: supervisionParseRetries, + maxAutoContinueStreak: supervisionAutoContinueStreak, + maxAutoContinueTotal: supervisionAutoContinueTotal, ...(isAuditMode ? { auditMode: supervisionAuditMode, @@ -618,6 +630,8 @@ export function SessionSettingsDialog({ supervision.mode, supervisionAuditLoops, supervisionAuditMode, + supervisionAutoContinueStreak, + supervisionAutoContinueTotal, supervisionBackend, supervisionCustomInstructions, supervisionCustomInstructionsOverride, @@ -681,6 +695,8 @@ export function SessionSettingsDialog({ timeoutMs: prev.timeoutMs ?? DEFAULT_SUPERVISION_TIMEOUT_MS, promptVersion: prev.promptVersion ?? SUPERVISION_PROMPT_VERSION, customInstructions: prev.customInstructions, + maxAutoContinueStreak: prev.maxAutoContinueStreak ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + maxAutoContinueTotal: prev.maxAutoContinueTotal ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, maxParseRetries: prev.maxParseRetries ?? DEFAULT_SUPERVISION_MAX_PARSE_RETRIES, auditMode: prev.auditMode, maxAuditLoops: prev.maxAuditLoops ?? DEFAULT_SUPERVISION_MAX_AUDIT_LOOPS, @@ -695,6 +711,8 @@ export function SessionSettingsDialog({ timeoutMs: prev.timeoutMs ?? DEFAULT_SUPERVISION_TIMEOUT_MS, promptVersion: prev.promptVersion ?? SUPERVISION_PROMPT_VERSION, customInstructions: prev.customInstructions, + maxAutoContinueStreak: prev.maxAutoContinueStreak ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + maxAutoContinueTotal: prev.maxAutoContinueTotal ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, maxParseRetries: prev.maxParseRetries ?? DEFAULT_SUPERVISION_MAX_PARSE_RETRIES, auditMode: prev.auditMode, maxAuditLoops: prev.maxAuditLoops ?? DEFAULT_SUPERVISION_MAX_AUDIT_LOOPS, @@ -708,6 +726,8 @@ export function SessionSettingsDialog({ timeoutMs: prev.timeoutMs ?? DEFAULT_SUPERVISION_TIMEOUT_MS, promptVersion: prev.promptVersion ?? SUPERVISION_PROMPT_VERSION, customInstructions: prev.customInstructions, + maxAutoContinueStreak: prev.maxAutoContinueStreak ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + maxAutoContinueTotal: prev.maxAutoContinueTotal ?? DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, maxParseRetries: prev.maxParseRetries ?? DEFAULT_SUPERVISION_MAX_PARSE_RETRIES, taskRunPromptVersion: prev.taskRunPromptVersion ?? TASK_RUN_PROMPT_VERSION, }; @@ -745,6 +765,8 @@ export function SessionSettingsDialog({ model: supervisorDefaultsModel.trim(), timeoutMs: supervisorDefaultsTimeout, promptVersion: supervisorDefaultsPromptVersion, + maxAutoContinueStreak: supervisorDefaultsAutoContinueStreak, + maxAutoContinueTotal: supervisorDefaultsAutoContinueTotal, // Optional free-text global supervision instructions. Empty string // is normalized to undefined by the shared helper. customInstructions: supervisorDefaultsCustomInstructions.trim() || undefined, @@ -836,9 +858,44 @@ export function SessionSettingsDialog({ onBackendChange={(nextBackend) => { setSupervisorDefaults((prev) => ({ ...prev, ...updateRuntimeDraft(prev, nextBackend) })); }} - onModelChange={(model) => setSupervisorDefaults((prev) => ({ ...prev, model }))} - onTimeoutChange={(seconds) => setSupervisorDefaults((prev) => ({ ...prev, timeoutMs: timeoutUiSecondsToMs(seconds) }))} - /> + onModelChange={(model) => setSupervisorDefaults((prev) => ({ ...prev, model }))} + onTimeoutChange={(seconds) => setSupervisorDefaults((prev) => ({ ...prev, timeoutMs: timeoutUiSecondsToMs(seconds) }))} + /> + +
+
+
{t('session.supervision.maxAutoContinueStreak')}
+ { + const value = Number.parseInt((e.target as HTMLInputElement).value, 10); + setSupervisorDefaults((prev) => ({ ...prev, maxAutoContinueStreak: Number.isFinite(value) && value >= 0 ? value : DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK })); + }} + style={{ width: '100%' }} + disabled={saving} + /> +
{t('session.supervision.maxAutoContinueStreakHelp')}
+
+
+
{t('session.supervision.maxAutoContinueTotal')}
+ { + const value = Number.parseInt((e.target as HTMLInputElement).value, 10); + setSupervisorDefaults((prev) => ({ ...prev, maxAutoContinueTotal: Number.isFinite(value) && value >= 0 ? value : DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL })); + }} + style={{ width: '100%' }} + disabled={saving} + /> +
{t('session.supervision.maxAutoContinueTotalHelp')}
+
+
{showDefaultsPresetPicker && ( setSupervision((prev) => ({ ...prev, timeoutMs: timeoutUiSecondsToMs(seconds) }))} /> +
+
+
{t('session.supervision.maxAutoContinueStreak')}
+ { + const value = Number.parseInt((e.target as HTMLInputElement).value, 10); + setSupervision((prev) => ({ ...prev, maxAutoContinueStreak: Number.isFinite(value) && value >= 0 ? value : DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK })); + }} + style={{ width: '100%' }} + disabled={saving} + /> +
{t('session.supervision.maxAutoContinueStreakHelp')}
+
+ +
+
{t('session.supervision.maxAutoContinueTotal')}
+ { + const value = Number.parseInt((e.target as HTMLInputElement).value, 10); + setSupervision((prev) => ({ ...prev, maxAutoContinueTotal: Number.isFinite(value) && value >= 0 ? value : DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL })); + }} + style={{ width: '100%' }} + disabled={saving} + /> +
{t('session.supervision.maxAutoContinueTotalHelp')}
+
+
+ {showSessionPresetPicker && ( {t('session.supervision.summaryTimeout', { value: `${supervisionTimeoutSeconds} s` })} +
+ {t('session.supervision.summaryContinueLimits', { + streak: supervisionAutoContinueStreak, + total: supervisionAutoContinueTotal, + })} +
{t('session.supervision.summaryCustomInstructions', { value: supervisionCustomInstructions.trim() diff --git a/web/src/components/Sidebar.tsx b/web/src/components/Sidebar.tsx index 7dc473733..1197e004d 100644 --- a/web/src/components/Sidebar.tsx +++ b/web/src/components/Sidebar.tsx @@ -219,7 +219,8 @@ export function Sidebar({ collapsed, serverId, pinnedPanels: _pinnedPanels, onDr /** Load persisted collapsed state from localStorage */ export function loadSidebarCollapsed(): boolean { try { - return localStorage.getItem(LS_COLLAPSED) === 'true'; + const value = localStorage.getItem(LS_COLLAPSED); + return value === '1' || value === 'true'; } catch { return false; } @@ -228,6 +229,6 @@ export function loadSidebarCollapsed(): boolean { /** Persist collapsed state to localStorage */ export function saveSidebarCollapsed(collapsed: boolean): void { try { - localStorage.setItem(LS_COLLAPSED, String(collapsed)); + localStorage.setItem(LS_COLLAPSED, collapsed ? '1' : '0'); } catch { /* ignore */ } } diff --git a/web/src/hooks/useTimeline.ts b/web/src/hooks/useTimeline.ts index 1bbb8b095..bcc86ed2c 100644 --- a/web/src/hooks/useTimeline.ts +++ b/web/src/hooks/useTimeline.ts @@ -140,6 +140,8 @@ const ECHO_WINDOW_MS = 500; const USER_MSG_DEDUP_WINDOW_MS = 5_000; const PROVISIONAL_TRANSPORT_HISTORY_PREFIX = 'transport-history:'; const OPTIMISTIC_EVENT_ID_PREFIX = 'optimistic:'; +const TIMELINE_SNAPSHOT_STORAGE_PREFIX = 'rcc_timeline_snapshot:'; +const MAX_PERSISTED_SNAPSHOT_EVENTS = 50; // If no confirmation arrives within this window we auto-flip the pending bubble to // "failed" so the user can retry rather than stare at a perpetual spinner. const OPTIMISTIC_TIMEOUT_MS = 30_000; @@ -165,6 +167,7 @@ function getCachedEvents(cacheKey: string): TimelineEvent[] | undefined { function setCachedEvents(cacheKey: string, events: TimelineEvent[]): void { eventsCache.set(cacheKey, events); markCacheAccess(cacheKey); + persistTimelineSnapshot(cacheKey, events); const listeners = cacheListeners.get(cacheKey); if (listeners) { for (const listener of listeners) listener(events); @@ -210,6 +213,45 @@ function scopeCacheKey(serverId: string | null | undefined, sessionId: string): return serverId ? `${serverId}:${sessionId}` : sessionId; } +function getTimelineSnapshotStorageKey(cacheKey: string): string { + return `${TIMELINE_SNAPSHOT_STORAGE_PREFIX}${cacheKey}`; +} + +function loadPersistedTimelineSnapshot(cacheKey: string): TimelineEvent[] { + try { + const raw = localStorage.getItem(getTimelineSnapshotStorageKey(cacheKey)); + if (!raw) return []; + const parsed = JSON.parse(raw); + if (!Array.isArray(parsed)) return []; + return parsed.filter((event): event is TimelineEvent => ( + !!event + && typeof event === 'object' + && typeof (event as TimelineEvent).eventId === 'string' + && typeof (event as TimelineEvent).type === 'string' + && typeof (event as TimelineEvent).sessionId === 'string' + && typeof (event as TimelineEvent).ts === 'number' + && typeof (event as TimelineEvent).payload === 'object' + )); + } catch { + return []; + } +} + +function persistTimelineSnapshot(cacheKey: string, events: TimelineEvent[]): void { + try { + if (events.length === 0) { + localStorage.removeItem(getTimelineSnapshotStorageKey(cacheKey)); + return; + } + const tail = events.length > MAX_PERSISTED_SNAPSHOT_EVENTS + ? events.slice(events.length - MAX_PERSISTED_SNAPSHOT_EVENTS) + : events; + localStorage.setItem(getTimelineSnapshotStorageKey(cacheKey), JSON.stringify(tail)); + } catch { + // best-effort + } +} + function isProvisionalTransportHistoryEvent(event: TimelineEvent): boolean { return event.eventId.startsWith(PROVISIONAL_TRANSPORT_HISTORY_PREFIX); } @@ -300,6 +342,19 @@ export function __resetTimelineCacheForTests(): void { lastHttpBackfillOkAt.clear(); } +export function __clearPersistedTimelineSnapshotsForTests(): void { + try { + const keys: string[] = []; + for (let i = 0; i < localStorage.length; i++) { + const key = localStorage.key(i); + if (key?.startsWith(TIMELINE_SNAPSHOT_STORAGE_PREFIX)) keys.push(key); + } + for (const key of keys) localStorage.removeItem(key); + } catch { + // ignore + } +} + /** * Test-only entry point for the same wipe the app does on long-hide / * pageshow restore. Exposed so tests can verify the cooldown actually @@ -363,6 +418,7 @@ export function useTimeline( // IDB + memory cache key: scope by serverId to prevent cross-server pollution // when different servers share the same session name (e.g. deck_cd_brain). const cacheKey = sessionId ? scopeCacheKey(serverId, sessionId) : sessionId; + const wsConnected = !!ws?.connected; const cacheKeyRef = useRef(cacheKey); cacheKeyRef.current = cacheKey; const [events, setEvents] = useState([]); @@ -409,7 +465,7 @@ export function useTimeline( if (memCached && memCached.length > 0) { setEvents(memCached); setLoading(false); - if (ws?.connected) { + if (wsConnected) { setRefreshing(true); historyRequestIdRef.current = ws.sendTimelineHistoryRequest(sessionId, MAX_MEMORY_EVENTS); } @@ -421,11 +477,26 @@ export function useTimeline( return () => { cancelled = true; }; } + // 1.5 Synchronous localStorage snapshot — instant restore across full page + // reloads before IndexedDB/network complete. This is intentionally only a + // tail snapshot for first paint; IndexedDB remains the fuller local source. + const localSnapshot = loadPersistedTimelineSnapshot(cacheKey!); + if (localSnapshot.length > 0) { + setCachedEvents(cacheKey!, localSnapshot); + setEvents((prev) => (prev === localSnapshot ? prev : localSnapshot)); + setLoading(false); + if (wsConnected) { + setRefreshing(true); + historyRequestIdRef.current = ws.sendTimelineHistoryRequest(sessionId, MAX_MEMORY_EVENTS); + } + fireHttpBackfillRef.current(200, { cooldownMs: MOUNT_BACKFILL_COOLDOWN_MS }); + } + // 2. Already loaded this session — skip reload (prevents flash-of-empty on minimize/restore) if (historyLoadedRef.current === cacheKey) { setLoading(false); // Just request incremental updates - if (ws?.connected) { + if (wsConnected) { setRefreshing(true); historyRequestIdRef.current = ws.sendTimelineHistoryRequest(sessionId, MAX_MEMORY_EVENTS); } @@ -437,7 +508,7 @@ export function useTimeline( } // 3. IndexedDB cache → daemon history (first load for this session in this page session) - setLoading(true); + if (localSnapshot.length === 0) setLoading(true); const load = async () => { const db = sharedDb; if (!db) return; @@ -456,7 +527,7 @@ export function useTimeline( setEvents((prev) => (prev === restored ? prev : restored)); setLoading(false); historyLoadedRef.current = cacheKeyRef.current; - if (ws?.connected) { + if (wsConnected) { setRefreshing(true); historyRequestIdRef.current = ws.sendTimelineHistoryRequest(sessionId, MAX_MEMORY_EVENTS); } @@ -469,7 +540,7 @@ export function useTimeline( seqRef.current = 0; if (cancelled) return; setEvents([]); - if (ws?.connected) { + if (wsConnected) { historyRequestIdRef.current = ws.sendTimelineHistoryRequest(sessionId); } else { setLoading(false); @@ -490,7 +561,7 @@ export function useTimeline( }; load().catch(() => {}); return () => { cancelled = true; }; - }, [sessionId, ws]); + }, [cacheKey, sessionId, ws, wsConnected]); // Map of commandId → optimistic eventId for O(1) lookup on command.ack / dedup. const optimisticIdsByCommandRef = useRef(new Map()); diff --git a/web/src/i18n/locales/en.json b/web/src/i18n/locales/en.json index 5a4b6d9c2..59eda797d 100644 --- a/web/src/i18n/locales/en.json +++ b/web/src/i18n/locales/en.json @@ -226,12 +226,17 @@ }, "selectAuditMode": "Select audit mode", "maxAuditLoops": "Max audit loops", + "maxAutoContinueStreak": "Max repeated auto-continues", + "maxAutoContinueStreakHelp": "Stops Auto after this many consecutive continue decisions of the same type. 0 = unlimited.", + "maxAutoContinueTotal": "Hard max auto-continues", + "maxAutoContinueTotalHelp": "Absolute cap on all auto-continue dispatches for one task run. 0 = unlimited.", "summaryTitle": "Resolved supervision", "summaryMode": "Mode: {{value}}", "summaryBackendModel": "Backend: {{backend}} · Model: {{model}}", "summaryUnset": "unset", "summaryDisabled": "Supervision off", "summaryTimeout": "Timeout: {{value}}", + "summaryContinueLimits": "Continue limits: streak {{streak}} · hard {{total}}", "summaryCustomInstructions": "Custom instructions: {{value}}", "summaryCustomInstructionsSet": "set", "summaryAudit": "Audit: {{auditMode}} · loops {{loops}}", diff --git a/web/src/i18n/locales/es.json b/web/src/i18n/locales/es.json index 783422afc..d1306bccd 100644 --- a/web/src/i18n/locales/es.json +++ b/web/src/i18n/locales/es.json @@ -226,12 +226,17 @@ }, "selectAuditMode": "Selecciona modo de auditoría", "maxAuditLoops": "Máx. ciclos de auditoría", + "maxAutoContinueStreak": "Máx. auto-continuaciones repetidas", + "maxAutoContinueStreakHelp": "Detiene Auto tras este número de decisiones continue consecutivas del mismo tipo. 0 = sin límite.", + "maxAutoContinueTotal": "Máx. duro de auto-continuaciones", + "maxAutoContinueTotalHelp": "Límite absoluto de todos los auto-continue en una sola ejecución de tarea. 0 = sin límite.", "summaryTitle": "Supervisión resuelta", "summaryMode": "Modo: {{value}}", "summaryBackendModel": "Backend: {{backend}} · Modelo: {{model}}", "summaryUnset": "sin configurar", "summaryDisabled": "Supervisión desactivada", "summaryTimeout": "Tiempo límite: {{value}}", + "summaryContinueLimits": "Límites de continue: racha {{streak}} · duro {{total}}", "summaryCustomInstructions": "Instrucciones personalizadas: {{value}}", "summaryCustomInstructionsSet": "configuradas", "summaryAudit": "Auditoría: {{auditMode}} · ciclos {{loops}}", diff --git a/web/src/i18n/locales/ja.json b/web/src/i18n/locales/ja.json index 6530ed4c0..f99f0cc9b 100644 --- a/web/src/i18n/locales/ja.json +++ b/web/src/i18n/locales/ja.json @@ -226,12 +226,17 @@ }, "selectAuditMode": "監査モードを選択", "maxAuditLoops": "監査ループ上限", + "maxAutoContinueStreak": "同種 auto-continue 上限", + "maxAutoContinueStreakHelp": "同じ種類の continue が連続してこの回数に達すると Auto を停止します。0 は無制限です。", + "maxAutoContinueTotal": "auto-continue ハード上限", + "maxAutoContinueTotalHelp": "1 回のタスク実行で送れる auto-continue 全体の絶対上限です。0 は無制限です。", "summaryTitle": "解決済み監督設定", "summaryMode": "モード: {{value}}", "summaryBackendModel": "バックエンド: {{backend}} · モデル: {{model}}", "summaryUnset": "未設定", "summaryDisabled": "監督オフ", "summaryTimeout": "タイムアウト: {{value}}", + "summaryContinueLimits": "continue 制限: 同種 {{streak}} ・ ハード {{total}}", "summaryCustomInstructions": "カスタム指示: {{value}}", "summaryCustomInstructionsSet": "設定済み", "summaryAudit": "監査: {{auditMode}} · ループ {{loops}}", diff --git a/web/src/i18n/locales/ko.json b/web/src/i18n/locales/ko.json index ce32ced69..9da10c50c 100644 --- a/web/src/i18n/locales/ko.json +++ b/web/src/i18n/locales/ko.json @@ -226,12 +226,17 @@ }, "selectAuditMode": "감사 모드 선택", "maxAuditLoops": "최대 감사 루프", + "maxAutoContinueStreak": "동일 유형 auto-continue 한도", + "maxAutoContinueStreakHelp": "같은 유형의 continue가 연속으로 이 횟수에 도달하면 Auto를 중단합니다. 0은 무제한입니다.", + "maxAutoContinueTotal": "auto-continue 하드 한도", + "maxAutoContinueTotalHelp": "한 번의 작업 실행에서 보낼 수 있는 전체 auto-continue 절대 상한입니다. 0은 무제한입니다.", "summaryTitle": "해결된 감독 설정", "summaryMode": "모드: {{value}}", "summaryBackendModel": "백엔드: {{backend}} · 모델: {{model}}", "summaryUnset": "설정 안 됨", "summaryDisabled": "감독 꺼짐", "summaryTimeout": "제한 시간: {{value}}", + "summaryContinueLimits": "continue 제한: 동일 유형 {{streak}} · 하드 {{total}}", "summaryCustomInstructions": "사용자 지정 지침: {{value}}", "summaryCustomInstructionsSet": "설정됨", "summaryAudit": "감사: {{auditMode}} · 루프 {{loops}}회", diff --git a/web/src/i18n/locales/ru.json b/web/src/i18n/locales/ru.json index c8a6bc930..4bf99a7a5 100644 --- a/web/src/i18n/locales/ru.json +++ b/web/src/i18n/locales/ru.json @@ -226,12 +226,17 @@ }, "selectAuditMode": "Выберите режим аудита", "maxAuditLoops": "Макс. циклов аудита", + "maxAutoContinueStreak": "Макс. повторяющихся auto-continue", + "maxAutoContinueStreakHelp": "Останавливает Auto после такого числа подряд идущих continue одного типа. 0 = без лимита.", + "maxAutoContinueTotal": "Жесткий лимит auto-continue", + "maxAutoContinueTotalHelp": "Абсолютный предел всех auto-continue в рамках одного прогона задачи. 0 = без лимита.", "summaryTitle": "Итоговый контроль", "summaryMode": "Режим: {{value}}", "summaryBackendModel": "Бэкенд: {{backend}} · Модель: {{model}}", "summaryUnset": "не задано", "summaryDisabled": "Контроль выключен", "summaryTimeout": "Тайм-аут: {{value}}", + "summaryContinueLimits": "Лимиты continue: серия {{streak}} · жесткий {{total}}", "summaryCustomInstructions": "Пользовательские инструкции: {{value}}", "summaryCustomInstructionsSet": "заданы", "summaryAudit": "Аудит: {{auditMode}} · циклов {{loops}}", diff --git a/web/src/i18n/locales/zh-CN.json b/web/src/i18n/locales/zh-CN.json index 5f0ad3c5d..844e7b27f 100644 --- a/web/src/i18n/locales/zh-CN.json +++ b/web/src/i18n/locales/zh-CN.json @@ -226,12 +226,17 @@ }, "selectAuditMode": "选择审计模式", "maxAuditLoops": "最大审计循环", + "maxAutoContinueStreak": "同类自动继续上限", + "maxAutoContinueStreakHelp": "同一种 continue 类型连续命中到这个次数后停止自动继续。0 表示不限制。", + "maxAutoContinueTotal": "自动继续硬上限", + "maxAutoContinueTotalHelp": "单次任务运行内所有 auto-continue 的绝对上限。0 表示不限制。", "summaryTitle": "已解析的监督配置", "summaryMode": "模式:{{value}}", "summaryBackendModel": "后端:{{backend}} · 模型:{{model}}", "summaryUnset": "未设置", "summaryDisabled": "监督已关闭", "summaryTimeout": "超时:{{value}}", + "summaryContinueLimits": "继续限制:同类 {{streak}} · 硬上限 {{total}}", "summaryCustomInstructions": "自定义提示词:{{value}}", "summaryCustomInstructionsSet": "已设置", "summaryAudit": "审计:{{auditMode}} · 循环 {{loops}} 次", diff --git a/web/src/i18n/locales/zh-TW.json b/web/src/i18n/locales/zh-TW.json index 905193909..fd291b0ae 100644 --- a/web/src/i18n/locales/zh-TW.json +++ b/web/src/i18n/locales/zh-TW.json @@ -226,12 +226,17 @@ }, "selectAuditMode": "選擇稽核模式", "maxAuditLoops": "最大稽核循環", + "maxAutoContinueStreak": "同類自動繼續上限", + "maxAutoContinueStreakHelp": "同一種 continue 類型連續達到此次數後停止自動繼續。0 表示不限制。", + "maxAutoContinueTotal": "自動繼續硬上限", + "maxAutoContinueTotalHelp": "單次任務執行內所有 auto-continue 的絕對上限。0 表示不限制。", "summaryTitle": "已解析的監督設定", "summaryMode": "模式:{{value}}", "summaryBackendModel": "後端:{{backend}} · 模型:{{model}}", "summaryUnset": "未設定", "summaryDisabled": "監督已關閉", "summaryTimeout": "逾時:{{value}}", + "summaryContinueLimits": "繼續限制:同類 {{streak}} · 硬上限 {{total}}", "summaryCustomInstructions": "自訂提示詞:{{value}}", "summaryCustomInstructionsSet": "已設定", "summaryAudit": "稽核:{{auditMode}} · 循環 {{loops}} 次", diff --git a/web/test/components/SessionControls.test.tsx b/web/test/components/SessionControls.test.tsx index e73148034..3dd1e2bd2 100644 --- a/web/test/components/SessionControls.test.tsx +++ b/web/test/components/SessionControls.test.tsx @@ -1941,7 +1941,7 @@ afterEach(() => { })); }); - it('falls back to Settings when heavy mode needs audit config', async () => { + it('upgrades supervised mode to audit mode with default audit config', async () => { const ws = makeWs(); const onSettings = vi.fn(); render( @@ -1971,9 +1971,18 @@ afterEach(() => { fireEvent.click(screen.getByRole('button', { name: /supervised_audit$/i })); await waitFor(() => { - expect(onSettings).toHaveBeenCalled(); + expect(patchSessionMock).toHaveBeenCalledWith('srv1', 'codex-sdk-session', expect.objectContaining({ + transportConfig: expect.objectContaining({ + supervision: expect.objectContaining({ + mode: 'supervised_audit', + auditMode: 'audit', + maxAuditLoops: 2, + taskRunPromptVersion: 'task_run_status_v1', + }), + }), + })); }); - expect(patchSessionMock).not.toHaveBeenCalled(); + expect(onSettings).not.toHaveBeenCalled(); }); it('falls back to Settings when heavy mode snapshot is present but audit config is invalid', async () => { @@ -2496,6 +2505,55 @@ afterEach(() => { }); }); + it('restores uploaded attachment badges when switching back to the same main session', async () => { + uploadFileMock.mockResolvedValue({ attachment: { daemonPath: '/tmp/persisted-attachment.txt' } }); + const ws = makeWs(); + const { rerender } = render( + , + ); + + const input = screen.getByRole('textbox') as HTMLDivElement; + fireEvent.paste(input, { + clipboardData: { + getData: (type: string) => type === 'text/plain' ? 'x'.repeat(1300) : '', + }, + }); + + await waitFor(() => { + expect(document.querySelector('.attachment-badge-name')?.textContent).toMatch(/^pasted-text-.*\.txt$/); + }); + const badgeName = document.querySelector('.attachment-badge-name')?.textContent ?? ''; + + rerender( + , + ); + + expect(document.querySelector('.attachment-badge-name')).toBeNull(); + + rerender( + , + ); + + await waitFor(() => { + expect(document.querySelector('.attachment-badge-name')?.textContent).toBe(badgeName); + }); + }); + it('blocks oversized plain-text paste when upload context is unavailable', async () => { render( ({ if (params?.value && typeof params.value === 'string') return `${leaf}:${params.value}`; if (params?.backend && params?.model) return `${leaf}:${params.backend}:${params.model}`; if (params?.auditMode && params?.loops != null) return `${leaf}:${params.auditMode}:${params.loops}`; + if (params?.streak != null && params?.total != null) return `${leaf}:${params.streak}:${params.total}`; if (params?.promptVersion) return `${leaf}:${params.promptVersion}`; return leaf; }, @@ -34,6 +35,16 @@ vi.mock('../../src/api.js', () => ({ import { SessionSettingsDialog } from '../../src/components/SessionSettingsDialog.js'; +function inputForLabel(label: string, index = 0): HTMLInputElement { + const labels = screen.getAllByText(label); + const container = labels[index]?.parentElement; + const input = container?.querySelector('input'); + if (!(input instanceof HTMLInputElement)) { + throw new Error(`Missing input for label ${label} at index ${index}`); + } + return input; +} + describe('SessionSettingsDialog supervision', () => { beforeEach(() => { vi.clearAllMocks(); @@ -139,6 +150,8 @@ describe('SessionSettingsDialog supervision', () => { model: CODEX_MODEL_IDS[0], timeoutMs: 18_000, promptVersion: 'supervision_decision_v1', + maxAutoContinueStreak: 4, + maxAutoContinueTotal: 9, }); render( @@ -161,6 +174,8 @@ describe('SessionSettingsDialog supervision', () => { fireEvent.change(screen.getAllByRole('combobox')[3]!, { target: { value: 'supervised' } }); expect(screen.getAllByDisplayValue('18').length).toBeGreaterThanOrEqual(2); + expect(screen.getAllByDisplayValue('4').length).toBeGreaterThanOrEqual(2); + expect(screen.getAllByDisplayValue('9').length).toBeGreaterThanOrEqual(2); fireEvent.click(screen.getByRole('button', { name: /save/i })); await waitFor(() => { @@ -195,6 +210,8 @@ describe('SessionSettingsDialog supervision', () => { promptVersion: 'supervision_decision_v1', customInstructions: 'Always prefer adding tests before claiming completion.', maxParseRetries: 1, + maxAutoContinueStreak: 2, + maxAutoContinueTotal: 8, auditMode: 'review>plan', maxAuditLoops: 3, taskRunPromptVersion: 'task_run_status_v1', @@ -208,11 +225,63 @@ describe('SessionSettingsDialog supervision', () => { expect(screen.getByText('summaryMode:supervised_audit')).toBeDefined(); expect(screen.getByText(`summaryBackendModel:codex_sdk:${CODEX_MODEL_IDS[0]}`)).toBeDefined(); expect(screen.getByText('summaryTimeout:9 s')).toBeDefined(); + expect(screen.getByText('summaryContinueLimits:2:8')).toBeDefined(); expect(screen.getByText('summaryCustomInstructions:summaryCustomInstructionsSet')).toBeDefined(); expect(screen.getByText('summaryAudit:review_plan:3')).toBeDefined(); expect(screen.getByText('summaryMeta:supervision_decision_v1')).toBeDefined(); }); + it('saves global auto-continue defaults together with the session override', async () => { + fetchSupervisorDefaultsMock.mockResolvedValue({ + backend: 'codex-sdk', + model: CODEX_MODEL_IDS[0], + timeoutMs: 12_000, + promptVersion: 'supervision_decision_v1', + maxAutoContinueStreak: 2, + maxAutoContinueTotal: 8, + }); + + render( + , + ); + + await waitFor(() => { + expect(fetchSupervisorDefaultsMock).toHaveBeenCalled(); + }); + + fireEvent.input(inputForLabel('maxAutoContinueStreak', 0), { target: { value: '5' } }); + fireEvent.input(inputForLabel('maxAutoContinueTotal', 0), { target: { value: '11' } }); + fireEvent.change(screen.getAllByRole('combobox')[3]!, { target: { value: 'supervised' } }); + fireEvent.input(inputForLabel('maxAutoContinueStreak', 1), { target: { value: '3' } }); + fireEvent.input(inputForLabel('maxAutoContinueTotal', 1), { target: { value: '6' } }); + fireEvent.click(screen.getByRole('button', { name: /save/i })); + + await waitFor(() => { + expect(saveSupervisorDefaultsMock).toHaveBeenCalledWith(expect.objectContaining({ + maxAutoContinueStreak: 5, + maxAutoContinueTotal: 11, + })); + expect(patchSessionMock).toHaveBeenCalledWith('srv-1', 'deck_proj_brain', expect.objectContaining({ + transportConfig: expect.objectContaining({ + supervision: expect.objectContaining({ + maxAutoContinueStreak: 3, + maxAutoContinueTotal: 6, + }), + }), + })); + }); + }); + it('persists qwen preset selection via the preset picker when ws fetches presets', async () => { // Stub ws that records sent messages and lets the test dispatch a preset list. // Pattern (Set of handlers + `act`-wrapped dispatch) mirrors the existing diff --git a/web/test/sidebar-persistence.test.ts b/web/test/sidebar-persistence.test.ts new file mode 100644 index 000000000..cb8fcabf7 --- /dev/null +++ b/web/test/sidebar-persistence.test.ts @@ -0,0 +1,40 @@ +/** + * @vitest-environment jsdom + */ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('react-i18next', () => ({ + initReactI18next: { + type: '3rdParty', + init: () => {}, + }, + useTranslation: () => ({ + t: (key: string) => key, + }), +})); + +import { loadSidebarCollapsed, saveSidebarCollapsed } from '../src/components/Sidebar.js'; + +describe('sidebar collapsed persistence', () => { + beforeEach(() => { + localStorage.clear(); + }); + + it('loads both legacy and current persisted truthy values', () => { + localStorage.setItem('sidebar_collapsed', 'true'); + expect(loadSidebarCollapsed()).toBe(true); + + localStorage.setItem('sidebar_collapsed', '1'); + expect(loadSidebarCollapsed()).toBe(true); + }); + + it('persists collapsed state using stable 1/0 values', () => { + saveSidebarCollapsed(true); + expect(localStorage.getItem('sidebar_collapsed')).toBe('1'); + expect(loadSidebarCollapsed()).toBe(true); + + saveSidebarCollapsed(false); + expect(localStorage.getItem('sidebar_collapsed')).toBe('0'); + expect(loadSidebarCollapsed()).toBe(false); + }); +}); diff --git a/web/test/supervision-api.test.ts b/web/test/supervision-api.test.ts index b77c61418..f7c56cff0 100644 --- a/web/test/supervision-api.test.ts +++ b/web/test/supervision-api.test.ts @@ -2,7 +2,11 @@ * @vitest-environment jsdom */ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { SUPERVISION_USER_DEFAULT_PREF_KEY } from '@shared/supervision-config.js'; +import { + DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, + SUPERVISION_USER_DEFAULT_PREF_KEY, +} from '@shared/supervision-config.js'; import { CODEX_MODEL_IDS } from '../../src/shared/models/options.js'; import { fetchSupervisorDefaults, @@ -48,6 +52,8 @@ describe('supervision API helpers', () => { model: CODEX_MODEL_IDS[0], timeoutMs: 20_000, promptVersion: 'custom_prompt_v1', + maxAutoContinueStreak: DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + maxAutoContinueTotal: DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, }); expect(fetchMock).toHaveBeenCalledWith( @@ -69,6 +75,8 @@ describe('supervision API helpers', () => { model: 'qwen3-coder-plus', timeoutMs: 15_000, promptVersion: 'supervision_decision_v1', + maxAutoContinueStreak: DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + maxAutoContinueTotal: DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, }); expect(fetchMock).toHaveBeenCalledWith( @@ -81,6 +89,8 @@ describe('supervision API helpers', () => { model: 'qwen3-coder-plus', timeoutMs: 15_000, promptVersion: 'supervision_decision_v1', + maxAutoContinueStreak: DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_STREAK, + maxAutoContinueTotal: DEFAULT_SUPERVISION_MAX_AUTO_CONTINUE_TOTAL, }, }), }), diff --git a/web/test/use-timeline-cache.test.ts b/web/test/use-timeline-cache.test.ts index b1239449a..f8795204f 100644 --- a/web/test/use-timeline-cache.test.ts +++ b/web/test/use-timeline-cache.test.ts @@ -8,6 +8,7 @@ import type { ServerMessage, TimelineEvent, WsClient } from '../src/ws-client.js import { TimelineDB } from '../src/timeline-db.js'; import { mergeTimelineEvents } from '../../src/shared/timeline/merge.js'; import { + __clearPersistedTimelineSnapshotsForTests, __getTimelineCacheKeysForTests, __getSharedTimelineBaseForTests, __resetTimelineCacheForTests, @@ -33,6 +34,7 @@ function makeEvents(sessionId: string, count: number): TimelineEvent[] { describe('useTimeline global cache bounds', () => { beforeEach(() => { __resetTimelineCacheForTests(); + __clearPersistedTimelineSnapshotsForTests(); cleanup(); }); @@ -303,6 +305,69 @@ describe('useTimeline global cache bounds', () => { }); }); + it('renders immediately from the persisted local snapshot before IndexedDB resolves', async () => { + const sessionName = `deck_local_snapshot_${Date.now()}`; + const serverId = `srv-${Date.now()}`; + + ingestTimelineEventForCache({ + eventId: `${sessionName}-snap-1`, + sessionId: sessionName, + ts: 1, + epoch: 1, + seq: 1, + source: 'daemon', + confidence: 'high', + type: 'assistant.text', + payload: { text: 'snapshot history' }, + }, serverId); + + __resetTimelineCacheForTests(); + vi.spyOn(TimelineDB.prototype, 'open').mockImplementation(() => new Promise(() => {})); + + function Probe() { + const { events, loading } = useTimeline(sessionName, null, serverId); + return h('div', { 'data-testid': 'probe', 'data-loading': String(loading) }, events.map((event) => String(event.payload.text ?? '')).join('|')); + } + + render(h(Probe)); + + await waitFor(() => { + expect(screen.getByTestId('probe').textContent).toBe('snapshot history'); + expect(screen.getByTestId('probe').getAttribute('data-loading')).toBe('false'); + }); + }); + + it('requests timeline history when the socket connects after the first mount', async () => { + const sessionName = `deck_late_connect_${Date.now()}`; + const serverId = `srv-${Date.now()}`; + let connected = false; + const sendTimelineHistoryRequest = vi.fn(() => 'history-late-connect'); + + const ws: WsClient = { + get connected() { + return connected; + }, + onMessage: () => () => {}, + sendTimelineHistoryRequest, + } as unknown as WsClient; + + function Probe({ tick }: { tick: number }) { + const { loading } = useTimeline(sessionName, ws, serverId); + return h('div', { 'data-testid': 'probe', 'data-tick': String(tick) }, String(loading)); + } + + const view = render(h(Probe, { tick: 0 })); + + expect(sendTimelineHistoryRequest).not.toHaveBeenCalled(); + + connected = true; + view.rerender(h(Probe, { tick: 1 })); + + await waitFor(() => { + expect(sendTimelineHistoryRequest).toHaveBeenCalledWith(sessionName); + }); + }); + it('renders immediately from globally ingested timeline events before the first history request returns', async () => { const sessionName = `deck_sub_codex_sdk_${Date.now()}`; const serverId = `srv-${Date.now()}`;