From 7331589276027c97be4c3656c67fe1eeb11dc3e1 Mon Sep 17 00:00:00 2001 From: fey Date: Sat, 21 Feb 2026 16:40:44 +0000 Subject: [PATCH] Use chibi's native autodestruct TTL instead of explicit destroy_context Replace the explicit destroy_context call in ChibiSession.close() with destroy_after_seconds_inactive=43200 (12h) set at session start via the set_system_prompt invocation flags. Chibi's GC then handles cleanup automatically, which is more robust as it survives crashes where close() never runs. Adds chibi-harness.test.ts covering TTL presence on set_system_prompt, absence on send_prompt, no destroy_context on close, and unique context names per session. --- packages/autonav/src/harness/chibi-harness.ts | 35 ++-- .../tests/harness/chibi-harness.test.ts | 150 ++++++++++++++++++ 2 files changed, 171 insertions(+), 14 deletions(-) create mode 100644 packages/autonav/tests/harness/chibi-harness.test.ts diff --git a/packages/autonav/src/harness/chibi-harness.ts b/packages/autonav/src/harness/chibi-harness.ts index 0c24a16..8bb09d5 100644 --- a/packages/autonav/src/harness/chibi-harness.ts +++ b/packages/autonav/src/harness/chibi-harness.ts @@ -14,9 +14,13 @@ * and error objects (type: "error", message: string) * * Session lifecycle: - * 1. set_system_prompt (separate invocation, no output) + * 1. set_system_prompt (separate invocation, no output) — also sets destroy_after_seconds_inactive * 2. send_prompt (separate invocation, streams JSONL output) * 3. For multi-turn: send_prompt again with same context name + * + * Context cleanup: + * Contexts self-destruct after CONTEXT_TTL_SECONDS of inactivity (chibi GC handles this). + * No explicit destroy_context call is needed. */ import { execFileSync, spawn, type ChildProcess } from "node:child_process"; @@ -31,6 +35,9 @@ import type { ToolDefinition } from "./tool-server.js"; import { createEphemeralHome, type EphemeralHome } from "./ephemeral-home.js"; import { wrapCommand, isSandboxEnabled } from "./sandbox.js"; +/** Auto-destroy context after 12 hours of inactivity. */ +const CONTEXT_TTL_SECONDS = 12 * 60 * 60; + const CHIBI_TOOL_MARKER = "__chibi_tools__" as const; interface ChibiToolServer { @@ -41,12 +48,16 @@ interface ChibiToolServer { /** * Build the chibi-json input object for a command. + * + * When destroyAfterSecondsInactive is set, it is passed in `flags` so chibi + * registers the TTL on the context entry at touch time. */ function buildInput( command: Record | string, context: string, projectRoot?: string, home?: string, + destroyAfterSecondsInactive?: number, ): string { const input: Record = { command, context }; if (projectRoot) { @@ -55,6 +66,9 @@ function buildInput( if (home) { input.home = home; } + if (destroyAfterSecondsInactive !== undefined) { + input.flags = { destroy_after_seconds_inactive: destroyAfterSecondsInactive }; + } return JSON.stringify(input); } @@ -241,13 +255,16 @@ class ChibiSession implements HarnessSession { } } - // Set system prompt (separate synchronous invocation) + // Set system prompt (separate synchronous invocation). + // Also registers the inactivity TTL on the context entry so chibi GC + // auto-destroys it after CONTEXT_TTL_SECONDS of disuse. if (config.systemPrompt) { const input = buildInput( { set_system_prompt: { prompt: config.systemPrompt } }, this.contextName, config.cwd, this.ephemeralHome.homePath, + CONTEXT_TTL_SECONDS, ); try { runSync(input, { env: this.extraEnv, sandboxConfig: this.sandboxConfig }); @@ -407,18 +424,8 @@ class ChibiSession implements HarnessSession { }); } - // Clean up the context - try { - const destroyInput = buildInput( - { destroy_context: { name: this.contextName } }, - this.contextName, - undefined, - this.ephemeralHome?.homePath, - ); - runSync(destroyInput, { env: this.extraEnv, sandboxConfig: this.sandboxConfig }); - } catch { - // Best-effort cleanup - } + // Context cleanup is handled by chibi's GC — destroy_after_seconds_inactive + // was set at session start, so the context auto-destructs after 12h of inactivity. // Clean up ephemeral home directory this.ephemeralHome?.cleanup(); diff --git a/packages/autonav/tests/harness/chibi-harness.test.ts b/packages/autonav/tests/harness/chibi-harness.test.ts new file mode 100644 index 0000000..1929c6f --- /dev/null +++ b/packages/autonav/tests/harness/chibi-harness.test.ts @@ -0,0 +1,150 @@ +/** + * Chibi Harness Tests + * + * Unit tests for the ChibiHarness and ChibiSession implementation. + * Mocks child_process to verify chibi-json invocations without requiring + * chibi to be installed. + * + * Key behaviours under test: + * - destroy_after_seconds_inactive is set on set_system_prompt invocation + * - destroy_context is NOT called on close() + * - Context name is randomised per session + * - buildInput correctly serialises flags + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { EventEmitter } from "node:events"; + +// ---- mock node:child_process ---- + +const mockExecFileSync = vi.fn(); +const mockSpawn = vi.fn(); + +vi.mock("node:child_process", () => ({ + execFileSync: (...args: unknown[]) => mockExecFileSync(...args), + spawn: (...args: unknown[]) => mockSpawn(...args), +})); + +// ---- mock ephemeral-home ---- + +vi.mock("../../src/harness/ephemeral-home.js", () => ({ + createEphemeralHome: vi.fn(() => ({ + homePath: "/tmp/ephemeral-home", + cleanup: vi.fn(), + })), +})); + +// ---- mock sandbox ---- + +vi.mock("../../src/harness/sandbox.js", () => ({ + wrapCommand: (_cmd: string, args: string[]) => ({ command: "chibi-json", args }), + isSandboxEnabled: vi.fn(() => false), +})); + +// ---- helpers ---- + +function makeChildProcess() { + const child = new EventEmitter() as ReturnType; + const stdout = new EventEmitter(); + const stderr = new EventEmitter(); + (child as Record).stdout = stdout; + (child as Record).stderr = stderr; + (child as Record).stdin = { write: vi.fn(), end: vi.fn() }; + (child as Record).exitCode = null; + (child as Record).kill = vi.fn(() => { + (child as Record).exitCode = 0; + child.emit("exit", 0); + }); + return child; +} + +// ---- tests ---- + +describe("ChibiHarness", () => { + beforeEach(() => { + vi.clearAllMocks(); + mockSpawn.mockReturnValue(makeChildProcess()); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("passes destroy_after_seconds_inactive on set_system_prompt invocation", async () => { + const { ChibiHarness } = await import("../../src/harness/chibi-harness.js"); + const harness = new ChibiHarness(); + + harness.run( + { systemPrompt: "You are a test navigator.", cwd: "/tmp/nav" }, + "hello", + ); + + expect(mockExecFileSync).toHaveBeenCalledOnce(); + const [, , opts] = mockExecFileSync.mock.calls[0]; + const input = JSON.parse(opts.input as string); + + expect(input.flags).toBeDefined(); + expect(input.flags.destroy_after_seconds_inactive).toBe(12 * 60 * 60); + }); + + it("does not pass destroy_after_seconds_inactive on send_prompt invocations", async () => { + const { ChibiHarness } = await import("../../src/harness/chibi-harness.js"); + const harness = new ChibiHarness(); + + harness.run( + { systemPrompt: "You are a test navigator.", cwd: "/tmp/nav" }, + "hello", + ); + + expect(mockSpawn).toHaveBeenCalledOnce(); + const [, , spawnOpts] = mockSpawn.mock.calls[0]; + // spawn receives no input arg — chibi-json reads from child.stdin + // Verify via the stdin.write call instead + const child = mockSpawn.mock.results[0].value; + const stdinWrite = (child.stdin as { write: ReturnType }).write; + expect(stdinWrite).toHaveBeenCalledOnce(); + const input = JSON.parse(stdinWrite.mock.calls[0][0] as string); + + expect(input.flags?.destroy_after_seconds_inactive).toBeUndefined(); + void spawnOpts; // suppress unused warning + }); + + it("does not call destroy_context on close()", async () => { + const child = makeChildProcess(); + // Pre-set exitCode so close() doesn't wait for the exit event + (child as Record).exitCode = 0; + mockSpawn.mockReturnValue(child); + + const { ChibiHarness } = await import("../../src/harness/chibi-harness.js"); + const harness = new ChibiHarness(); + const session = harness.run( + { systemPrompt: "You are a test navigator.", cwd: "/tmp/nav" }, + "hello", + ); + + mockExecFileSync.mockClear(); + await session.close(); + + // execFileSync is used for synchronous commands (set_system_prompt, destroy_context). + // After clearing post-setup, no further calls should occur. + expect(mockExecFileSync).not.toHaveBeenCalled(); + }); + + it("uses a unique context name per session", async () => { + const { ChibiHarness } = await import("../../src/harness/chibi-harness.js"); + const harness = new ChibiHarness(); + + mockSpawn.mockReturnValue(makeChildProcess()); + harness.run({ systemPrompt: "nav", cwd: "/tmp/nav" }, "q1"); + + mockSpawn.mockReturnValue(makeChildProcess()); + harness.run({ systemPrompt: "nav", cwd: "/tmp/nav" }, "q2"); + + const ctx1 = JSON.parse(mockExecFileSync.mock.calls[0][2].input as string).context as string; + const ctx2 = JSON.parse(mockExecFileSync.mock.calls[1][2].input as string).context as string; + + expect(ctx1).toMatch(/^autonav-/); + expect(ctx2).toMatch(/^autonav-/); + expect(ctx1).not.toBe(ctx2); + }); +});