Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions config/evolution.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ reflection:
effort: "high"
max_budget_usd: 0.50

# LLM Judge Configuration
judges:
# "auto" enables when ANTHROPIC_API_KEY is available
# "always" enables unconditionally
# "never" disables unconditionally
enabled: "auto"
# Safety net against runaway costs (daily reset)
cost_cap_usd_per_day: 50.0
# Maximum golden suite entries (prune oldest when exceeded)
max_golden_suite_size: 50

# Directory paths (relative to project root)
paths:
config_dir: "phantom-config"
Expand Down
1 change: 1 addition & 0 deletions src/evolution/__tests__/application.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ function testConfig(): EvolutionConfig {
cadence: { reflection_interval: 1, consolidation_interval: 10, full_review_interval: 50, drift_check_interval: 20 },
gates: { drift_threshold: 0.7, max_file_lines: 200, auto_rollback_threshold: 0.1, auto_rollback_window: 5 },
reflection: { model: "claude-sonnet-4-20250514", effort: "high", max_budget_usd: 0.5 },
judges: { enabled: "auto", cost_cap_usd_per_day: 50.0, max_golden_suite_size: 50 },
paths: {
config_dir: TEST_DIR,
constitution: `${TEST_DIR}/constitution.md`,
Expand Down
1 change: 1 addition & 0 deletions src/evolution/__tests__/constitution.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ function testConfig(): EvolutionConfig {
cadence: { reflection_interval: 1, consolidation_interval: 10, full_review_interval: 50, drift_check_interval: 20 },
gates: { drift_threshold: 0.7, max_file_lines: 200, auto_rollback_threshold: 0.1, auto_rollback_window: 5 },
reflection: { model: "claude-sonnet-4-20250514", effort: "high", max_budget_usd: 0.5 },
judges: { enabled: "auto", cost_cap_usd_per_day: 50.0, max_golden_suite_size: 50 },
paths: {
config_dir: TEST_DIR,
constitution: `${TEST_DIR}/constitution.md`,
Expand Down
199 changes: 199 additions & 0 deletions src/evolution/__tests__/cost-cap.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { EvolutionEngine } from "../engine.ts";
import type { SessionSummary } from "../types.ts";

const TEST_DIR = "/tmp/phantom-test-cost-cap";
const CONFIG_PATH = `${TEST_DIR}/config/evolution.yaml`;

let savedApiKey: string | undefined;

function setupTestEnv(costCap: number): void {
mkdirSync(`${TEST_DIR}/config`, { recursive: true });
mkdirSync(`${TEST_DIR}/phantom-config/meta`, { recursive: true });
mkdirSync(`${TEST_DIR}/phantom-config/strategies`, { recursive: true });
mkdirSync(`${TEST_DIR}/phantom-config/memory`, { recursive: true });

writeFileSync(
CONFIG_PATH,
[
"cadence:",
" reflection_interval: 1",
" consolidation_interval: 10",
"gates:",
" drift_threshold: 0.7",
" max_file_lines: 200",
" auto_rollback_threshold: 0.1",
" auto_rollback_window: 5",
"judges:",
' enabled: "never"',
` cost_cap_usd_per_day: ${costCap}`,
" max_golden_suite_size: 50",
"paths:",
` config_dir: "${TEST_DIR}/phantom-config"`,
` constitution: "${TEST_DIR}/phantom-config/constitution.md"`,
` version_file: "${TEST_DIR}/phantom-config/meta/version.json"`,
` metrics_file: "${TEST_DIR}/phantom-config/meta/metrics.json"`,
` evolution_log: "${TEST_DIR}/phantom-config/meta/evolution-log.jsonl"`,
` golden_suite: "${TEST_DIR}/phantom-config/meta/golden-suite.jsonl"`,
` session_log: "${TEST_DIR}/phantom-config/memory/session-log.jsonl"`,
].join("\n"),
"utf-8",
);

writeFileSync(`${TEST_DIR}/phantom-config/constitution.md`, "# Constitution\n1. Be honest.\n", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/persona.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/user-profile.md`, "# User Profile\n", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/domain-knowledge.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/strategies/task-patterns.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/strategies/tool-preferences.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/strategies/error-recovery.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/memory/session-log.jsonl`, "", "utf-8");
writeFileSync(
`${TEST_DIR}/phantom-config/meta/version.json`,
JSON.stringify({
version: 0,
parent: null,
timestamp: new Date().toISOString(),
changes: [],
metrics_at_change: { session_count: 0, success_rate_7d: 0, correction_rate_7d: 0 },
}),
"utf-8",
);
writeFileSync(
`${TEST_DIR}/phantom-config/meta/metrics.json`,
JSON.stringify({
session_count: 0,
success_count: 0,
failure_count: 0,
correction_count: 0,
evolution_count: 0,
rollback_count: 0,
last_session_at: null,
last_evolution_at: null,
success_rate_7d: 0,
correction_rate_7d: 0,
sessions_since_consolidation: 0,
}),
"utf-8",
);
writeFileSync(`${TEST_DIR}/phantom-config/meta/evolution-log.jsonl`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/meta/golden-suite.jsonl`, "", "utf-8");
}

function makeSession(overrides: Partial<SessionSummary> = {}): SessionSummary {
return {
session_id: `session-${Date.now()}`,
session_key: "cli:main",
user_id: "user-1",
user_messages: ["No, use TypeScript not JavaScript"],
assistant_messages: ["Got it."],
tools_used: [],
files_tracked: [],
outcome: "success",
cost_usd: 0.05,
started_at: "2026-03-25T10:00:00Z",
ended_at: "2026-03-25T10:05:00Z",
...overrides,
};
}

describe("Cost Cap", () => {
beforeEach(() => {
savedApiKey = process.env.ANTHROPIC_API_KEY;
});

afterEach(() => {
if (savedApiKey !== undefined) {
process.env.ANTHROPIC_API_KEY = savedApiKey;
} else {
process.env.ANTHROPIC_API_KEY = undefined;
}
rmSync(TEST_DIR, { recursive: true, force: true });
});

test("cost cap config is parsed from YAML", () => {
setupTestEnv(10.0);
const engine = new EvolutionEngine(CONFIG_PATH);
const config = engine.getEvolutionConfig();
expect(config.judges.cost_cap_usd_per_day).toBe(10.0);
});

test("cost cap defaults to 50 when not configured", () => {
mkdirSync(`${TEST_DIR}/config`, { recursive: true });
mkdirSync(`${TEST_DIR}/phantom-config/meta`, { recursive: true });
mkdirSync(`${TEST_DIR}/phantom-config/strategies`, { recursive: true });
mkdirSync(`${TEST_DIR}/phantom-config/memory`, { recursive: true });

writeFileSync(
CONFIG_PATH,
[
"paths:",
` config_dir: "${TEST_DIR}/phantom-config"`,
` constitution: "${TEST_DIR}/phantom-config/constitution.md"`,
` version_file: "${TEST_DIR}/phantom-config/meta/version.json"`,
` metrics_file: "${TEST_DIR}/phantom-config/meta/metrics.json"`,
` evolution_log: "${TEST_DIR}/phantom-config/meta/evolution-log.jsonl"`,
` golden_suite: "${TEST_DIR}/phantom-config/meta/golden-suite.jsonl"`,
` session_log: "${TEST_DIR}/phantom-config/memory/session-log.jsonl"`,
].join("\n"),
"utf-8",
);
writeFileSync(`${TEST_DIR}/phantom-config/constitution.md`, "# Constitution\n", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/persona.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/user-profile.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/domain-knowledge.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/strategies/task-patterns.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/strategies/tool-preferences.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/strategies/error-recovery.md`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/memory/session-log.jsonl`, "", "utf-8");
writeFileSync(
`${TEST_DIR}/phantom-config/meta/version.json`,
JSON.stringify({
version: 0,
parent: null,
timestamp: new Date().toISOString(),
changes: [],
metrics_at_change: { session_count: 0, success_rate_7d: 0, correction_rate_7d: 0 },
}),
"utf-8",
);
writeFileSync(
`${TEST_DIR}/phantom-config/meta/metrics.json`,
JSON.stringify({
session_count: 0,
success_count: 0,
failure_count: 0,
correction_count: 0,
evolution_count: 0,
rollback_count: 0,
last_session_at: null,
last_evolution_at: null,
success_rate_7d: 0,
correction_rate_7d: 0,
sessions_since_consolidation: 0,
}),
"utf-8",
);
writeFileSync(`${TEST_DIR}/phantom-config/meta/evolution-log.jsonl`, "", "utf-8");
writeFileSync(`${TEST_DIR}/phantom-config/meta/golden-suite.jsonl`, "", "utf-8");

const engine = new EvolutionEngine(CONFIG_PATH);
expect(engine.getEvolutionConfig().judges.cost_cap_usd_per_day).toBe(50.0);
});

test("engine uses heuristic path when judges are disabled", async () => {
setupTestEnv(50.0);
const engine = new EvolutionEngine(CONFIG_PATH);

// judges.enabled: "never" means heuristics
expect(engine.usesLLMJudges()).toBe(false);

const result = await engine.afterSession(makeSession());
// Should still work with heuristics
expect(result.changes_applied.length).toBeGreaterThan(0);

const userProfile = readFileSync(`${TEST_DIR}/phantom-config/user-profile.md`, "utf-8");
expect(userProfile).toContain("TypeScript");
});
});
2 changes: 2 additions & 0 deletions src/evolution/__tests__/engine.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ function setupTestEnvironment(): void {
" auto_rollback_window: 5",
"reflection:",
' model: "claude-sonnet-4-20250514"',
"judges:",
' enabled: "never"',
"paths:",
` config_dir: "${TEST_DIR}/phantom-config"`,
` constitution: "${TEST_DIR}/phantom-config/constitution.md"`,
Expand Down
116 changes: 116 additions & 0 deletions src/evolution/__tests__/golden-suite-cap.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { mkdirSync, rmSync, writeFileSync } from "node:fs";
import type { EvolutionConfig } from "../config.ts";
import { addCase, loadSuite, pruneSuite } from "../golden-suite.ts";
import type { GoldenCase } from "../types.ts";

const TEST_DIR = "/tmp/phantom-test-golden-cap";

function testConfig(): EvolutionConfig {
return {
cadence: { reflection_interval: 1, consolidation_interval: 10, full_review_interval: 50, drift_check_interval: 20 },
gates: { drift_threshold: 0.7, max_file_lines: 200, auto_rollback_threshold: 0.1, auto_rollback_window: 5 },
reflection: { model: "claude-sonnet-4-20250514", effort: "high", max_budget_usd: 0.5 },
judges: { enabled: "auto", cost_cap_usd_per_day: 50.0, max_golden_suite_size: 50 },
paths: {
config_dir: TEST_DIR,
constitution: `${TEST_DIR}/constitution.md`,
version_file: `${TEST_DIR}/meta/version.json`,
metrics_file: `${TEST_DIR}/meta/metrics.json`,
evolution_log: `${TEST_DIR}/meta/evolution-log.jsonl`,
golden_suite: `${TEST_DIR}/meta/golden-suite.jsonl`,
session_log: `${TEST_DIR}/memory/session-log.jsonl`,
},
};
}

function makeGoldenCase(index: number, daysAgo = 0): GoldenCase {
const date = new Date();
date.setDate(date.getDate() - daysAgo);
return {
id: `golden-${index}`,
description: `Correction ${index}`,
lesson: `Lesson for correction ${index}`,
session_id: `session-${index}`,
created_at: date.toISOString(),
};
}

describe("Golden Suite Cap", () => {
beforeEach(() => {
mkdirSync(`${TEST_DIR}/meta`, { recursive: true });
writeFileSync(`${TEST_DIR}/meta/golden-suite.jsonl`, "", "utf-8");
});

afterEach(() => {
rmSync(TEST_DIR, { recursive: true, force: true });
});

test("pruneSuite is a no-op when suite is under the cap", () => {
const config = testConfig();
for (let i = 0; i < 5; i++) {
addCase(config, makeGoldenCase(i));
}
const removed = pruneSuite(config, 50);
expect(removed).toBe(0);
expect(loadSuite(config)).toHaveLength(5);
});

test("pruneSuite removes oldest entries when suite exceeds cap", () => {
const config = testConfig();
// Add 10 cases with decreasing age (0 = newest, 9 = oldest)
for (let i = 0; i < 10; i++) {
addCase(config, makeGoldenCase(i, i));
}
expect(loadSuite(config)).toHaveLength(10);

const removed = pruneSuite(config, 5);
expect(removed).toBe(5);

const remaining = loadSuite(config);
expect(remaining).toHaveLength(5);

// Remaining should be the 5 newest (days ago 0-4)
for (const entry of remaining) {
const id = Number.parseInt(entry.id.replace("golden-", ""), 10);
expect(id).toBeLessThan(5);
}
});

test("pruneSuite with max_golden_suite_size defaults to 50", () => {
const config = testConfig();
// Default cap is 50 from the config
expect(config.judges.max_golden_suite_size).toBe(50);
});

test("pruneSuite handles empty suite", () => {
const config = testConfig();
const removed = pruneSuite(config, 50);
expect(removed).toBe(0);
});

test("pruneSuite handles suite at exactly the cap", () => {
const config = testConfig();
for (let i = 0; i < 5; i++) {
addCase(config, makeGoldenCase(i));
}
const removed = pruneSuite(config, 5);
expect(removed).toBe(0);
expect(loadSuite(config)).toHaveLength(5);
});

test("pruneSuite keeps newest entries when exceeding cap by 1", () => {
const config = testConfig();
// oldest first, then newest
addCase(config, makeGoldenCase(0, 10));
addCase(config, makeGoldenCase(1, 0));

const removed = pruneSuite(config, 1);
expect(removed).toBe(1);

const remaining = loadSuite(config);
expect(remaining).toHaveLength(1);
// The newest entry (days ago 0) should remain
expect(remaining[0].id).toBe("golden-1");
});
});
Loading
Loading