Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 63 additions & 6 deletions src/agents/judge.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import { resolve } from "node:path";
import { trace } from "../trace.js";
import type { DomainSpec, JudgeResult, ReviewerResult, WorkerResult } from "./types.js";

const delay = (ms: number): Promise<void> => new Promise((r) => { setTimeout(r, ms); });
import { isStub } from "../stub.js";
import { runLayer1 } from "../validation/layer1.js";
import { runLayer2 } from "../validation/layer2.js";
import type { DomainSpec, JudgeResult, Platform, ReviewerResult, WorkerResult } from "./types.js";

export type JudgeInput = {
domain: DomainSpec;
Expand All @@ -11,8 +13,49 @@ export type JudgeInput = {
reviewer: ReviewerResult;
};

type PlatformReport = {
platform: Platform;
layer1Pass: boolean;
layer1Findings: number;
layer2Pass: boolean;
layer2Command: string;
layer2DurationMs: number;
};

export async function runJudge(input: JudgeInput): Promise<JudgeResult> {
void input;
if (isStub("judge")) return runStubJudge();

trace("judge", "Layer 1 (structural) — scanning for leftover tokens");
trace("judge", "Layer 2 (runtime) — validating toolchains load");

const reports = await Promise.all([
evaluate(input.rails),
evaluate(input.ios),
evaluate(input.android),
]);

for (const r of reports) {
const l1 = r.layer1Pass ? "PASS" : `FAIL (${r.layer1Findings} leftover tokens)`;
const l2 = r.layer2Pass ? `PASS (${(r.layer2DurationMs / 1000).toFixed(1)}s)` : "FAIL";
trace("judge", `${r.platform}: Layer 1 ${l1} · Layer 2 ${l2} [${r.layer2Command}]`);
}

trace("judge", "Layer 3 (semantic, Opus 4.7 vision judge) — not yet wired; treating as skipped");

const overallPass = reports.every((r) => r.layer1Pass && r.layer2Pass);
const l1Total = reports.filter((r) => r.layer1Pass).length;
const l2Total = reports.filter((r) => r.layer2Pass).length;

return {
overallPass,
summary: `Layer 1 ${l1Total}/3 pass · Layer 2 ${l2Total}/3 pass · Layer 3 skipped`,
};
}

const delay = (ms: number): Promise<void> => new Promise((r) => { setTimeout(r, ms); });

async function runStubJudge(): Promise<JudgeResult> {
trace("judge", "(stub mode)");
trace("judge", "Layer 1 (structural): ripgrep leftover tokens");
await delay(200);
trace("judge", "Layer 1: PASS");
Expand All @@ -24,9 +67,23 @@ export async function runJudge(input: JudgeInput): Promise<JudgeResult> {
trace("judge", "Layer 3 (semantic): Opus 4.7 judge, median of 3 runs");
await delay(300);
trace("judge", "Layer 3: PASS (semantic score above threshold)");
return { overallPass: true, summary: "Layer 1/2/3 PASS" };
}

async function evaluate(worker: WorkerResult): Promise<PlatformReport> {
const outDir = resolve(process.cwd(), worker.outDir);

const [layer1, layer2] = await Promise.all([
runLayer1({ projectDir: outDir, forbiddenTokens: worker.renamedFrom }),
runLayer2({ platform: worker.platform, outDir }),
]);

return {
overallPass: true,
summary: "Layer 1/2/3 PASS",
platform: worker.platform,
layer1Pass: layer1.pass,
layer1Findings: layer1.findings.length,
layer2Pass: layer2.pass,
layer2Command: layer2.command,
layer2DurationMs: layer2.durationMs,
};
}
1 change: 1 addition & 0 deletions src/agents/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export type WorkerResult = {
platform: Platform;
outDir: string;
filesTouched: number;
renamedFrom: readonly string[];
};

export type ReviewerResult = {
Expand Down
4 changes: 3 additions & 1 deletion src/agents/workers/android.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ export async function runAndroidWorker(domain: DomainSpec): Promise<WorkerResult
await copyFiltered(substrate, outDir);

const productPairs = buildProductRenamePairs(domain.slug);
const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan];
const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan].filter((p) => p.from !== p.to);
const plan = renamePlan.map((p) => `${p.from}->${p.to}`).join(", ");
trace("android", `running scripts/ruby/rename.rb: ${plan}`);

Expand All @@ -63,6 +63,7 @@ export async function runAndroidWorker(domain: DomainSpec): Promise<WorkerResult
platform: "android",
outDir: `./out/${domain.slug}/android`,
filesTouched: renameStats.files_changed + renameStats.files_renamed,
renamedFrom: renamePlan.map((p) => p.from),
};
}

Expand Down Expand Up @@ -133,5 +134,6 @@ async function runStubAndroidWorker(domain: DomainSpec): Promise<WorkerResult> {
platform: "android",
outDir: `./out/${domain.slug}/android`,
filesTouched: 81,
renamedFrom: domain.renamePlan.map((p) => p.from),
};
}
4 changes: 3 additions & 1 deletion src/agents/workers/ios.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export async function runIosWorker(domain: DomainSpec): Promise<WorkerResult> {
await copyFiltered(substrate, outDir);

const productPairs = buildProductRenamePairs(domain.slug);
const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan];
const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan].filter((p) => p.from !== p.to);
const plan = renamePlan.map((p) => `${p.from}->${p.to}`).join(", ");
trace("ios", `running scripts/ruby/rename.rb: ${plan}`);

Expand All @@ -54,6 +54,7 @@ export async function runIosWorker(domain: DomainSpec): Promise<WorkerResult> {
platform: "ios",
outDir: `./out/${domain.slug}/ios`,
filesTouched: renameStats.files_changed + renameStats.files_renamed,
renamedFrom: renamePlan.map((p) => p.from),
};
}

Expand Down Expand Up @@ -123,5 +124,6 @@ async function runStubIosWorker(domain: DomainSpec): Promise<WorkerResult> {
platform: "ios",
outDir: `./out/${domain.slug}/ios`,
filesTouched: 63,
renamedFrom: domain.renamePlan.map((p) => p.from),
};
}
4 changes: 3 additions & 1 deletion src/agents/workers/rails.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export async function runRailsWorker(domain: DomainSpec): Promise<WorkerResult>
await copyFiltered(substrate, outDir);

const productPairs = buildProductRenamePairs(domain.slug);
const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan];
const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan].filter((p) => p.from !== p.to);
const plan = renamePlan.map((p) => `${p.from}->${p.to}`).join(", ");
trace("rails", `running scripts/ruby/rename.rb: ${plan}`);

Expand All @@ -54,6 +54,7 @@ export async function runRailsWorker(domain: DomainSpec): Promise<WorkerResult>
platform: "rails",
outDir: `./out/${domain.slug}/rails`,
filesTouched: renameStats.files_changed + renameStats.files_renamed,
renamedFrom: renamePlan.map((p) => p.from),
};
}

Expand Down Expand Up @@ -123,5 +124,6 @@ async function runStubRailsWorker(domain: DomainSpec): Promise<WorkerResult> {
platform: "rails",
outDir: `./out/${domain.slug}/rails`,
filesTouched: 47,
renamedFrom: domain.renamePlan.map((p) => p.from),
};
}
Loading