From 70345db3b47d9e8de62fedfeb2013dbe8f7757ee Mon Sep 17 00:00:00 2001 From: dadachi Date: Wed, 22 Apr 2026 09:12:25 +0900 Subject: [PATCH] Wire Layer 1 + Layer 2 through the judge sub-agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit runJudge now actually runs runLayer1 and runLayer2 against each generated project (parallel via Promise.all) and aggregates into a JudgeResult. No more hardcoded PASS from the stub — the pipeline's final gate is real. - types.ts: WorkerResult gains `renamedFrom: readonly string[]` so the judge can run Layer 1 against the exact token set that platform worker was supposed to eliminate (product pairs + domain pairs). - All three workers populate renamedFrom from their final rename plan, filtering out identity pairs (from === to) which are no-ops and would otherwise trigger false Layer 1 positives. - judge.ts: new evaluate() helper per platform; stub path preserved behind isStub("judge") for CI + offline iteration. Caught a real planner bug along the way: real Opus 4.7 occasionally emits identity rename pairs (e.g. `Completed → Completed` for vet-clinic-queue). Previously these would survive into the worker's renamedFrom and Layer 1 would flag every legitimate `Completed` occurrence as a leftover. Identity filter in each worker defuses it. Verified end-to-end on a real walk-in clinic queue run: Layer 1 3/3 pass · Layer 2 3/3 pass · Layer 3 skipped overall: PASS rails 35.6s (cold bundle install) ios 1.9s android 1.1s Layer 3 stays stubbed (skipped) pending vision-judge implementation. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/agents/judge.ts | 69 ++++++++++++++++++++++++++++++++--- src/agents/types.ts | 1 + src/agents/workers/android.ts | 4 +- src/agents/workers/ios.ts | 4 +- src/agents/workers/rails.ts | 4 +- 5 files changed, 73 insertions(+), 9 deletions(-) diff --git a/src/agents/judge.ts b/src/agents/judge.ts index be5a2ce..18443d6 100644 --- a/src/agents/judge.ts +++ b/src/agents/judge.ts @@ -1,7 +1,9 @@ +import { resolve } from "node:path"; import { trace } from "../trace.js"; -import type { DomainSpec, JudgeResult, ReviewerResult, WorkerResult } from "./types.js"; - -const delay = (ms: number): Promise => new Promise((r) => { setTimeout(r, ms); }); +import { isStub } from "../stub.js"; +import { runLayer1 } from "../validation/layer1.js"; +import { runLayer2 } from "../validation/layer2.js"; +import type { DomainSpec, JudgeResult, Platform, ReviewerResult, WorkerResult } from "./types.js"; export type JudgeInput = { domain: DomainSpec; @@ -11,8 +13,49 @@ export type JudgeInput = { reviewer: ReviewerResult; }; +type PlatformReport = { + platform: Platform; + layer1Pass: boolean; + layer1Findings: number; + layer2Pass: boolean; + layer2Command: string; + layer2DurationMs: number; +}; + export async function runJudge(input: JudgeInput): Promise { - void input; + if (isStub("judge")) return runStubJudge(); + + trace("judge", "Layer 1 (structural) — scanning for leftover tokens"); + trace("judge", "Layer 2 (runtime) — validating toolchains load"); + + const reports = await Promise.all([ + evaluate(input.rails), + evaluate(input.ios), + evaluate(input.android), + ]); + + for (const r of reports) { + const l1 = r.layer1Pass ? "PASS" : `FAIL (${r.layer1Findings} leftover tokens)`; + const l2 = r.layer2Pass ? `PASS (${(r.layer2DurationMs / 1000).toFixed(1)}s)` : "FAIL"; + trace("judge", `${r.platform}: Layer 1 ${l1} · Layer 2 ${l2} [${r.layer2Command}]`); + } + + trace("judge", "Layer 3 (semantic, Opus 4.7 vision judge) — not yet wired; treating as skipped"); + + const overallPass = reports.every((r) => r.layer1Pass && r.layer2Pass); + const l1Total = reports.filter((r) => r.layer1Pass).length; + const l2Total = reports.filter((r) => r.layer2Pass).length; + + return { + overallPass, + summary: `Layer 1 ${l1Total}/3 pass · Layer 2 ${l2Total}/3 pass · Layer 3 skipped`, + }; +} + +const delay = (ms: number): Promise => new Promise((r) => { setTimeout(r, ms); }); + +async function runStubJudge(): Promise { + trace("judge", "(stub mode)"); trace("judge", "Layer 1 (structural): ripgrep leftover tokens"); await delay(200); trace("judge", "Layer 1: PASS"); @@ -24,9 +67,23 @@ export async function runJudge(input: JudgeInput): Promise { trace("judge", "Layer 3 (semantic): Opus 4.7 judge, median of 3 runs"); await delay(300); trace("judge", "Layer 3: PASS (semantic score above threshold)"); + return { overallPass: true, summary: "Layer 1/2/3 PASS" }; +} + +async function evaluate(worker: WorkerResult): Promise { + const outDir = resolve(process.cwd(), worker.outDir); + + const [layer1, layer2] = await Promise.all([ + runLayer1({ projectDir: outDir, forbiddenTokens: worker.renamedFrom }), + runLayer2({ platform: worker.platform, outDir }), + ]); return { - overallPass: true, - summary: "Layer 1/2/3 PASS", + platform: worker.platform, + layer1Pass: layer1.pass, + layer1Findings: layer1.findings.length, + layer2Pass: layer2.pass, + layer2Command: layer2.command, + layer2DurationMs: layer2.durationMs, }; } diff --git a/src/agents/types.ts b/src/agents/types.ts index 636e1d6..1043f74 100644 --- a/src/agents/types.ts +++ b/src/agents/types.ts @@ -32,6 +32,7 @@ export type WorkerResult = { platform: Platform; outDir: string; filesTouched: number; + renamedFrom: readonly string[]; }; export type ReviewerResult = { diff --git a/src/agents/workers/android.ts b/src/agents/workers/android.ts index a0c5949..6cef756 100644 --- a/src/agents/workers/android.ts +++ b/src/agents/workers/android.ts @@ -42,7 +42,7 @@ export async function runAndroidWorker(domain: DomainSpec): Promise p.from !== p.to); const plan = renamePlan.map((p) => `${p.from}->${p.to}`).join(", "); trace("android", `running scripts/ruby/rename.rb: ${plan}`); @@ -63,6 +63,7 @@ export async function runAndroidWorker(domain: DomainSpec): Promise p.from), }; } @@ -133,5 +134,6 @@ async function runStubAndroidWorker(domain: DomainSpec): Promise { platform: "android", outDir: `./out/${domain.slug}/android`, filesTouched: 81, + renamedFrom: domain.renamePlan.map((p) => p.from), }; } diff --git a/src/agents/workers/ios.ts b/src/agents/workers/ios.ts index 079865e..65e8563 100644 --- a/src/agents/workers/ios.ts +++ b/src/agents/workers/ios.ts @@ -33,7 +33,7 @@ export async function runIosWorker(domain: DomainSpec): Promise { await copyFiltered(substrate, outDir); const productPairs = buildProductRenamePairs(domain.slug); - const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan]; + const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan].filter((p) => p.from !== p.to); const plan = renamePlan.map((p) => `${p.from}->${p.to}`).join(", "); trace("ios", `running scripts/ruby/rename.rb: ${plan}`); @@ -54,6 +54,7 @@ export async function runIosWorker(domain: DomainSpec): Promise { platform: "ios", outDir: `./out/${domain.slug}/ios`, filesTouched: renameStats.files_changed + renameStats.files_renamed, + renamedFrom: renamePlan.map((p) => p.from), }; } @@ -123,5 +124,6 @@ async function runStubIosWorker(domain: DomainSpec): Promise { platform: "ios", outDir: `./out/${domain.slug}/ios`, filesTouched: 63, + renamedFrom: domain.renamePlan.map((p) => p.from), }; } diff --git a/src/agents/workers/rails.ts b/src/agents/workers/rails.ts index 9930a26..cb700e5 100644 --- a/src/agents/workers/rails.ts +++ b/src/agents/workers/rails.ts @@ -33,7 +33,7 @@ export async function runRailsWorker(domain: DomainSpec): Promise await copyFiltered(substrate, outDir); const productPairs = buildProductRenamePairs(domain.slug); - const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan]; + const renamePlan: readonly RenamePair[] = [...productPairs, ...domain.renamePlan].filter((p) => p.from !== p.to); const plan = renamePlan.map((p) => `${p.from}->${p.to}`).join(", "); trace("rails", `running scripts/ruby/rename.rb: ${plan}`); @@ -54,6 +54,7 @@ export async function runRailsWorker(domain: DomainSpec): Promise platform: "rails", outDir: `./out/${domain.slug}/rails`, filesTouched: renameStats.files_changed + renameStats.files_renamed, + renamedFrom: renamePlan.map((p) => p.from), }; } @@ -123,5 +124,6 @@ async function runStubRailsWorker(domain: DomainSpec): Promise { platform: "rails", outDir: `./out/${domain.slug}/rails`, filesTouched: 47, + renamedFrom: domain.renamePlan.map((p) => p.from), }; }