rafters-studio · ssilvius · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/apps/web/src/api/app.ts b/apps/web/src/api/app.ts
@@ -7,6 +7,7 @@ import { loadSession } from "./middleware/auth";
 import { authRoutes } from "./routes/auth";
 import { colorRoutes } from "./routes/color";
 import { ctrlRoutes } from "./routes/ctrl/index";
+import { uncertaintyRoutes } from "./routes/uncertainty";
 import type { HonoEnv } from "./types";
 
 const app = new Hono<HonoEnv>()
@@ -15,7 +16,8 @@ const app = new Hono<HonoEnv>()
   .route("/auth", authRoutes)
   .use("/*", loadSession)
   .route("/color", colorRoutes)
-  .route("/ctrl", ctrlRoutes);
+  .route("/ctrl", ctrlRoutes)
+  .route("/uncertainty", uncertaintyRoutes);
 
 export type AppType = typeof app;
 export default app;
diff --git a/apps/web/src/api/lib/uncertainty/cohort.ts b/apps/web/src/api/lib/uncertainty/cohort.ts
@@ -0,0 +1,41 @@
+export const DAY_MS = 86_400_000;
+export const DEFAULT_ORPHAN_TTL_DAYS = 30;
+export const BUCKET_WIDTH = 0.1;
+export const BUCKET_COUNT = 10;
+
+export function bucketLower(confidence: number): number {
+  if (confidence >= 1) return 0.9;
+  if (confidence < 0) return 0;
+  return Math.floor(confidence * 10) / 10;
+}
+
+export function cohortKey(
+  surface: string,
+  model: string,
+  modelVersion: string,
+  confidence: number,
+): string {
+  return `${surface}|${model}|${modelVersion}|${bucketLower(confidence).toFixed(1)}`;
+}
+
+export function brierScore(claimed: readonly number[], correctness: readonly number[]): number {
+  if (claimed.length === 0 || claimed.length !== correctness.length) return 0;
+  let sum = 0;
+  for (let i = 0; i < claimed.length; i++) {
+    const diff = claimed[i] - correctness[i];
+    sum += diff * diff;
+  }
+  return sum / claimed.length;
+}
+
+export function bucketIndex(confidence: number): number {
+  if (confidence >= 1) return BUCKET_COUNT - 1;
+  if (confidence < 0) return 0;
+  return Math.floor(confidence * BUCKET_COUNT);
+}
+
+export function bucketBounds(index: number): { lower: number; upper: number } {
+  const lower = index * BUCKET_WIDTH;
+  const upper = lower + BUCKET_WIDTH;
+  return { lower, upper };
+}
diff --git a/apps/web/src/api/routes/uncertainty.ts b/apps/web/src/api/routes/uncertainty.ts
@@ -0,0 +1,133 @@
+import { zValidator } from "@hono/zod-validator";
+import { and, count, eq, like, sql } from "drizzle-orm";
+import { Hono } from "hono";
+import { uuidv7 } from "uuidv7";
+import { z } from "zod";
+import { createDb } from "../../db/client";
+import { uncertaintyCalibrationSnapshot, uncertaintyPrediction } from "../../db/schema/uncertainty";
+import { outcomeLabelSchema, surfaceSchema } from "../../db/schema/uncertainty.zod";
+import { cohortKey, DAY_MS, DEFAULT_ORPHAN_TTL_DAYS } from "../lib/uncertainty/cohort";
+import { requireAuth } from "../middleware/auth";
+import type { HonoEnv } from "../types";
+
+const emitBodySchema = z.object({
+  surface: surfaceSchema,
+  feature_key: z.string().min(1).max(128),
+  input_fingerprint: z.string().min(1).max(128),
+  model: z.string().min(1).max(64),
+  model_version: z.string().min(1).max(64),
+  claimed_confidence: z.number().min(0).max(1),
+  prediction_payload: z.unknown(),
+  orphan_ttl_days: z.number().int().min(1).max(365).optional(),
+});
+
+const witnessBodySchema = z.object({
+  outcome_label: outcomeLabelSchema,
+  outcome_correctness: z.number().min(0).max(1),
+  outcome_payload: z.unknown().optional(),
+});
+
+const calibrationQuerySchema = z.object({
+  surface: surfaceSchema,
+  model: z.string().min(1).max(64),
+  model_version: z.string().min(1).max(64),
+});
+
+const uncertaintyRoutes = new Hono<HonoEnv>()
+  .use("*", requireAuth)
+  .post("/predictions", zValidator("json", emitBodySchema), async (c) => {
+    const body = c.req.valid("json");
+    const db = createDb(c.env.DB);
+
+    const id = uuidv7();
+    const now = Date.now();
+    const ttlDays = body.orphan_ttl_days ?? DEFAULT_ORPHAN_TTL_DAYS;
+    const orphanAfter = now + ttlDays * DAY_MS;
+
+    await db.insert(uncertaintyPrediction).values({
+      id,
+      surface: body.surface,
+      featureKey: body.feature_key,
+      inputFingerprint: body.input_fingerprint,
+      model: body.model,
+      modelVersion: body.model_version,
+      claimedConfidence: body.claimed_confidence,
+      predictionPayload: body.prediction_payload,
+      state: "emitted",
+      createdAt: new Date(now),
+      orphanAfter: new Date(orphanAfter),
+      cohortKey: cohortKey(body.surface, body.model, body.model_version, body.claimed_confidence),
+    });
+
+    return c.json({ id, orphan_after: orphanAfter }, 201);
+  })
+  .put(
+    "/predictions/:id/witness",
+    zValidator("param", z.object({ id: z.string().min(1) })),
+    zValidator("json", witnessBodySchema),
+    async (c) => {
+      const { id } = c.req.valid("param");
+      const body = c.req.valid("json");
+      const db = createDb(c.env.DB);
+
+      const existing = await db
+        .select({ state: uncertaintyPrediction.state })
+        .from(uncertaintyPrediction)
+        .where(eq(uncertaintyPrediction.id, id))
+        .limit(1);
+
+      if (existing.length === 0) {
+        return c.json({ error: "Prediction not found" }, 404);
+      }
+
+      if (existing[0].state === "witnessed") {
+        return c.json({ error: "Prediction already witnessed" }, 409);
+      }
+
+      await db
+        .update(uncertaintyPrediction)
+        .set({
+          state: "witnessed",
+          outcomeLabel: body.outcome_label,
+          outcomeCorrectness: body.outcome_correctness,
+          outcomePayload: body.outcome_payload ?? null,
+          witnessedAt: new Date(),
+        })
+        .where(and(eq(uncertaintyPrediction.id, id), eq(uncertaintyPrediction.state, "emitted")));
+
+      return c.json({ id, state: "witnessed" as const });
+    },
+  )
+  .get("/calibration", zValidator("query", calibrationQuerySchema), async (c) => {
+    const q = c.req.valid("query");
+    const db = createDb(c.env.DB);
+
+    const cohortPrefix = `${q.surface}|${q.model}|${q.model_version}|`;
+    const rows = await db
+      .select()
+      .from(uncertaintyCalibrationSnapshot)
+      .where(like(uncertaintyCalibrationSnapshot.cohortKey, `${cohortPrefix}%`))
+      .orderBy(uncertaintyCalibrationSnapshot.bucketLower);
+
+    return c.json({ buckets: rows });
+  })
+  .get("/orphans", async (c) => {
+    const db = createDb(c.env.DB);
+
+    const rows = await db
+      .select({
+        surface: uncertaintyPrediction.surface,
+        orphan_count: count(sql`CASE WHEN ${uncertaintyPrediction.state} = 'orphaned' THEN 1 END`),
+        emitted_count: count(sql`CASE WHEN ${uncertaintyPrediction.state} = 'emitted' THEN 1 END`),
+        witnessed_count: count(
+          sql`CASE WHEN ${uncertaintyPrediction.state} = 'witnessed' THEN 1 END`,
+        ),
+        total: count(),
+      })
+      .from(uncertaintyPrediction)
+      .groupBy(uncertaintyPrediction.surface);
+
+    return c.json({ surfaces: rows });
+  });
+
+export { uncertaintyRoutes };
diff --git a/apps/web/src/db/client.ts b/apps/web/src/db/client.ts
@@ -1,8 +1,9 @@
 import { type DrizzleD1Database, drizzle } from "drizzle-orm/d1";
 import * as authSchema from "./schema/auth";
 import * as auditSchema from "./schema/audit";
+import * as uncertaintySchema from "./schema/uncertainty";
 
-const schema = { ...authSchema, ...auditSchema };
+const schema = { ...authSchema, ...auditSchema, ...uncertaintySchema };
 
 export type Database = DrizzleD1Database<typeof schema>;
 

diff --git a/apps/web/src/pages/api/[...slug].ts b/apps/web/src/pages/api/[...slug].ts
@@ -4,6 +4,7 @@ import { Hono } from "hono";
 import { createAuth } from "../../api/auth";
 import { colorRoutes } from "../../api/routes/color";
 import { ctrlRoutes } from "../../api/routes/ctrl/index";
+import { uncertaintyRoutes } from "../../api/routes/uncertainty";
 import { loadSession } from "../../api/middleware/auth";
 import { requestLogger } from "../../lib/logging/middleware";
 import type { HonoEnv } from "../../api/types";
@@ -26,6 +27,7 @@ app.use("/*", loadSession);
 
 app.route("/color", colorRoutes);
 app.route("/ctrl", ctrlRoutes);
+app.route("/uncertainty", uncertaintyRoutes);
 
 const handle: APIRoute = (context) => app.fetch(context.request, env);
 

diff --git a/apps/web/tests/api/routes/uncertainty.test.ts b/apps/web/tests/api/routes/uncertainty.test.ts
@@ -0,0 +1,142 @@
+import { describe, expect, it } from "vitest";
+import {
+  brierScore,
+  bucketBounds,
+  bucketIndex,
+  bucketLower,
+  cohortKey,
+} from "../../../src/api/lib/uncertainty/cohort";
+import {
+  insertUncertaintyPredictionSchema,
+  outcomeLabelSchema,
+  predictionStateSchema,
+} from "../../../src/db/schema/uncertainty.zod";
+
+describe("bucketLower", () => {
+  it("floors confidence to 0.1 buckets", () => {
+    expect(bucketLower(0.0)).toBe(0);
+    expect(bucketLower(0.34)).toBeCloseTo(0.3, 5);
+    expect(bucketLower(0.5)).toBeCloseTo(0.5, 5);
+    expect(bucketLower(0.99)).toBeCloseTo(0.9, 5);
+  });
+
+  it("clamps a perfect-confidence call into the top bucket", () => {
+    expect(bucketLower(1)).toBe(0.9);
+  });
+
+  it("clamps negative confidence to the bottom bucket", () => {
+    expect(bucketLower(-0.2)).toBe(0);
+  });
+});
+
+describe("cohortKey", () => {
+  it("composes surface, model, version, and bucket lower bound", () => {
+    expect(cohortKey("rafters.color", "claude-sonnet-4-7", "2026-04-01", 0.82)).toBe(
+      "rafters.color|claude-sonnet-4-7|2026-04-01|0.8",
+    );
+  });
+
+  it("collapses confidences in the same bucket to the same cohort", () => {
+    const a = cohortKey("eavesdrop.classify", "kimi-k2", "v1", 0.71);
+    const b = cohortKey("eavesdrop.classify", "kimi-k2", "v1", 0.79);
+    expect(a).toBe(b);
+  });
+
+  it("separates different surfaces into different cohorts", () => {
+    const a = cohortKey("rafters.color", "m", "v", 0.5);
+    const b = cohortKey("mail.deliverability", "m", "v", 0.5);
+    expect(a).not.toBe(b);
+  });
+});
+
+describe("bucketIndex + bucketBounds", () => {
+  it("indexes 0..9 across [0, 1)", () => {
+    expect(bucketIndex(0)).toBe(0);
+    expect(bucketIndex(0.5)).toBe(5);
+    expect(bucketIndex(0.99)).toBe(9);
+  });
+
+  it("clamps perfect confidence into the top bucket", () => {
+    expect(bucketIndex(1)).toBe(9);
+  });
+
+  it("returns matching [lower, upper) bounds", () => {
+    expect(bucketBounds(0)).toEqual({ lower: 0, upper: 0.1 });
+    const seventh = bucketBounds(7);
+    expect(seventh.lower).toBeCloseTo(0.7, 5);
+    expect(seventh.upper).toBeCloseTo(0.8, 5);
+  });
+});
+
+describe("brierScore", () => {
+  it("returns 0 for an empty cohort", () => {
+    expect(brierScore([], [])).toBe(0);
+  });
+
+  it("scores a perfectly calibrated cohort at 0", () => {
+    expect(brierScore([1, 0, 1], [1, 0, 1])).toBe(0);
+  });
+
+  it("scores a worst-case cohort at 1", () => {
+    expect(brierScore([1, 0], [0, 1])).toBe(1);
+  });
+
+  it("computes mean squared error", () => {
+    // claimed = 0.8, actual = 0.5 -> diff^2 = 0.09 (single-row mean)
+    expect(brierScore([0.8], [0.5])).toBeCloseTo(0.09, 5);
+  });
+});
+
+describe("insertUncertaintyPredictionSchema", () => {
+  it("accepts a well-formed emit", () => {
+    const result = insertUncertaintyPredictionSchema.safeParse({
+      surface: "rafters.color",
+      featureKey: "oklch.lowChromaHighLightness",
+      inputFingerprint: "abc123",
+      model: "claude-sonnet-4-7",
+      modelVersion: "2026-04-01",
+      claimedConfidence: 0.82,
+      predictionPayload: { name: "parchment" },
+      state: "emitted",
+      orphanAfter: new Date(),
+      cohortKey: "rafters.color|claude-sonnet-4-7|2026-04-01|0.8",
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it("rejects out-of-range confidence", () => {
+    const result = insertUncertaintyPredictionSchema.safeParse({
+      surface: "rafters.color",
+      featureKey: "x",
+      inputFingerprint: "y",
+      model: "m",
+      modelVersion: "v",
+      claimedConfidence: 1.5,
+      predictionPayload: {},
+      state: "emitted",
+      orphanAfter: new Date(),
+      cohortKey: "k",
+    });
+    expect(result.success).toBe(false);
+  });
+});
+
+describe("predictionStateSchema", () => {
+  it("accepts the four lifecycle states", () => {
+    for (const s of ["emitted", "witnessed", "orphaned", "retired"] as const) {
+      expect(predictionStateSchema.safeParse(s).success).toBe(true);
+    }
+  });
+
+  it("rejects an unknown state", () => {
+    expect(predictionStateSchema.safeParse("calibrated").success).toBe(false);
+  });
+});
+
+describe("outcomeLabelSchema", () => {
+  it("accepts the per-surface outcome labels", () => {
+    for (const l of ["accepted", "rejected", "edited", "ignored", "custom"] as const) {
+      expect(outcomeLabelSchema.safeParse(l).success).toBe(true);
+    }
+  });
+});