diff --git a/.github/ci-config.json b/.github/ci-config.json index 75a2b30c6..0b42631f0 100644 --- a/.github/ci-config.json +++ b/.github/ci-config.json @@ -25,7 +25,12 @@ "pr_preview": false }, "prediction-swarm": { - "deploys": ["swarm-twitter", "swarm-verifier", "swarm-filter"], + "deploys": [ + "swarm-twitter", + "swarm-judge", + "swarm-verifier", + "swarm-filter" + ], "pr_preview": false, "preview_domain": "predictionswarm.com" }, @@ -33,6 +38,10 @@ "pr_preview": false, "prod_only": true }, + "swarm-judge": { + "pr_preview": false, + "prod_only": true + }, "swarm-verifier": { "pr_preview": false, "prod_only": true diff --git a/apps/torus-wallet/next-env.d.ts b/apps/torus-wallet/next-env.d.ts index c4b7818fb..9edff1c7c 100644 --- a/apps/torus-wallet/next-env.d.ts +++ b/apps/torus-wallet/next-env.d.ts @@ -1,6 +1,6 @@ /// /// -import "./.next/dev/types/routes.d.ts"; +import "./.next/types/routes.d.ts"; // NOTE: This file should not be edited // see https://nextjs.org/docs/app/api-reference/config/typescript for more information. diff --git a/atlas/migrations/prediction_swarm/20260105202138.sql b/atlas/migrations/prediction_swarm/20260105202138.sql new file mode 100644 index 000000000..8b76b041b --- /dev/null +++ b/atlas/migrations/prediction_swarm/20260105202138.sql @@ -0,0 +1,65 @@ +-- Create "verification_claim" table +CREATE TABLE "public"."verification_claim" ( + "id" uuid NOT NULL DEFAULT uuidv7(), + "parsed_prediction_id" uuid NOT NULL DEFAULT uuidv7(), + "verifier_agent_id" character varying(256) NOT NULL, + "verifier_agent_signature" text NOT NULL, + "claim_outcome" boolean NOT NULL, + "confidence" numeric NOT NULL, + "reasoning" text NOT NULL, + "sources" jsonb NULL, + "timeframe_start_utc" timestamptz NULL, + "timeframe_end_utc" timestamptz NULL, + "timeframe_precision" character varying(32) NULL, + "created_at" timestamptz NOT NULL DEFAULT now(), + "updated_at" timestamptz NOT NULL DEFAULT now(), + "deleted_at" timestamptz NULL, + PRIMARY KEY ("id"), + CONSTRAINT "verification_claim_unique_verifier" UNIQUE ("parsed_prediction_id", "verifier_agent_id"), + CONSTRAINT "verification_claim_parsed_prediction_id_parsed_prediction_id_fk" FOREIGN KEY ("parsed_prediction_id") REFERENCES "public"."parsed_prediction" ("id") ON UPDATE NO ACTION ON DELETE CASCADE +); +-- Create index "verification_claim_created_at_idx" to table: "verification_claim" +CREATE INDEX "verification_claim_created_at_idx" ON "public"."verification_claim" ("created_at"); +-- Create index "verification_claim_parsed_prediction_id_idx" to table: "verification_claim" +CREATE INDEX "verification_claim_parsed_prediction_id_idx" ON "public"."verification_claim" ("parsed_prediction_id"); +-- Create index "verification_claim_verifier_agent_id_idx" to table: "verification_claim" +CREATE INDEX "verification_claim_verifier_agent_id_idx" ON "public"."verification_claim" ("verifier_agent_id"); +-- Modify "verdict" table +ALTER TABLE "public"."verdict" ADD COLUMN "accepted_claim_id" uuid NULL DEFAULT uuidv7(), ADD CONSTRAINT "verdict_accepted_claim_id_verification_claim_id_fk" FOREIGN KEY ("accepted_claim_id") REFERENCES "public"."verification_claim" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION; +-- Create "verifier_feedback" table +CREATE TABLE "public"."verifier_feedback" ( + "id" uuid NOT NULL DEFAULT uuidv7(), + "parsed_prediction_id" uuid NOT NULL DEFAULT uuidv7(), + "verifier_agent_id" character varying(256) NOT NULL, + "verifier_agent_signature" text NOT NULL, + "failure_cause" "public"."failure_cause_enum" NOT NULL, + "reason" text NOT NULL, + "created_at" timestamptz NOT NULL DEFAULT now(), + "updated_at" timestamptz NOT NULL DEFAULT now(), + "deleted_at" timestamptz NULL, + PRIMARY KEY ("id"), + CONSTRAINT "verifier_feedback_unique" UNIQUE ("parsed_prediction_id", "verifier_agent_id"), + CONSTRAINT "verifier_feedback_parsed_prediction_id_parsed_prediction_id_fk" FOREIGN KEY ("parsed_prediction_id") REFERENCES "public"."parsed_prediction" ("id") ON UPDATE NO ACTION ON DELETE CASCADE +); +-- Create index "verifier_feedback_agent_idx" to table: "verifier_feedback" +CREATE INDEX "verifier_feedback_agent_idx" ON "public"."verifier_feedback" ("verifier_agent_id"); +-- Create index "verifier_feedback_failure_cause_idx" to table: "verifier_feedback" +CREATE INDEX "verifier_feedback_failure_cause_idx" ON "public"."verifier_feedback" ("failure_cause"); +-- Create index "verifier_feedback_prediction_idx" to table: "verifier_feedback" +CREATE INDEX "verifier_feedback_prediction_idx" ON "public"."verifier_feedback" ("parsed_prediction_id"); +-- Create "verifier_topic_registration" table +CREATE TABLE "public"."verifier_topic_registration" ( + "id" uuid NOT NULL DEFAULT uuidv7(), + "verifier_agent_id" character varying(256) NOT NULL, + "topic_id" uuid NOT NULL DEFAULT uuidv7(), + "created_at" timestamptz NOT NULL DEFAULT now(), + "updated_at" timestamptz NOT NULL DEFAULT now(), + "deleted_at" timestamptz NULL, + PRIMARY KEY ("id"), + CONSTRAINT "verifier_topic_unique" UNIQUE ("verifier_agent_id", "topic_id"), + CONSTRAINT "verifier_topic_registration_topic_id_prediction_topic_id_fk" FOREIGN KEY ("topic_id") REFERENCES "public"."prediction_topic" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION +); +-- Create index "verifier_topic_agent_idx" to table: "verifier_topic_registration" +CREATE INDEX "verifier_topic_agent_idx" ON "public"."verifier_topic_registration" ("verifier_agent_id"); +-- Create index "verifier_topic_topic_idx" to table: "verifier_topic_registration" +CREATE INDEX "verifier_topic_topic_idx" ON "public"."verifier_topic_registration" ("topic_id"); diff --git a/atlas/migrations/prediction_swarm/atlas.sum b/atlas/migrations/prediction_swarm/atlas.sum index 9d36ec8d4..7506d15e6 100644 --- a/atlas/migrations/prediction_swarm/atlas.sum +++ b/atlas/migrations/prediction_swarm/atlas.sum @@ -1,4 +1,4 @@ -h1:zgOElHPnrTchPb+gb2wPJygPOHJwa/MVSSghPyDnX7A= +h1:8hG9KyqunMoyK3ggRLphPfYFaoH2ZAgKRy/dyvci3v4= 20251118085426_baseline.sql h1:yYt/F7QkBCF92mhKeZN8/T6ljcYJN+1s3M/gBNv5MxI= 20251120161044_rename_goal.sql h1:V0EM07MmeCsc4naTIRBXiLxYtEbHQ0AQI4GzxmkUfxU= 20251120161127.sql h1:27i5PAjofmV/DRHMWKybpF5ncgpQTwdaMFUay959YSg= @@ -11,3 +11,4 @@ h1:zgOElHPnrTchPb+gb2wPJygPOHJwa/MVSSghPyDnX7A= 20251126135959.sql h1:ouWxe6r2q3MV48Z18j7vX4m6hDypY8XOpv8V9/1HdpU= 20251128161908.sql h1:qcYrYrusemufBSPM9Mbj/On++3GrjxfItjfTwEOGOus= 20251130132124.sql h1:kkYoKrMn7ITv2IuWGxrLBaCkpPC6zqt4C+ZVYw1BlA0= +20260105202138.sql h1:56oSvxIQl+Ca6uecSbIpxST7pSFNNnNvhDyvPQ7/vGk= diff --git a/atlas/migrations/webapps/20260105111457_open_verification.sql b/atlas/migrations/webapps/20260105111457_open_verification.sql new file mode 100644 index 000000000..c8934689f --- /dev/null +++ b/atlas/migrations/webapps/20260105111457_open_verification.sql @@ -0,0 +1,36 @@ +-- Open verification: verification claims and topic registration + +CREATE TABLE IF NOT EXISTS verification_claim ( + id UUID PRIMARY KEY DEFAULT uuidv7(), + parsed_prediction_id UUID NOT NULL REFERENCES parsed_prediction(id) ON DELETE CASCADE, + verifier_agent_id VARCHAR(256) NOT NULL, + verifier_agent_signature TEXT NOT NULL, + claim_outcome BOOLEAN NOT NULL, + confidence DECIMAL NOT NULL, + reasoning TEXT NOT NULL, + sources JSONB, + timeframe_start_utc TIMESTAMP WITH TIME ZONE, + timeframe_end_utc TIMESTAMP WITH TIME ZONE, + timeframe_precision VARCHAR(32), + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + deleted_at TIMESTAMP WITH TIME ZONE +); + +CREATE TABLE IF NOT EXISTS verifier_topic_registration ( + id UUID PRIMARY KEY DEFAULT uuidv7(), + verifier_agent_id VARCHAR(256) NOT NULL, + topic_id UUID NOT NULL REFERENCES prediction_topic(id), + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + deleted_at TIMESTAMP WITH TIME ZONE, + UNIQUE(verifier_agent_id, topic_id) +); + +ALTER TABLE verdict ADD COLUMN IF NOT EXISTS accepted_claim_id UUID REFERENCES verification_claim(id); + +CREATE INDEX IF NOT EXISTS verification_claim_parsed_prediction_id_idx ON verification_claim(parsed_prediction_id); +CREATE INDEX IF NOT EXISTS verification_claim_verifier_agent_id_idx ON verification_claim(verifier_agent_id); +CREATE INDEX IF NOT EXISTS verification_claim_created_at_idx ON verification_claim(created_at); +CREATE INDEX IF NOT EXISTS verifier_topic_agent_idx ON verifier_topic_registration(verifier_agent_id); +CREATE INDEX IF NOT EXISTS verifier_topic_topic_idx ON verifier_topic_registration(topic_id); diff --git a/atlas/migrations/webapps/20260105140000_verifier_feedback.sql b/atlas/migrations/webapps/20260105140000_verifier_feedback.sql new file mode 100644 index 000000000..71d6e5f5d --- /dev/null +++ b/atlas/migrations/webapps/20260105140000_verifier_feedback.sql @@ -0,0 +1,17 @@ +-- Verifier feedback table for per-verifier feedback on predictions +CREATE TABLE IF NOT EXISTS verifier_feedback ( + id UUID PRIMARY KEY DEFAULT uuidv7(), + parsed_prediction_id UUID NOT NULL REFERENCES parsed_prediction(id) ON DELETE CASCADE, + verifier_agent_id VARCHAR(256) NOT NULL, + verifier_agent_signature TEXT NOT NULL, + failure_cause failure_cause_enum NOT NULL, + reason TEXT NOT NULL, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + deleted_at TIMESTAMP WITH TIME ZONE, + UNIQUE(parsed_prediction_id, verifier_agent_id) +); + +CREATE INDEX IF NOT EXISTS verifier_feedback_prediction_idx ON verifier_feedback(parsed_prediction_id); +CREATE INDEX IF NOT EXISTS verifier_feedback_agent_idx ON verifier_feedback(verifier_agent_id); +CREATE INDEX IF NOT EXISTS verifier_feedback_failure_cause_idx ON verifier_feedback(failure_cause); diff --git a/atlas/migrations/webapps/20260105160000_verification_claim_unique.sql b/atlas/migrations/webapps/20260105160000_verification_claim_unique.sql new file mode 100644 index 000000000..3922e846d --- /dev/null +++ b/atlas/migrations/webapps/20260105160000_verification_claim_unique.sql @@ -0,0 +1,5 @@ +-- Add unique constraint to verification_claim table +-- Ensures each verifier can only submit one claim per prediction +ALTER TABLE verification_claim +ADD CONSTRAINT verification_claim_unique_verifier +UNIQUE (parsed_prediction_id, verifier_agent_id); diff --git a/packages/db/src/schema/index.ts b/packages/db/src/schema/index.ts index fc490aaaf..871e0c648 100644 --- a/packages/db/src/schema/index.ts +++ b/packages/db/src/schema/index.ts @@ -7,4 +7,6 @@ export * from "./memory"; export * from "./swarm-filter"; +export * from "./swarm-verifier"; + export * from "./context-schemas"; diff --git a/packages/db/src/schema/memory.ts b/packages/db/src/schema/memory.ts index f36f94389..0c6f65917 100644 --- a/packages/db/src/schema/memory.ts +++ b/packages/db/src/schema/memory.ts @@ -289,11 +289,77 @@ export const predictionTopicSchema = createTable( ], ); -// ==== Verdicts ==== +// ==== Verification Claims ==== + +/** + * Source evidence for verification claims + */ +export interface ClaimSource { + url: string; + title?: string; + snippet?: string; + retrievedAt: string; + archiveUrl?: string; +} + +/** + * Claims submitted by open verifiers asserting a prediction's outcome. + * Each verifier can submit one claim per prediction. + * The verdict system evaluates claims to produce final verdicts. + */ +export const verificationClaimSchema = createTable( + "verification_claim", + { + id: uuidv7("id").primaryKey(), + parsedPredictionId: uuidv7("parsed_prediction_id") + .notNull() + .references(() => parsedPredictionSchema.id, { onDelete: "cascade" }), + verifierAgentId: ss58Address("verifier_agent_id").notNull(), + verifierAgentSignature: text("verifier_agent_signature").notNull(), + claimOutcome: boolean("claim_outcome").notNull(), + confidence: decimal("confidence").notNull(), + reasoning: text("reasoning").notNull(), + sources: jsonb("sources").$type(), + timeframeStartUtc: timestampz("timeframe_start_utc"), + timeframeEndUtc: timestampz("timeframe_end_utc"), + timeframePrecision: varchar("timeframe_precision", { length: 32 }), + ...timeFields(), + }, + (t) => [ + unique("verification_claim_unique_verifier").on( + t.parsedPredictionId, + t.verifierAgentId, + ), + index("verification_claim_parsed_prediction_id_idx").on( + t.parsedPredictionId, + ), + index("verification_claim_verifier_agent_id_idx").on(t.verifierAgentId), + index("verification_claim_created_at_idx").on(t.createdAt), + ], +); + +/** + * Tracks which topics a verifier has registered to verify. + * Used for weighting claims from topic specialists. + */ +export const verifierTopicRegistrationSchema = createTable( + "verifier_topic_registration", + { + id: uuidv7("id").primaryKey(), + verifierAgentId: ss58Address("verifier_agent_id").notNull(), + topicId: uuidv7("topic_id") + .notNull() + .references(() => predictionTopicSchema.id), + ...timeFields(), + }, + (t) => [ + unique("verifier_topic_unique").on(t.verifierAgentId, t.topicId), + index("verifier_topic_agent_idx").on(t.verifierAgentId), + index("verifier_topic_topic_idx").on(t.topicId), + ], +); -// TODO: I don't think the JSON for the conclusion works -// Some of our queries are more complicated than this allows -// We shall see +// ==== Verdicts ==== /** * Stores verdicts for predictions @@ -307,6 +373,9 @@ export const verdictSchema = createTable( .references(() => parsedPredictionSchema.id), verdict: boolean("verdict").notNull(), // True if prediction came true, false otherwise context: jsonb("context").notNull().$type(), // Context explaining the verdict + acceptedClaimId: uuidv7("accepted_claim_id").references( + () => verificationClaimSchema.id, + ), ...timeFields(), }, (t) => [ @@ -429,6 +498,34 @@ export const parsedPredictionFeedbackSchema = createTable( ], ); +/** + * Stores per-verifier feedback on predictions. + * When a verifier submits feedback, that prediction is excluded from their claimable list. + */ +export const verifierFeedbackSchema = createTable( + "verifier_feedback", + { + id: uuidv7("id").primaryKey(), + parsedPredictionId: uuidv7("parsed_prediction_id") + .notNull() + .references(() => parsedPredictionSchema.id, { onDelete: "cascade" }), + verifierAgentId: ss58Address("verifier_agent_id").notNull(), + verifierAgentSignature: text("verifier_agent_signature").notNull(), + failureCause: failureCauseEnum("failure_cause").notNull(), + reason: text("reason").notNull(), + ...timeFields(), + }, + (t) => [ + unique("verifier_feedback_unique").on( + t.parsedPredictionId, + t.verifierAgentId, + ), + index("verifier_feedback_prediction_idx").on(t.parsedPredictionId), + index("verifier_feedback_agent_idx").on(t.verifierAgentId), + index("verifier_feedback_failure_cause_idx").on(t.failureCause), + ], +); + // ==== Credit System ==== /** diff --git a/packages/db/src/schema/swarm-verifier.ts b/packages/db/src/schema/swarm-verifier.ts new file mode 100644 index 000000000..e755cfa63 --- /dev/null +++ b/packages/db/src/schema/swarm-verifier.ts @@ -0,0 +1,12 @@ +import { integer, text } from "drizzle-orm/pg-core"; +import { createTable, ss58Address, timeFields } from "./utils"; + +/** + * Tracks the last processed cursor for each verifier instance. + * Prevents reprocessing the same predictions on restart. + */ +export const verifierCursorStateSchema = createTable("verifier_cursor_state", { + verifierAgentId: ss58Address("verifier_agent_id").primaryKey(), + lastCursor: text("last_cursor").notNull(), + ...timeFields(), +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1d3fb681d..9745def22 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1464,6 +1464,9 @@ importers: typescript: specifier: 'catalog:' version: 5.9.2 + vitest: + specifier: 'catalog:' + version: 3.2.4(@types/debug@4.1.12)(@types/node@20.19.11)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.1)(tsx@4.20.5)(yaml@2.8.2) services/swarm-filter: dependencies: @@ -1556,6 +1559,73 @@ importers: specifier: 'catalog:' version: 3.2.4(@types/debug@4.1.12)(@types/node@20.19.11)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.1)(tsx@4.20.5)(yaml@2.8.2) + services/swarm-judge: + dependencies: + '@t3-oss/env-core': + specifier: ^0.11.1 + version: 0.11.1(typescript@5.9.2)(zod@3.25.76) + '@torus-network/sdk': + specifier: workspace:* + version: link:../../packages/torus-sdk-ts + '@torus-network/torus-utils': + specifier: workspace:* + version: link:../../packages/torus-utils + '@torus-ts/db': + specifier: workspace:* + version: link:../../packages/db + cors: + specifier: ^2.8.5 + version: 2.8.5 + express: + specifier: ^4.19.2 + version: 4.21.2 + node-fetch: + specifier: ^3.3.2 + version: 3.3.2 + superjson: + specifier: 'catalog:' + version: 2.2.1 + tsafe: + specifier: 'catalog:' + version: 1.8.5 + vitest: + specifier: 'catalog:' + version: 3.2.4(@types/debug@4.1.12)(@types/node@20.19.11)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.1)(tsx@4.20.5)(yaml@2.8.2) + zod: + specifier: 'catalog:' + version: 3.25.76 + devDependencies: + '@torus-ts/eslint-config': + specifier: workspace:* + version: link:../../tooling/eslint + '@torus-ts/prettier-config': + specifier: workspace:* + version: link:../../tooling/prettier + '@torus-ts/tsconfig': + specifier: workspace:* + version: link:../../tooling/typescript + '@types/cors': + specifier: ^2.8.17 + version: 2.8.19 + '@types/express': + specifier: ^4.17.21 + version: 4.17.23 + '@types/node': + specifier: 'catalog:' + version: 20.19.11 + dotenv-cli: + specifier: 'catalog:' + version: 7.4.4 + eslint: + specifier: 'catalog:' + version: 9.35.0(jiti@2.6.1) + prettier: + specifier: 'catalog:' + version: 3.6.2 + typescript: + specifier: 'catalog:' + version: 5.9.2 + services/swarm-services: dependencies: '@polkadot/api': @@ -1686,39 +1756,36 @@ importers: services/swarm-verifier: dependencies: - '@t3-oss/env-core': - specifier: ^0.11.1 - version: 0.11.1(typescript@5.9.2)(zod@3.25.76) - '@torus-network/sdk': - specifier: workspace:* - version: link:../../packages/torus-sdk-ts + '@polkadot/api': + specifier: 'catalog:' + version: 14.3.1(bufferutil@4.0.9)(utf-8-validate@5.0.10) + '@polkadot/util': + specifier: 'catalog:' + version: 13.4.3 + '@polkadot/util-crypto': + specifier: 'catalog:' + version: 13.5.6(@polkadot/util@13.4.3) '@torus-network/torus-utils': specifier: workspace:* version: link:../../packages/torus-utils '@torus-ts/db': specifier: workspace:* version: link:../../packages/db - cors: - specifier: ^2.8.5 - version: 2.8.5 - express: - specifier: ^4.19.2 - version: 4.21.2 - node-fetch: - specifier: ^3.3.2 - version: 3.3.2 - superjson: - specifier: 'catalog:' - version: 2.2.1 + canonicalize: + specifier: ^2.1.0 + version: 2.1.0 + openai: + specifier: ^4.73.1 + version: 4.104.0(ws@8.18.3(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.76) tsafe: specifier: 'catalog:' version: 1.8.5 - vitest: - specifier: 'catalog:' - version: 3.2.4(@types/debug@4.1.12)(@types/node@20.19.11)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.1)(tsx@4.20.5)(yaml@2.8.2) zod: specifier: 'catalog:' version: 3.25.76 + zod-to-json-schema: + specifier: ^3.24.6 + version: 3.24.6(zod@3.25.76) devDependencies: '@torus-ts/eslint-config': specifier: workspace:* @@ -1729,12 +1796,6 @@ importers: '@torus-ts/tsconfig': specifier: workspace:* version: link:../../tooling/typescript - '@types/cors': - specifier: ^2.8.17 - version: 2.8.19 - '@types/express': - specifier: ^4.17.21 - version: 4.17.23 '@types/node': specifier: 'catalog:' version: 20.19.11 @@ -1747,6 +1808,9 @@ importers: prettier: specifier: 'catalog:' version: 3.6.2 + tsx: + specifier: 'catalog:' + version: 4.20.5 typescript: specifier: 'catalog:' version: 5.9.2 @@ -9358,6 +9422,7 @@ packages: get-starknet-core@4.0.0: resolution: {integrity: sha512-6pLmidQZkC3wZsrHY99grQHoGpuuXqkbSP65F8ov1/JsEI8DDLkhsAuLCKFzNOK56cJp+f1bWWfTJ57e9r5eqQ==} + deprecated: Package no longer supported. Please use @starknet-io/get-starknet-core get-stream@5.2.0: resolution: {integrity: sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==} diff --git a/services/swarm-api/package.json b/services/swarm-api/package.json index 70273857f..04f546046 100644 --- a/services/swarm-api/package.json +++ b/services/swarm-api/package.json @@ -17,6 +17,7 @@ "lint-fix": "eslint --cache --cache-location .cache/.eslintcache --fix", "start": "node ./dist/index.js", "start:local": "pnpm with-env pnpm start", + "test": "vitest run", "typecheck": "tsc --noEmit", "with-env": "dotenv -e ../../.env --" }, @@ -49,7 +50,8 @@ "eslint": "catalog:", "prettier": "catalog:", "tsx": "catalog:", - "typescript": "catalog:" + "typescript": "catalog:", + "vitest": "catalog:" }, "prettier": "@torus-ts/prettier-config" } diff --git a/services/swarm-api/src/routes/claims.ts b/services/swarm-api/src/routes/claims.ts new file mode 100644 index 000000000..b9cd73351 --- /dev/null +++ b/services/swarm-api/src/routes/claims.ts @@ -0,0 +1,416 @@ +import { blake2AsHex, signatureVerify } from "@polkadot/util-crypto"; +import { and, eq, gt, inArray, isNull, notExists } from "@torus-ts/db"; +import { + parsedPredictionSchema, + predictionDuplicateRelationsSchema, + predictionTopicSchema, + scrapedTweetSchema, + twitterUsersSchema, + verdictSchema, + verificationClaimSchema, + verifierFeedbackSchema, + verifierTopicRegistrationSchema, +} from "@torus-ts/db/schema"; +import type { FailureCause } from "@torus-ts/db/schema"; +import canonicalize from "canonicalize"; +import { requirePermission } from "../middleware/auth"; +import type { ContextApp } from "../middleware/context"; +import { + claimableQuerySchema, + claimSubmissionSchema, + feedbackSubmissionSchema, + registerTopicSchema, +} from "../schemas/claims"; +import type { + ClaimSubmission, + FeedbackSubmission, + RegisterTopic, +} from "../schemas/claims"; +import { HttpError } from "../utils/errors"; + +export const claimsRouter = (app: ContextApp) => + app.use(requirePermission(["prediction.verify"])).group("/v1", (app) => + app + .get( + "/predictions/claimable", + async ({ query, db, userKey }) => { + const { after, limit, topics } = query; + + const noVerdict = notExists( + db + .select() + .from(verdictSchema) + .where( + eq(verdictSchema.parsedPredictionId, parsedPredictionSchema.id), + ), + ); + + const noFeedbackFromVerifier = notExists( + db + .select() + .from(verifierFeedbackSchema) + .where( + and( + eq( + verifierFeedbackSchema.parsedPredictionId, + parsedPredictionSchema.id, + ), + eq(verifierFeedbackSchema.verifierAgentId, userKey), + isNull(verifierFeedbackSchema.deletedAt), + ), + ), + ); + + const notDuplicate = notExists( + db + .select() + .from(predictionDuplicateRelationsSchema) + .where( + eq( + predictionDuplicateRelationsSchema.predictionId, + parsedPredictionSchema.id, + ), + ), + ); + + const noClaimFromVerifier = notExists( + db + .select() + .from(verificationClaimSchema) + .where( + and( + eq( + verificationClaimSchema.parsedPredictionId, + parsedPredictionSchema.id, + ), + eq(verificationClaimSchema.verifierAgentId, userKey), + isNull(verificationClaimSchema.deletedAt), + ), + ), + ); + + const conditions = [ + noVerdict, + noFeedbackFromVerifier, + notDuplicate, + noClaimFromVerifier, + ]; + + if (after) { + conditions.push(gt(parsedPredictionSchema.id, after)); + } + + if (topics && topics.length > 0) { + conditions.push(inArray(predictionTopicSchema.name, topics)); + } + + const predictions = await db + .select({ + id: parsedPredictionSchema.id, + predictionId: parsedPredictionSchema.predictionId, + target: parsedPredictionSchema.target, + timeframe: parsedPredictionSchema.timeframe, + topicName: predictionTopicSchema.name, + createdAt: parsedPredictionSchema.createdAt, + }) + .from(parsedPredictionSchema) + .innerJoin( + predictionTopicSchema, + eq(predictionTopicSchema.id, parsedPredictionSchema.topicId), + ) + .where(and(...conditions)) + .orderBy(parsedPredictionSchema.id) + .limit(limit); + + const lastPrediction = predictions[predictions.length - 1]; + const nextCursor = lastPrediction?.id ?? null; + + return { + predictions: predictions.map((p) => ({ + id: p.id, + predictionId: p.predictionId, + target: p.target, + timeframe: p.timeframe, + topicName: p.topicName, + createdAt: p.createdAt.toISOString(), + })), + nextCursor, + hasMore: predictions.length === limit, + }; + }, + { query: claimableQuerySchema }, + ) + .post( + "/predictions/:id/claim", + async ({ params, body, db, userKey }) => { + const predictionId = params.id; + const input = body as ClaimSubmission; + + const sentAt = new Date(input.content.sentAt); + const now = new Date(); + const diffMs = Math.abs(now.getTime() - sentAt.getTime()); + const maxTimestampDiffMs = 300 * 1000; + + if (diffMs > maxTimestampDiffMs) { + throw new HttpError( + 400, + `Invalid timestamp: sentAt is ${Math.floor(diffMs / 1000)}s off (max ${maxTimestampDiffMs / 1000}s allowed)`, + ); + } + + const contentCanonical = canonicalize(input.content); + if (!contentCanonical) { + throw new HttpError(500, "Failed to canonicalize content"); + } + const contentHash = blake2AsHex(contentCanonical); + + const verification = signatureVerify( + contentHash, + input.metadata.signature, + userKey, + ); + + if (!verification.isValid) { + throw new HttpError( + 400, + "Invalid signature: signature does not match content or was not signed by authenticated agent", + ); + } + + const prediction = await db + .select({ id: parsedPredictionSchema.id }) + .from(parsedPredictionSchema) + .where(eq(parsedPredictionSchema.id, predictionId)) + .limit(1); + + if (prediction.length === 0) { + throw new HttpError(404, `Prediction ${predictionId} not found`); + } + + const existingVerdict = await db + .select({ id: verdictSchema.id }) + .from(verdictSchema) + .where(eq(verdictSchema.parsedPredictionId, predictionId)) + .limit(1); + + if (existingVerdict.length > 0) { + throw new HttpError( + 400, + "Cannot submit claim: prediction already has a verdict", + ); + } + + const [insertedClaim] = await db + .insert(verificationClaimSchema) + .values({ + parsedPredictionId: predictionId, + verifierAgentId: userKey, + verifierAgentSignature: input.metadata.signature, + claimOutcome: input.content.outcome, + confidence: input.content.confidence, + reasoning: input.content.reasoning, + sources: input.content.sources, + timeframeStartUtc: new Date(input.content.timeframe.startUtc), + timeframeEndUtc: new Date(input.content.timeframe.endUtc), + timeframePrecision: input.content.timeframe.precision, + }) + .onConflictDoNothing() + .returning({ id: verificationClaimSchema.id }); + + if (!insertedClaim) { + throw new HttpError( + 400, + "You have already submitted a claim for this prediction", + ); + } + + return { + claimId: insertedClaim.id, + parsedPredictionId: predictionId, + }; + }, + { body: claimSubmissionSchema }, + ) + .post( + "/verifiers/register-topic", + async ({ body, db, userKey }) => { + const { topicId } = body as RegisterTopic; + + const topic = await db + .select({ id: predictionTopicSchema.id }) + .from(predictionTopicSchema) + .where(eq(predictionTopicSchema.id, topicId)) + .limit(1); + + if (topic.length === 0) { + throw new HttpError(404, `Topic ${topicId} not found`); + } + + const [registration] = await db + .insert(verifierTopicRegistrationSchema) + .values({ + verifierAgentId: userKey, + topicId, + }) + .onConflictDoNothing() + .returning({ id: verifierTopicRegistrationSchema.id }); + + return { + registered: registration !== undefined, + topicId, + }; + }, + { body: registerTopicSchema }, + ) + .post( + "/predictions/:id/feedback", + async ({ params, body, db, userKey }) => { + const predictionId = params.id; + const input = body as FeedbackSubmission; + + const sentAt = new Date(input.content.sentAt); + const now = new Date(); + const diffMs = Math.abs(now.getTime() - sentAt.getTime()); + const maxTimestampDiffMs = 300 * 1000; + + if (diffMs > maxTimestampDiffMs) { + throw new HttpError( + 400, + `Invalid timestamp: sentAt is ${Math.floor(diffMs / 1000)}s off (max ${maxTimestampDiffMs / 1000}s allowed)`, + ); + } + + const contentCanonical = canonicalize(input.content); + if (!contentCanonical) { + throw new HttpError(500, "Failed to canonicalize content"); + } + const contentHash = blake2AsHex(contentCanonical); + + const verification = signatureVerify( + contentHash, + input.metadata.signature, + userKey, + ); + + if (!verification.isValid) { + throw new HttpError( + 400, + "Invalid signature: signature does not match content or was not signed by authenticated agent", + ); + } + + const prediction = await db + .select({ id: parsedPredictionSchema.id }) + .from(parsedPredictionSchema) + .where(eq(parsedPredictionSchema.id, predictionId)) + .limit(1); + + if (prediction.length === 0) { + throw new HttpError(404, `Prediction ${predictionId} not found`); + } + + const [inserted] = await db + .insert(verifierFeedbackSchema) + .values({ + parsedPredictionId: predictionId, + verifierAgentId: userKey, + verifierAgentSignature: input.metadata.signature, + failureCause: input.content.failureCause as FailureCause, + reason: input.content.reason, + }) + .onConflictDoUpdate({ + target: [ + verifierFeedbackSchema.parsedPredictionId, + verifierFeedbackSchema.verifierAgentId, + ], + set: { + verifierAgentSignature: input.metadata.signature, + failureCause: input.content.failureCause as FailureCause, + reason: input.content.reason, + updatedAt: new Date(), + }, + }) + .returning({ id: verifierFeedbackSchema.id }); + + return { + feedbackId: inserted?.id, + parsedPredictionId: predictionId, + }; + }, + { body: feedbackSubmissionSchema }, + ) + .get("/predictions/:id/context", async ({ params, db }) => { + const predictionId = params.id; + + const prediction = await db + .select({ + id: parsedPredictionSchema.id, + predictionId: parsedPredictionSchema.predictionId, + target: parsedPredictionSchema.target, + timeframe: parsedPredictionSchema.timeframe, + topicName: predictionTopicSchema.name, + }) + .from(parsedPredictionSchema) + .innerJoin( + predictionTopicSchema, + eq(predictionTopicSchema.id, parsedPredictionSchema.topicId), + ) + .where(eq(parsedPredictionSchema.id, predictionId)) + .limit(1); + + if (prediction.length === 0) { + throw new HttpError(404, `Prediction ${predictionId} not found`); + } + + const pred = prediction[0]; + if (!pred) { + throw new HttpError(404, `Prediction ${predictionId} not found`); + } + + const targetSlices = pred.target; + const timeframeSlices = pred.timeframe; + const allSlices = [...targetSlices, ...timeframeSlices]; + const tweetIds = [ + ...new Set(allSlices.map((s) => BigInt(s.source.tweet_id))), + ]; + + if (tweetIds.length === 0) { + return { + id: pred.id, + predictionId: pred.predictionId, + target: pred.target, + timeframe: pred.timeframe, + tweets: [], + topicName: pred.topicName, + }; + } + + const tweets = await db + .select({ + id: scrapedTweetSchema.id, + text: scrapedTweetSchema.text, + authorUsername: twitterUsersSchema.username, + date: scrapedTweetSchema.date, + }) + .from(scrapedTweetSchema) + .leftJoin( + twitterUsersSchema, + eq(scrapedTweetSchema.authorId, twitterUsersSchema.id), + ) + .where(inArray(scrapedTweetSchema.id, tweetIds)); + + return { + id: pred.id, + predictionId: pred.predictionId, + target: pred.target, + timeframe: pred.timeframe, + tweets: tweets.map((t) => ({ + id: t.id.toString(), + text: t.text, + authorUsername: t.authorUsername, + date: t.date.toISOString(), + })), + topicName: pred.topicName, + }; + }), + ); diff --git a/services/swarm-api/src/routes/predictions.ts b/services/swarm-api/src/routes/predictions.ts index 5cb82721c..32cdd9db6 100644 --- a/services/swarm-api/src/routes/predictions.ts +++ b/services/swarm-api/src/routes/predictions.ts @@ -1,7 +1,8 @@ import { blake2AsHex, signatureVerify } from "@polkadot/util-crypto"; -import { inArray, sql } from "@torus-ts/db"; +import { eq, inArray, sql } from "@torus-ts/db"; import { parsedPredictionSchema, + predictionDuplicateRelationsSchema, predictionSchema, predictionTopicSchema, scrapedTweetSchema, @@ -10,6 +11,8 @@ import canonicalize from "canonicalize"; import { authPlugin } from "../middleware/auth"; import type { ContextApp } from "../middleware/context"; import { storePredictionsInputSchema } from "../schemas/predictions"; +import { findCanonicalPrediction } from "../utils/dedup"; +import type { ParsedPredictionForDedup } from "../utils/dedup"; import { HttpError } from "../utils/errors"; export const predictionsRouter = (app: ContextApp) => @@ -243,6 +246,59 @@ export const predictionsRouter = (app: ContextApp) => (p) => p.id, ); + // Deduplication: find predictions associated with the same tweets + const relatedPredictions = await tx + .select({ + id: parsedPredictionSchema.id, + predictionId: parsedPredictionSchema.predictionId, + target: parsedPredictionSchema.target, + timeframe: parsedPredictionSchema.timeframe, + }) + .from(parsedPredictionSchema) + .innerJoin( + scrapedTweetSchema, + eq( + scrapedTweetSchema.predictionId, + parsedPredictionSchema.predictionId, + ), + ) + .where(inArray(scrapedTweetSchema.id, tweetIds)); + + const predictionsForDedup: ParsedPredictionForDedup[] = + relatedPredictions.map((p) => ({ + id: p.id, + predictionId: p.predictionId, + target: p.target, + timeframe: p.timeframe, + })); + + const duplicateRelations: { + predictionId: string; + canonicalId: string; + similarityScore: string; + }[] = []; + + for (const insertedId of parsedPredictionIds) { + const result = findCanonicalPrediction( + insertedId, + predictionsForDedup, + ); + if (result) { + duplicateRelations.push({ + predictionId: insertedId, + canonicalId: result.canonicalId, + similarityScore: result.similarityScore.toFixed(4), + }); + } + } + + if (duplicateRelations.length > 0) { + await tx + .insert(predictionDuplicateRelationsSchema) + .values(duplicateRelations) + .onConflictDoNothing(); + } + const receiptTimestamp = new Date().toISOString(); const receiptData = { parsedPredictionIds, diff --git a/services/swarm-api/src/schemas/claims.ts b/services/swarm-api/src/schemas/claims.ts new file mode 100644 index 000000000..e4beab03f --- /dev/null +++ b/services/swarm-api/src/schemas/claims.ts @@ -0,0 +1,116 @@ +import { failureCauseValues } from "@torus-ts/db/schema"; +import { z } from "zod"; + +const decimalSchema = z.string().refine( + (val) => { + const num = parseFloat(val); + return !isNaN(num) && num >= 0 && num <= 1; + }, + { message: "Must be a decimal string between 0 and 1" }, +); + +export const claimSourceSchema = z.object({ + url: z.string().url(), + title: z.string().optional(), + snippet: z.string().optional(), + retrievedAt: z.string().datetime(), + archiveUrl: z.string().url().optional(), +}); + +export const claimTimeframeSchema = z.object({ + startUtc: z.string().datetime(), + endUtc: z.string().datetime(), + precision: z.string().min(1), +}); + +export const claimContentSchema = z.object({ + outcome: z.boolean(), + confidence: decimalSchema, + reasoning: z.string().min(1), + sources: z.array(claimSourceSchema), + timeframe: claimTimeframeSchema, + sentAt: z.string().datetime(), +}); + +export const claimMetadataSchema = z.object({ + signature: z.string().min(1), + version: z.literal(1), +}); + +export const claimSubmissionSchema = z.object({ + content: claimContentSchema, + metadata: claimMetadataSchema, +}); + +export const claimableQuerySchema = z.object({ + after: z.string().uuid().optional(), + limit: z.coerce.number().int().positive().max(100).default(50), + topics: z + .string() + .optional() + .transform((val) => + val + ? val + .split(",") + .map((t) => t.trim().toLowerCase()) + .filter((t) => t.length > 0) + : undefined, + ), +}); + +export const claimsQuerySchema = z.object({ + after: z.string().uuid().optional(), + limit: z.coerce.number().int().positive().max(100).default(50), +}); + +export const registerTopicSchema = z.object({ + topicId: z.string().uuid(), +}); + +export type ClaimSubmission = z.infer; +export type ClaimableQuery = z.infer; +export type ClaimsQuery = z.infer; +export type RegisterTopic = z.infer; + +export const feedbackContentSchema = z.object({ + failureCause: z.enum( + Object.keys(failureCauseValues) as [string, ...string[]], + ), + reason: z.string().min(1).max(2000), + sentAt: z.string().datetime(), +}); + +export const feedbackSubmissionSchema = z.object({ + content: feedbackContentSchema, + metadata: claimMetadataSchema, +}); + +export type FeedbackSubmission = z.infer; + +export const postSliceSchema = z.object({ + source: z.object({ + tweet_id: z.string(), + }), + start: z.number(), + end: z.number(), +}); + +export const predictionContextTweetSchema = z.object({ + id: z.string(), + text: z.string(), + authorUsername: z.string().nullable(), + date: z.string().datetime(), +}); + +export const predictionContextResponseSchema = z.object({ + id: z.string().uuid(), + predictionId: z.string().uuid(), + target: z.array(postSliceSchema), + timeframe: z.array(postSliceSchema), + tweets: z.array(predictionContextTweetSchema), + topicName: z.string(), +}); + +export type PredictionContextResponse = z.infer< + typeof predictionContextResponseSchema +>; diff --git a/services/swarm-api/src/server.ts b/services/swarm-api/src/server.ts index 631a053ac..eee76b63d 100644 --- a/services/swarm-api/src/server.ts +++ b/services/swarm-api/src/server.ts @@ -11,6 +11,7 @@ import { createAppContext } from "./context"; import { getEnv } from "./env"; import { requirePermission } from "./middleware/auth"; import { contextPlugin } from "./middleware/context"; +import { claimsRouter } from "./routes/claims"; import { creditsRouter } from "./routes/credits"; import { permissionRouter } from "./routes/permission"; import { predictionsRouter } from "./routes/predictions"; @@ -197,6 +198,7 @@ export async function createServer() { .get("/health", () => ({ status: "ok" })) .use(permissionRouter) .use(creditsRouter) + .use(claimsRouter) .use(requirePermission(["prediction.filter"])) .use(tweetsRouter) .use(predictionsRouter); diff --git a/services/swarm-verifier/src/__tests__/deduplication.test.ts b/services/swarm-api/src/utils/__tests__/dedup.test.ts similarity index 99% rename from services/swarm-verifier/src/__tests__/deduplication.test.ts rename to services/swarm-api/src/utils/__tests__/dedup.test.ts index 69f9c4225..dab737f51 100644 --- a/services/swarm-verifier/src/__tests__/deduplication.test.ts +++ b/services/swarm-api/src/utils/__tests__/dedup.test.ts @@ -1,7 +1,7 @@ import type { PostSlice } from "@torus-ts/db/schema"; import { describe, expect, it } from "vitest"; -import type { ParsedPredictionForDedup } from "../verifier.js"; -import { comparePredictions } from "../verifier.js"; +import type { ParsedPredictionForDedup } from "../dedup"; +import { comparePredictions } from "../dedup"; /** * Represents a tweet with its text content. diff --git a/services/swarm-api/src/utils/dedup.ts b/services/swarm-api/src/utils/dedup.ts new file mode 100644 index 000000000..6f0c92768 --- /dev/null +++ b/services/swarm-api/src/utils/dedup.ts @@ -0,0 +1,185 @@ +import type { PostSlice } from "@torus-ts/db/schema"; + +function groupSlicesByTweet(slices: PostSlice[]): Map { + const byTweet = new Map(); + for (const slice of slices) { + const group = byTweet.get(slice.source.tweet_id); + if (group) group.push(slice); + else byTweet.set(slice.source.tweet_id, [slice]); + } + return byTweet; +} + +interface Range { + start: number; + end: number; +} + +function mergeRanges(ranges: Range[]): Range[] { + if (ranges.length === 0) return []; + + const sorted = [...ranges].sort((a, b) => a.start - b.start); + const first = sorted[0]; + if (!first) return []; + + const merged: Range[] = [first]; + + for (let i = 1; i < sorted.length; i++) { + const current = sorted[i]; + const last = merged[merged.length - 1]; + if (!current || !last) continue; + + if (current.start <= last.end + 1) { + last.end = Math.max(last.end, current.end); + } else { + merged.push(current); + } + } + + return merged; +} + +function calculateCoverageAwareOverlap( + slices1: PostSlice[], + slices2: PostSlice[], +): number { + if (slices1.length === 0 || slices2.length === 0) { + return 0; + } + + const slicesByTweet1 = groupSlicesByTweet(slices1); + const slicesByTweet2 = groupSlicesByTweet(slices2); + + let totalCoveredLength = 0; + let totalLength1 = 0; + + for (const [tweetId, tweet1Slices] of slicesByTweet1) { + const tweet2Slices = slicesByTweet2.get(tweetId) ?? []; + + const merged1 = mergeRanges( + tweet1Slices.map((s) => ({ start: s.start, end: s.end })), + ); + const merged2 = mergeRanges( + tweet2Slices.map((s) => ({ start: s.start, end: s.end })), + ); + + for (const r1 of merged1) { + const r1Length = r1.end - r1.start; + totalLength1 += r1Length; + + let coveredLength = 0; + for (const r2 of merged2) { + const overlapStart = Math.max(r1.start, r2.start); + const overlapEnd = Math.min(r1.end, r2.end); + coveredLength += Math.max(0, overlapEnd - overlapStart); + } + + totalCoveredLength += coveredLength; + } + } + + if (totalLength1 === 0) return 0; + + return totalCoveredLength / totalLength1; +} + +function calculateBidirectionalOverlap( + slices1: PostSlice[], + slices2: PostSlice[], +): number { + const overlap1to2 = calculateCoverageAwareOverlap(slices1, slices2); + const overlap2to1 = calculateCoverageAwareOverlap(slices2, slices1); + + return Math.min(overlap1to2, overlap2to1); +} + +export interface ParsedPredictionForDedup { + id: string; + predictionId: string; + target: PostSlice[]; + timeframe: PostSlice[]; +} + +export interface PredictionComparisonResult { + targetScore: number; + timeframeScore: number; + isDuplicate: boolean; +} + +export function comparePredictions( + pred1: ParsedPredictionForDedup, + pred2: ParsedPredictionForDedup, + targetThreshold = 0.96, + timeframeThreshold = 0.96, +): PredictionComparisonResult { + const targetScore = calculateBidirectionalOverlap(pred1.target, pred2.target); + const timeframeScore = calculateBidirectionalOverlap( + pred1.timeframe, + pred2.timeframe, + ); + + return { + targetScore, + timeframeScore, + isDuplicate: + targetScore >= targetThreshold && timeframeScore >= timeframeThreshold, + }; +} + +export function findCanonicalPrediction( + predictionId: string, + predictions: ParsedPredictionForDedup[], +): { canonicalId: string; similarityScore: number } | null { + if (predictions.length < 2) return null; + + const parent = new Map(); + for (const pred of predictions) { + parent.set(pred.id, pred.id); + } + + function find(id: string): string { + const p = parent.get(id); + if (p === undefined || p === id) return id; + const root = find(p); + parent.set(id, root); + return root; + } + + function union(id1: string, id2: string): void { + const root1 = find(id1); + const root2 = find(id2); + if (root1 === root2) return; + if (root1 < root2) { + parent.set(root2, root1); + } else { + parent.set(root1, root2); + } + } + + for (let i = 0; i < predictions.length; i++) { + for (let j = i + 1; j < predictions.length; j++) { + const pred1 = predictions[i]; + const pred2 = predictions[j]; + if (!pred1 || !pred2) continue; + + if (comparePredictions(pred1, pred2).isDuplicate) { + union(pred1.id, pred2.id); + } + } + } + + const root = find(predictionId); + if (root === predictionId) return null; + + const canonical = predictions.find((p) => p.id === root); + if (!canonical) return null; + + const currentPred = predictions.find((p) => p.id === predictionId); + if (!currentPred) return null; + + const result = comparePredictions(currentPred, canonical); + return { + canonicalId: root, + similarityScore: (result.targetScore + result.timeframeScore) / 2, + }; +} diff --git a/services/swarm-api/vitest.config.ts b/services/swarm-api/vitest.config.ts new file mode 100644 index 000000000..cbac66612 --- /dev/null +++ b/services/swarm-api/vitest.config.ts @@ -0,0 +1,15 @@ +import path from "node:path"; +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + resolve: { + alias: { + "@": path.resolve(__dirname, "./src"), + }, + }, + test: { + include: ["src/**/__tests__/**/*.test.ts"], + environment: "node", + globals: true, + }, +}); diff --git a/services/swarm-judge/CLAIM_VALIDATION_PROMPT.md b/services/swarm-judge/CLAIM_VALIDATION_PROMPT.md new file mode 100644 index 000000000..439d3084b --- /dev/null +++ b/services/swarm-judge/CLAIM_VALIDATION_PROMPT.md @@ -0,0 +1,46 @@ +# Claim Validation Prompt + +You are evaluating a verification claim submitted by an external verifier. Your task is to assess whether the claim is logically consistent and well-supported by its sources. + +## Input Format + +You will receive a JSON object containing: + +- `claim_outcome`: boolean - what the verifier claims (true = prediction came true, false = prediction did not come true) +- `confidence`: string - the verifier's confidence (0.0 to 1.0) +- `reasoning`: string - the verifier's explanation of their verdict +- `sources`: array of source objects with url, title, snippet, etc. + +## Evaluation Criteria + +### Logical Consistency + +- Does the reasoning logically support the claim_outcome? +- Are there internal contradictions in the argument? +- Does the verifier correctly interpret the evidence? + +### Source Relevance + +- Are the sources relevant to the prediction being verified? +- Do the source snippets actually support the claims made in the reasoning? +- Are the sources from credible domains? + +## Output Format + +Return a JSON object with: + +- `is_logically_consistent`: boolean - whether the reasoning follows from the evidence +- `sources_relevant`: boolean - whether the sources support the conclusion +- `confidence`: number (0.0 to 1.0) - how confident you are in this assessment +- `reasoning`: string - brief explanation of your evaluation + +## Guidelines + +Be lenient with minor issues but strict on fundamental problems: + +- Accept claims where the reasoning has small gaps but the overall argument is sound +- Reject claims where the conclusion contradicts the evidence +- Reject claims where sources are clearly irrelevant or fabricated +- Give benefit of the doubt when sources could plausibly support the claim + +Focus on whether the verifier did honest research, not whether you agree with their interpretation. diff --git a/services/swarm-judge/FILTER_VALIDATION_PROMPT.md b/services/swarm-judge/FILTER_VALIDATION_PROMPT.md new file mode 100644 index 000000000..f0fe0fa64 --- /dev/null +++ b/services/swarm-judge/FILTER_VALIDATION_PROMPT.md @@ -0,0 +1,765 @@ +# Filter Extraction Validation Prompt + +## Goal + +Summarize the thread context and determine if the extracted slices represent a valid prediction. + +## Inputs + +You will receive: + +```json +{ + "current_date": "2025-01-20T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "123456789", + "author": "@username", + "date": "2025-01-15T14:30:00Z", + "text": "Full tweet text here" + } + ], + "target_slices": [ + { + "tweet_id": "123456789", + "start": 0, + "end": 20, + "text": "BTC will hit 100k" + } + ], + "timeframe_slices": [ + { + "tweet_id": "123456789", + "start": 21, + "end": 35, + "text": "by end of Q1" + } + ], + "timeframe_parsed": { + "start_utc": "2025-01-15T14:30:00Z", + "end_utc": "2025-03-31T23:59:59Z", + "precision": "quarter" + } +} +``` + +## Task + +Determine if this is a valid prediction that should be verified. + +**CRITICAL: First check slice extraction quality:** + +Before evaluating the prediction content, verify the filter didn't create broken extractions: + +1. **Word boundary violations**: Check if slices cut through the middle of words + - Compare the extracted text against the full tweet at those indices + - Example: Extracting "now" from "k**now**" is INVALID - it cuts through the word "know" + - If a slice doesn't align with word boundaries, mark as invalid with `failure_cause: "broken_extraction"` + +2. **Semantic validity of extracted text**: Does the extracted text make sense in isolation? + - Is the timeframe slice actually a temporal expression? (not just random word fragments) + - Is the target slice actually a predictive statement? (not just disconnected fragments) + - If extractions are nonsensical or fragments, mark as invalid with `failure_cause: "broken_extraction"` + +**Then check for disqualifying factors:** + +1. **Self-announcements** (NOT predictions): + - Author announcing their OWN actions, plans, decisions, or products + - Examples: Company announcing product release, artist announcing tour, athlete announcing retirement + - Includes both obvious ("I'm releasing X") and subtle ("And now I get to tour America??") announcements + - Check if the author is the same entity that controls the outcome + - Look for linguistic clues of ownership/control: + - First-person plural: "we", "our", "us" (e.g., "our new product", "we're launching") + - Phrases indicating control: "Get ready for X from [Company]", "X is coming to [Company]" + - Responding to questions about the company's own products/services + - Making statements about the company's own seasonal/recurring offerings (e.g., "McRib will come around again") + - Announcing company-specific promotions, products, or events (e.g., "Beyblades from McDonald's next month") + - If the tweet discusses when a product/service will be available from that same organization, it's a self-announcement + - The author must be predicting something OUTSIDE their control, not their own business decisions + +2. **Personal or local actions** (NOT publicly verifiable predictions): + - Actions directed at specific individuals, not general public events + - Examples: "you will receive an email", "we'll send you a refund", "I'll reply to your DM" + - Customer service responses about individual cases + - Cannot be objectively verified by third parties + - If it's a private interaction or personal action, it's not a prediction + +3. **Vague or unmeasurable targets**: + - Subjective outcomes: "will be wild", "will be grim", "will be good/bad" + - No clear success criteria: "will have consequences", "will impact X" + - Abstract philosophical statements: "the West in decline", "the East ascendant" + - Cannot objectively verify if it happened + - Note: Conditionals with specific, measurable outcomes are VALID. Only reject if the target itself is vague. + +4. **Present-state commentary** (NOT predictions): + - Describing current conditions: "we already face", "things are now" + - Ongoing analysis of existing situations + - **News reporting on already-decided/announced events (even if execution is future):** + - "The bill was signed today" - the signing IS done, execution later doesn't make it a prediction + - "Parliament passed a new law" - the law IS passed, enforcement later doesn't make it a prediction + - "Company announced" / "Company confirmed" - the announcement IS made + - "Player has joined" (past tense) - the signing IS complete + - These describe PRESENT STATE (decision exists now), not future uncertainty + - **Key distinction**: If the decision/commitment/announcement already happened, it's present state + - If it's about what IS (or what HAS BEEN decided) rather than what WILL BE, it's not a prediction + +5. **Questions** (NOT predictions): + - Rhetorical questions: "Could X be the next big Y?", "Will these records be broken?" + - Speculative questions: "Is this the end of Z?" + - Questions are inherently uncertain and non-committal - they're asking, not predicting + - Even if the answer is obviously yes/no, a question is not a statement of prediction + +6. **Quoting others** (NOT predictions): + - Author reporting someone else's prediction or claim + - Examples: "Expert claims X will happen", "Analyst says Y will occur", "Shoutcaster predicts Z" + - Look for attribution: "claims", "says", "predicts", "according to" + - The author is not making their own prediction, just reporting what someone else said + - Even if the quoted prediction comes true, the author wasn't making the prediction + +7. **Trivial/obvious outcomes**: + - Stating obvious consequences: "This list will change after a tournament", "Scores will be different tomorrow" + - No meaningful uncertainty - outcome is essentially guaranteed + - These are observations about obvious causality, not predictions + - Example: "The leaderboard will change during a $30M tournament" - obviously yes + +8. **Negation**: "I don't think", "won't", "unlikely" + +9. **Sarcasm**: "lol", "lmao", emojis, "yeah right" + +10. **Heavy hedging**: "maybe", "possibly", "could" + +## Output Format + +Return ONLY valid JSON (no markdown fences): + +```json +{ + "context": "Brief summary of what the thread is about and what the author was saying", + "is_valid": true | false, + "failure_cause": "BROKEN_EXTRACTION" | "SELF_ANNOUNCEMENT" | "PERSONAL_ACTION" | "VAGUE_TARGET" | "PRESENT_STATE" | "NEGATION" | "SARCASM" | "QUOTING_OTHERS" | "HEAVY_HEDGING" | "FUTURE_TIMEFRAME" | "OTHER" | null, + "confidence": 0.95, + "reasoning": "Explanation of why this is or isn't a valid prediction" +} +``` + +**Fields:** + +- `context`: Brief summary of the thread and what the author was saying +- `is_valid`: Boolean indicating if this is a valid prediction +- `failure_cause`: Category of failure (null if is_valid is true). Must be one of: + - `"BROKEN_EXTRACTION"`: Slices cut through word boundaries or extract nonsensical fragments + - `"SELF_ANNOUNCEMENT"`: Author announcing their own actions/products (not a prediction) + - `"PERSONAL_ACTION"`: Local/personal actions directed at individuals, not publicly verifiable + - `"VAGUE_TARGET"`: Target is subjective, unmeasurable, or has no clear success criteria + - `"PRESENT_STATE"`: Statement about current conditions, not a future prediction + - `"NEGATION"`: Prediction is negated ("I don't think", "won't", "unlikely") + - `"SARCASM"`: Sarcastic or joking tone ("lol", "lmao", emojis) + - `"QUOTING_OTHERS"`: Author is quoting someone else's view (not making their own prediction) + - `"HEAVY_HEDGING"`: Heavily hedged ("maybe", "possibly", "could") + - `"OTHER"`: Other disqualifying factors including questions, trivial/obvious outcomes, and patterns not covered above +- `confidence`: Confidence score from 0.0 to 1.0 indicating how certain the validation is +- `reasoning`: Human-readable explanation + +## Examples + +### Example 1: Valid + +**Input:** + +```json +{ + "current_date": "2025-04-15T00:00:00Z", + "thread_tweets": [ + { + "text": "I'm calling it now: BTC will hit 100k by end of Q1 2025. Screenshot this." + } + ], + "target_slices": [{ "text": "BTC will hit 100k" }], + "timeframe_slices": [{ "text": "by end of Q1 2025" }], + "timeframe_parsed": { + "start_utc": "2025-01-15T14:30:00Z", + "end_utc": "2025-03-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "Author is making a confident price prediction for Bitcoin reaching 100k by end of Q1 2025.", + "is_valid": true, + "failure_cause": null, + "confidence": 0.98, + "reasoning": "Clear, unconditional prediction with specific target and deadline. No hedging, sarcasm, or conditions." +} +``` + +### Example 2: Invalid - Broken Extraction (Word Boundary Violation) + +**Input:** + +```json +{ + "current_date": "2025-04-15T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "123456789", + "text": "@vgr My critique is deeper than \"Metaverse Wikipedia will beat Metaverse Encyclopedia Britannica\". It's that we don't really know the definition of \"the metaverse\" yet, it's far too early to know what people actually want. So anything Facebook creates now will misfire." + } + ], + "target_slices": [ + { + "tweet_id": "123456789", + "start": 180, + "end": 183, + "text": "now" + } + ], + "timeframe_slices": [ + { + "tweet_id": "123456789", + "start": 175, + "end": 178, + "text": "now" + } + ], + "timeframe_parsed": { + "start_utc": "2020-03-18T10:00:00Z", + "end_utc": "2020-03-18T10:00:00Z" + } +} +``` + +**Output:** + +```json +{ + "context": "Author is critiquing Facebook's metaverse strategy, saying it's too early to know what people want.", + "is_valid": false, + "failure_cause": "BROKEN_EXTRACTION", + "confidence": 0.99, + "reasoning": "The filter extracted 'now' from the middle of the word 'know' at position 175-178. Looking at the full tweet, this is part of the phrase 'we don't really know' - the filter cut through the word boundary. The extracted 'now' is not a temporal expression but a fragment of the word 'know'. This is a broken extraction that doesn't represent the actual tweet content." +} +``` + +### Example 3: Invalid - Negation + +**Input:** + +```json +{ + "current_date": "2025-04-15T00:00:00Z", + "thread_tweets": [ + { "text": "I don't think BTC will hit 100k by end of Q1 tbh" } + ], + "target_slices": [{ "text": "BTC will hit 100k" }], + "timeframe_slices": [{ "text": "by end of Q1" }], + "timeframe_parsed": { + "start_utc": "2025-01-15T00:00:00Z", + "end_utc": "2025-03-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "Author is expressing doubt that Bitcoin will reach 100k by end of Q1. This is a negative prediction.", + "is_valid": false, + "failure_cause": "NEGATION", + "confidence": 0.99, + "reasoning": "Author explicitly stated 'I don't think' which negates the prediction. The filter removed the negation." +} +``` + +### Example 3: Invalid - Sarcasm + +**Input:** + +```json +{ + "current_date": "2025-01-20T00:00:00Z", + "thread_tweets": [ + { "text": "Yeah BTC will totally hit 100k by next week lmaooo 🤡" } + ], + "target_slices": [{ "text": "BTC will totally hit 100k" }], + "timeframe_slices": [{ "text": "by next week" }], + "timeframe_parsed": { + "start_utc": "2025-01-20T00:00:00Z", + "end_utc": "2025-01-27T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "Author is sarcastically mocking the idea that Bitcoin could reach 100k in such a short timeframe.", + "is_valid": false, + "failure_cause": "SARCASM", + "confidence": 0.97, + "reasoning": "Clear sarcasm indicators: 'totally', 'lmaooo', clown emoji, and unrealistic timeframe. This is a joke, not a serious prediction." +} +``` + +### Example 4: Invalid - Vague Target + +**Input:** + +```json +{ + "current_date": "2025-04-15T00:00:00Z", + "thread_tweets": [{ "text": "VR will be wild by end of Q1 2025" }], + "target_slices": [{ "text": "VR will be wild" }], + "timeframe_slices": [{ "text": "by end of Q1 2025" }], + "timeframe_parsed": { + "start_utc": "2025-01-20T00:00:00Z", + "end_utc": "2025-03-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "Author is making a vague prediction about VR technology becoming 'wild'.", + "is_valid": false, + "failure_cause": "VAGUE_TARGET", + "confidence": 0.95, + "reasoning": "'Wild' is subjective with no clear success criteria. How would we objectively verify if VR became 'wild'? There's no measurable outcome to check. This is an opinion statement, not a verifiable prediction." +} +``` + +### Example 5: Invalid - Present State Commentary + +**Input:** + +```json +{ + "current_date": "2025-04-15T00:00:00Z", + "thread_tweets": [ + { + "text": "We already face a combination of supply chain disruptions and soaring prices for masks" + } + ], + "target_slices": [{ "text": "supply chain disruptions" }], + "timeframe_slices": [{ "text": "already" }], + "timeframe_parsed": { + "start_utc": "2020-03-22T14:30:00Z", + "end_utc": "2020-03-22T14:30:00Z" + } +} +``` + +**Output:** + +```json +{ + "context": "Author is describing current conditions in March 2020, not making a prediction about the future.", + "is_valid": false, + "failure_cause": "PRESENT_STATE", + "confidence": 0.99, + "reasoning": "'We already face' indicates this is commentary on existing conditions, not a prediction. The author is describing what IS happening, not what WILL happen. This is analysis of the present, not a forecast." +} +``` + +### Example 5b: Invalid - News Reporting (Already-Signed Law) + +**Input:** + +```json +{ + "current_date": "2025-01-31T00:00:00Z", + "thread_tweets": [ + { + "text": "Florida will ban anyone under 14 owning a social media account from January 2025, deleting existing accounts. The bill was signed today by Governor DeSantis" + } + ], + "target_slices": [ + { "text": "Florida will ban anyone under 14 owning a social media account" } + ], + "timeframe_slices": [{ "text": "from January 2025" }], + "timeframe_parsed": { + "start_utc": "2024-03-25T13:26:00Z", + "end_utc": "2025-01-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "News organization reporting on Florida's social media age restriction law that was just signed into law.", + "is_valid": false, + "failure_cause": "PRESENT_STATE", + "confidence": 0.98, + "reasoning": "The text explicitly states 'The bill was signed today by Governor DeSantis.' This is present state - the bill IS signed, the decision HAS BEEN made. While the enforcement happens in January 2025 (future), the commitment/decision exists NOW. This is news reporting on a present reality (signed law), not a prediction about whether a law will pass. A prediction would have been made BEFORE the bill was signed." +} +``` + +### Example 5c: Invalid - News Reporting (Past Tense Announcement) + +**Input:** + +```json +{ + "current_date": "2025-12-31T00:00:00Z", + "thread_tweets": [ + { + "text": "Legendary Counter Strike player S1mple has joined FaZe Clan on a 2-event loan for his highly anticipated return. The Ukrainian champion will compete at IEM Dallas and the BLAST Austin Major 2025" + } + ], + "target_slices": [ + { "text": "S1mple will compete at IEM Dallas and the BLAST Austin Major" } + ], + "timeframe_slices": [{ "text": "2025" }], + "timeframe_parsed": { + "start_utc": "2025-05-05T10:20:00Z", + "end_utc": "2025-12-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "News organization reporting on S1mple's roster move and which events he will attend.", + "is_valid": false, + "failure_cause": "PRESENT_STATE", + "confidence": 0.97, + "reasoning": "The text states 'S1mple has joined' (past tense) - the deal IS done, the commitment HAS BEEN made. The specific events (IEM Dallas, BLAST Austin Major) were part of the already-announced agreement. This is present state - the contract exists NOW, the events are already scheduled NOW. This is news reporting on a completed deal and its terms, not a prediction about whether S1mple would join or which events he'd attend." +} +``` + +### Example 6: Valid - Conditional with Specific Outcome + +**Input:** + +```json +{ + "current_date": "2025-01-20T00:00:00Z", + "thread_tweets": [ + { "text": "If BTC breaks $95k resistance, it will hit $100k within a week" } + ], + "target_slices": [{ "text": "it will hit $100k" }], + "timeframe_slices": [{ "text": "within a week" }], + "timeframe_parsed": { + "start_utc": "2025-01-20T00:00:00Z", + "end_utc": "2025-01-27T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "Author is making a conditional price prediction: if BTC breaks $95k, it will reach $100k within a week.", + "is_valid": true, + "failure_cause": null, + "confidence": 0.92, + "reasoning": "While this is conditional, both the condition ($95k break) and outcome ($100k target) are specific and measurable. We can verify if the condition was met and then check if the outcome happened. This is a legitimate causal prediction, not a vague statement." +} +``` + +### Example 7: Invalid - Self-Announcement + +**Input:** + +```json +{ + "current_date": "2025-04-15T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "123456789", + "author": "@McDonalds", + "date": "2024-10-25T17:46:00Z", + "text": "@SoulsofMystery hey Billy! yes, surely, the Nether Sauce will be available starting April 1st, 2025." + } + ], + "target_slices": [{ "text": "the Nether Sauce will be available" }], + "timeframe_slices": [{ "text": "starting April 1st, 2025" }], + "timeframe_parsed": { + "start_utc": "2024-10-25T17:46:00Z", + "end_utc": "2025-04-01T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "McDonald's official account is responding to a customer inquiry about when a new sauce product will be available.", + "is_valid": false, + "failure_cause": "SELF_ANNOUNCEMENT", + "confidence": 0.99, + "reasoning": "This is McDonald's (@McDonalds) announcing their own product release date. The company controls when they release the Nether Sauce, so this is not a prediction about an uncertain future event - it's a company announcement of their own plans. Self-announcements are not predictions." +} +``` + +### Example 8: Invalid - Personal Action + +**Input:** + +```json +{ + "current_date": "2025-04-15T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "987654321", + "author": "@McDonalds", + "date": "2025-03-10T14:20:00Z", + "text": "@JohnDoe123 We're sorry to hear that! Our team will send you an email within 24 hours to resolve this issue." + } + ], + "target_slices": [{ "text": "Our team will send you an email" }], + "timeframe_slices": [{ "text": "within 24 hours" }], + "timeframe_parsed": { + "start_utc": "2025-03-10T14:20:00Z", + "end_utc": "2025-03-11T14:20:00Z" + } +} +``` + +**Output:** + +```json +{ + "context": "McDonald's customer service responding to a specific user's complaint, promising to send them an email.", + "is_valid": false, + "failure_cause": "PERSONAL_ACTION", + "confidence": 0.98, + "reasoning": "This is a customer service response about a personal action directed at a specific individual (@JohnDoe123). The statement 'will send you an email' is a private interaction between the company and one user. This is not a publicly verifiable prediction - no third party can verify whether this specific user received an email. Personal actions and individual customer service interactions are not predictions." +} +``` + +### Example 9: Invalid - Self-Announcement (Seasonal Product) + +**Input:** + +```json +{ + "current_date": "2021-12-01T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "1234567890", + "author": "@1062894451171790848", + "date": "2021-02-25T11:55:00Z", + "text": "@MikeWasBad Mike, the McRib is Seasonal & will come around again next Winter." + } + ], + "target_slices": [ + { "text": "the McRib is Seasonal & will come around again" } + ], + "timeframe_slices": [{ "text": "next Winter" }], + "timeframe_parsed": { + "start_utc": "2021-02-25T11:55:00Z", + "end_utc": "2021-12-21T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "Company responding to a customer question about when the McRib will return, stating it's seasonal and will be back next winter.", + "is_valid": false, + "failure_cause": "SELF_ANNOUNCEMENT", + "confidence": 0.97, + "reasoning": "This is a company responding about their own seasonal product offering. The McRib is McDonald's product, and they control when it's available. Even though the author ID is numeric, the linguistic pattern is clear: they're responding to a customer question about their own product and announcing when it will return. The company controls this decision, making it a self-announcement rather than a prediction about an external event." +} +``` + +### Example 10: Invalid - Self-Announcement (Product Promotion) + +**Input:** + +```json +{ + "current_date": "2020-04-15T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "9876543210", + "author": "@1062894451171790848", + "date": "2020-03-24T00:23:00Z", + "text": "Yo who's ready for Beyblades from McDonalds next month? 😳" + } + ], + "target_slices": [{ "text": "Beyblades from McDonalds" }], + "timeframe_slices": [{ "text": "next month" }], + "timeframe_parsed": { + "start_utc": "2020-03-24T00:23:00Z", + "end_utc": "2020-04-30T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "Company account asking who's ready for Beyblades promotion coming to McDonald's next month.", + "is_valid": false, + "failure_cause": "SELF_ANNOUNCEMENT", + "confidence": 0.98, + "reasoning": "The key phrase 'Beyblades from McDonalds' indicates this is McDonald's announcing their own promotional item. The phrase 'from McDonalds' shows the author controls when this happens. This is a company hyping up their own upcoming promotion, not predicting an external event. Even phrased as a question ('who's ready'), this is clearly a promotional announcement of their own business decision." +} +``` + +### Example 11: Invalid - Question (Rhetorical) + +**Input:** + +```json +{ + "current_date": "2020-01-01T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "123456789", + "author": "@Dexerto", + "date": "2019-11-20T23:09:00Z", + "text": "Could @DisguisedToast be the next big Twitch streamer to leave the platform? 👀" + } + ], + "target_slices": [ + { + "text": "@DisguisedToast be the next big Twitch streamer to leave the platform" + } + ], + "timeframe_slices": [{ "text": "next" }], + "timeframe_parsed": { + "start_utc": "2019-11-20T23:09:00Z", + "end_utc": "2019-12-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "News organization asking a speculative question about DisguisedToast potentially leaving Twitch.", + "is_valid": false, + "failure_cause": "OTHER", + "confidence": 0.96, + "reasoning": "This is phrased as a question ('Could @DisguisedToast be...?'), not a statement or prediction. Questions are inherently speculative and non-committal. The author is asking readers to speculate, not making their own prediction. Even rhetorical questions do not constitute predictions." +} +``` + +### Example 12: Invalid - Question (Speculative) + +**Input:** + +```json +{ + "current_date": "2020-01-01T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "987654321", + "author": "@Dexerto", + "date": "2019-01-09T09:06:00Z", + "text": "Updated list of all Fortnite Battle Royale world records 🏅 Will these records be broken in 2019?" + } + ], + "target_slices": [{ "text": "these records be broken" }], + "timeframe_slices": [{ "text": "in 2019" }], + "timeframe_parsed": { + "start_utc": "2019-01-09T09:06:00Z", + "end_utc": "2019-12-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "News organization sharing Fortnite world records and asking if they'll be broken.", + "is_valid": false, + "failure_cause": "OTHER", + "confidence": 0.97, + "reasoning": "This ends with a question: 'Will these records be broken in 2019?' Questions are not predictions - they're inviting speculation, not making a statement. The author is not predicting that records will or won't be broken, they're asking the audience." +} +``` + +### Example 13: Invalid - Quoting Others + +**Input:** + +```json +{ + "current_date": "2019-12-31T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "111222333", + "author": "@Dexerto", + "date": "2019-08-28T05:40:00Z", + "text": "LoL shoutcaster claims that favorite team to win #Worlds2019 'will get exploited.'" + } + ], + "target_slices": [ + { "text": "favorite team to win #Worlds2019 will get exploited" } + ], + "timeframe_slices": [{ "text": "#Worlds2019" }], + "timeframe_parsed": { + "start_utc": "2019-08-28T05:40:00Z", + "end_utc": "2019-11-10T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "News organization reporting on a League of Legends shoutcaster's prediction about Worlds 2019.", + "is_valid": false, + "failure_cause": "QUOTING_OTHERS", + "confidence": 0.98, + "reasoning": "The text states 'LoL shoutcaster claims that...' - this is clearly attributing the prediction to someone else (the shoutcaster), not making their own prediction. Dexerto is reporting on what the shoutcaster said, not making their own prediction. Even if the shoutcaster's prediction comes true, Dexerto wasn't the one making the prediction." +} +``` + +### Example 14: Invalid - Trivial/Obvious Outcome + +**Input:** + +```json +{ + "current_date": "2019-07-28T00:00:00Z", + "thread_tweets": [ + { + "tweet_id": "444555666", + "author": "@Dexerto", + "date": "2019-07-25T09:43:00Z", + "text": "Top 20 highest earning #fortnite players ahead of the $30 million World Cup 🏆 This list will change a lot this weekend 🤑" + } + ], + "target_slices": [{ "text": "This list will change a lot" }], + "timeframe_slices": [{ "text": "this weekend" }], + "timeframe_parsed": { + "start_utc": "2019-07-25T09:43:00Z", + "end_utc": "2019-07-28T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "context": "News organization noting that the highest earning Fortnite players list will change during the World Cup tournament.", + "is_valid": false, + "failure_cause": "OTHER", + "confidence": 0.94, + "reasoning": "This is stating an obvious outcome: of course the earnings list will change during a $30 million tournament happening this weekend. There's no meaningful uncertainty here - this is virtually guaranteed. This is an observation about obvious causality ('big tournament = earnings change'), not a prediction requiring insight or analysis. Trivial outcomes are not meaningful predictions." +} +``` diff --git a/services/swarm-judge/TIMEFRAME_PROMPT.md b/services/swarm-judge/TIMEFRAME_PROMPT.md new file mode 100644 index 000000000..7809f9963 --- /dev/null +++ b/services/swarm-judge/TIMEFRAME_PROMPT.md @@ -0,0 +1,646 @@ +# Timeframe Extraction Prompt for Gemini + +## Goal + +Extract and normalize the prediction deadline into concrete UTC ISO-8601 timestamps. + +- Read the timeframe text and any relevant context from the thread. +- Normalize any timeline or deadline into UTC ISO-8601 timestamps. +- If no concrete timeline exists or it's too vague, mark as unverifiable. +- Do NOT assess the prediction's truth; only extract time bounds. + +## Inputs + +You will receive: + +```json +{ + "timeframe_text": "by end of Q1 2025", + "target_text": "BTC will hit 100k", + "tweet_timestamp": "2024-11-15T12:00:00Z", + "current_time": "2025-01-20T10:00:00Z", + "thread_context": "Optional: surrounding tweets if they clarify the timeframe" +} +``` + +## Rules + +### Temporal Anchoring + +- Anchor ALL relative phrases to `tweet_timestamp`, NOT `current_time`. +- Example: "in 3 months" posted on 2024-11-15 → deadline is 2025-02-15, regardless of when we verify. + +### Date Interpretation + +- **"by "**: Inclusive, deadline at 23:59:59 UTC of that date. + - "by June 2025" → 2025-06-30T23:59:59Z + - "by March 31" → YYYY-03-31T23:59:59Z (year from context) + +- **"before "**: Exclusive, deadline at 00:00:00 UTC of that date. + - "before June 2025" → 2025-06-01T00:00:00Z + +- **"within N days/weeks/months/years"**: Window from tweet_timestamp + N units. + - Deadline inclusive at 23:59:59 UTC. + - "within 6 months" posted 2025-01-15 → 2025-07-15T23:59:59Z + +- **"in N days/weeks/months"**: Same as "within N". + +### Quarters and Periods + +- **Quarters**: Assume calendar year unless explicitly stated as fiscal. + - Q1 = Jan 1 – Mar 31 (ends 03-31T23:59:59Z) + - Q2 = Apr 1 – Jun 30 (ends 06-30T23:59:59Z) + - Q3 = Jul 1 – Sep 30 (ends 09-30T23:59:59Z) + - Q4 = Oct 1 – Dec 31 (ends 12-31T23:59:59Z) + +- **Halves**: + - H1 = Jan 1 – Jun 30 (ends 06-30T23:59:59Z) + - H2 = Jul 1 – Dec 31 (ends 12-31T23:59:59Z) + +### Time Idioms + +- **EOD** (end of day): 23:59:59 UTC of the referenced day +- **EOW** (end of week): Sunday 23:59:59 UTC +- **EOM** (end of month): Last day of month 23:59:59 UTC +- **EOY** (end of year): December 31 23:59:59 UTC +- **"this week/month/year"**: Relative to tweet_timestamp +- **"next week/month/year"**: Following period from tweet_timestamp + +### Relative Day References + +When tweet says "by Friday" or "next Tuesday": + +- Calculate from tweet_timestamp day-of-week +- "this Friday" = upcoming Friday in same week +- "next Friday" = Friday of following week +- Always use 23:59:59 UTC for deadline + +### Timezone Handling + +- Default to UTC unless timezone is explicitly mentioned. +- If explicit timezone mentioned ("EST", "PST", "CET"), convert to UTC. +- Twitter market predictions often reference: + - NYSE/NASDAQ close: 21:00:00 UTC (4 PM EST) + - Crypto: 24/7, use 23:59:59 UTC for daily deadlines + +### Multiple Timeframes + +If multiple timelines appear in text: + +1. Prefer the most specific and binding deadline. +2. If conflicting, choose the EARLIEST (most conservative). +3. Note the conflict in `reasoning` field. + +### Invalid Timeframes (Mark as Missing) + +These are NOT valid timeframes and should be marked as `timeframe_status: "missing"`: + +**1. Vague Temporal Phrases:** + +- "soon", "eventually", "someday", "one day" +- "in the future", "in the near future", "in the coming years" +- "before too long", "at some point" +- "down the road", "down the line" +- "yet" (as in "don't write an obituary yet") + +**2. Non-Temporal Words:** + +- Single words that aren't time references: "will", "would", "should", "may" +- Meta-prediction phrases: "I'm predicting", "I predict", "I think", "I believe", "I'm calling" +- Verbs or auxiliaries mistakenly extracted as timeframes +- Random words from the sentence +- These are statements ABOUT making predictions, not temporal deadlines + +**3. Excessively Long Timeframes:** + +- Multi-decade predictions: "the 21st century", "this century" +- Any timeframe exceeding 10 years from tweet_timestamp +- Reason: Too long to be meaningfully verifiable + +**4. Conditional Phrases (NOT timeframes):** + +- "when X happens" - This is a condition, not a time +- "if/once/after X" - These are event triggers, not temporal deadlines +- "when checks hit", "once approved", "after the announcement" +- Mark these as `timeframe_status: "event_trigger"` if they reference a specific event +- Mark as `timeframe_status: "missing"` if too vague + +**When you see these patterns, return `timeframe_status: "missing"` with null timestamps.** + +### Event-Triggered Phrases + +Phrases tied to external events: + +- "when X releases", "after the halving", "once approved" +- "when the merger completes", "after the election" + +**Handling:** + +1. Check if thread_context provides clarification (e.g., "the halving in April 2024"). +2. If event has a known date, use it. +3. If event date is unknown, mark as `timeframe_status: "event_trigger"`. +4. Do NOT guess event dates. + +### Inference Bias + +- **Strong bias toward bounded windows**: When vague, infer a conservative deadline. +- **"this will happen"** (no explicit time) → infer 1 year from tweet_timestamp. +- **"X is going to Y"** (future tense, no time) → infer 6 months conservative. +- Only use `missing` when truly unbounded or event-triggered with unknown date. + +## Output Format + +Return ONLY valid JSON (no markdown fences, no extra text): + +```json +{ + "timeframe_status": "explicit" | "implicit" | "inferred" | "event_trigger" | "missing", + "start_utc": "2024-11-15T12:00:00Z", + "end_utc": "2025-06-30T23:59:59Z", + "precision": "hour" | "day" | "week" | "month" | "quarter" | "year", + "reasoning": "Brief explanation of how you arrived at this timeframe", + "assumptions": ["list", "of", "assumptions", "made"], + "confidence": 0.95 +} +``` + +### Field Definitions + +**timeframe_status:** + +- `explicit`: Clear date/range in text ("by June 2025", "in Q1 2026") +- `implicit`: Time anchored indirectly but reasonable ("within a year", "over the next 6 months") +- `inferred`: Derived via conservative reasoning when text is vague ("this will flip" → infer 1 year) +- `event_trigger`: Depends on external event with unknown date ("after the halving") +- `missing`: No bounded window can be inferred ("eventually", "someday") + +**start_utc:** + +- Usually the tweet_timestamp (when prediction was made) +- Can be null if prediction has no clear start + +**end_utc:** + +- The deadline for verification (when to check if prediction came true) +- Must be null only if timeframe_status is "event_trigger" or "missing" + +**precision:** + +- Indicates granularity of the timeframe +- Affects how we interpret boundary cases + +**confidence:** + +- 0.0 to 1.0 +- How confident you are in this interpretation +- Lower if ambiguous or inferred + +## Examples + +### Example 1: Explicit Date + +**Input:** + +```json +{ + "timeframe_text": "by end of Q1 2025", + "target_text": "BTC will hit 100k", + "tweet_timestamp": "2024-11-15T12:00:00Z", + "current_time": "2025-01-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "explicit", + "start_utc": "2024-11-15T12:00:00Z", + "end_utc": "2025-03-31T23:59:59Z", + "precision": "quarter", + "reasoning": "Q1 2025 ends March 31, using calendar quarters, end of day UTC", + "assumptions": ["calendar Q1 not fiscal", "UTC timezone"], + "confidence": 1.0 +} +``` + +--- + +### Example 2: Relative Time + +**Input:** + +```json +{ + "timeframe_text": "within 3 months", + "goal_text": "ETH will flip BTC", + "tweet_timestamp": "2025-01-15T09:30:00Z", + "current_time": "2025-02-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "implicit", + "start_utc": "2025-01-15T09:30:00Z", + "end_utc": "2025-04-15T23:59:59Z", + "precision": "day", + "reasoning": "3 months from tweet timestamp (2025-01-15) is 2025-04-15, end of day", + "assumptions": ["calendar months", "inclusive deadline at EOD"], + "confidence": 0.95 +} +``` + +--- + +### Example 3: Day Reference + +**Input:** + +```json +{ + "timeframe_text": "by Friday", + "goal_text": "SOL above $200", + "tweet_timestamp": "2025-01-20T14:00:00Z", + "current_time": "2025-01-21T10:00:00Z" +} +``` + +Tweet posted: Monday Jan 20, 2025 + +**Output:** + +```json +{ + "timeframe_status": "explicit", + "start_utc": "2025-01-20T14:00:00Z", + "end_utc": "2025-01-24T23:59:59Z", + "precision": "day", + "reasoning": "Tweet posted Monday Jan 20, 'by Friday' means Jan 24 end of day", + "assumptions": ["this week's Friday", "UTC EOD"], + "confidence": 0.9 +} +``` + +--- + +### Example 4: Vague (Unverifiable) + +**Input:** + +```json +{ + "timeframe_text": "soon", + "goal_text": "BTC will moon", + "tweet_timestamp": "2025-01-15T09:30:00Z", + "current_time": "2025-02-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "missing", + "start_utc": null, + "end_utc": null, + "precision": "unbounded", + "reasoning": "'soon' is too vague to establish a falsifiable deadline", + "assumptions": [], + "confidence": 1.0 +} +``` + +--- + +### Example 5: Inferred Conservative + +**Input:** + +```json +{ + "timeframe_text": "this is going to happen", + "goal_text": "TSLA will hit $500", + "tweet_timestamp": "2025-01-15T09:30:00Z", + "current_time": "2025-02-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "inferred", + "start_utc": "2025-01-15T09:30:00Z", + "end_utc": "2026-01-15T23:59:59Z", + "precision": "year", + "reasoning": "No explicit timeframe; inferred 1 year as conservative default for price predictions", + "assumptions": [ + "1 year conservative window", + "standard market prediction timeframe" + ], + "confidence": 0.5 +} +``` + +--- + +### Example 6: Event Trigger + +**Input:** + +```json +{ + "timeframe_text": "after the BTC halving", + "goal_text": "BTC will reach 150k", + "tweet_timestamp": "2023-11-15T12:00:00Z", + "current_time": "2024-01-20T10:00:00Z", + "thread_context": "No additional context about halving date" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "event_trigger", + "start_utc": null, + "end_utc": null, + "precision": "event", + "reasoning": "Tied to BTC halving event with no specific date provided in context", + "assumptions": ["BTC halving ~April 2024 historically"], + "confidence": 0.3 +} +``` + +**Note:** If thread_context mentioned "the halving in April 2024", would become: + +```json +{ + "timeframe_status": "explicit", + "start_utc": "2024-04-01T00:00:00Z", + "end_utc": "2024-04-30T23:59:59Z", + "precision": "month", + "reasoning": "Context clarified halving in April 2024", + "assumptions": ["full month of April as window"], + "confidence": 0.85 +} +``` + +--- + +### Example 7: Invalid - Non-Temporal Word + +**Input:** + +```json +{ + "timeframe_text": "Will", + "goal_text": "have enormous consequences", + "tweet_timestamp": "2020-03-15T10:00:00Z", + "current_time": "2025-01-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "missing", + "start_utc": null, + "end_utc": null, + "precision": "unbounded", + "reasoning": "'Will' is a verb/auxiliary, not a timeframe. This appears to be a filter extraction error.", + "assumptions": [], + "confidence": 1.0 +} +``` + +--- + +### Example 8: Invalid - "Yet" is Not a Timeframe + +**Input:** + +```json +{ + "timeframe_text": "yet", + "goal_text": "Don't write an obituary for globalism", + "tweet_timestamp": "2020-03-20T12:00:00Z", + "current_time": "2025-01-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "missing", + "start_utc": null, + "end_utc": null, + "precision": "unbounded", + "reasoning": "'yet' means 'at this time' or 'so far', not a future deadline. No verifiable timeframe exists.", + "assumptions": [], + "confidence": 1.0 +} +``` + +--- + +### Example 9: Invalid - Excessively Long Timeframe + +**Input:** + +```json +{ + "timeframe_text": "The 21st century", + "goal_text": "The East will be in ascendance", + "tweet_timestamp": "2020-03-15T10:00:00Z", + "current_time": "2025-01-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "missing", + "start_utc": null, + "end_utc": null, + "precision": "unbounded", + "reasoning": "21st century spans 100 years (2001-2100). This is too long to be a meaningful verifiable prediction. Maximum reasonable timeframe is 10 years.", + "assumptions": [], + "confidence": 1.0 +} +``` + +--- + +### Example 10: Invalid - Conditional Phrase Not a Timeframe + +**Input:** + +```json +{ + "timeframe_text": "When checks hit", + "goal_text": "people will spend on consumer staples", + "tweet_timestamp": "2020-03-22T14:30:00Z", + "current_time": "2025-01-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "missing", + "start_utc": null, + "end_utc": null, + "precision": "unbounded", + "reasoning": "'When checks hit' is a conditional trigger, not a temporal deadline. This is an 'if X then Y' statement, not a time-bound prediction.", + "assumptions": [], + "confidence": 1.0 +} +``` + +--- + +### Example 11: Invalid - "In the Future" is Vague + +**Input:** + +```json +{ + "timeframe_text": "In the future", + "goal_text": "BTC will dominate in hashpower charts", + "tweet_timestamp": "2020-03-20T10:00:00Z", + "current_time": "2025-01-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "missing", + "start_utc": null, + "end_utc": null, + "precision": "unbounded", + "reasoning": "'In the future' is completely unbounded with no specific deadline. This could mean next week or in 50 years. There's no way to determine when to verify this prediction.", + "assumptions": [], + "confidence": 1.0 +} +``` + +--- + +### Example 12: Invalid - Meta-Prediction Phrase + +**Input:** + +```json +{ + "timeframe_text": "I'm predicting", + "goal_text": "the start of a California independence movement", + "tweet_timestamp": "2020-03-18T16:00:00Z", + "current_time": "2025-01-20T10:00:00Z" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "missing", + "start_utc": null, + "end_utc": null, + "precision": "unbounded", + "reasoning": "'I'm predicting' is a meta-statement about making a prediction, not a timeframe. This describes the ACT of predicting, not WHEN the prediction will come true. No temporal deadline exists.", + "assumptions": [], + "confidence": 1.0 +} +``` + +--- + +### Example 12: Thread Context Clarification + +**Input:** + +```json +{ + "timeframe_text": "by then", + "goal_text": "we'll see 10k ETH", + "tweet_timestamp": "2025-01-15T09:30:00Z", + "current_time": "2025-02-20T10:00:00Z", + "thread_context": "Previous tweet in thread: 'Q2 2025 will be wild'" +} +``` + +**Output:** + +```json +{ + "timeframe_status": "implicit", + "start_utc": "2025-01-15T09:30:00Z", + "end_utc": "2025-06-30T23:59:59Z", + "precision": "quarter", + "reasoning": "'by then' refers to Q2 2025 mentioned in thread context", + "assumptions": ["'then' refers to Q2 from previous tweet", "end of Q2"], + "confidence": 0.75 +} +``` + +--- + +## Edge Cases to Handle + +### Conflicting Times + +Text: "BTC to 100k by June or July 2025" +→ Choose June (earliest/most conservative) + +### Past Deadlines + +If `end_utc` > `current_time`, that's fine. Just extract what was said. +We'll handle maturity checking separately. + +### Year Ambiguity + +Text: "by March" (no year mentioned) +→ If tweet is in 2024 and March hasn't passed, use 2024-03-31 +→ If tweet is in November 2024, "by March" likely means 2025-03-31 + +### Implicit "This Year" + +Text: "before June" (tweet posted in January 2025) +→ Assume 2025-06-01T00:00:00Z + +### Trading Hours + +Text: "by market close Friday" +→ Assume NYSE close: Friday 21:00:00 UTC (4 PM EST) + +## Important Reminders + +1. **Always anchor to tweet_timestamp**, never to current_time +2. **Be conservative**: When uncertain, choose shorter/earlier deadline +3. **Use UTC**: Convert all timezones to UTC +4. **End of day = 23:59:59**: For date-only deadlines +5. **Return valid JSON only**: No markdown, no extra text +6. **Use thread_context**: It may clarify vague references +7. **Confidence matters**: Lower confidence for inferred/ambiguous cases +8. **Precision guides interpretation**: "month" precision = less strict than "day" + +## Testing Your Output + +Before returning, verify: + +- [ ] `end_utc` is a valid ISO-8601 timestamp or null +- [ ] `start_utc` is a valid ISO-8601 timestamp or null +- [ ] If status is "explicit", "implicit", or "inferred", `end_utc` must NOT be null +- [ ] `confidence` is between 0.0 and 1.0 +- [ ] `reasoning` explains your logic clearly +- [ ] JSON is valid (no trailing commas, proper quotes) diff --git a/services/swarm-judge/VERDICT_PROMPT.md b/services/swarm-judge/VERDICT_PROMPT.md new file mode 100644 index 000000000..41beb0082 --- /dev/null +++ b/services/swarm-judge/VERDICT_PROMPT.md @@ -0,0 +1,446 @@ +# Verdict Generation Prompt + +## Goal + +Determine if a prediction came true by searching for evidence and checking if the target was achieved within the specified timeframe. + +**CRITICAL: This must be a PREDICTION, not NEWS reporting.** If the target was already true or was news BEFORE the prediction was made (before start_utc), the prediction is INVALID and must return verdict: false. + +## Inputs + +You will receive: + +```json +{ + "context": "Author is making a confident price prediction for Bitcoin reaching 100k by end of Q1 2025.", + "target_text": "BTC will hit 100k", + "timeframe_text": "by end of Q1 2025", + "timeframe_parsed": { + "start_utc": "2024-11-15T12:00:00Z", + "end_utc": "2025-03-31T23:59:59Z", + "precision": "quarter" + } +} +``` + +## Task + +Search the web for evidence and determine if the target was achieved within the timeframe. + +**Critical Instructions:** + +1. **Check if this is a SELF-ANNOUNCEMENT, not a prediction:** + - If the author is announcing their OWN actions, plans, or decisions, this is NOT a prediction + - Examples: Artist announcing album release, athlete announcing retirement, CEO announcing product launch + - The author must be predicting something OUTSIDE their control + - This includes both obvious announcements ("I'm releasing X") and subtle ones ("And now I get to tour America??") + - If this is a self-announcement, valid MUST be false + +2. **Check if this was NEWS, not a prediction:** + - **CRITICAL: Check if the event was ANNOUNCED/DECIDED before the tweet, even if it hasn't HAPPENED yet** + - Search for evidence that the target was ALREADY ANNOUNCED, SIGNED, PASSED, or DECIDED before start_utc + - News organizations reporting on already-announced future events are NOT making predictions + - **Common patterns that indicate NEWS, not predictions:** + - Laws/bills that were already SIGNED/PASSED (even if enforcement is future) + - Products/features already ANNOUNCED by companies (even if release is future) + - Events already CONFIRMED by organizers (even if the event date is future) + - Contracts/deals already SIGNED (even if execution is future) + - Company statements/press releases announcing future plans + - **How to verify:** + - Search for official announcements, press releases, or news reports BEFORE start_utc + - Check if the decision/commitment was already public knowledge + - If other sources reported the same thing BEFORE this tweet, it's news, not a prediction + - **Examples:** + - "Florida will ban social media for under-14s from Jan 2025" tweeted March 2024 → INVALID if bill was signed before March 2024 + - "Apple will use USB-C by 2024" tweeted Oct 2022 → INVALID if EU law was passed before Oct 2022 + - "Fortnite returns to iOS this year" → INVALID if Epic already announced it + - If the target was already decided/announced BEFORE the prediction was made, valid MUST be false + +3. **Only evaluate predictions that were actually predictive:** + - ONLY consider evidence from dates between start_utc and end_utc + - Ignore any evidence from after end_utc (too late) + - For price predictions: Target reached at ANY point during the window counts as true + - If you cannot find sufficient evidence, verdict is false + +4. **Search authoritative sources:** + - Use reliable sources for verification (news sites, official announcements, market data) + - Cite specific sources and dates in your reasoning + +## Output Format + +Return ONLY valid JSON (no markdown fences): + +```json +{ + "valid": true, + "verdict": true, + "confidence": 0.95, + "reasoning": "Brief explanation of why the prediction came true or false, citing specific sources and dates" +} +``` + +**Fields:** + +- `valid`: Boolean indicating if this was a legitimate prediction (true) or invalid/news (false) +- `verdict`: Boolean indicating if the prediction came true (true) or false (false). Only meaningful when valid=true. +- `confidence`: Confidence score from 0.0 to 1.0 indicating certainty in the verdict determination +- `reasoning`: Explanation with sources and dates + +**When to set valid=false:** + +- Author is announcing their own actions/plans (self-announcement, not a prediction) +- Target was already achieved/announced BEFORE start_utc (this was news, not a prediction) +- When valid=false, verdict should also be false + +## Examples + +### Example 1: True + +**Input:** + +```json +{ + "context": "Price prediction for Bitcoin.", + "target_text": "BTC will hit 100k", + "timeframe_text": "by end of Q1 2025", + "timeframe_parsed": { + "start_utc": "2024-11-15T12:00:00Z", + "end_utc": "2025-03-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": true, + "verdict": true, + "confidence": 0.98, + "reasoning": "According to CoinGecko, Bitcoin reached $102,450 on March 28, 2025, which is within the timeframe and exceeds the $100k target." +} +``` + +### Example 2: False + +**Input:** + +```json +{ + "context": "Price prediction for Bitcoin.", + "target_text": "BTC will hit 100k", + "timeframe_text": "by end of Q1 2025", + "timeframe_parsed": { + "start_utc": "2024-11-15T12:00:00Z", + "end_utc": "2025-03-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": true, + "verdict": false, + "confidence": 0.95, + "reasoning": "Bitcoin peaked at $87,300 on March 31, 2025 according to market data, falling short of the $100k target." +} +``` + +### Example 3: False - Happened After Deadline + +**Input:** + +```json +{ + "context": "Price prediction for Bitcoin.", + "target_text": "BTC will hit 100k", + "timeframe_text": "by end of Q1 2025", + "timeframe_parsed": { + "start_utc": "2024-11-15T12:00:00Z", + "end_utc": "2025-03-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": true, + "verdict": false, + "confidence": 0.97, + "reasoning": "Bitcoin reached $102,000 on April 3, 2025, which is after the deadline of March 31, 2025. The prediction failed." +} +``` + +### Example 4: False - No Evidence + +**Input:** + +```json +{ + "context": "Price prediction for an obscure token.", + "goal_text": "XYZ will hit $10", + "timeframe_text": "by Friday", + "timeframe_parsed": { + "start_utc": "2025-01-20T00:00:00Z", + "end_utc": "2025-01-24T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": true, + "verdict": false, + "confidence": 0.7, + "reasoning": "Unable to find reliable price data for XYZ token during the specified timeframe. Insufficient evidence to verify the prediction." +} +``` + +### Example 5: Invalid - This Was News, Not a Prediction + +**Input:** + +```json +{ + "context": "Author claiming Bitcoin will hit 100k by end of month.", + "target_text": "BTC will hit 100k", + "timeframe_text": "by end of January", + "timeframe_parsed": { + "start_utc": "2025-01-15T10:30:00Z", + "end_utc": "2025-01-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": false, + "verdict": false, + "confidence": 0.99, + "reasoning": "Bitcoin reached $100,000 on January 12, 2025 according to CoinGecko, which is BEFORE the prediction was made on January 15, 2025. This is not a prediction but rather news reporting or commentary on an event that already happened. Prediction is invalid." +} +``` + +### Example 6: Invalid - Event Already Announced Before Prediction + +**Input:** + +```json +{ + "context": "Author predicting a major company acquisition.", + "goal_text": "Microsoft will acquire OpenAI", + "timeframe_text": "by end of Q1", + "timeframe_parsed": { + "start_utc": "2025-02-10T14:00:00Z", + "end_utc": "2025-03-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": false, + "verdict": false, + "confidence": 0.98, + "reasoning": "Microsoft announced the acquisition of OpenAI on February 8, 2025 according to Reuters and Bloomberg. The tweet was made on February 10, 2025, which is AFTER the announcement. This is news commentary, not a prediction. Prediction is invalid." +} +``` + +### Example 7: Valid - Actual Prediction That Came True + +**Input:** + +```json +{ + "context": "Author predicting Ethereum price movement.", + "goal_text": "ETH will reach $5000", + "timeframe_text": "by March 2025", + "timeframe_parsed": { + "start_utc": "2024-12-01T08:00:00Z", + "end_utc": "2025-03-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": true, + "verdict": true, + "confidence": 0.96, + "reasoning": "Verified that ETH was trading at $3,200 when the prediction was made on December 1, 2024. ETH reached $5,100 on March 15, 2025 according to CoinGecko, which is within the timeframe. This was a genuine forward-looking prediction that came true." +} +``` + +### Example 8: Invalid - Self-Announcement (Obvious) + +**Input:** + +```json +{ + "context": "Artist announcing new album release date on their own account.", + "goal_text": "My new album drops July 17th", + "timeframe_text": "July 17th", + "timeframe_parsed": { + "start_utc": "2025-05-10T18:30:00Z", + "end_utc": "2025-07-17T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": false, + "verdict": false, + "confidence": 0.99, + "reasoning": "This is a self-announcement where the author is declaring their own plans for releasing their album. The author has full control over when they release their album, so this is not a prediction about an uncertain future event. This is an announcement of their own decision, not a prediction. Prediction is invalid." +} +``` + +### Example 9: Invalid - Self-Announcement (Subtle) + +**Input:** + +```json +{ + "context": "Artist making an excited statement about touring.", + "goal_text": "I get to tour America this year", + "timeframe_text": "this year", + "timeframe_parsed": { + "start_utc": "2025-03-15T20:00:00Z", + "end_utc": "2025-12-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": false, + "verdict": false, + "confidence": 0.97, + "reasoning": "Despite the casual phrasing ('And now I get to tour'), this is the author announcing their own tour plans. The author controls their tour schedule, making this a self-announcement rather than a prediction. Even though it's phrased as an excited statement rather than a formal announcement, it's still declaring the author's own planned actions. Prediction is invalid." +} +``` + +### Example 10: Invalid - News About Already-Passed Law + +**Input:** + +```json +{ + "context": "News organization reporting on Florida social media ban for minors.", + "target_text": "Florida will ban anyone under 14 owning a social media account", + "timeframe_text": "from January 2025", + "timeframe_parsed": { + "start_utc": "2024-03-25T13:26:00Z", + "end_utc": "2025-01-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": false, + "verdict": false, + "confidence": 0.98, + "reasoning": "The tweet was made on March 25, 2024, and explicitly states 'The bill was signed today by Governor DeSantis.' This means the law was already PASSED and SIGNED before the tweet was made. While the enforcement date is January 2025 (future), the decision to ban was already made and publicly announced. This is news reporting on an already-signed law, not a prediction. A prediction would have been made BEFORE the bill was signed. Prediction is invalid." +} +``` + +### Example 11: Invalid - News About Already-Announced Product Return + +**Input:** + +```json +{ + "context": "News organization reporting that Fortnite will return to iOS in Europe.", + "target_text": "Fortnite on iOS will return to Apple devices", + "timeframe_text": "this year in Europe", + "timeframe_parsed": { + "start_utc": "2024-01-25T10:00:00Z", + "end_utc": "2024-12-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": false, + "verdict": false, + "confidence": 0.96, + "reasoning": "Search reveals that Epic Games had already announced plans to return Fortnite to iOS in Europe through the Digital Markets Act before January 25, 2024. Multiple news sources reported on Epic's official announcement in early January 2024. This tweet is reporting on Epic's already-announced plans, not making an original prediction. News organizations reporting on company announcements are not making predictions. Prediction is invalid." +} +``` + +### Example 12: Invalid - News About Already-Passed EU Law + +**Input:** + +```json +{ + "context": "Reporting on Apple being forced to adopt USB-C due to EU law.", + "target_text": "Apple will be forced to get rid of their lightning port after the EU parliament passed a new law requiring the use of USB-C", + "timeframe_text": "by 2024", + "timeframe_parsed": { + "start_utc": "2022-10-04T12:00:00Z", + "end_utc": "2024-12-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": false, + "verdict": false, + "confidence": 0.99, + "reasoning": "The tweet was made on October 4, 2022, and states 'after the EU parliament passed a new law' - meaning the law was ALREADY PASSED on the same day or before. According to European Parliament records, the vote to enforce USB-C happened on October 4, 2022. This tweet is reporting on a law that was just passed, not predicting future legislation. While the enforcement deadline (2024) is future, the legislative decision was already made. This is news reporting, not prediction. Prediction is invalid." +} +``` + +### Example 13: Invalid - News About Already-Announced Player Transfer + +**Input:** + +```json +{ + "context": "News organization reporting on S1mple joining FaZe Clan for specific events.", + "target_text": "S1mple will compete at IEM Dallas and the BLAST Austin Major", + "timeframe_text": "2025", + "timeframe_parsed": { + "start_utc": "2025-05-05T10:20:00Z", + "end_utc": "2025-12-31T23:59:59Z" + } +} +``` + +**Output:** + +```json +{ + "valid": false, + "verdict": false, + "confidence": 0.97, + "reasoning": "The tweet states 'Legendary Counter Strike player S1mple has joined FaZe Clan on a 2-event loan' - note the past tense 'has joined.' This indicates the deal was already announced/finalized before the tweet. The tweet is reporting on an already-confirmed roster move and the specific events were already announced as part of that deal. This is news reporting on an already-announced agreement, not a prediction about whether S1mple would join or which events he would attend. Prediction is invalid." +} +``` diff --git a/services/swarm-verifier/VERIFIER.md b/services/swarm-judge/VERIFIER.md similarity index 100% rename from services/swarm-verifier/VERIFIER.md rename to services/swarm-judge/VERIFIER.md diff --git a/services/swarm-judge/eslint.config.js b/services/swarm-judge/eslint.config.js new file mode 100644 index 000000000..7a3e95f54 --- /dev/null +++ b/services/swarm-judge/eslint.config.js @@ -0,0 +1,9 @@ +import baseConfig from "@torus-ts/eslint-config/base"; + +/** @type {import('typescript-eslint').Config} */ +export default [ + { + ignores: [], + }, + ...baseConfig, +]; diff --git a/services/swarm-verifier/helmfile.yaml b/services/swarm-judge/helmfile.yaml similarity index 90% rename from services/swarm-verifier/helmfile.yaml rename to services/swarm-judge/helmfile.yaml index 7f99669d4..89d08ebf2 100644 --- a/services/swarm-verifier/helmfile.yaml +++ b/services/swarm-judge/helmfile.yaml @@ -5,7 +5,7 @@ environments: - values.yaml - namespace: torus-dev-web-apps - environmentName: dev - - releaseName: dev-swarm-verifier + - releaseName: dev-swarm-judge - wssApiEndpoint: wss://api.testnet.torus.network - chainEnv: testnet prod: @@ -14,7 +14,7 @@ environments: - values.yaml - namespace: torus-prod-web-apps - environmentName: prod - - releaseName: swarm-verifier + - releaseName: swarm-judge - wssApiEndpoint: wss://api.torus.network - chainEnv: mainnet --- diff --git a/services/swarm-judge/package.json b/services/swarm-judge/package.json new file mode 100644 index 000000000..31666c359 --- /dev/null +++ b/services/swarm-judge/package.json @@ -0,0 +1,50 @@ +{ + "name": "swarm-judge", + "version": "0.1.0", + "private": true, + "license": "MIT", + "type": "module", + "exports": { + ".": "./src/index.ts" + }, + "scripts": { + "build": "tsc", + "clean": "rm -rf .turbo node_modules", + "dev": "pnpm with-env tsx ./src/index.ts", + "format": "prettier --check . --ignore-path ../../.gitignore --cache --cache-location .cache/.prettiercache", + "format-fix": "prettier --write . --ignore-path ../../.gitignore --cache --cache-location .cache/.prettiercache", + "lint": "eslint --cache --cache-location .cache/.eslintcache", + "lint-fix": "eslint --cache --cache-location .cache/.eslintcache --fix", + "start": "node ./dist/index.js", + "start:local": "pnpm with-env pnpm start", + "typecheck": "tsc --noEmit", + "with-env": "dotenv -e ../../.env --" + }, + "prettier": "@torus-ts/prettier-config", + "dependencies": { + "@t3-oss/env-core": "^0.11.1", + "@torus-ts/db": "workspace:*", + "@torus-network/sdk": "workspace:*", + "@torus-network/torus-utils": "workspace:*", + "cors": "^2.8.5", + "express": "^4.19.2", + "node-fetch": "^3.3.2", + "superjson": "catalog:", + "zod": "catalog:", + "tsafe": "catalog:", + "vitest": "catalog:" + }, + "devDependencies": { + "@torus-ts/eslint-config": "workspace:*", + "@torus-ts/prettier-config": "workspace:*", + "@torus-ts/tsconfig": "workspace:*", + "@types/cors": "^2.8.17", + "@types/express": "^4.17.21", + "@types/node": "catalog:", + "dotenv-cli": "catalog:", + "eslint": "catalog:", + "prettier": "catalog:", + "typescript": "catalog:", + "vitest": "catalog:" + } +} diff --git a/services/swarm-verifier/src/env.ts b/services/swarm-judge/src/env.ts similarity index 100% rename from services/swarm-verifier/src/env.ts rename to services/swarm-judge/src/env.ts diff --git a/services/swarm-judge/src/index.ts b/services/swarm-judge/src/index.ts new file mode 100644 index 000000000..1ce23a5ca --- /dev/null +++ b/services/swarm-judge/src/index.ts @@ -0,0 +1,41 @@ +import { readFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { createDb } from "@torus-ts/db/client"; +import { PredictionJudge } from "./judge"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +if (!process.env.OPENROUTER_API_KEY) { + console.error("missing OPENROUTER_API_KEY env var"); + process.exit(1); +} + +const timeframePrompt = await readFile( + join(__dirname, "../TIMEFRAME_PROMPT.md"), + "utf-8", +); + +const filterValidationPrompt = await readFile( + join(__dirname, "../FILTER_VALIDATION_PROMPT.md"), + "utf-8", +); + +const verdictPrompt = await readFile( + join(__dirname, "../VERDICT_PROMPT.md"), + "utf-8", +); + +const judge = new PredictionJudge( + { + openrouterApiKey: process.env.OPENROUTER_API_KEY, + concurrency: 8, + timeframePrompt, + filterValidationPrompt, + verdictPrompt, + }, + createDb(), +); + +await judge.runJudge(() => false); diff --git a/services/swarm-verifier/src/verifier.ts b/services/swarm-judge/src/judge.ts similarity index 70% rename from services/swarm-verifier/src/verifier.ts rename to services/swarm-judge/src/judge.ts index 6622b3ccd..333f7fbc8 100644 --- a/services/swarm-verifier/src/verifier.ts +++ b/services/swarm-judge/src/judge.ts @@ -4,165 +4,25 @@ import { parsedPredictionDetailsSchema, parsedPredictionFeedbackSchema, parsedPredictionSchema, - predictionDuplicateRelationsSchema, scrapedTweetSchema, - twitterScrapingJobsSchema, twitterUsersSchema, verdictSchema, + verificationClaimSchema, + verifierTopicRegistrationSchema, } from "@torus-ts/db/schema"; import type { + ClaimSource, FailureCause, PostSlice, ScrapedTweet, VerdictContext, } from "@torus-ts/db/schema"; -import { and, asc, eq, lt, notExists, sql } from "drizzle-orm"; +import { and, asc, eq, lt, min, notExists, sql } from "drizzle-orm"; import { logger } from "./logger"; import { sleep } from "./utils"; const workerContext = new AsyncLocalStorage<{ workerId: number }>(); -function groupSlicesByTweet(slices: PostSlice[]): Map { - const byTweet = new Map(); - for (const slice of slices) { - const group = byTweet.get(slice.source.tweet_id); - if (group) group.push(slice); - else byTweet.set(slice.source.tweet_id, [slice]); - } - return byTweet; -} - -interface Range { - start: number; - end: number; -} - -/** Sorts ranges by start position and merges overlapping/adjacent ones into unified spans. */ -function mergeRanges(ranges: Range[]): Range[] { - if (ranges.length === 0) return []; - - const sorted = [...ranges].sort((a, b) => a.start - b.start); - const first = sorted[0]; - if (!first) return []; - - const merged: Range[] = [first]; - - for (let i = 1; i < sorted.length; i++) { - const current = sorted[i]; - const last = merged[merged.length - 1]; - if (!current || !last) continue; - - if (current.start <= last.end + 1) { - last.end = Math.max(last.end, current.end); - } else { - merged.push(current); - } - } - - return merged; -} - -/** - * Calculates what fraction of slices1's character span is covered by slices2. Groups - * slices by tweet, merges overlapping ranges within each group, then sums intersection - * lengths. Handles fragmented slices that collectively cover the same content. - */ -function calculateCoverageAwareOverlap( - slices1: PostSlice[], - slices2: PostSlice[], -): number { - if (slices1.length === 0 || slices2.length === 0) { - return 0; - } - - const slicesByTweet1 = groupSlicesByTweet(slices1); - const slicesByTweet2 = groupSlicesByTweet(slices2); - - let totalCoveredLength = 0; - let totalLength1 = 0; - - for (const [tweetId, tweet1Slices] of slicesByTweet1) { - const tweet2Slices = slicesByTweet2.get(tweetId) ?? []; - - const merged1 = mergeRanges( - tweet1Slices.map((s) => ({ start: s.start, end: s.end })), - ); - const merged2 = mergeRanges( - tweet2Slices.map((s) => ({ start: s.start, end: s.end })), - ); - - for (const r1 of merged1) { - const r1Length = r1.end - r1.start; - totalLength1 += r1Length; - - let coveredLength = 0; - for (const r2 of merged2) { - const overlapStart = Math.max(r1.start, r2.start); - const overlapEnd = Math.min(r1.end, r2.end); - coveredLength += Math.max(0, overlapEnd - overlapStart); - } - - totalCoveredLength += coveredLength; - } - } - - if (totalLength1 === 0) return 0; - - return totalCoveredLength / totalLength1; -} - -/** - * Computes coverage from both directions and returns the MINIMUM. This ensures - * that extra content in either prediction significantly reduces the similarity - * score, preventing supersets from being marked as duplicates. - */ -function calculateBidirectionalOverlap( - slices1: PostSlice[], - slices2: PostSlice[], -): number { - const overlap1to2 = calculateCoverageAwareOverlap(slices1, slices2); - const overlap2to1 = calculateCoverageAwareOverlap(slices2, slices1); - - return Math.min(overlap1to2, overlap2to1); -} - -export interface ParsedPredictionForDedup { - id: string; - predictionId: string; - target: PostSlice[]; - timeframe: PostSlice[]; -} - -export interface PredictionComparisonResult { - targetScore: number; - timeframeScore: number; - isDuplicate: boolean; -} - -/** - * Compares two predictions using bidirectional slice overlap. Both target and - * timeframe must exceed their thresholds (default 0.96) for a duplicate match. - */ -export function comparePredictions( - pred1: ParsedPredictionForDedup, - pred2: ParsedPredictionForDedup, - targetThreshold = 0.96, - timeframeThreshold = 0.96, -): PredictionComparisonResult { - const targetScore = calculateBidirectionalOverlap(pred1.target, pred2.target); - const timeframeScore = calculateBidirectionalOverlap( - pred1.timeframe, - pred2.timeframe, - ); - - return { - targetScore, - timeframeScore, - isDuplicate: - targetScore >= targetThreshold && timeframeScore >= timeframeThreshold, - }; -} - function logInfo(message: string, fields?: Record): void { const context = workerContext.getStore(); const prefix = context ? `[Worker ${context.workerId}]` : ""; @@ -416,7 +276,7 @@ const VERDICT_SCHEMA = { additionalProperties: false, } as const; -export interface PredictionVerifierConfig { +export interface PredictionJudgeConfig { concurrency?: number; debugMode?: boolean; openrouterApiKey: string; @@ -425,12 +285,12 @@ export interface PredictionVerifierConfig { verdictPrompt: string; } -export class PredictionVerifier { - private readonly config: Required; +export class PredictionJudge { + private readonly config: Required; private db: DB; - constructor(config: PredictionVerifierConfig, db: DB) { + constructor(config: PredictionJudgeConfig, db: DB) { this.config = { concurrency: config.concurrency ?? 3, debugMode: config.debugMode ?? false, @@ -443,22 +303,62 @@ export class PredictionVerifier { this.db = db; } - async getNextPredictionToVerify(tx: Transaction): Promise< + /** + * Fetches the next prediction that has claims ready for evaluation. A prediction + * is ready when its oldest claim is at least 1 hour old, it has no verdict yet, + * and no feedback has been recorded. + */ + async getNextPredictionWithClaims(tx: Transaction): Promise< | { - id: string; - predictionId: string; - sourceTweetId: bigint; - conversationId: bigint | null; - target: unknown; - timeframe: unknown; - llmConfidence: string; - vagueness: string | null; - topicId: string | null; - context: unknown; - filterAgentId: string | null; + prediction: { + id: string; + predictionId: string; + sourceTweetId: bigint; + conversationId: bigint | null; + target: PostSlice[]; + timeframe: PostSlice[]; + topicId: string | null; + }; + claims: { + id: string; + verifierAgentId: string; + claimOutcome: boolean; + confidence: string; + reasoning: string; + sources: ClaimSource[] | null; + timeframeStartUtc: Date | null; + timeframeEndUtc: Date | null; + timeframePrecision: string | null; + createdAt: Date; + }[]; } | undefined > { + // Find predictions where the oldest claim is at least 1 hour old + const predictionsWithMatureClaims = await tx + .select({ + parsedPredictionId: verificationClaimSchema.parsedPredictionId, + oldestClaimAt: min(verificationClaimSchema.createdAt), + }) + .from(verificationClaimSchema) + .groupBy(verificationClaimSchema.parsedPredictionId) + .having( + lt( + min(verificationClaimSchema.createdAt), + sql`NOW() - INTERVAL '1 hour'`, + ), + ); + + if (predictionsWithMatureClaims.length === 0) { + return undefined; + } + + // Get the prediction IDs that have mature claims + const matureClaimPredictionIds = predictionsWithMatureClaims.map( + (p) => p.parsedPredictionId, + ); + + // Find a prediction that has no verdict and no feedback const predictions = await tx .select({ id: parsedPredictionSchema.id, @@ -467,11 +367,7 @@ export class PredictionVerifier { conversationId: scrapedTweetSchema.conversationId, target: parsedPredictionSchema.target, timeframe: parsedPredictionSchema.timeframe, - llmConfidence: parsedPredictionSchema.llmConfidence, - vagueness: parsedPredictionSchema.vagueness, topicId: parsedPredictionSchema.topicId, - context: parsedPredictionSchema.context, - filterAgentId: parsedPredictionSchema.filterAgentId, }) .from(parsedPredictionSchema) .innerJoin( @@ -483,10 +379,7 @@ export class PredictionVerifier { ) .where( and( - lt( - parsedPredictionSchema.createdAt, - sql`NOW() - INTERVAL '5 minutes'`, - ), + sql`${parsedPredictionSchema.id} IN ${matureClaimPredictionIds}`, notExists( tx .select() @@ -506,35 +399,43 @@ export class PredictionVerifier { ), ), ), - notExists( - tx - .select() - .from(predictionDuplicateRelationsSchema) - .where( - eq( - predictionDuplicateRelationsSchema.predictionId, - parsedPredictionSchema.id, - ), - ), - ), - notExists( - tx - .select() - .from(twitterScrapingJobsSchema) - .where( - eq( - twitterScrapingJobsSchema.conversationId, - scrapedTweetSchema.conversationId, - ), - ), - ), ), ) .orderBy(asc(parsedPredictionSchema.createdAt)) .limit(1) .for("update", { skipLocked: true }); - return predictions[0]; + const prediction = predictions[0]; + if (!prediction) { + return undefined; + } + + // Fetch all claims for this prediction + const claims = await tx + .select({ + id: verificationClaimSchema.id, + verifierAgentId: verificationClaimSchema.verifierAgentId, + claimOutcome: verificationClaimSchema.claimOutcome, + confidence: verificationClaimSchema.confidence, + reasoning: verificationClaimSchema.reasoning, + sources: verificationClaimSchema.sources, + timeframeStartUtc: verificationClaimSchema.timeframeStartUtc, + timeframeEndUtc: verificationClaimSchema.timeframeEndUtc, + timeframePrecision: verificationClaimSchema.timeframePrecision, + createdAt: verificationClaimSchema.createdAt, + }) + .from(verificationClaimSchema) + .where(eq(verificationClaimSchema.parsedPredictionId, prediction.id)) + .orderBy(asc(verificationClaimSchema.createdAt)); + + return { + prediction: { + ...prediction, + target: prediction.target, + timeframe: prediction.timeframe, + }, + claims, + }; } async fetchSinglePredictionTweet( @@ -710,27 +611,6 @@ export class PredictionVerifier { }); } - private async storeVerdict( - tx: Transaction, - parsedPredictionId: string, - verdictResult: VerdictResult, - ): Promise { - const context: VerdictContext = { - feedback: verdictResult.reasoning, - }; - - await tx.insert(verdictSchema).values({ - parsedPredictionId: parsedPredictionId, - verdict: verdictResult.verdict, - context: context, - }); - - logInfo("Created verdict record", { - parsedPredictionId, - verdict: verdictResult.verdict, - }); - } - private async generateVerdict( context: string, targetText: string, @@ -1144,111 +1024,21 @@ export class PredictionVerifier { return replyChain.sort((a, b) => (a.date < b.date ? -1 : 1)); } - /** Fetches all parsed predictions associated with the given tweet IDs. */ - private async fetchPredictionsForTweets( - tx: Transaction, - tweetIds: bigint[], - ): Promise { - if (tweetIds.length === 0) return []; - - const predictions = await tx - .select({ - id: parsedPredictionSchema.id, - predictionId: parsedPredictionSchema.predictionId, - target: parsedPredictionSchema.target, - timeframe: parsedPredictionSchema.timeframe, - }) - .from(parsedPredictionSchema) - .innerJoin( - scrapedTweetSchema, - eq( - scrapedTweetSchema.predictionId, - parsedPredictionSchema.predictionId, - ), - ) - .where(sql`${scrapedTweetSchema.id} IN ${tweetIds}`); - - return predictions as ParsedPredictionForDedup[]; - } - - /** - * Runs deduplication on predictions in the tweet tree using union-find clustering. - * Returns the canonical prediction ID for the given prediction, or null if it's - * already canonical (unique or the chosen representative of its cluster). - */ - private findCanonicalPrediction( - predictionId: string, - predictions: ParsedPredictionForDedup[], - ): { canonicalId: string; similarityScore: number } | null { - if (predictions.length < 2) return null; - - const parent = new Map(); - for (const pred of predictions) { - parent.set(pred.id, pred.id); - } - - function find(id: string): string { - const p = parent.get(id); - if (p === undefined || p === id) return id; - const root = find(p); - parent.set(id, root); - return root; - } - - function union(id1: string, id2: string): void { - const root1 = find(id1); - const root2 = find(id2); - if (root1 === root2) return; - if (root1 < root2) { - parent.set(root2, root1); - } else { - parent.set(root1, root2); - } - } - - for (let i = 0; i < predictions.length; i++) { - for (let j = i + 1; j < predictions.length; j++) { - const pred1 = predictions[i]; - const pred2 = predictions[j]; - if (!pred1 || !pred2) continue; - - if (comparePredictions(pred1, pred2).isDuplicate) { - union(pred1.id, pred2.id); - } - } - } - - const root = find(predictionId); - if (root === predictionId) return null; - - const canonical = predictions.find((p) => p.id === root); - if (!canonical) return null; - - const currentPred = predictions.find((p) => p.id === predictionId); - if (!currentPred) return null; - - const result = comparePredictions(currentPred, canonical); - return { - canonicalId: root, - similarityScore: (result.targetScore + result.timeframeScore) / 2, - }; - } - /** - * Main verification pipeline: validates slices, extracts timeframe via LLM, checks - * if prediction has matured, validates extraction quality, then generates verdict - * using web search. Returns false if no work available, true otherwise. + * Processes a prediction with mature claims by doing independent research and + * comparing against submitted claims. Returns true if work was done. */ - async processNextPrediction(tx: Transaction): Promise { - const prediction = await this.getNextPredictionToVerify(tx); - if (!prediction) { - return false; - } - - logInfo("Found prediction to verify", { - id: prediction.id, - predictionId: prediction.predictionId, - sourceTweetId: prediction.sourceTweetId.toString(), + private async processClaimBasedVerdict( + tx: Transaction, + data: NonNullable< + Awaited> + >, + ): Promise { + const { prediction, claims } = data; + + logInfo("Processing claim-based verdict", { + predictionId: prediction.id, + claimCount: claims.length, }); const tweets = prediction.conversationId @@ -1264,79 +1054,16 @@ export class PredictionVerifier { return true; } - logInfo("Fetched tweets for prediction", { - count: tweets.length, - tweetIds: tweets.map((t) => t.id.toString()).join(","), - }); - const tweetMap = new Map(tweets.map((t) => [t.id, t])); - const target = prediction.target as PostSlice[]; - const timeframe = prediction.timeframe as PostSlice[]; - - const tweetIds = tweets.map((t) => t.id); - const predictionsInTree = await this.fetchPredictionsForTweets( - tx, - tweetIds, - ); - - const duplicateResult = this.findCanonicalPrediction( - prediction.id, - predictionsInTree, - ); - - if (duplicateResult) { - const canonical = predictionsInTree.find( - (p) => p.id === duplicateResult.canonicalId, - ); - - const currentTargetText = this.extractSliceText(target, tweetMap); - const currentTimeframeText = this.extractSliceText(timeframe, tweetMap); - const canonicalTargetText = canonical - ? this.extractSliceText(canonical.target, tweetMap) - : ""; - const canonicalTimeframeText = canonical - ? this.extractSliceText(canonical.timeframe, tweetMap) - : ""; - - logInfo("Prediction is a duplicate, skipping verification", { - predictionId: prediction.id, - canonicalId: duplicateResult.canonicalId, - similarityScore: duplicateResult.similarityScore.toFixed(4), - }); - logInfo("Duplicate comparison - Current target", { - text: currentTargetText, - }); - logInfo("Duplicate comparison - Canonical target", { - text: canonicalTargetText, - }); - logInfo("Duplicate comparison - Current timeframe", { - text: currentTimeframeText, - }); - logInfo("Duplicate comparison - Canonical timeframe", { - text: canonicalTimeframeText, - }); - - await tx - .insert(predictionDuplicateRelationsSchema) - .values({ - predictionId: prediction.id, - canonicalId: duplicateResult.canonicalId, - similarityScore: duplicateResult.similarityScore.toFixed(4), - }) - .onConflictDoNothing(); - - return true; - } + const { target, timeframe } = prediction; + // Validate slices const targetValidation = this.validatePostSlices( target, tweetMap, "Target", ); if (!targetValidation.valid) { - logInfo("Target slices validation failed", { - failureCause: targetValidation.failureCause, - }); await this.storeFeedback( tx, prediction.id, @@ -1353,9 +1080,6 @@ export class PredictionVerifier { "Timeframe", ); if (!timeframeValidation.valid) { - logInfo("Timeframe slices validation failed", { - failureCause: timeframeValidation.failureCause, - }); await this.storeFeedback( tx, prediction.id, @@ -1366,14 +1090,10 @@ export class PredictionVerifier { return true; } - logInfo("PostSlices validated successfully"); - const targetText = this.extractSliceText(target, tweetMap); const timeframeText = this.extractSliceText(timeframe, tweetMap); logInfo("Extracted slice text", { - targetLength: targetText.length, - timeframeLength: timeframeText.length, target: targetText, timeframe: timeframeText, }); @@ -1384,6 +1104,7 @@ export class PredictionVerifier { return true; } + // Extract timeframe independently const timeframeResult = await this.extractTimeframe( targetText, timeframeText, @@ -1393,7 +1114,6 @@ export class PredictionVerifier { logInfo("Timeframe extracted", { status: timeframeResult.timeframe_status, - startUtc: timeframeResult.start_utc, endUtc: timeframeResult.end_utc, confidence: timeframeResult.confidence, }); @@ -1404,10 +1124,6 @@ export class PredictionVerifier { timeframeResult.timeframe_status === "missing" || timeframeResult.timeframe_status === "event_trigger" ) { - logInfo("Timeframe is unverifiable", { - status: timeframeResult.timeframe_status, - reasoning: timeframeResult.reasoning, - }); await this.storeFeedback( tx, prediction.id, @@ -1420,29 +1136,7 @@ export class PredictionVerifier { return true; } - if (timeframeResult.end_utc) { - const endDate = new Date(timeframeResult.end_utc); - const currentDate = new Date(); - const oneDayAfterEnd = new Date(endDate); - oneDayAfterEnd.setDate(oneDayAfterEnd.getDate() + 1); - - if (oneDayAfterEnd > currentDate) { - logInfo("Timeframe has not matured yet", { - endUtc: timeframeResult.end_utc, - currentDate: currentDate.toISOString(), - requiresMaturityUntil: oneDayAfterEnd.toISOString(), - }); - await this.storeFeedback( - tx, - prediction.id, - "timeframe_extraction", - `Prediction timeframe ends on ${timeframeResult.end_utc}. Predictions must be mature for at least one day before verification. Can be verified after ${oneDayAfterEnd.toISOString()}.`, - "FUTURE_TIMEFRAME", - ); - return true; - } - } - + // Validate filter extraction const validationResult = await this.validateFilterExtraction( targetText, timeframeText, @@ -1455,7 +1149,6 @@ export class PredictionVerifier { logInfo("Filter validation completed", { isValid: validationResult.is_valid, context: validationResult.context.substring(0, 100), - confidence: validationResult.confidence, }); await this.storeFilterValidationContext( @@ -1465,10 +1158,6 @@ export class PredictionVerifier { ); if (!validationResult.is_valid) { - logInfo("Prediction marked as invalid by filter validation", { - failureCause: validationResult.failure_cause, - reasoning: validationResult.reasoning, - }); await this.storeFeedback( tx, prediction.id, @@ -1479,6 +1168,7 @@ export class PredictionVerifier { return true; } + // Generate independent verdict const verdictResult = await this.generateVerdict( validationResult.context, targetText, @@ -1486,20 +1176,14 @@ export class PredictionVerifier { timeframeResult, ); - logInfo("Verdict generated", { - valid: verdictResult.valid, + logInfo("Independent verdict generated", { verdict: verdictResult.verdict, confidence: verdictResult.confidence, - sourcesCount: verdictResult.sources?.length ?? 0, - reasoning: verdictResult.reasoning.substring(0, 150), }); await this.storeVerdictDetails(tx, prediction.id, verdictResult); if (!verdictResult.valid) { - logInfo("Prediction marked as invalid by verdict generation", { - reasoning: verdictResult.reasoning, - }); await this.storeFeedback( tx, prediction.id, @@ -1509,17 +1193,90 @@ export class PredictionVerifier { return true; } - await this.storeVerdict(tx, prediction.id, verdictResult); + // Calculate majority verdict from claims + const trueClaims = claims.filter((c) => c.claimOutcome === true); + const falseClaims = claims.filter((c) => c.claimOutcome === false); + const majorityVerdict = trueClaims.length >= falseClaims.length; - logInfo("Verdict stored successfully", { + // Find claims that agree with our independent verdict + const agreeingClaims = claims.filter( + (claim) => claim.claimOutcome === verdictResult.verdict, + ); + + logInfo("Claim comparison", { + ourVerdict: verdictResult.verdict, + majorityVerdict, + trueClaims: trueClaims.length, + falseClaims: falseClaims.length, + agreeingClaims: agreeingClaims.length, + }); + + // Fetch verifiers registered for this prediction's topic + const registeredVerifiers = new Set(); + if (prediction.topicId) { + const registrations = await tx + .select({ + verifierAgentId: verifierTopicRegistrationSchema.verifierAgentId, + }) + .from(verifierTopicRegistrationSchema) + .where(eq(verifierTopicRegistrationSchema.topicId, prediction.topicId)); + for (const reg of registrations) { + registeredVerifiers.add(reg.verifierAgentId); + } + } + + // Pick the best agreeing claim, ordered by: + // 1. Verifiers registered for the topic (first) + // 2. Claims matching the majority verdict + // 3. Highest confidence + let acceptedClaim: (typeof claims)[number] | undefined; + if (agreeingClaims.length > 0) { + acceptedClaim = agreeingClaims.reduce((best, claim) => { + const bestRegistered = registeredVerifiers.has(best.verifierAgentId); + const claimRegistered = registeredVerifiers.has(claim.verifierAgentId); + + // Prefer registered verifiers + if (claimRegistered && !bestRegistered) return claim; + if (bestRegistered && !claimRegistered) return best; + + // Both same registration status - prefer majority agreement + const bestMatchesMajority = best.claimOutcome === majorityVerdict; + const claimMatchesMajority = claim.claimOutcome === majorityVerdict; + if (claimMatchesMajority && !bestMatchesMajority) return claim; + if (bestMatchesMajority && !claimMatchesMajority) return best; + + // Both same majority status - prefer higher confidence + return parseFloat(claim.confidence) > parseFloat(best.confidence) + ? claim + : best; + }); + } + + // Store verdict with accepted claim reference + const context: VerdictContext = { + feedback: verdictResult.reasoning, + }; + + await tx.insert(verdictSchema).values({ parsedPredictionId: prediction.id, - predictionId: prediction.predictionId, verdict: verdictResult.verdict, + context, + acceptedClaimId: acceptedClaim?.id, + }); + + logInfo("Verdict stored with claim reference", { + predictionId: prediction.id, + verdict: verdictResult.verdict, + acceptedClaimId: acceptedClaim?.id ?? "none", }); return true; } + /** + * Main verification pipeline. Processes predictions that have mature claims + * by doing independent research and comparing against submitted claims. + */ private async runWorker( workerId: number, stopHook: () => boolean, @@ -1532,9 +1289,14 @@ export class PredictionVerifier { while (!stopHook()) { try { - const progress = await this.db.transaction( - async (tx) => await this.processNextPrediction(tx), - ); + const progress = await this.db.transaction(async (tx) => { + const predictionWithClaims = + await this.getNextPredictionWithClaims(tx); + if (!predictionWithClaims) { + return false; + } + return this.processClaimBasedVerdict(tx, predictionWithClaims); + }); consecutiveFailures = 0; @@ -1563,7 +1325,7 @@ export class PredictionVerifier { }); } - async runVerifier(stopHook: () => boolean): Promise { + async runJudge(stopHook: () => boolean): Promise { const workers = Array.from({ length: this.config.concurrency }, (_, i) => this.runWorker(i + 1, stopHook), ); diff --git a/services/swarm-verifier/src/load-test.ts b/services/swarm-judge/src/load-test.ts similarity index 100% rename from services/swarm-verifier/src/load-test.ts rename to services/swarm-judge/src/load-test.ts diff --git a/services/swarm-judge/src/logger.ts b/services/swarm-judge/src/logger.ts new file mode 100644 index 000000000..b33daf9d3 --- /dev/null +++ b/services/swarm-judge/src/logger.ts @@ -0,0 +1,3 @@ +import { BasicLogger } from "@torus-network/torus-utils/logger"; + +export const logger = BasicLogger.create({ name: "swarm-judge" }); diff --git a/services/swarm-verifier/src/utils.ts b/services/swarm-judge/src/utils.ts similarity index 100% rename from services/swarm-verifier/src/utils.ts rename to services/swarm-judge/src/utils.ts diff --git a/services/swarm-judge/tsconfig.json b/services/swarm-judge/tsconfig.json new file mode 100644 index 000000000..cd8017617 --- /dev/null +++ b/services/swarm-judge/tsconfig.json @@ -0,0 +1,3 @@ +{ + "extends": "@torus-ts/tsconfig/app-node.json" +} diff --git a/services/swarm-verifier/turbo.json b/services/swarm-judge/turbo.json similarity index 100% rename from services/swarm-verifier/turbo.json rename to services/swarm-judge/turbo.json diff --git a/services/swarm-verifier/values.yaml b/services/swarm-judge/values.yaml similarity index 88% rename from services/swarm-verifier/values.yaml rename to services/swarm-judge/values.yaml index e1c494b56..0fea261b6 100644 --- a/services/swarm-verifier/values.yaml +++ b/services/swarm-judge/values.yaml @@ -8,13 +8,13 @@ service: deployment: image: - repository: ghcr.io/renlabs-dev/swarm-verifier + repository: ghcr.io/renlabs-dev/swarm-judge additionalLabels: - app: swarm-verifier + app: swarm-judge podLabels: - app: swarm-verifier + app: swarm-judge resources: limits: diff --git a/services/swarm-verifier/DEDUPLICATION.md b/services/swarm-verifier/DEDUPLICATION.md deleted file mode 100644 index d3278b39e..000000000 --- a/services/swarm-verifier/DEDUPLICATION.md +++ /dev/null @@ -1,319 +0,0 @@ -# Prediction Deduplication System - -## Overview - -The Prediction Swarm filters predictions from Twitter threads, with multiple filters potentially processing the same source tweets. This creates a deduplication problem where the same prediction appears multiple times with slightly different slice boundaries or from different filters. - -This system detects and removes duplicate predictions based on slice overlap and content similarity. - -## The Problem - -Duplicates can arise from two scenarios: - -### 1. Different Filters Processing the Same Tweet - -Multiple filter agents independently process the same scraped tweet and create different parsed predictions: - -``` -Tweet: "BTC will hit $100k by Q1" - -Filter A extracts: - Target: ["BTC will hit $100k"] - Timeframe: ["Q1"] - -Filter B extracts: - Target: ["BTC will hit $100k"] - Timeframe: ["Q1"] - -Result: Duplicate predictions from different filters -``` - -### 2. Same Filter Processing Different Tweets in Same Thread - -A filter processes multiple tweets in a conversation thread where the prediction spans multiple tweets: - -``` -Tweet 100: "BTC is going to the moon" -Tweet 101: "I mean it will hit $100k" -Tweet 102: "by end of Q1" - -Filter processes tweet 100: - Creates prediction with slices from 100, 101, 102 - -Filter processes tweet 101: - Creates another prediction with overlapping slices from 101, 102 - -Result: Same prediction sourced from different tweets in the thread -``` - -## Solution Architecture - -### Database Schema - -Created a separate table for tracking duplicate relationships: - -```sql -CREATE TABLE prediction_duplicate_relations ( - prediction_id VARCHAR(256) NOT NULL, - canonical_id VARCHAR(256) NOT NULL, - similarity_score NUMERIC(5,4), - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - PRIMARY KEY (prediction_id, canonical_id) -); -``` - -**Key Design Decisions:** - -- **Additive pattern**: Uses `ON CONFLICT DO NOTHING` for race-safe concurrent inserts -- **Separate table**: Doesn't modify predictions directly, allowing multiple deduplication processes to run simultaneously -- **Transitive relationships**: CTE query resolves chains (A→B→C all collapse to canonical A) - -### Deduplication Algorithm - -The algorithm determines duplicates through bidirectional coverage analysis: - -#### Stage 1: Merge Adjacent Slices - -Slices within the same tweet that are adjacent or have a 1-character gap are merged into unified ranges: - -``` -Pred slices: [0-8], [9-18] from same tweet -Merged: [0-18] (1-char gap at position 8 is tolerated) - -Pred slices: [0-8], [10-18] from same tweet -Merged: [0-8], [10-18] (2-char gap keeps them separate) -``` - -#### Stage 2: Calculate Coverage - -For each tweet, calculate what portion of one prediction's ranges are covered by the other's: - -``` -Pred1: [0-18] (18 chars) -Pred2: [0-10], [15-18] (13 chars total) - -Coverage of Pred1 by Pred2: - [0-10] covers 10 chars of [0-18] - [15-18] covers 3 chars of [0-18] - Total: 13/18 = 0.722 -``` - -#### Stage 3: Bidirectional Minimum - -Calculate coverage in both directions and take the **minimum**: - -``` -Coverage A→B: How much of A is covered by B -Coverage B→A: How much of B is covered by A -Final score = MIN(Coverage A→B, Coverage B→A) -``` - -Using minimum ensures extra content in either prediction significantly reduces the score. A superset prediction won't be marked as duplicate. - -#### Stage 4: Threshold Check - -``` -Duplicate if: - target_score ≥ 0.96 AND timeframe_score ≥ 0.96 -``` - -## Algorithm Behavior Examples - -### ✅ Duplicate: Same Content, Different Slice Boundaries - -``` -Tweet: "BTC will hit $100k by EOY 2025" -Pred1 extracts: [0-25] as single slice -Pred2 extracts: [0-12], [12-18], [18-25] as three slices - -Both cover the same positions → score: 1.000 -Result: DUPLICATE ✅ -``` - -### ❌ Not Duplicate: Superset Prediction - -``` -Tweet: "BTC will hit $100k and ETH will reach $5k by Q1" -Pred1 extracts: [0-18] "BTC will hit $100k" -Pred2 extracts: [0-41] "BTC will hit $100k and ETH will reach $5k" - -Coverage Pred1→Pred2: 18/18 = 1.0 (Pred2 fully covers Pred1) -Coverage Pred2→Pred1: 18/41 = 0.44 (Pred1 only covers 44% of Pred2) -Bidirectional min: 0.44 -Result: NOT DUPLICATE ✅ -``` - -### ❌ Not Duplicate: Partial Overlap from Same Tweet - -``` -Tweet: "BTC will hit $100k ETH will hit $10k SOL will reach $200 by 2025" -Pred1 extracts: "BTC will hit $100k" + "ETH will hit $10k" -Pred2 extracts: "BTC will hit $100k" + "SOL will reach $200" - -Only the first part overlaps, second parts are different positions. -Target score: 0.486 -Result: NOT DUPLICATE ✅ -``` - -### ❌ Not Duplicate: Different Tweets - -``` -Tweet A: "BTC will hit $100k by 2025" -Tweet B: "BTC will hit $100k by 2025" (different tweet, same text) - -Pred1 extracts from Tweet A -Pred2 extracts from Tweet B - -Different tweet IDs → score: 0.000 -Result: NOT DUPLICATE ✅ -``` - -## Configuration - -### Thresholds - -```typescript -targetThreshold = 0.96; // 96% similarity required for target -timeframeThreshold = 0.96; // 96% similarity required for timeframe -``` - -Both thresholds must be met for predictions to be considered duplicates. - -### Tuning Guidelines - -- **Higher threshold (0.98+)**: More conservative, fewer false positives, but may miss legitimate duplicates with small gaps -- **Lower threshold (0.90-0.95)**: More aggressive, catches more duplicates, but may incorrectly merge predictions with partial overlap -- **Current (0.96)**: Balanced approach that correctly handles: - - Different slicing strategies (duplicate) - - Superset predictions (not duplicate) - - Partial overlaps (not duplicate) - -## Canonical Resolution - -Query uses recursive CTE to resolve duplicate chains: - -```sql -WITH RECURSIVE duplicate_graph AS ( - -- Start with all predictions - SELECT id, id as root_id, 0 as depth - FROM parsed_predictions - - UNION - - -- Follow duplicate chains - SELECT dr.prediction_id, dg.root_id, dg.depth + 1 - FROM duplicate_graph dg - JOIN prediction_duplicate_relations dr ON dr.canonical_id = dg.id - WHERE dg.depth < 10 -), -canonical_map AS ( - -- Pick lowest ID as canonical - SELECT id, MIN(root_id) as canonical_id - FROM duplicate_graph - GROUP BY id -) -SELECT * FROM parsed_predictions p -JOIN canonical_map cm ON cm.id = p.id -WHERE p.id = cm.canonical_id -``` - -This ensures: - -- Transitive relationships are resolved (A→B→C all map to A) -- Cycles are broken deterministically (MIN(id)) -- Depth limit prevents infinite loops - -## Testing - -Test suite with 18 cases covering position-based overlap scenarios. Tests use real tweet definitions as source of truth, ensuring slices reference valid positions. - -1. Identical extractions from same tweet -2. Same span, different slice boundaries -3. Superset extraction (extra content) -4. Different tweets entirely (zero overlap) -5. Same text from different tweets (not duplicates) -6. Cross-tweet thread (identical extractions) -7. Cross-tweet partial extraction -8. Large gap between slices (identical discontiguous) -9. Gap vs contiguous spans -10. Overlapping slices within prediction -11. Out-of-order slices -12. Asymmetric containment (subset vs full) -13. Adjacent slices vs single slice -14. 1-char gap merge tolerance -15. 2-char gap coverage penalty -16. Empty target arrays -17. Partial overlap (shared and different content) -18. Single vs multiple targets - -Run tests: - -```sh -pnpm --filter swarm-verifier dev --test -``` - -## Integration Points - -### When to Run Deduplication - -Options for triggering deduplication: - -1. **Just-in-Time** (recommended for initial implementation) - - Run deduplication when verifier queries for predictions - - Ensures duplicates are removed before verification - - Simple, no additional infrastructure - -2. **Post-Insert Hook** - - Deduplicate immediately after filter stores predictions - - Can be async (fire-and-forget) - - May slow down filter inserts if awaited - -3. **Periodic Background Job** - - Run deduplication every N minutes - - Decoupled from other operations - - Delay before duplicates are detected - -### Usage Example - -```typescript -import { comparePredictions, type ParsedPredictionForDedup } from "./verifier"; - -// Fetch predictions for a conversation -const predictions = await fetchPredictionsForConversation(conversationId); - -// Compare all pairs -const duplicateClusters = new Map(); - -for (let i = 0; i < predictions.length; i++) { - for (let j = i + 1; j < predictions.length; j++) { - const result = comparePredictions(predictions[i], predictions[j]); - - if (result.isDuplicate) { - // Store duplicate relationship - const canonicalId = predictions[i].id; - const duplicateId = predictions[j].id; - - if (!duplicateClusters.has(canonicalId)) { - duplicateClusters.set(canonicalId, []); - } - duplicateClusters.get(canonicalId)!.push(duplicateId); - } - } -} - -// Store relationships in database -await storeDuplicateRelationships(tx, duplicateClusters); -``` - -## Key Insights - -1. **Position-Based Comparison**: The algorithm compares slice positions within tweets, not text content. Same text from different tweets has zero overlap. - -2. **Merge Tolerance**: Adjacent slices and slices with 1-character gaps merge into unified ranges, handling different slicing granularities. - -3. **Bidirectional Minimum**: Taking MIN of both coverage directions prevents supersets from being marked as duplicates. Extra content in either prediction significantly reduces the score. - -4. **Race-Safe Design**: Additive pattern with separate relations table allows concurrent deduplication without conflicts. - -5. **High Threshold**: 96% coverage requirement ensures high precision while maintaining recall for legitimate duplicates. diff --git a/services/swarm-verifier/eslint.config.js b/services/swarm-verifier/eslint.config.js index 7a3e95f54..b91726d06 100644 --- a/services/swarm-verifier/eslint.config.js +++ b/services/swarm-verifier/eslint.config.js @@ -1,9 +1,3 @@ import baseConfig from "@torus-ts/eslint-config/base"; -/** @type {import('typescript-eslint').Config} */ -export default [ - { - ignores: [], - }, - ...baseConfig, -]; +export default [...baseConfig]; diff --git a/services/swarm-verifier/package.json b/services/swarm-verifier/package.json index aa82e398c..f6200b4b5 100644 --- a/services/swarm-verifier/package.json +++ b/services/swarm-verifier/package.json @@ -11,43 +11,37 @@ "build": "tsc", "clean": "rm -rf .turbo node_modules", "dev": "pnpm with-env tsx ./src/index.ts", - "dev:dedup-dry-run": "pnpm with-env tsx ./src/dedup-dry-run.ts", - "dev:dedup-retroactive": "pnpm with-env tsx ./src/dedup-retroactive.ts", "format": "prettier --check . --ignore-path ../../.gitignore --cache --cache-location .cache/.prettiercache", "format-fix": "prettier --write . --ignore-path ../../.gitignore --cache --cache-location .cache/.prettiercache", "lint": "eslint --cache --cache-location .cache/.eslintcache", "lint-fix": "eslint --cache --cache-location .cache/.eslintcache --fix", "start": "node ./dist/index.js", "start:local": "pnpm with-env pnpm start", - "test": "vitest run", "typecheck": "tsc --noEmit", "with-env": "dotenv -e ../../.env --" }, "prettier": "@torus-ts/prettier-config", "dependencies": { - "@t3-oss/env-core": "^0.11.1", - "@torus-ts/db": "workspace:*", - "@torus-network/sdk": "workspace:*", + "@polkadot/api": "catalog:", + "@polkadot/util": "catalog:", + "@polkadot/util-crypto": "catalog:", "@torus-network/torus-utils": "workspace:*", - "cors": "^2.8.5", - "express": "^4.19.2", - "node-fetch": "^3.3.2", - "superjson": "catalog:", - "zod": "catalog:", + "@torus-ts/db": "workspace:*", + "canonicalize": "^2.1.0", + "openai": "^4.73.1", "tsafe": "catalog:", - "vitest": "catalog:" + "zod": "catalog:", + "zod-to-json-schema": "^3.24.6" }, "devDependencies": { "@torus-ts/eslint-config": "workspace:*", "@torus-ts/prettier-config": "workspace:*", "@torus-ts/tsconfig": "workspace:*", - "@types/cors": "^2.8.17", - "@types/express": "^4.17.21", "@types/node": "catalog:", "dotenv-cli": "catalog:", "eslint": "catalog:", "prettier": "catalog:", - "typescript": "catalog:", - "vitest": "catalog:" + "tsx": "catalog:", + "typescript": "catalog:" } } diff --git a/services/swarm-verifier/src/ai/openrouter-client.ts b/services/swarm-verifier/src/ai/openrouter-client.ts new file mode 100644 index 000000000..b69553aa1 --- /dev/null +++ b/services/swarm-verifier/src/ai/openrouter-client.ts @@ -0,0 +1,195 @@ +import OpenAI from "openai"; +import type { z } from "zod"; +import { ZodError } from "zod"; +import { zodToJsonSchema } from "zod-to-json-schema"; + +export interface OpenRouterConfig { + apiKey: string; + baseURL?: string; + model?: string; + defaultTemperature?: number; + defaultMaxTokens?: number; +} + +export interface CompletionOptions { + temperature?: number; + maxTokens?: number; +} + +export class OpenRouterClient { + private client: OpenAI; + private model: string; + private defaultTemperature: number; + private defaultMaxTokens: number; + + constructor(config: OpenRouterConfig) { + this.client = new OpenAI({ + apiKey: config.apiKey, + baseURL: config.baseURL ?? "https://openrouter.ai/api/v1", + timeout: 120000, + maxRetries: 0, + }); + + this.model = config.model ?? "anthropic/claude-sonnet-4"; + this.defaultTemperature = config.defaultTemperature ?? 0.7; + this.defaultMaxTokens = config.defaultMaxTokens ?? 4096; + } + + async completeStructured( + systemPrompt: string, + userPrompt: string, + outputSchema: z.ZodType, + options?: CompletionOptions, + ): Promise { + const temperature = options?.temperature ?? this.defaultTemperature; + const maxTokens = options?.maxTokens ?? this.defaultMaxTokens; + + const maxRetries = 3; + let lastError: unknown; + + const rawSchema = zodToJsonSchema(outputSchema, { + $refStrategy: "none", + }) as Record; + + const jsonSchema = { + type: "object", + ...rawSchema, + additionalProperties: false, + }; + + for (let attempt = 0; attempt < maxRetries; attempt++) { + try { + const completion = await this.client.chat.completions.create({ + model: this.model, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ], + temperature, + max_tokens: maxTokens, + response_format: { + type: "json_schema", + json_schema: { + name: "verification_response", + strict: true, + schema: jsonSchema, + }, + }, + }); + + const content = completion.choices[0]?.message.content; + if (!content) { + throw new Error("No content in completion response"); + } + + const jsonResponse: unknown = JSON.parse(content); + const validatedResponse = outputSchema.parse(jsonResponse); + + return validatedResponse; + } catch (error) { + lastError = error; + + if (error instanceof ZodError && attempt < maxRetries - 1) { + continue; + } + + break; + } + } + + if (lastError instanceof ZodError) { + throw new Error( + `OpenRouter completion failed (Zod validation): ${JSON.stringify(lastError.errors)}`, + ); + } + if (lastError instanceof Error) { + console.error(`[OpenRouter] Full error details:`, { + message: lastError.message, + cause: lastError.cause, + stack: lastError.stack, + }); + throw new Error(`OpenRouter completion failed: ${lastError.message}`); + } + throw lastError; + } + + async completeWithSearch( + systemPrompt: string, + userPrompt: string, + outputSchema: z.ZodType, + options?: CompletionOptions, + ): Promise { + const temperature = options?.temperature ?? this.defaultTemperature; + const maxTokens = options?.maxTokens ?? this.defaultMaxTokens; + + const maxRetries = 3; + let lastError: unknown; + + const rawSchema = zodToJsonSchema(outputSchema, { + $refStrategy: "none", + }) as Record; + + const jsonSchema = { + type: "object", + ...rawSchema, + additionalProperties: false, + }; + + for (let attempt = 0; attempt < maxRetries; attempt++) { + try { + const completion = await this.client.chat.completions.create({ + model: this.model, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ], + temperature, + max_tokens: maxTokens, + response_format: { + type: "json_schema", + json_schema: { + name: "verification_response", + strict: true, + schema: jsonSchema, + }, + }, + // Enable web search via OpenRouter plugins + // @ts-expect-error - OpenRouter-specific parameter + plugins: [{ id: "web" }], + }); + + const content = completion.choices[0]?.message.content; + if (!content) { + throw new Error("No content in completion response"); + } + + const jsonResponse: unknown = JSON.parse(content); + const validatedResponse = outputSchema.parse(jsonResponse); + + return validatedResponse; + } catch (error) { + lastError = error; + + if (error instanceof ZodError && attempt < maxRetries - 1) { + continue; + } + + break; + } + } + + if (lastError instanceof ZodError) { + throw new Error( + `OpenRouter completion failed (Zod validation): ${JSON.stringify(lastError.errors)}`, + ); + } + if (lastError instanceof Error) { + throw new Error(`OpenRouter completion failed: ${lastError.message}`); + } + throw lastError; + } + + getModel(): string { + return this.model; + } +} diff --git a/services/swarm-verifier/src/ai/prompt-loader.ts b/services/swarm-verifier/src/ai/prompt-loader.ts new file mode 100644 index 000000000..a0eec5ea6 --- /dev/null +++ b/services/swarm-verifier/src/ai/prompt-loader.ts @@ -0,0 +1,51 @@ +import { readFileSync } from "node:fs"; +import { resolve } from "node:path"; + +/** + * Loads prompts from markdown files. + * Simpler than swarm-filter's TOML-based prompts since verification uses single-file prompts. + */ +export class PromptLoader { + private cache = new Map(); + private promptsDir: string; + + constructor(promptsDir?: string) { + this.promptsDir = promptsDir ?? resolve(process.cwd()); + } + + private loadPrompt(filename: string): string { + const cached = this.cache.get(filename); + if (cached !== undefined) { + return cached; + } + + const promptPath = resolve(this.promptsDir, filename); + + try { + const content = readFileSync(promptPath, "utf-8"); + this.cache.set(filename, content); + return content; + } catch (error) { + if (error instanceof Error) { + throw new Error(`Failed to load prompt ${filename}: ${error.message}`); + } + throw error; + } + } + + getTimeframePrompt(): string { + return this.loadPrompt("TIMEFRAME_PROMPT.md"); + } + + getFilterValidationPrompt(): string { + return this.loadPrompt("FILTER_VALIDATION_PROMPT.md"); + } + + getVerdictPrompt(): string { + return this.loadPrompt("VERDICT_PROMPT.md"); + } + + clearCache(): void { + this.cache.clear(); + } +} diff --git a/services/swarm-verifier/src/api-client.ts b/services/swarm-verifier/src/api-client.ts new file mode 100644 index 000000000..0553265ee --- /dev/null +++ b/services/swarm-verifier/src/api-client.ts @@ -0,0 +1,272 @@ +import { Keyring } from "@polkadot/api"; +import { u8aToHex } from "@polkadot/util"; +import { blake2AsHex, cryptoWaitReady } from "@polkadot/util-crypto"; +import { BasicLogger } from "@torus-network/torus-utils/logger"; +import canonicalize from "canonicalize"; + +const logger = BasicLogger.create({ name: "swarm-verifier:api-client" }); + +interface PostSlice { + source: { tweet_id: string }; + start: number; + end: number; +} + +interface ClaimablePrediction { + id: string; + predictionId: string; + target: PostSlice[]; + timeframe: PostSlice[]; + topicName: string; + createdAt: string; +} + +interface ClaimableResponse { + predictions: ClaimablePrediction[]; + nextCursor: string | null; + hasMore: boolean; +} + +interface PredictionTweet { + id: string; + text: string; + authorUsername: string | null; + date: string; +} + +interface PredictionContext { + id: string; + predictionId: string; + target: PostSlice[]; + timeframe: PostSlice[]; + tweets: PredictionTweet[]; + topicName: string; +} + +interface ClaimSource { + url: string; + title?: string; + snippet?: string; + retrievedAt: string; + archiveUrl?: string; +} + +interface ClaimContent { + outcome: boolean; + confidence: string; + reasoning: string; + sources: ClaimSource[]; + timeframe: { + startUtc: string; + endUtc: string; + precision: string; + }; + sentAt: string; +} + +interface FeedbackContent { + failureCause: string; + reason: string; + sentAt: string; +} + +/** + * Swarm API client for open verifiers. + * Uses per-request signature authentication. + */ +export class SwarmApiClient { + private apiUrl: string; + private keypair: ReturnType["addFromUri"]>; + private address: string; + + private constructor( + apiUrl: string, + keypair: ReturnType["addFromUri"]>, + ) { + this.apiUrl = apiUrl.replace(/\/$/, ""); + this.keypair = keypair; + this.address = keypair.address; + } + + static async create( + apiUrl: string, + mnemonic: string, + ): Promise { + await cryptoWaitReady(); + + const keyring = new Keyring({ type: "sr25519" }); + const keypair = keyring.addFromUri(mnemonic); + + logger.info(`Created API client for address: ${keypair.address}`); + logger.info(`API URL: ${apiUrl}`); + + return new SwarmApiClient(apiUrl, keypair); + } + + getAddress(): string { + return this.address; + } + + private generateAuthHeaders(): Record { + const timestamp = new Date().toISOString(); + + const payload = { + address: this.address, + timestamp, + }; + + const payloadCanonical = canonicalize(payload); + if (!payloadCanonical) { + throw new Error("Failed to canonicalize auth payload"); + } + + const payloadHash = blake2AsHex(payloadCanonical); + const signature = u8aToHex(this.keypair.sign(payloadHash)); + + return { + "x-agent-address": this.address, + "x-signature": signature, + "x-timestamp": timestamp, + "Content-Type": "application/json", + }; + } + + private signContent(content: T): string { + const canonical = canonicalize(content); + if (!canonical) { + throw new Error("Failed to canonicalize content"); + } + const hash = blake2AsHex(canonical); + const signature = u8aToHex(this.keypair.sign(hash)); + return signature; + } + + private async get( + path: string, + params?: Record, + ): Promise { + const url = new URL(`${this.apiUrl}${path}`); + + if (params) { + for (const [key, value] of Object.entries(params)) { + if (value !== undefined) { + url.searchParams.set(key, String(value)); + } + } + } + + const response = await fetch(url.toString(), { + method: "GET", + headers: this.generateAuthHeaders(), + }); + + if (!response.ok) { + const errorBody = await response.text(); + throw new Error( + `API request failed: ${response.status} ${response.statusText} - ${errorBody}`, + ); + } + + return response.json() as Promise; + } + + private async post(path: string, body: unknown): Promise { + const url = `${this.apiUrl}${path}`; + + const response = await fetch(url, { + method: "POST", + headers: this.generateAuthHeaders(), + body: JSON.stringify(body), + }); + + if (!response.ok) { + const errorBody = await response.text(); + throw new Error( + `API request failed: ${response.status} ${response.statusText} - ${errorBody}`, + ); + } + + return response.json() as Promise; + } + + async getClaimablePredictions( + after?: string, + limit = 50, + topics?: string[], + ): Promise { + return this.get("/v1/predictions/claimable", { + after, + limit, + topics: topics?.join(","), + }); + } + + async getPredictionContext(predictionId: string): Promise { + return this.get( + `/v1/predictions/${predictionId}/context`, + ); + } + + async submitClaim( + predictionId: string, + outcome: boolean, + confidence: number, + reasoning: string, + sources: ClaimSource[], + timeframe: { startUtc: string; endUtc: string; precision: string }, + ): Promise<{ claimId: string; parsedPredictionId: string }> { + const content: ClaimContent = { + outcome, + confidence: confidence.toFixed(2), + reasoning, + sources, + timeframe, + sentAt: new Date().toISOString(), + }; + + const signature = this.signContent(content); + + const body = { + content, + metadata: { + signature, + version: 1, + }, + }; + + return this.post(`/v1/predictions/${predictionId}/claim`, body); + } + + async submitFeedback( + predictionId: string, + failureCause: string, + reason: string, + ): Promise<{ feedbackId: string; parsedPredictionId: string }> { + const content: FeedbackContent = { + failureCause, + reason, + sentAt: new Date().toISOString(), + }; + + const signature = this.signContent(content); + + const body = { + content, + metadata: { + signature, + version: 1, + }, + }; + + return this.post(`/v1/predictions/${predictionId}/feedback`, body); + } +} + +export type { + ClaimablePrediction, + ClaimableResponse, + ClaimSource, + PostSlice, + PredictionContext, + PredictionTweet, +}; diff --git a/services/swarm-verifier/src/dedup-retroactive.ts b/services/swarm-verifier/src/dedup-retroactive.ts deleted file mode 100644 index 8affcf13e..000000000 --- a/services/swarm-verifier/src/dedup-retroactive.ts +++ /dev/null @@ -1,298 +0,0 @@ -/** - * Retroactive deduplication script. - * Usage: pnpm --filter swarm-verifier dev:dedup-retroactive - */ - -import { createDb } from "@torus-ts/db/client"; -import type { DB } from "@torus-ts/db/client"; -import { - deduplicationProcessedConversationsSchema, - parsedPredictionSchema, - predictionDuplicateRelationsSchema, - scrapedTweetSchema, -} from "@torus-ts/db/schema"; -import { and, eq, gt, isNotNull, isNull, max, or, sql } from "drizzle-orm"; -import type { ParsedPredictionForDedup } from "./verifier.js"; -import { comparePredictions } from "./verifier.js"; - -interface DuplicateRelation { - predictionId: string; - canonicalId: string; - similarityScore: number; -} - -let interrupted = false; -const globalStats = { - totalConversations: 0, - processedCount: 0, - skippedCount: 0, - totalPredictions: 0, - totalDuplicatesInserted: 0, - conversationsWithDuplicates: 0, - errors: 0, -}; - -function printSummary() { - console.log("\n" + "=".repeat(60)); - console.log("SUMMARY" + (interrupted ? " (interrupted)" : "")); - console.log("=".repeat(60)); - console.log(`Conversations to process: ${globalStats.totalConversations}`); - console.log(`Conversations processed: ${globalStats.processedCount}`); - console.log( - `Conversations skipped (already done): ${globalStats.skippedCount}`, - ); - console.log(`Total predictions analyzed: ${globalStats.totalPredictions}`); - console.log( - `Conversations with duplicates: ${globalStats.conversationsWithDuplicates}`, - ); - console.log( - `Total duplicate relations inserted: ${globalStats.totalDuplicatesInserted}`, - ); - if (globalStats.errors > 0) { - console.log(`Errors encountered: ${globalStats.errors}`); - } - console.log("\n=== " + (interrupted ? "INTERRUPTED" : "COMPLETE") + " ==="); -} - -process.on("SIGINT", () => { - console.log("\n\nInterrupted by user (Ctrl+C)"); - interrupted = true; - printSummary(); - process.exit(0); -}); - -function buildDuplicateClusters( - predictions: ParsedPredictionForDedup[], -): DuplicateRelation[] { - const predById = new Map(); - const parent = new Map(); - - for (const pred of predictions) { - predById.set(pred.id, pred); - parent.set(pred.id, pred.id); - } - - function find(id: string): string { - const p = parent.get(id); - if (p === undefined || p === id) return id; - const root = find(p); - parent.set(id, root); - return root; - } - - function union(id1: string, id2: string): void { - const root1 = find(id1); - const root2 = find(id2); - if (root1 === root2) return; - if (root1 < root2) { - parent.set(root2, root1); - } else { - parent.set(root1, root2); - } - } - - for (let i = 0; i < predictions.length; i++) { - for (let j = i + 1; j < predictions.length; j++) { - const pred1 = predictions[i]; - const pred2 = predictions[j]; - if (!pred1 || !pred2) continue; - - if (comparePredictions(pred1, pred2).isDuplicate) { - union(pred1.id, pred2.id); - } - } - } - - const relations: DuplicateRelation[] = []; - for (const pred of predictions) { - const root = find(pred.id); - if (root !== pred.id) { - const canonical = predById.get(root); - if (canonical) { - const result = comparePredictions(pred, canonical); - relations.push({ - predictionId: pred.id, - canonicalId: root, - similarityScore: (result.targetScore + result.timeframeScore) / 2, - }); - } - } - } - - return relations; -} - -async function processConversation( - db: DB, - conversationId: bigint, -): Promise<{ predictionsProcessed: number; duplicatesFound: number }> { - const predictions = await db - .select({ - id: parsedPredictionSchema.id, - predictionId: parsedPredictionSchema.predictionId, - target: parsedPredictionSchema.target, - timeframe: parsedPredictionSchema.timeframe, - }) - .from(parsedPredictionSchema) - .innerJoin( - scrapedTweetSchema, - eq(scrapedTweetSchema.predictionId, parsedPredictionSchema.predictionId), - ) - .where(eq(scrapedTweetSchema.conversationId, conversationId)); - - if (predictions.length < 2) { - return { predictionsProcessed: predictions.length, duplicatesFound: 0 }; - } - - const relations = buildDuplicateClusters( - predictions as ParsedPredictionForDedup[], - ); - - if (relations.length > 0) { - await db - .insert(predictionDuplicateRelationsSchema) - .values( - relations.map((r) => ({ - predictionId: r.predictionId, - canonicalId: r.canonicalId, - similarityScore: r.similarityScore.toFixed(4), - })), - ) - .onConflictDoNothing(); - } - - await db - .insert(deduplicationProcessedConversationsSchema) - .values({ - conversationId, - predictionsProcessed: predictions.length, - duplicatesFound: relations.length, - }) - .onConflictDoUpdate({ - target: deduplicationProcessedConversationsSchema.conversationId, - set: { - predictionsProcessed: predictions.length, - duplicatesFound: relations.length, - updatedAt: new Date(), - }, - }); - - return { - predictionsProcessed: predictions.length, - duplicatesFound: relations.length, - }; -} - -async function main() { - console.log("=== RETROACTIVE DEDUPLICATION ===\n"); - console.log("Connecting to database..."); - - const db = createDb(); - - // Get the global cutoff - max updatedAt from deduplication table. - // Conversations with all predictions before this were already processed. - const cutoffResult = await db - .select({ - maxUpdatedAt: max(deduplicationProcessedConversationsSchema.updatedAt), - }) - .from(deduplicationProcessedConversationsSchema); - const globalCutoff = cutoffResult[0]?.maxUpdatedAt; - - if (globalCutoff) { - console.log(`Global cutoff: ${globalCutoff.toISOString()}`); - } else { - console.log( - "No previous deduplication records found, processing all conversations", - ); - } - - console.log("Fetching conversations with new predictions...\n"); - - // Find conversations that have predictions newer than: - // - Their deduplication record's createdAt (if exists) - // - The global cutoff (if no dedup record exists) - const conversations = await db - .selectDistinct({ - conversationId: scrapedTweetSchema.conversationId, - }) - .from(scrapedTweetSchema) - .innerJoin( - parsedPredictionSchema, - eq(scrapedTweetSchema.predictionId, parsedPredictionSchema.predictionId), - ) - .leftJoin( - deduplicationProcessedConversationsSchema, - eq( - scrapedTweetSchema.conversationId, - deduplicationProcessedConversationsSchema.conversationId, - ), - ) - .where( - and( - isNotNull(scrapedTweetSchema.predictionId), - isNotNull(scrapedTweetSchema.conversationId), - or( - // Conversation has dedup record but has newer predictions - and( - isNotNull(deduplicationProcessedConversationsSchema.updatedAt), - gt( - parsedPredictionSchema.createdAt, - deduplicationProcessedConversationsSchema.updatedAt, - ), - ), - // No dedup record and (no cutoff OR prediction is newer than cutoff) - and( - isNull(deduplicationProcessedConversationsSchema.updatedAt), - globalCutoff - ? gt(parsedPredictionSchema.createdAt, globalCutoff) - : sql`true`, - ), - ), - ), - ); - - globalStats.totalConversations = conversations.length; - console.log( - `Found ${conversations.length} conversations with new predictions\n`, - ); - - if (conversations.length === 0) { - console.log("Nothing to process!"); - process.exit(0); - } - - for (const { conversationId } of conversations) { - if (interrupted) break; - if (conversationId === null) continue; - - globalStats.processedCount++; - if (globalStats.processedCount % 100 === 0) { - console.log( - `Progress: ${globalStats.processedCount}/${conversations.length} ` + - `(${((globalStats.processedCount / conversations.length) * 100).toFixed(1)}%) - ` + - `${globalStats.totalDuplicatesInserted} duplicates inserted`, - ); - } - - try { - const result = await processConversation(db, conversationId); - globalStats.totalPredictions += result.predictionsProcessed; - globalStats.totalDuplicatesInserted += result.duplicatesFound; - if (result.duplicatesFound > 0) { - globalStats.conversationsWithDuplicates++; - } - } catch (err) { - globalStats.errors++; - console.error(`Error processing conversation ${conversationId}:`, err); - } - } - - printSummary(); - process.exit(0); -} - -main().catch((err) => { - console.error("Fatal error:", err); - printSummary(); - process.exit(1); -}); diff --git a/services/swarm-verifier/src/index.ts b/services/swarm-verifier/src/index.ts index 705e95fda..eedce6b81 100644 --- a/services/swarm-verifier/src/index.ts +++ b/services/swarm-verifier/src/index.ts @@ -1,41 +1,135 @@ -import { readFile } from "node:fs/promises"; -import { dirname, join } from "node:path"; -import { fileURLToPath } from "node:url"; -import { createDb } from "@torus-ts/db/client"; -import { PredictionVerifier } from "./verifier"; +/** + * Swarm Verifier Service + * + * Polls claimable predictions via HTTP API, researches them, and submits claims. + */ -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); +import { validateEnvOrExit } from "@torus-network/torus-utils/env"; +import { BasicLogger } from "@torus-network/torus-utils/logger"; +import { z } from "zod"; +import { OpenRouterClient } from "./ai/openrouter-client"; +import { PromptLoader } from "./ai/prompt-loader"; +import { SwarmApiClient } from "./api-client"; +import { getVerifierCursor, updateVerifierCursor } from "./seen-storage"; +import { PredictionVerifier } from "./services/prediction-verifier"; +import { withRetry } from "./utils/retry"; -if (!process.env.OPENROUTER_API_KEY) { - console.error("missing OPENROUTER_API_KEY env var"); - process.exit(1); +const logger = BasicLogger.create({ name: "swarm-verifier" }); + +const getEnv = validateEnvOrExit({ + SWARM_API_URL: z.string().url().default("https://api.predictionswarm.com"), + OPENROUTER_API_KEY: z.string().min(1, "OPENROUTER_API_KEY is required"), + AGENT_MNEMONIC: z.string().min(1, "AGENT_MNEMONIC is required"), + VERIFICATION_MODEL: z.string().default("anthropic/claude-sonnet-4"), + CONCURRENCY: z.coerce.number().int().positive().default(3), + POLL_INTERVAL_MS: z.coerce.number().int().positive().default(60000), + TOPICS: z + .string() + .optional() + .transform((val) => + val + ? val + .split(",") + .map((t) => t.trim().toLowerCase()) + .filter((t) => t.length > 0) + : undefined, + ), +}); + +async function main() { + logger.info("Swarm Verifier Open Service starting..."); + + const env = getEnv(process.env); + logger.info("Environment validated"); + logger.info(`Verification model: ${env.VERIFICATION_MODEL}`); + logger.info(`API URL: ${env.SWARM_API_URL}`); + if (env.TOPICS) { + logger.info(`Filtering topics: ${env.TOPICS.join(", ")}`); + } + + const apiClient = await SwarmApiClient.create( + env.SWARM_API_URL, + env.AGENT_MNEMONIC, + ); + + const llmClient = new OpenRouterClient({ + apiKey: env.OPENROUTER_API_KEY, + model: env.VERIFICATION_MODEL, + }); + + const promptLoader = new PromptLoader(); + + const verifier = new PredictionVerifier({ + llmClient, + promptLoader, + apiClient, + }); + + const verifierAddress = apiClient.getAddress(); + let cursor = await getVerifierCursor(verifierAddress); + + if (cursor) { + logger.info(`Resuming from cursor: ${cursor}`); + } + + logger.info(`Starting verification loop with concurrency=${env.CONCURRENCY}`); + + while (true) { + try { + const { predictions, nextCursor, hasMore } = await withRetry(() => + apiClient.getClaimablePredictions(cursor, env.CONCURRENCY, env.TOPICS), + ); + + if (predictions.length === 0) { + logger.info("No claimable predictions available, sleeping..."); + await new Promise((resolve) => + setTimeout(resolve, env.POLL_INTERVAL_MS), + ); + continue; + } + + logger.info(`Processing ${predictions.length} predictions...`); + + let batchErrors = 0; + const verificationPromises = predictions.map((prediction) => + verifier.verifyPrediction(prediction).catch((error) => { + logger.error(`Failed to verify prediction ${prediction.id}`, error); + batchErrors++; + return null; + }), + ); + const results = await Promise.all(verificationPromises); + + const claimsCount = results.filter((r) => r === "claim").length; + const feedbackCount = results.filter((r) => r === "feedback").length; + + logger.info( + `Batch: ${claimsCount} claims | ${feedbackCount} feedback | ${batchErrors} errors | ${predictions.length} total`, + ); + logger.info( + `Total: ${verifier.stats.claimsSubmitted} claims | ${verifier.stats.feedbackSubmitted} feedback | ${verifier.stats.errors} errors | ${verifier.stats.predictionsProcessed} processed`, + ); + + if (nextCursor) { + cursor = nextCursor; + await updateVerifierCursor(verifierAddress, cursor); + } + + if (!hasMore) { + logger.info("Reached end of claimable predictions, sleeping..."); + cursor = undefined; + await new Promise((resolve) => + setTimeout(resolve, env.POLL_INTERVAL_MS), + ); + } + } catch (error) { + logger.error("Error processing batch", error); + await new Promise((resolve) => setTimeout(resolve, env.POLL_INTERVAL_MS)); + } + } } -const timeframePrompt = await readFile( - join(__dirname, "../TIMEFRAME_PROMPT.md"), - "utf-8", -); - -const filterValidationPrompt = await readFile( - join(__dirname, "../FILTER_VALIDATION_PROMPT.md"), - "utf-8", -); - -const verdictPrompt = await readFile( - join(__dirname, "../VERDICT_PROMPT.md"), - "utf-8", -); - -const scraper = new PredictionVerifier( - { - openrouterApiKey: process.env.OPENROUTER_API_KEY, - concurrency: 8, - timeframePrompt, - filterValidationPrompt, - verdictPrompt, - }, - createDb(), -); - -await scraper.runVerifier(() => false); +main().catch((error) => { + logger.error("Fatal error", error); + process.exit(1); +}); diff --git a/services/swarm-verifier/src/logger.ts b/services/swarm-verifier/src/logger.ts deleted file mode 100644 index 868eb3e3c..000000000 --- a/services/swarm-verifier/src/logger.ts +++ /dev/null @@ -1,3 +0,0 @@ -import { BasicLogger } from "@torus-network/torus-utils/logger"; - -export const logger = BasicLogger.create({ name: "swarm-verifier" }); diff --git a/services/swarm-verifier/src/schemas/llm-response.ts b/services/swarm-verifier/src/schemas/llm-response.ts new file mode 100644 index 000000000..082b4c49a --- /dev/null +++ b/services/swarm-verifier/src/schemas/llm-response.ts @@ -0,0 +1,63 @@ +import { z } from "zod"; + +export const TimeframeExtractionSchema = z.object({ + timeframe_status: z.enum([ + "explicit", + "implicit", + "inferred", + "event_trigger", + "missing", + ]), + start_utc: z.string().nullable(), + end_utc: z.string().nullable(), + precision: z.enum([ + "hour", + "day", + "week", + "month", + "quarter", + "year", + "unbounded", + "event", + ]), + reasoning: z.string(), + assumptions: z.array(z.string()), + confidence: z.number().min(0).max(1), +}); + +export type TimeframeExtractionResult = z.infer< + typeof TimeframeExtractionSchema +>; + +export const FilterValidationSchema = z.object({ + context: z.string(), + is_valid: z.boolean(), + failure_cause: z + .enum([ + "BROKEN_EXTRACTION", + "VAGUE_TARGET", + "PRESENT_STATE", + "NEGATION", + "SARCASM", + "QUOTING_OTHERS", + "HEAVY_HEDGING", + "FUTURE_TIMEFRAME", + "SELF_ANNOUNCEMENT", + "PERSONAL_ACTION", + "OTHER", + ]) + .nullable(), + confidence: z.number().min(0).max(1), + reasoning: z.string(), +}); + +export type FilterValidationResult = z.infer; + +export const VerdictSchema = z.object({ + valid: z.boolean(), + verdict: z.boolean(), + confidence: z.number().min(0).max(1), + reasoning: z.string(), +}); + +export type VerdictResult = z.infer; diff --git a/services/swarm-verifier/src/seen-storage.ts b/services/swarm-verifier/src/seen-storage.ts new file mode 100644 index 000000000..779bd8be1 --- /dev/null +++ b/services/swarm-verifier/src/seen-storage.ts @@ -0,0 +1,39 @@ +import { createDb, eq } from "@torus-ts/db"; +import { verifierCursorStateSchema } from "@torus-ts/db/schema"; + +/** + * Get the current cursor for this verifier from the database. + * Returns undefined if no cursor exists (fresh start). + */ +export async function getVerifierCursor( + verifierAgentId: string, +): Promise { + const db = createDb(); + + const [state] = await db + .select({ lastCursor: verifierCursorStateSchema.lastCursor }) + .from(verifierCursorStateSchema) + .where(eq(verifierCursorStateSchema.verifierAgentId, verifierAgentId)) + .limit(1); + + return state?.lastCursor; +} + +/** + * Update the cursor for this verifier in the database. + * Upserts to handle both new and existing verifiers. + */ +export async function updateVerifierCursor( + verifierAgentId: string, + cursor: string, +): Promise { + const db = createDb(); + + await db + .insert(verifierCursorStateSchema) + .values({ verifierAgentId, lastCursor: cursor }) + .onConflictDoUpdate({ + target: verifierCursorStateSchema.verifierAgentId, + set: { lastCursor: cursor }, + }); +} diff --git a/services/swarm-verifier/src/services/prediction-verifier.ts b/services/swarm-verifier/src/services/prediction-verifier.ts new file mode 100644 index 000000000..066990b15 --- /dev/null +++ b/services/swarm-verifier/src/services/prediction-verifier.ts @@ -0,0 +1,454 @@ +import { BasicLogger } from "@torus-network/torus-utils/logger"; +import type { OpenRouterClient } from "../ai/openrouter-client"; +import type { + ClaimablePrediction, + ClaimSource, + PostSlice, + PredictionContext, + PredictionTweet, + SwarmApiClient, +} from "../api-client"; +import { + FilterValidationSchema, + TimeframeExtractionSchema, + VerdictSchema, +} from "../schemas/llm-response"; +import type { + FilterValidationResult, + TimeframeExtractionResult, + VerdictResult, +} from "../schemas/llm-response"; + +const logger = BasicLogger.create({ name: "swarm-verifier:verifier" }); + +type SliceValidationFailureCause = + | "EMPTY_SLICES" + | "MISSING_TWEET" + | "NEGATIVE_INDICES" + | "INVALID_RANGE" + | "SLICE_TOO_SHORT" + | "OUT_OF_BOUNDS"; + +interface SliceValidationResult { + valid: boolean; + failureCause?: SliceValidationFailureCause; + message?: string; +} + +interface UrlCitation { + url: string; + title?: string; + content?: string; +} + +interface PromptLoader { + getTimeframePrompt(): string; + getFilterValidationPrompt(): string; + getVerdictPrompt(): string; +} + +export interface PredictionVerifierConfig { + llmClient: OpenRouterClient; + promptLoader: PromptLoader; + apiClient: SwarmApiClient; +} + +/** + * Processes predictions through LLM validation and generates claims/feedback. + * Matches swarm-filter's PredictionExtractor pattern. + */ +export class PredictionVerifier { + private llmClient: OpenRouterClient; + private promptLoader: PromptLoader; + private apiClient: SwarmApiClient; + + public stats = { + predictionsProcessed: 0, + claimsSubmitted: 0, + feedbackSubmitted: 0, + errors: 0, + }; + + constructor(config: PredictionVerifierConfig) { + this.llmClient = config.llmClient; + this.promptLoader = config.promptLoader; + this.apiClient = config.apiClient; + } + + private extractSliceText( + slices: PostSlice[], + tweetMap: Map, + ): string { + return slices + .map((slice) => { + const tweet = tweetMap.get(slice.source.tweet_id); + if (!tweet) return ""; + return tweet.text.substring(slice.start, slice.end); + }) + .join(" "); + } + + private validatePostSlices( + slices: PostSlice[], + tweetMap: Map, + sliceType: string, + ): SliceValidationResult { + if (slices.length === 0) { + return { + valid: false, + failureCause: "EMPTY_SLICES", + message: `${sliceType} slices are empty`, + }; + } + + for (const slice of slices) { + const tweetId = slice.source.tweet_id; + const tweet = tweetMap.get(tweetId); + + if (!tweet) { + return { + valid: false, + failureCause: "MISSING_TWEET", + message: `${sliceType} slice references missing tweet ${tweetId}`, + }; + } + + if (slice.start < 0 || slice.end < 0) { + return { + valid: false, + failureCause: "NEGATIVE_INDICES", + message: `${sliceType} slice has negative indices`, + }; + } + + if (slice.start >= slice.end) { + return { + valid: false, + failureCause: "INVALID_RANGE", + message: `${sliceType} slice has invalid range`, + }; + } + + if (slice.end - slice.start < 2) { + return { + valid: false, + failureCause: "SLICE_TOO_SHORT", + message: `${sliceType} slice too short`, + }; + } + + if (slice.end > tweet.text.length) { + return { + valid: false, + failureCause: "OUT_OF_BOUNDS", + message: `${sliceType} slice exceeds tweet length`, + }; + } + } + + return { valid: true }; + } + + private async extractTimeframe( + targetText: string, + timeframeText: string, + tweetTimestamp: Date, + tweets: PredictionTweet[], + ): Promise { + const threadContext = tweets + .map( + (t) => + `Tweet ID: ${t.id}\n` + + `Author: ${t.authorUsername ? `@${t.authorUsername}` : "unknown"}\n` + + `Date: ${t.date}\n` + + `Text: ${t.text}\n`, + ) + .join("\n---\n\n"); + + const inputData = { + timeframe_text: timeframeText, + target_text: targetText, + tweet_timestamp: tweetTimestamp.toISOString(), + current_time: new Date().toISOString(), + thread_context: threadContext, + }; + + const userPrompt = `Extract the timeframe from this prediction:\n\n${JSON.stringify(inputData, null, 2)}`; + + return this.llmClient.completeStructured( + this.promptLoader.getTimeframePrompt(), + userPrompt, + TimeframeExtractionSchema, + { temperature: 0.1 }, + ); + } + + private async validateFilterExtraction( + targetText: string, + timeframeText: string, + targetSlices: PostSlice[], + timeframeSlices: PostSlice[], + tweets: PredictionTweet[], + timeframeResult: TimeframeExtractionResult, + ): Promise { + const threadTweets = tweets.map((t) => ({ + tweet_id: t.id, + author: t.authorUsername ? `@${t.authorUsername}` : "unknown", + date: t.date, + text: t.text, + })); + + const inputData = { + current_date: new Date().toISOString(), + thread_tweets: threadTweets, + target_slices: targetSlices.map((slice) => ({ + tweet_id: slice.source.tweet_id, + start: slice.start, + end: slice.end, + text: targetText, + })), + timeframe_slices: timeframeSlices.map((slice) => ({ + tweet_id: slice.source.tweet_id, + start: slice.start, + end: slice.end, + text: timeframeText, + })), + timeframe_parsed: { + start_utc: timeframeResult.start_utc, + end_utc: timeframeResult.end_utc, + precision: timeframeResult.precision, + }, + }; + + const userPrompt = `Validate this prediction extraction:\n\n${JSON.stringify(inputData, null, 2)}`; + + return this.llmClient.completeStructured( + this.promptLoader.getFilterValidationPrompt(), + userPrompt, + FilterValidationSchema, + { temperature: 0.1 }, + ); + } + + private async generateVerdict( + context: string, + targetText: string, + timeframeText: string, + timeframeResult: TimeframeExtractionResult, + ): Promise<{ verdict: VerdictResult; sources?: UrlCitation[] }> { + const inputData = { + context, + target_text: targetText, + timeframe_text: timeframeText, + timeframe_parsed: { + start_utc: timeframeResult.start_utc, + end_utc: timeframeResult.end_utc, + precision: timeframeResult.precision, + }, + }; + + const userPrompt = `Generate verdict for this prediction:\n\n${JSON.stringify(inputData, null, 2)}`; + + const verdict = await this.llmClient.completeWithSearch( + this.promptLoader.getVerdictPrompt(), + userPrompt, + VerdictSchema, + { temperature: 0.1 }, + ); + + return { verdict, sources: undefined }; + } + + async verifyPrediction( + prediction: ClaimablePrediction, + ): Promise<"claim" | "feedback" | null> { + logger.info( + `Processing prediction ${prediction.id} (topic: ${prediction.topicName})`, + ); + + let context: PredictionContext; + try { + context = await this.apiClient.getPredictionContext(prediction.id); + } catch (error) { + logger.error(`Failed to get prediction context: ${String(error)}`); + this.stats.errors++; + return null; + } + + const tweets = context.tweets; + + if (tweets.length === 0) { + logger.info("No tweets found for prediction, submitting feedback"); + await this.apiClient.submitFeedback( + prediction.id, + "OTHER", + "No tweet data available", + ); + this.stats.feedbackSubmitted++; + return "feedback"; + } + + const tweetMap = new Map(tweets.map((t) => [t.id, t])); + + const targetValidation = this.validatePostSlices( + context.target, + tweetMap, + "Target", + ); + if (!targetValidation.valid) { + logger.info( + `Target slices validation failed: ${targetValidation.failureCause}`, + ); + await this.apiClient.submitFeedback( + prediction.id, + targetValidation.failureCause ?? "OTHER", + targetValidation.message ?? "Validation failed", + ); + this.stats.feedbackSubmitted++; + return "feedback"; + } + + const timeframeValidation = this.validatePostSlices( + context.timeframe, + tweetMap, + "Timeframe", + ); + if (!timeframeValidation.valid) { + logger.info( + `Timeframe slices validation failed: ${timeframeValidation.failureCause}`, + ); + await this.apiClient.submitFeedback( + prediction.id, + timeframeValidation.failureCause ?? "OTHER", + timeframeValidation.message ?? "Validation failed", + ); + this.stats.feedbackSubmitted++; + return "feedback"; + } + + const targetText = this.extractSliceText(context.target, tweetMap); + const timeframeText = this.extractSliceText(context.timeframe, tweetMap); + + const sourceTweet = tweets[0]; + if (!sourceTweet) { + logger.info("No source tweet available"); + return null; + } + + const timeframeResult = await this.extractTimeframe( + targetText, + timeframeText, + new Date(sourceTweet.date), + tweets, + ); + + logger.info( + `Timeframe: status=${timeframeResult.timeframe_status}, start=${timeframeResult.start_utc}, end=${timeframeResult.end_utc}`, + ); + + if ( + timeframeResult.timeframe_status === "missing" || + timeframeResult.timeframe_status === "event_trigger" + ) { + const cause = + timeframeResult.timeframe_status === "missing" + ? "MISSING_TIMEFRAME" + : "EVENT_TRIGGER"; + await this.apiClient.submitFeedback( + prediction.id, + cause, + timeframeResult.reasoning, + ); + this.stats.feedbackSubmitted++; + return "feedback"; + } + + if (timeframeResult.end_utc) { + const endDate = new Date(timeframeResult.end_utc); + const oneDayAfterEnd = new Date(endDate); + oneDayAfterEnd.setDate(oneDayAfterEnd.getDate() + 1); + + if (oneDayAfterEnd > new Date()) { + logger.info( + `Timeframe not matured yet (ends ${timeframeResult.end_utc})`, + ); + await this.apiClient.submitFeedback( + prediction.id, + "FUTURE_TIMEFRAME", + `Prediction timeframe ends on ${timeframeResult.end_utc}. Can be verified after ${oneDayAfterEnd.toISOString()}.`, + ); + this.stats.feedbackSubmitted++; + return "feedback"; + } + } + + const validationResult = await this.validateFilterExtraction( + targetText, + timeframeText, + context.target, + context.timeframe, + tweets, + timeframeResult, + ); + + logger.info( + `Filter validation: valid=${validationResult.is_valid}, confidence=${validationResult.confidence}`, + ); + + if (!validationResult.is_valid) { + await this.apiClient.submitFeedback( + prediction.id, + validationResult.failure_cause ?? "OTHER", + validationResult.reasoning, + ); + this.stats.feedbackSubmitted++; + return "feedback"; + } + + const { verdict: verdictResult, sources } = await this.generateVerdict( + validationResult.context, + targetText, + timeframeText, + timeframeResult, + ); + + logger.info( + `Verdict: valid=${verdictResult.valid}, verdict=${verdictResult.verdict}, confidence=${verdictResult.confidence}`, + ); + + if (!verdictResult.valid) { + await this.apiClient.submitFeedback( + prediction.id, + "OTHER", + verdictResult.reasoning, + ); + this.stats.feedbackSubmitted++; + return "feedback"; + } + + const claimSources: ClaimSource[] = (sources ?? []).map((s) => ({ + url: s.url, + title: s.title, + snippet: s.content, + retrievedAt: new Date().toISOString(), + })); + + await this.apiClient.submitClaim( + prediction.id, + verdictResult.verdict, + verdictResult.confidence, + verdictResult.reasoning, + claimSources, + { + startUtc: timeframeResult.start_utc ?? new Date().toISOString(), + endUtc: timeframeResult.end_utc ?? new Date().toISOString(), + precision: timeframeResult.precision, + }, + ); + + logger.info(`Claim submitted for prediction ${prediction.id}`); + this.stats.claimsSubmitted++; + this.stats.predictionsProcessed++; + return "claim"; + } +} diff --git a/services/swarm-verifier/src/utils/retry.ts b/services/swarm-verifier/src/utils/retry.ts new file mode 100644 index 000000000..5ad4f7421 --- /dev/null +++ b/services/swarm-verifier/src/utils/retry.ts @@ -0,0 +1,27 @@ +/** + * Retries an async operation with exponential backoff. + */ +export async function withRetry( + operation: () => Promise, + maxRetries = 3, + baseDelay = 1000, +): Promise { + let lastError: unknown; + + for (let attempt = 0; attempt < maxRetries; attempt++) { + try { + return await operation(); + } catch (error) { + lastError = error; + if (attempt < maxRetries - 1) { + const delay = baseDelay * Math.pow(2, attempt); + console.log( + ` Retry attempt ${attempt + 1}/${maxRetries - 1} after ${delay}ms...`, + ); + await new Promise((resolve) => setTimeout(resolve, delay)); + } + } + } + + throw lastError; +} diff --git a/services/swarm-verifier/vitest.config.ts b/services/swarm-verifier/vitest.config.ts deleted file mode 100644 index 995b909b6..000000000 --- a/services/swarm-verifier/vitest.config.ts +++ /dev/null @@ -1,41 +0,0 @@ -import path from "node:path"; -import { defineConfig } from "vitest/config"; - -export default defineConfig({ - resolve: { - alias: { - "@": path.resolve(__dirname, "./src"), - }, - }, - test: { - projects: [ - { - extends: true, - test: { - name: "twitterapi", - include: ["src/twitterapi-io/__tests__/**/*.test.ts"], - environment: "node", - globals: true, - testTimeout: 30_000, - hookTimeout: 30_000, - pool: "threads", - maxConcurrency: 5, - poolOptions: { - threads: { - singleThread: false, - }, - }, - }, - }, - { - extends: true, - test: { - name: "deduplication", - include: ["src/__tests__/**/*.test.ts"], - environment: "node", - globals: true, - }, - }, - ], - }, -});