From 2725253ea646268f5909eea3e04ca88117aa9918 Mon Sep 17 00:00:00 2001 From: JMCP Bot Date: Wed, 25 Mar 2026 13:18:29 +0100 Subject: [PATCH] jmcp: Use structured output for tag enrichment --- apps/web/trigger/tasks.ts | 9 ++--- packages/ai/src/index.ts | 64 ++++++++++++++++++++++++++++++++- packages/contracts/src/index.ts | 37 +++++++++++++++++++ 3 files changed, 102 insertions(+), 8 deletions(-) diff --git a/apps/web/trigger/tasks.ts b/apps/web/trigger/tasks.ts index 1ccc61d..a552c11 100644 --- a/apps/web/trigger/tasks.ts +++ b/apps/web/trigger/tasks.ts @@ -6,14 +6,9 @@ const provider = new GrokProvider() export const enrichPaperMetadata = task({ id: "enrich-paper-metadata", run: async (payload: { title: string; abstract: string }) => { - const tags = await provider.complete( - "tag-extraction", - `Title: ${payload.title}\nAbstract: ${payload.abstract}`, - ) + const result = await provider.extractTags(payload.title, payload.abstract) - return { - tags, - } + return result }, }) diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts index d69e54d..6157f14 100644 --- a/packages/ai/src/index.ts +++ b/packages/ai/src/index.ts @@ -1,5 +1,12 @@ import { getPapersConfig } from "@papers/config" -import { type ProviderTaskKind, type SafeAiPayload, safeAiPayloadSchema } from "@papers/contracts" +import { + type ProviderTaskKind, + type SafeAiPayload, + safeAiPayloadSchema, + type TagExtractionResult, + tagExtractionJsonSchema, + tagExtractionResultSchema, +} from "@papers/contracts" export class GrokProvider { readonly #config = getPapersConfig() @@ -61,4 +68,59 @@ export class GrokProvider { const payload = (await response.json()) as { output_text?: string } return payload.output_text ?? "" } + + async extractTags(title: string, abstract: string): Promise { + const text = `Title: ${title}\nAbstract: ${abstract}` + + this.assertSafe({ + task: "tag-extraction", + isBlindContent: false, + containsPrivateDraft: false, + text, + }) + + if (!this.enabled || !this.#config.PAPERS_XAI_API_KEY) { + return { tags: [] } + } + + const response = await fetch(`${this.#config.PAPERS_XAI_BASE_URL}/responses`, { + method: "POST", + headers: { + authorization: `Bearer ${this.#config.PAPERS_XAI_API_KEY}`, + "content-type": "application/json", + }, + body: JSON.stringify({ + model: this.#config.PAPERS_XAI_MODEL, + input: [ + { + role: "system", + content: [ + { + type: "input_text", + text: "Extract up to 8 research topic tags from the paper. Each tag needs a human-readable label and a URL-safe slug. Never infer hidden identity.", + }, + ], + }, + { + role: "user", + content: [{ type: "input_text", text }], + }, + ], + text: { + format: { + type: "json_schema", + ...tagExtractionJsonSchema, + }, + }, + }), + }) + + if (!response.ok) { + throw new Error(`Grok tag extraction failed with ${response.status}`) + } + + const raw = (await response.json()) as { output_text?: string } + const parsed: unknown = JSON.parse(raw.output_text ?? "{}") + return tagExtractionResultSchema.parse(parsed) + } } diff --git a/packages/contracts/src/index.ts b/packages/contracts/src/index.ts index 13053d4..df03322 100644 --- a/packages/contracts/src/index.ts +++ b/packages/contracts/src/index.ts @@ -181,6 +181,43 @@ export const safeAiPayloadSchema = z.object({ }) export type SafeAiPayload = z.infer +export const tagExtractionResultSchema = z.object({ + tags: z + .array( + z.object({ + label: z.string(), + slug: z.string(), + }), + ) + .max(8), +}) +export type TagExtractionResult = z.infer + +/** JSON Schema sent to the Grok Responses API for structured output. */ +export const tagExtractionJsonSchema = { + name: "tag_extraction", + strict: true, + schema: { + type: "object", + properties: { + tags: { + type: "array", + items: { + type: "object", + properties: { + label: { type: "string" }, + slug: { type: "string" }, + }, + required: ["label", "slug"], + additionalProperties: false, + }, + }, + }, + required: ["tags"], + additionalProperties: false, + }, +} as const + export const opportunityIdeaSchema = z.object({ id: z.string(), label: z.string(),