WillBooster · reminjp · Dec 24, 2025 · Dec 23, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/.env.test.example b/.env.test.example
@@ -0,0 +1,2 @@
+# copy this file to `.env.test.local` and fill in variables
+GOOGLE_GENERATIVE_AI_API_KEY=
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -16,4 +16,5 @@ jobs:
     with:
       github_hosted_runner: true
     secrets:
+      DOT_ENV: ${{ secrets.DOT_ENV_TEST }}
       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/example/prompt_summary/judge.ts b/example/prompt_summary/judge.ts
@@ -0,0 +1,14 @@
+import { llmJudgePreset } from '@exercode/problem-utils/presets/llm';
+import { DecisionCode } from '@exercode/problem-utils';
+
+await llmJudgePreset(import.meta.dirname, {
+  test(context) {
+    return {
+      decisionCode:
+        context.result.output.trim().length < (context.testCase.input?.trim().length ?? 0) &&
+        context.result.output.includes(context.testCase.output?.trim() ?? '')
+          ? DecisionCode.ACCEPTED
+          : DecisionCode.WRONG_ANSWER,
+    };
+  },
+});
diff --git a/example/prompt_summary/model_answers.test/wa/prompt.txt b/example/prompt_summary/model_answers.test/wa/prompt.txt
@@ -0,0 +1,3 @@
+Please repeat the following text.
+
+{input}
diff --git a/example/prompt_summary/model_answers/default/prompt.txt b/example/prompt_summary/model_answers/default/prompt.txt
@@ -0,0 +1,3 @@
+Please summarize the following text in its original language.
+
+{input}
diff --git a/example/prompt_summary/problem.md b/example/prompt_summary/problem.md
@@ -0,0 +1,24 @@
+---
+name: 'Summary'
+---
+
+## 問題文
+
+文章が与えられます。
+LLMに文章を要約させるプロンプトを作成してください。
+
+---
+
+## 入力
+
+プロンプト中の`{input}`が入力の文章で置き換えられる。
+
+### 解答例
+
+次のプロンプトは、与えられた文章をそのまま返すことを指示しています。
+
+```
+Please repeat the following text.
+
+{input}
+```
diff --git a/example/prompt_summary/test_cases/01_small_00.in b/example/prompt_summary/test_cases/01_small_00.in
@@ -0,0 +1,2 @@
+富士山（ふじさん）は、静岡県（富士宮市、富士市、裾野市、御殿場市、駿東郡小山町）と山梨県（富士吉田市、南都留郡鳴沢村）に跨る活火山である[注釈 3]。
+標高3776.12 m、日本最高峰（剣ヶ峰）[注釈 4]の独立峰で、その優美な風貌は日本国外でも日本の象徴として広く知られている。
diff --git a/example/prompt_summary/test_cases/01_small_00.out b/example/prompt_summary/test_cases/01_small_00.out
@@ -0,0 +1 @@
+富士山
diff --git a/example/prompt_summary/test_cases/02_large_00.in b/example/prompt_summary/test_cases/02_large_00.in
@@ -0,0 +1,15 @@
+富士山（ふじさん）は、静岡県（富士宮市、富士市、裾野市、御殿場市、駿東郡小山町）と山梨県（富士吉田市、南都留郡鳴沢村）に跨る活火山である[注釈 3]。
+標高3776.12 m、日本最高峰（剣ヶ峰）[注釈 4]の独立峰で、その優美な風貌は日本国外でも日本の象徴として広く知られている。
+
+数多くの芸術作品の題材とされ芸術面のみならず、気候や地層など地質学的にも社会に大きな影響を与えている。
+懸垂曲線の山容を有した玄武岩質成層火山で構成され、その山体は駿河湾の海岸まで及ぶ。
+
+古来霊峰とされ、特に山頂部は浅間大神が鎮座するとされたため、神聖視された。
+噴火を沈静化するため律令国家により浅間神社が祭祀され、浅間信仰が確立された。
+また、富士山修験道の開祖とされる富士上人により修験道の霊場としても認識されるようになり、登拝が行われるようになった。
+これら富士信仰は時代により多様化し、村山修験や富士講といった一派を形成するに至る。
+現在、富士山麓周辺には観光名所が多くある他、夏季シーズンには富士登山が盛んである。
+
+日本三名山（三霊山）、日本百名山[2]、日本の地質百選に選定されている。
+また、1936年（昭和11年）には富士箱根伊豆国立公園に指定されている[注釈 5]。
+その後、1952年（昭和27年）に特別名勝、2011年（平成23年）に史跡、さらに2013年（平成25年）6月22日には関連する文化財群とともに「富士山-信仰の対象と芸術の源泉」の名で世界文化遺産に登録された[4]。
diff --git a/example/prompt_summary/test_cases/02_large_00.out b/example/prompt_summary/test_cases/02_large_00.out
@@ -0,0 +1 @@
+富士山
diff --git a/package.json b/package.json
@@ -43,12 +43,14 @@
     "prepare": "husky || true",
     "prettify": "prettier --cache --color --write \"**/{.*/,}*.{cjs,css,cts,htm,html,java,js,json,json5,jsonc,jsx,md,mjs,mts,scss,ts,tsx,vue,yaml,yml}\" \"!**/test{-,/}fixtures/**\" || true",
     "start": "build-ts run src/index.ts",
-    "test": "rm -fr temp && vitest test",
+    "test": "rm -fr temp && dotenv -c test -- vitest test",
     "test/ci-setup": "yarn build && bun install --cwd example",
     "typecheck": "tsc --noEmit --Pretty"
   },
   "prettier": "@willbooster/prettier-config",
   "dependencies": {
+    "@ai-sdk/google": "2.0.49",
+    "ai": "5.0.115",
     "front-matter": "4.0.2",
     "zod": "4.2.0"
   },
@@ -60,6 +62,7 @@
     "@willbooster/prettier-config": "10.2.4",
     "build-ts": "17.0.9",
     "conventional-changelog-conventionalcommits": "9.1.0",
+    "dotenv-cli": "11.0.0",
     "eslint": "9.39.1",
     "eslint-config-flat-gitignore": "2.1.0",
     "eslint-config-prettier": "10.1.8",

diff --git a/src/presets/llm.ts b/src/presets/llm.ts
@@ -0,0 +1,93 @@
+import fs from 'node:fs';
+import path from 'node:path';
+
+import { google } from '@ai-sdk/google';
+import { generateText } from 'ai';
+import { z } from 'zod';
+
+import { parseArgs } from '../helpers/parseArgs.js';
+import { printTestCaseResult } from '../helpers/printTestCaseResult.js';
+import { readTestCases } from '../helpers/readTestCases.js';
+import { DecisionCode } from '../types/decisionCode.js';
+import type { TestCaseResult } from '../types/testCaseResult.js';
+
+const PROMPT_FILENAME = 'prompt.txt';
+
+const judgeParamsSchema = z.object({
+  model: z.enum(['google/gemini-2.5-flash-lite']),
+});
+
+interface LlmJudgePresetOptions {
+  test: (context: {
+    testCase: { id: string; input?: string; output?: string };
+    result: { output: string };
+  }) => Partial<TestCaseResult> | Promise<Partial<TestCaseResult>>;
+}
+
+/**
+ * A preset judge function for running and testing a user prompt in LLM.
+ *
+ * @example
+ * Create `judge.ts`:
+ * ```ts
+ * import { llmJudgePreset } from '@exercode/problem-utils/presets/llm';
+ * import { DecisionCode } from '@exercode/problem-utils';
+ *
+ * await llmJudgePreset(import.meta.dirname, {
+ *   test: (context) {
+ *     return { decisionCode: context.result.output ? DecisionCode.ACCEPTED : DecisionCode.WRONG_ANSWER };
+ *   }
+ * });
+ * ```
+ *
+ * Run with the required parameters:
+ * ```bash
+ * bun judge.ts model_answers/java '{ "model": "gemini-2.5-flash-lite" }'
+ * ```
+ */
+export async function llmJudgePreset(problemDir: string, options: LlmJudgePresetOptions): Promise<void> {
+  const args = parseArgs(process.argv);
+  const params = judgeParamsSchema.parse(args.params);
+
+  const testCases = await readTestCases(path.join(problemDir, 'test_cases'));
+
+  const prompt = await fs.promises.readFile(path.join(args.cwd, PROMPT_FILENAME), 'utf8');
+
+  for (const testCase of testCases) {
+    const startTimeMilliseconds = Date.now();
+    try {
+      // requires `GOOGLE_GENERATIVE_AI_API_KEY`
+      const { text } = await generateText({
+        model: google(params.model.slice('google/'.length)),
+        prompt: prompt.replaceAll('{input}', testCase.input ?? ''),
+      });
+
+      const stopTimeMilliseconds = Date.now();
+
+      const testCaseResult = {
+        testCaseId: testCase.id,
+        decisionCode: DecisionCode.ACCEPTED,
+        stdin: testCase.input,
+        stdout: text,
+        timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,
+        ...(await options.test({ testCase, result: { output: text } })),
+      };
+
+      printTestCaseResult(testCaseResult);
+
+      if (testCaseResult.decisionCode !== DecisionCode.ACCEPTED) break;
+    } catch (error) {
+      const stopTimeMilliseconds = Date.now();
+
+      printTestCaseResult({
+        testCaseId: testCase.id,
+        decisionCode: DecisionCode.RUNTIME_ERROR,
+        stdin: testCase.input,
+        stderr: error instanceof Error ? error.message : String(error),
+        timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,
+      });
+
+      break;
+    }
+  }
+}
diff --git a/test/e2e/debugAndJudge.test.ts b/test/e2e/debugAndJudge.test.ts
@@ -110,12 +110,16 @@ const acceptedTestCaseResultsForAPlusBFile = [
   },
 ] as const satisfies readonly TestCaseResult[];
 
-test.each<[string, string, string, Record<string, unknown>, readonly TestCaseResult[]]>([
+test.each<
+  [string, string, string, Record<string, unknown>, Record<string, string | undefined>, readonly TestCaseResult[]]
+>([
+  // stdioDebugPreset
   [
     'example/a_plus_b',
     'debug.ts',
     'model_answers/java',
     { stdin: '1 1' },
+    {},
     [
       {
         testCaseId: 'debug',
@@ -129,14 +133,16 @@ test.each<[string, string, string, Record<string, unknown>, readonly TestCaseRes
     ],
   ],
 
-  ['example/a_plus_b', 'judge.ts', 'model_answers/java', {}, acceptedTestCaseResultsForAPlusB],
-  ['example/a_plus_b', 'judge.ts', 'model_answers/python', {}, acceptedTestCaseResultsForAPlusB],
-  ['example/a_plus_b', 'judge.ts', 'model_answers.test/java_rename', {}, acceptedTestCaseResultsForAPlusB],
+  // stdioJudgePreset
+  ['example/a_plus_b', 'judge.ts', 'model_answers/java', {}, {}, acceptedTestCaseResultsForAPlusB],
+  ['example/a_plus_b', 'judge.ts', 'model_answers/python', {}, {}, acceptedTestCaseResultsForAPlusB],
+  ['example/a_plus_b', 'judge.ts', 'model_answers.test/java_rename', {}, {}, acceptedTestCaseResultsForAPlusB],
   [
     'example/a_plus_b',
     'judge.ts',
     'model_answers.test/python_fpe',
     {},
+    {},
     [
       {
         testCaseId: '01_small_00',
@@ -158,6 +164,7 @@ test.each<[string, string, string, Record<string, unknown>, readonly TestCaseRes
     'judge.ts',
     'model_answers.test/python_rpe',
     {},
+    {},
     [
       {
         testCaseId: '01_small_00',
@@ -175,6 +182,7 @@ test.each<[string, string, string, Record<string, unknown>, readonly TestCaseRes
     'judge.ts',
     'model_answers.test/python_tle',
     {},
+    {},
     [
       ...acceptedTestCaseResultsForAPlusB.slice(0, 2),
       {
@@ -192,6 +200,7 @@ test.each<[string, string, string, Record<string, unknown>, readonly TestCaseRes
     'judge.ts',
     'model_answers.test/python_wa',
     {},
+    {},
     [
       {
         testCaseId: '01_small_00',
@@ -223,12 +232,13 @@ test.each<[string, string, string, Record<string, unknown>, readonly TestCaseRes
     ],
   ],
 
-  ['example/a_plus_b_file', 'judge.ts', 'model_answers/javascript', {}, acceptedTestCaseResultsForAPlusBFile],
+  ['example/a_plus_b_file', 'judge.ts', 'model_answers/javascript', {}, {}, acceptedTestCaseResultsForAPlusBFile],
   [
     'example/a_plus_b_file',
     'judge.ts',
     'model_answers.test/javascript_mrofe',
     {},
+    {},
     [
       {
         testCaseId: '01_small_00',
@@ -245,6 +255,7 @@ test.each<[string, string, string, Record<string, unknown>, readonly TestCaseRes
     'judge.ts',
     'model_answers.test/javascript_wa',
     {},
+    {},
     [
       ...acceptedTestCaseResultsForAPlusBFile.slice(0, 1),
       {
@@ -257,10 +268,68 @@ test.each<[string, string, string, Record<string, unknown>, readonly TestCaseRes
       },
     ],
   ],
+
+  // llmJudgePreset
+  [
+    'example/prompt_summary',
+    'judge.ts',
+    'model_answers/default',
+    { model: 'google/gemini-2.5-flash-lite' },
+    { GOOGLE_GENERATIVE_AI_API_KEY: process.env.GOOGLE_GENERATIVE_AI_API_KEY },
+    [
+      {
+        testCaseId: '01_small_00',
+        decisionCode: 2000,
+        stdin: expect.any(String),
+        stdout: expect.any(String),
+        timeSeconds: expect.any(Number),
+      },
+      {
+        testCaseId: '02_large_00',
+        decisionCode: 2000,
+        stdin: expect.any(String),
+        stdout: expect.any(String),
+        timeSeconds: expect.any(Number),
+      },
+    ],
+  ],
+  [
+    'example/prompt_summary',
+    'judge.ts',
+    'model_answers.test/wa',
+    { model: 'google/gemini-2.5-flash-lite' },
+    { GOOGLE_GENERATIVE_AI_API_KEY: process.env.GOOGLE_GENERATIVE_AI_API_KEY },
+    [
+      {
+        testCaseId: '01_small_00',
+        decisionCode: 1000,
+        stdin: expect.any(String),
+        stdout: expect.any(String),
+        timeSeconds: expect.any(Number),
+      },
+    ],
+  ],
+  [
+    'example/prompt_summary',
+    'judge.ts',
+    'model_answers/default',
+    { model: 'google/gemini-2.5-flash-lite' },
+    { GOOGLE_GENERATIVE_AI_API_KEY: undefined },
+    [
+      {
+        testCaseId: '01_small_00',
+        decisionCode: 1001,
+        stdin: expect.any(String),
+        stderr:
+          "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.",
+        timeSeconds: expect.any(Number),
+      },
+    ],
+  ],
 ])(
-  '%s %s %j',
+  '%s %s %s %j',
   { timeout: 20_000, concurrent: true },
-  async (cwd, scriptFilename, argsCwd, argsParams, expectedTestCaseResults) => {
+  async (cwd, scriptFilename, argsCwd, argsParams, env, expectedTestCaseResults) => {
     // The target files may be changed during the judging, so clone it before testing.
     await fs.promises.mkdir('temp', { recursive: true });
     const tempDir = await fs.promises.mkdtemp(path.join('temp', 'judge_'));
@@ -269,6 +338,7 @@ test.each<[string, string, string, Record<string, unknown>, readonly TestCaseRes
     const spawnResult = child_process.spawnSync('bun', [scriptFilename, argsCwd, JSON.stringify(argsParams)], {
       cwd: tempDir,
       encoding: 'utf8',
+      env: { ...process.env, ...env },
     });
 
     if (spawnResult.stderr) console.error(spawnResult.stderr);
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# copy this file to `.env.test.local` and fill in variables
		GOOGLE_GENERATIVE_AI_API_KEY=
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Please summarize the following text in its original language.

		{input}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		富士山（ふじさん）は、静岡県（富士宮市、富士市、裾野市、御殿場市、駿東郡小山町）と山梨県（富士吉田市、南都留郡鳴沢村）に跨る活火山である[注釈 3]。
		標高3776.12 m、日本最高峰（剣ヶ峰）[注釈 4]の独立峰で、その優美な風貌は日本国外でも日本の象徴として広く知られている。