Skip to content

Commit d9e441e

Browse files
Add FileParserPlugin example for fetch API
- Add PDF fixtures (small, medium, large, xlarge) with verification codes - Create comprehensive file-parser example testing all PDF sizes - Uses mistral-ocr engine for PDF processing - Validates extraction of verification codes from PDFs
1 parent 1d667b1 commit d9e441e

File tree

9 files changed

+366
-0
lines changed

9 files changed

+366
-0
lines changed

fixtures/pdfs/README.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Test PDF Files with Verification Codes
2+
3+
Each PDF contains a unique verification code that can be used to confirm end-to-end processing by AI models.
4+
5+
## Verification Codes
6+
7+
| File | Size | Verification Code | Purpose |
8+
|------|------|-------------------|---------|
9+
| small.pdf | 33KB | **SMALL-7X9Q2** | Baseline small file test |
10+
| medium.pdf | 813KB | **MEDIUM-K4P8R** | Medium-sized file test |
11+
| large.pdf | 3.4MB | **LARGE-M9N3T** | Large file test |
12+
| xlarge.pdf | 11MB | **XLARGE-W6H5V** | Extra-large file test |
13+
14+
## Usage
15+
16+
When testing PDF upload to AI services, ask the AI to extract the verification code from the PDF. A successful response should include the exact code listed above for that file.
17+
18+
## Example Test Prompt
19+
20+
```
21+
What is the verification code shown in this PDF?
22+
```
23+
24+
**Expected responses:**
25+
- For small.pdf: "SMALL-7X9Q2"
26+
- For medium.pdf: "MEDIUM-K4P8R"
27+
- For large.pdf: "LARGE-M9N3T"
28+
- For xlarge.pdf: "XLARGE-W6H5V"
29+
30+
If the AI returns the correct code, this confirms:
31+
1. The PDF was successfully uploaded
32+
2. The PDF was processed/parsed by the AI
33+
3. The AI extracted and understood the text content
34+
35+
## PDF Details
36+
37+
All PDFs are structurally valid:
38+
- Validated with `qpdf --check`
39+
- PDF versions 1.3-1.4
40+
- Contain both text overlays and image content
41+
- Increasing file sizes for payload testing
42+
43+
## Verification Code Format
44+
45+
Codes follow the pattern: `{SIZE}-{5-CHAR-RANDOM}`
46+
- SIZE: SMALL, MEDIUM, LARGE, or XLARGE
47+
- Random component ensures uniqueness and prevents guessing

fixtures/pdfs/codes.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
small: SMALL-7X9Q2
2+
medium: MEDIUM-K4P8R
3+
large: LARGE-M9N3T
4+
xlarge: XLARGE-W6H5V

fixtures/pdfs/generate-pdfs.sh

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/bin/bash
2+
# Script to generate test PDFs with unique verification codes
3+
# Each PDF contains embedded text that can be used to verify end-to-end AI processing
4+
5+
set -e
6+
7+
echo "Generating test PDFs with verification codes..."
8+
echo
9+
10+
# Small PDF (33KB) - Text only with minimal content
11+
echo "Creating small.pdf with code SMALL-7X9Q2..."
12+
convert -size 800x600 xc:white \
13+
-pointsize 36 -gravity center \
14+
-annotate +0-150 'SMALL PDF TEST' \
15+
-annotate +0-80 'Verification Code:' \
16+
-fill red -pointsize 42 \
17+
-annotate +0-20 'SMALL-7X9Q2' \
18+
-fill black -pointsize 18 \
19+
-annotate +0+60 'If you can read this code, PDF processing works.' \
20+
small_text.jpg
21+
22+
convert small_text.jpg small.pdf
23+
rm small_text.jpg
24+
25+
# Medium PDF (813KB) - Text + some image padding
26+
echo "Creating medium.pdf with code MEDIUM-K4P8R..."
27+
magick -size 1200x900 xc:white \
28+
-pointsize 48 -gravity center \
29+
-annotate +0-250 'MEDIUM PDF TEST' \
30+
-pointsize 32 \
31+
-annotate +0-150 'Verification Code:' \
32+
-fill blue -pointsize 38 \
33+
-annotate +0-80 'MEDIUM-K4P8R' \
34+
-fill black -pointsize 20 \
35+
-annotate +0+20 'This is a medium test document.' \
36+
-annotate +0+60 'Code confirms AI processed the PDF.' \
37+
medium_base.pdf
38+
39+
# Add filler images to increase size
40+
magick -size 1000x1000 plasma:fractal med_fill1.jpg
41+
magick -size 1000x1000 plasma:fractal med_fill2.jpg
42+
magick medium_base.pdf med_fill1.jpg med_fill2.jpg medium.pdf
43+
rm medium_base.pdf med_fill*.jpg
44+
45+
# Large PDF (3.4MB) - Text + more image padding
46+
echo "Creating large.pdf with code LARGE-M9N3T..."
47+
magick -size 1600x1200 xc:white \
48+
-pointsize 60 -gravity center \
49+
-annotate +0-350 'LARGE PDF TEST' \
50+
-pointsize 40 \
51+
-annotate +0-250 'Verification Code:' \
52+
-fill green -pointsize 46 \
53+
-annotate +0-170 'LARGE-M9N3T' \
54+
-fill black -pointsize 24 \
55+
-annotate +0-80 'Large test document for PDF uploads.' \
56+
-annotate +0-40 'Verification code confirms processing.' \
57+
large_base.pdf
58+
59+
# Add filler images to increase size
60+
magick -size 1500x1500 plasma:fractal lg_fill1.jpg
61+
magick -size 1500x1500 plasma:fractal lg_fill2.jpg
62+
magick -size 1500x1500 plasma:fractal lg_fill3.jpg
63+
magick -size 1500x1500 plasma:fractal lg_fill4.jpg
64+
magick large_base.pdf lg_fill1.jpg lg_fill2.jpg lg_fill3.jpg lg_fill4.jpg large.pdf
65+
rm large_base.pdf lg_fill*.jpg
66+
67+
# XLarge PDF (11MB) - Text + lots of image padding
68+
echo "Creating xlarge.pdf with code XLARGE-W6H5V..."
69+
magick -size 2000x1500 xc:white \
70+
-pointsize 72 -gravity center \
71+
-annotate +0-450 'XLARGE PDF TEST' \
72+
-pointsize 48 \
73+
-annotate +0-330 'Verification Code:' \
74+
-fill purple -pointsize 56 \
75+
-annotate +0-240 'XLARGE-W6H5V' \
76+
-fill black -pointsize 28 \
77+
-annotate +0-140 'Extra-large test document.' \
78+
-annotate +0-100 'Code confirms AI read the PDF.' \
79+
xlarge_base.pdf
80+
81+
# Add many filler images to increase size
82+
for i in {1..8}; do
83+
magick -size 2000x2000 plasma:fractal xl_fill$i.jpg
84+
done
85+
magick xlarge_base.pdf xl_fill*.jpg xlarge.pdf
86+
rm xlarge_base.pdf xl_fill*.jpg
87+
88+
echo
89+
echo "✓ PDF generation complete!"
90+
echo
91+
echo "Generated files:"
92+
ls -lh *.pdf
93+
echo
94+
echo "Verification codes:"
95+
echo " small.pdf -> SMALL-7X9Q2"
96+
echo " medium.pdf -> MEDIUM-K4P8R"
97+
echo " large.pdf -> LARGE-M9N3T"
98+
echo " xlarge.pdf -> XLARGE-W6H5V"
99+
echo
100+
echo "Validating PDFs..."
101+
for pdf in small.pdf medium.pdf large.pdf xlarge.pdf; do
102+
if qpdf --check "$pdf" &>/dev/null; then
103+
echo "$pdf is valid"
104+
else
105+
echo "$pdf has issues"
106+
fi
107+
done

fixtures/pdfs/large.pdf

3.33 MB
Binary file not shown.

fixtures/pdfs/medium.pdf

812 KB
Binary file not shown.

fixtures/pdfs/small.pdf

32.3 KB
Binary file not shown.

fixtures/pdfs/xlarge.pdf

10.3 MB
Binary file not shown.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# OpenRouter FileParserPlugin Examples (Fetch)
2+
3+
Examples demonstrating OpenRouter's FileParserPlugin with raw fetch API.
4+
5+
## Overview
6+
7+
The FileParserPlugin enables PDF processing for models that don't natively support file inputs. The plugin:
8+
9+
- Accepts PDFs via base64-encoded data URLs
10+
- Extracts text using configurable engines (mistral-ocr, mistral-ocr, native)
11+
- Returns parsed content to the model for processing
12+
13+
## Examples
14+
15+
- `file-parser-all-sizes.ts` - Tests PDF processing across multiple file sizes
16+
17+
## Running
18+
19+
```bash
20+
bun run typescript/fetch/src/plugin-file-parser/file-parser-all-sizes.ts
21+
```
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
/**
2+
* Example: OpenRouter FileParserPlugin - All PDF Sizes
3+
*
4+
* This example demonstrates using OpenRouter's FileParserPlugin with raw fetch
5+
* to process PDF documents of various sizes. The plugin automatically parses PDFs
6+
* and makes them consumable by LLMs, even for models that don't natively support
7+
* file inputs.
8+
*
9+
* Key Points:
10+
* - FileParserPlugin processes PDFs for models without native file support
11+
* - PDFs are sent via base64-encoded data URLs
12+
* - Plugin must be explicitly configured in the request body
13+
* - Tests multiple PDF sizes: small (33KB), medium (813KB), large (3.4MB), xlarge (10.8MB)
14+
*
15+
* To run: bun run typescript/fetch/src/plugin-file-parser/file-parser-all-sizes.ts
16+
*/
17+
18+
import type { ChatCompletionResponse } from '@openrouter-examples/shared/types';
19+
20+
// OpenRouter API endpoint
21+
const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions';
22+
23+
// Expected verification codes from PDFs
24+
const EXPECTED_CODES: Record<string, string> = {
25+
small: 'SMALL-7X9Q2',
26+
medium: 'MEDIUM-K4P8R',
27+
large: 'LARGE-M9N3T',
28+
xlarge: 'XLARGE-F6H2V',
29+
};
30+
31+
/**
32+
* Convert PDF file to base64 data URL
33+
*/
34+
async function readPdfAsDataUrl(filePath: string): Promise<string> {
35+
const pdfFile = Bun.file(filePath);
36+
const pdfBuffer = await pdfFile.arrayBuffer();
37+
const base64PDF = Buffer.from(pdfBuffer).toString('base64');
38+
return `data:application/pdf;base64,${base64PDF}`;
39+
}
40+
41+
/**
42+
* Extract verification code from response text
43+
*/
44+
function extractCode(text: string): string | null {
45+
const match = text.match(/[A-Z]+-[A-Z0-9]{5}/);
46+
return match ? match[0] : null;
47+
}
48+
49+
/**
50+
* Format file size for display
51+
*/
52+
function formatSize(bytes: number): string {
53+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
54+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
55+
}
56+
57+
/**
58+
* Make a request to process a PDF with FileParserPlugin
59+
*/
60+
async function processPdf(
61+
size: string,
62+
expectedCode: string,
63+
): Promise<{ success: boolean; extracted: string | null; usage?: unknown }> {
64+
const filePath = `./fixtures/pdfs/${size}.pdf`;
65+
const file = Bun.file(filePath);
66+
const dataUrl = await readPdfAsDataUrl(filePath);
67+
68+
console.log(`\n=== ${size.toUpperCase()} PDF ===`);
69+
console.log(`Size: ${formatSize(file.size)}`);
70+
console.log(`Expected: ${expectedCode}`);
71+
72+
if (!process.env.OPENROUTER_API_KEY) {
73+
throw new Error('OPENROUTER_API_KEY environment variable is not set');
74+
}
75+
76+
const response = await fetch(OPENROUTER_API_URL, {
77+
method: 'POST',
78+
headers: {
79+
Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`,
80+
'Content-Type': 'application/json',
81+
'HTTP-Referer': 'https://github.com/openrouter/examples',
82+
'X-Title': `FileParser - ${size} PDF`,
83+
},
84+
body: JSON.stringify({
85+
model: 'openai/gpt-4o-mini',
86+
messages: [
87+
{
88+
role: 'user',
89+
content: [
90+
{
91+
type: 'file',
92+
file: {
93+
filename: `${size}.pdf`,
94+
file_data: dataUrl,
95+
},
96+
},
97+
{
98+
type: 'text',
99+
text: 'Extract the verification code. Reply with ONLY the code.',
100+
},
101+
],
102+
},
103+
],
104+
plugins: [
105+
{
106+
id: 'file-parser',
107+
pdf: {
108+
engine: 'mistral-ocr',
109+
},
110+
},
111+
],
112+
max_tokens: 500,
113+
}),
114+
});
115+
116+
if (!response.ok) {
117+
const errorText = await response.text();
118+
throw new Error(`HTTP error! status: ${response.status}, body: ${errorText}`);
119+
}
120+
121+
const data = (await response.json()) as ChatCompletionResponse;
122+
const responseText = data.choices[0].message.content;
123+
const extracted = extractCode(responseText);
124+
const success = extracted === expectedCode;
125+
126+
console.log(`Extracted: ${extracted || '(none)'}`);
127+
console.log(`Status: ${success ? '✅ PASS' : '❌ FAIL'}`);
128+
console.log(`Tokens: ${data.usage.total_tokens}`);
129+
130+
return { success, extracted, usage: data.usage };
131+
}
132+
133+
/**
134+
* Main example
135+
*/
136+
async function main() {
137+
console.log('╔════════════════════════════════════════════════════════════════════════════╗');
138+
console.log('║ OpenRouter FileParserPlugin - All PDF Sizes ║');
139+
console.log('╚════════════════════════════════════════════════════════════════════════════╝');
140+
console.log();
141+
console.log('Testing PDF processing with verification code extraction');
142+
console.log();
143+
144+
const results: boolean[] = [];
145+
146+
try {
147+
for (const [size, expectedCode] of Object.entries(EXPECTED_CODES)) {
148+
try {
149+
const result = await processPdf(size, expectedCode);
150+
results.push(result.success);
151+
} catch (error) {
152+
console.log(`Status: ❌ FAIL`);
153+
console.log(`Error: ${error instanceof Error ? error.message : String(error)}`);
154+
results.push(false);
155+
}
156+
}
157+
158+
const passed = results.filter(Boolean).length;
159+
const total = results.length;
160+
161+
console.log('\n' + '='.repeat(80));
162+
console.log(`Results: ${passed}/${total} passed`);
163+
console.log('='.repeat(80));
164+
165+
if (passed === total) {
166+
console.log('\n✅ All PDF sizes processed successfully!');
167+
process.exit(0);
168+
} else {
169+
console.log('\n❌ Some PDF tests failed');
170+
process.exit(1);
171+
}
172+
} catch (error) {
173+
console.error('\n❌ ERROR during testing:');
174+
175+
if (error instanceof Error) {
176+
console.error('Error message:', error.message);
177+
console.error('Stack trace:', error.stack);
178+
} else {
179+
console.error('Unknown error:', error);
180+
}
181+
182+
process.exit(1);
183+
}
184+
}
185+
186+
// Run the example
187+
main();

0 commit comments

Comments
 (0)