Skip to content

Commit b858a9d

Browse files
Update generate-pdfs.sh to version with JSON metadata generation
- Replace old script with version from ai-sdk-provider - New version generates JSON metadata files with verification codes - Supports TODO about dynamic randomization
1 parent c94a621 commit b858a9d

File tree

1 file changed

+87
-13
lines changed

1 file changed

+87
-13
lines changed

fixtures/pdfs/generate-pdfs.sh

Lines changed: 87 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,33 @@
11
#!/bin/bash
22
# Script to generate test PDFs with unique verification codes
33
# Each PDF contains embedded text that can be used to verify end-to-end AI processing
4+
#
5+
# TODO: Dynamically randomize verification codes at generation time so fixtures can't be
6+
# guessed in advance. This would involve:
7+
# 1. Generating a random code for each PDF at runtime
8+
# 2. Writing both PDF and JSON metadata with the randomized code
9+
# 3. Reading the code from the JSON in tests instead of hard-coding
10+
# This ensures the "needle" in the PDF cannot be known before execution.
411

512
set -e
613

14+
# Verification codes
15+
SMALL_CODE="SMALL-7X9Q2"
16+
MEDIUM_CODE="MEDIUM-K4P8R"
17+
LARGE_CODE="LARGE-M9N3T"
18+
XLARGE_CODE="XLARGE-W6H5V"
19+
720
echo "Generating test PDFs with verification codes..."
821
echo
922

1023
# Small PDF (33KB) - Text only with minimal content
11-
echo "Creating small.pdf with code SMALL-7X9Q2..."
24+
echo "Creating small.pdf with code $SMALL_CODE..."
1225
convert -size 800x600 xc:white \
1326
-pointsize 36 -gravity center \
1427
-annotate +0-150 'SMALL PDF TEST' \
1528
-annotate +0-80 'Verification Code:' \
1629
-fill red -pointsize 42 \
17-
-annotate +0-20 'SMALL-7X9Q2' \
30+
-annotate +0-20 "$SMALL_CODE" \
1831
-fill black -pointsize 18 \
1932
-annotate +0+60 'If you can read this code, PDF processing works.' \
2033
small_text.jpg
@@ -23,14 +36,14 @@ convert small_text.jpg small.pdf
2336
rm small_text.jpg
2437

2538
# Medium PDF (813KB) - Text + some image padding
26-
echo "Creating medium.pdf with code MEDIUM-K4P8R..."
39+
echo "Creating medium.pdf with code $MEDIUM_CODE..."
2740
magick -size 1200x900 xc:white \
2841
-pointsize 48 -gravity center \
2942
-annotate +0-250 'MEDIUM PDF TEST' \
3043
-pointsize 32 \
3144
-annotate +0-150 'Verification Code:' \
3245
-fill blue -pointsize 38 \
33-
-annotate +0-80 'MEDIUM-K4P8R' \
46+
-annotate +0-80 "$MEDIUM_CODE" \
3447
-fill black -pointsize 20 \
3548
-annotate +0+20 'This is a medium test document.' \
3649
-annotate +0+60 'Code confirms AI processed the PDF.' \
@@ -43,14 +56,14 @@ magick medium_base.pdf med_fill1.jpg med_fill2.jpg medium.pdf
4356
rm medium_base.pdf med_fill*.jpg
4457

4558
# Large PDF (3.4MB) - Text + more image padding
46-
echo "Creating large.pdf with code LARGE-M9N3T..."
59+
echo "Creating large.pdf with code $LARGE_CODE..."
4760
magick -size 1600x1200 xc:white \
4861
-pointsize 60 -gravity center \
4962
-annotate +0-350 'LARGE PDF TEST' \
5063
-pointsize 40 \
5164
-annotate +0-250 'Verification Code:' \
5265
-fill green -pointsize 46 \
53-
-annotate +0-170 'LARGE-M9N3T' \
66+
-annotate +0-170 "$LARGE_CODE" \
5467
-fill black -pointsize 24 \
5568
-annotate +0-80 'Large test document for PDF uploads.' \
5669
-annotate +0-40 'Verification code confirms processing.' \
@@ -65,14 +78,14 @@ magick large_base.pdf lg_fill1.jpg lg_fill2.jpg lg_fill3.jpg lg_fill4.jpg large.
6578
rm large_base.pdf lg_fill*.jpg
6679

6780
# XLarge PDF (11MB) - Text + lots of image padding
68-
echo "Creating xlarge.pdf with code XLARGE-W6H5V..."
81+
echo "Creating xlarge.pdf with code $XLARGE_CODE..."
6982
magick -size 2000x1500 xc:white \
7083
-pointsize 72 -gravity center \
7184
-annotate +0-450 'XLARGE PDF TEST' \
7285
-pointsize 48 \
7386
-annotate +0-330 'Verification Code:' \
7487
-fill purple -pointsize 56 \
75-
-annotate +0-240 'XLARGE-W6H5V' \
88+
-annotate +0-240 "$XLARGE_CODE" \
7689
-fill black -pointsize 28 \
7790
-annotate +0-140 'Extra-large test document.' \
7891
-annotate +0-100 'Code confirms AI read the PDF.' \
@@ -87,15 +100,67 @@ rm xlarge_base.pdf xl_fill*.jpg
87100

88101
echo
89102
echo "✓ PDF generation complete!"
103+
echo
104+
echo "Generating JSON metadata files..."
105+
echo
106+
107+
# Create JSON metadata for each PDF with verification code
108+
# Format: { "verificationCode": "CODE", "description": "...", "size": "...", "type": "test_fixture" }
109+
110+
cat > small.json << EOF
111+
{
112+
"verificationCode": "$SMALL_CODE",
113+
"description": "Small test PDF (33KB) with minimal content",
114+
"size": "small",
115+
"type": "test_fixture",
116+
"purpose": "Test basic PDF processing with FileParserPlugin"
117+
}
118+
EOF
119+
echo " ✓ small.json"
120+
121+
cat > medium.json << EOF
122+
{
123+
"verificationCode": "$MEDIUM_CODE",
124+
"description": "Medium test PDF (813KB) with text and image padding",
125+
"size": "medium",
126+
"type": "test_fixture",
127+
"purpose": "Test PDF processing with moderate file size"
128+
}
129+
EOF
130+
echo " ✓ medium.json"
131+
132+
cat > large.json << EOF
133+
{
134+
"verificationCode": "$LARGE_CODE",
135+
"description": "Large test PDF (3.4MB) for FileParserPlugin regression testing",
136+
"size": "large",
137+
"type": "test_fixture",
138+
"purpose": "Test large PDF handling and plugin activation",
139+
"regression": "Validates fix for FileParserPlugin large PDF issue"
140+
}
141+
EOF
142+
echo " ✓ large.json"
143+
144+
cat > xlarge.json << EOF
145+
{
146+
"verificationCode": "$XLARGE_CODE",
147+
"description": "Extra-large test PDF (11MB) with extensive content",
148+
"size": "xlarge",
149+
"type": "test_fixture",
150+
"purpose": "Test maximum file size handling with FileParserPlugin"
151+
}
152+
EOF
153+
echo " ✓ xlarge.json"
154+
90155
echo
91156
echo "Generated files:"
92-
ls -lh *.pdf
157+
ls -lh *.pdf *.json
93158
echo
94159
echo "Verification codes:"
95-
echo " small.pdf -> SMALL-7X9Q2"
96-
echo " medium.pdf -> MEDIUM-K4P8R"
97-
echo " large.pdf -> LARGE-M9N3T"
98-
echo " xlarge.pdf -> XLARGE-W6H5V"
160+
echo " small.pdf -> $SMALL_CODE (small.json)"
161+
echo " medium.pdf -> $MEDIUM_CODE (medium.json)"
162+
echo " large.pdf -> $LARGE_CODE (large.json)"
163+
echo " xlarge.pdf -> $XLARGE_CODE (xlarge.json)"
99164
echo
100165
echo "Validating PDFs..."
101166
for pdf in small.pdf medium.pdf large.pdf xlarge.pdf; do
@@ -105,3 +170,12 @@ for pdf in small.pdf medium.pdf large.pdf xlarge.pdf; do
105170
echo "$pdf has issues"
106171
fi
107172
done
173+
echo
174+
echo "Validating JSON metadata..."
175+
for json in small.json medium.json large.json xlarge.json; do
176+
if python3 -m json.tool "$json" > /dev/null 2>&1; then
177+
echo "$json is valid"
178+
else
179+
echo "$json has syntax errors"
180+
fi
181+
done

0 commit comments

Comments
 (0)