11#! /bin/bash
22# Script to generate test PDFs with unique verification codes
33# Each PDF contains embedded text that can be used to verify end-to-end AI processing
4+ #
5+ # TODO: Dynamically randomize verification codes at generation time so fixtures can't be
6+ # guessed in advance. This would involve:
7+ # 1. Generating a random code for each PDF at runtime
8+ # 2. Writing both PDF and JSON metadata with the randomized code
9+ # 3. Reading the code from the JSON in tests instead of hard-coding
10+ # This ensures the "needle" in the PDF cannot be known before execution.
411
512set -e
613
14+ # Verification codes
15+ SMALL_CODE=" SMALL-7X9Q2"
16+ MEDIUM_CODE=" MEDIUM-K4P8R"
17+ LARGE_CODE=" LARGE-M9N3T"
18+ XLARGE_CODE=" XLARGE-W6H5V"
19+
720echo " Generating test PDFs with verification codes..."
821echo
922
1023# Small PDF (33KB) - Text only with minimal content
11- echo " Creating small.pdf with code SMALL-7X9Q2 ..."
24+ echo " Creating small.pdf with code $SMALL_CODE ..."
1225convert -size 800x600 xc:white \
1326 -pointsize 36 -gravity center \
1427 -annotate +0-150 ' SMALL PDF TEST' \
1528 -annotate +0-80 ' Verification Code:' \
1629 -fill red -pointsize 42 \
17- -annotate +0-20 ' SMALL-7X9Q2 ' \
30+ -annotate +0-20 " $SMALL_CODE " \
1831 -fill black -pointsize 18 \
1932 -annotate +0+60 ' If you can read this code, PDF processing works.' \
2033 small_text.jpg
@@ -23,14 +36,14 @@ convert small_text.jpg small.pdf
2336rm small_text.jpg
2437
2538# Medium PDF (813KB) - Text + some image padding
26- echo " Creating medium.pdf with code MEDIUM-K4P8R ..."
39+ echo " Creating medium.pdf with code $MEDIUM_CODE ..."
2740magick -size 1200x900 xc:white \
2841 -pointsize 48 -gravity center \
2942 -annotate +0-250 ' MEDIUM PDF TEST' \
3043 -pointsize 32 \
3144 -annotate +0-150 ' Verification Code:' \
3245 -fill blue -pointsize 38 \
33- -annotate +0-80 ' MEDIUM-K4P8R ' \
46+ -annotate +0-80 " $MEDIUM_CODE " \
3447 -fill black -pointsize 20 \
3548 -annotate +0+20 ' This is a medium test document.' \
3649 -annotate +0+60 ' Code confirms AI processed the PDF.' \
@@ -43,14 +56,14 @@ magick medium_base.pdf med_fill1.jpg med_fill2.jpg medium.pdf
4356rm medium_base.pdf med_fill* .jpg
4457
4558# Large PDF (3.4MB) - Text + more image padding
46- echo " Creating large.pdf with code LARGE-M9N3T ..."
59+ echo " Creating large.pdf with code $LARGE_CODE ..."
4760magick -size 1600x1200 xc:white \
4861 -pointsize 60 -gravity center \
4962 -annotate +0-350 ' LARGE PDF TEST' \
5063 -pointsize 40 \
5164 -annotate +0-250 ' Verification Code:' \
5265 -fill green -pointsize 46 \
53- -annotate +0-170 ' LARGE-M9N3T ' \
66+ -annotate +0-170 " $LARGE_CODE " \
5467 -fill black -pointsize 24 \
5568 -annotate +0-80 ' Large test document for PDF uploads.' \
5669 -annotate +0-40 ' Verification code confirms processing.' \
@@ -65,14 +78,14 @@ magick large_base.pdf lg_fill1.jpg lg_fill2.jpg lg_fill3.jpg lg_fill4.jpg large.
6578rm large_base.pdf lg_fill* .jpg
6679
6780# XLarge PDF (11MB) - Text + lots of image padding
68- echo " Creating xlarge.pdf with code XLARGE-W6H5V ..."
81+ echo " Creating xlarge.pdf with code $XLARGE_CODE ..."
6982magick -size 2000x1500 xc:white \
7083 -pointsize 72 -gravity center \
7184 -annotate +0-450 ' XLARGE PDF TEST' \
7285 -pointsize 48 \
7386 -annotate +0-330 ' Verification Code:' \
7487 -fill purple -pointsize 56 \
75- -annotate +0-240 ' XLARGE-W6H5V ' \
88+ -annotate +0-240 " $XLARGE_CODE " \
7689 -fill black -pointsize 28 \
7790 -annotate +0-140 ' Extra-large test document.' \
7891 -annotate +0-100 ' Code confirms AI read the PDF.' \
@@ -87,15 +100,67 @@ rm xlarge_base.pdf xl_fill*.jpg
87100
88101echo
89102echo " ✓ PDF generation complete!"
103+ echo
104+ echo " Generating JSON metadata files..."
105+ echo
106+
107+ # Create JSON metadata for each PDF with verification code
108+ # Format: { "verificationCode": "CODE", "description": "...", "size": "...", "type": "test_fixture" }
109+
110+ cat > small.json << EOF
111+ {
112+ "verificationCode": "$SMALL_CODE ",
113+ "description": "Small test PDF (33KB) with minimal content",
114+ "size": "small",
115+ "type": "test_fixture",
116+ "purpose": "Test basic PDF processing with FileParserPlugin"
117+ }
118+ EOF
119+ echo " ✓ small.json"
120+
121+ cat > medium.json << EOF
122+ {
123+ "verificationCode": "$MEDIUM_CODE ",
124+ "description": "Medium test PDF (813KB) with text and image padding",
125+ "size": "medium",
126+ "type": "test_fixture",
127+ "purpose": "Test PDF processing with moderate file size"
128+ }
129+ EOF
130+ echo " ✓ medium.json"
131+
132+ cat > large.json << EOF
133+ {
134+ "verificationCode": "$LARGE_CODE ",
135+ "description": "Large test PDF (3.4MB) for FileParserPlugin regression testing",
136+ "size": "large",
137+ "type": "test_fixture",
138+ "purpose": "Test large PDF handling and plugin activation",
139+ "regression": "Validates fix for FileParserPlugin large PDF issue"
140+ }
141+ EOF
142+ echo " ✓ large.json"
143+
144+ cat > xlarge.json << EOF
145+ {
146+ "verificationCode": "$XLARGE_CODE ",
147+ "description": "Extra-large test PDF (11MB) with extensive content",
148+ "size": "xlarge",
149+ "type": "test_fixture",
150+ "purpose": "Test maximum file size handling with FileParserPlugin"
151+ }
152+ EOF
153+ echo " ✓ xlarge.json"
154+
90155echo
91156echo " Generated files:"
92- ls -lh * .pdf
157+ ls -lh * .pdf * .json
93158echo
94159echo " Verification codes:"
95- echo " small.pdf -> SMALL-7X9Q2 "
96- echo " medium.pdf -> MEDIUM-K4P8R "
97- echo " large.pdf -> LARGE-M9N3T "
98- echo " xlarge.pdf -> XLARGE-W6H5V "
160+ echo " small.pdf -> $SMALL_CODE (small.json) "
161+ echo " medium.pdf -> $MEDIUM_CODE (medium.json) "
162+ echo " large.pdf -> $LARGE_CODE (large.json) "
163+ echo " xlarge.pdf -> $XLARGE_CODE (xlarge.json) "
99164echo
100165echo " Validating PDFs..."
101166for pdf in small.pdf medium.pdf large.pdf xlarge.pdf; do
@@ -105,3 +170,12 @@ for pdf in small.pdf medium.pdf large.pdf xlarge.pdf; do
105170 echo " ✗ $pdf has issues"
106171 fi
107172done
173+ echo
174+ echo " Validating JSON metadata..."
175+ for json in small.json medium.json large.json xlarge.json; do
176+ if python3 -m json.tool " $json " > /dev/null 2>&1 ; then
177+ echo " ✓ $json is valid"
178+ else
179+ echo " ✗ $json has syntax errors"
180+ fi
181+ done
0 commit comments