From ad0ae0e0bd041be7d368811ccbfc3a698f0c8325 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 27 Oct 2025 10:46:09 +0000 Subject: [PATCH] Add PDF support for token counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Created /api/pdf route to handle PDF token counting using Anthropic's document content blocks - Updated tokenComponents.tsx to add PDF upload UI with file validation (32MB limit) - Added PDF-to-base64 conversion and API integration - Updated TokenMetrics component to display PDF token information - Supports PDFs up to 32MB and 100 pages as per Anthropic's API limits Fixes #3 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- app/api/pdf/route.ts | 88 +++++++++++++++++++ components/tokenComponents.tsx | 150 ++++++++++++++++++++++++++++++--- 2 files changed, 227 insertions(+), 11 deletions(-) create mode 100644 app/api/pdf/route.ts diff --git a/app/api/pdf/route.ts b/app/api/pdf/route.ts new file mode 100644 index 0000000..73dd951 --- /dev/null +++ b/app/api/pdf/route.ts @@ -0,0 +1,88 @@ +import Anthropic from '@anthropic-ai/sdk'; + +const MAX_PDF_BYTES = 32 * 1024 * 1024; // 32 MiB +const ALLOWED_MEDIA_TYPE = 'application/pdf'; + +interface PDFPayload { + pdf: string; + media_type: string; + text?: string; +} + +type DocumentBlockParam = { + type: 'document'; + source: { + type: 'base64'; + media_type: 'application/pdf'; + data: string; + }; +}; + +export async function POST(req: Request) { + try { + if (!req.headers.get('content-type')?.includes('application/json')) { + return Response.json( + { error: 'Unsupported content-type. Use application/json.' }, + { status: 415 } + ); + } + + const body = (await req.json()) as Partial; + const { pdf, media_type } = body; + + if (!pdf) { + return Response.json( + { error: 'Missing "pdf" property (base-64 string).' }, + { status: 400 } + ); + } + + if (!media_type || media_type !== ALLOWED_MEDIA_TYPE) { + return Response.json( + { + error: `Unsupported or missing media_type. Must be: ${ALLOWED_MEDIA_TYPE}`, + }, + { status: 400 } + ); + } + + const cleanedBase64 = pdf.replace(/^data:[^;]+;base64,/, ''); + const buffer = Buffer.from(cleanedBase64, 'base64'); + + if (buffer.length > MAX_PDF_BYTES) { + return Response.json( + { error: `PDF exceeds the ${MAX_PDF_BYTES / 1024 / 1024} MiB limit.` }, + { status: 413 } + ); + } + + const content: DocumentBlockParam[] = [ + { + type: 'document', + source: { + type: 'base64', + media_type: 'application/pdf', + data: cleanedBase64, + }, + }, + ]; + + const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY }); + + const count = await anthropic.beta.messages.countTokens({ + betas: ['token-counting-2024-11-01'], + model: 'claude-3-5-sonnet-20241022', + messages: [ + { + role: 'user', + content, + }, + ], + }); + + return Response.json(count); + } catch (error) { + console.error('PDF token-counting error:', error); + return Response.json({ error: 'Failed to count tokens for PDF.' }, { status: 500 }); + } +} diff --git a/components/tokenComponents.tsx b/components/tokenComponents.tsx index ba529f0..2988dea 100644 --- a/components/tokenComponents.tsx +++ b/components/tokenComponents.tsx @@ -1,7 +1,7 @@ import { useState, useEffect, useCallback } from 'react'; import { Textarea } from "@/components/ui/textarea"; import { Button } from "@/components/ui/button"; -import { X, Upload, Image as ImageIcon } from 'lucide-react'; +import { X, Upload, Image as ImageIcon, FileText } from 'lucide-react'; // Debounce utility function const debounce = void>(func: T, delay: number) => { @@ -15,12 +15,14 @@ const debounce = void>(func: T, delay: number) => interface InputData { text: string; image: File | null; + pdf: File | null; } export const TokenizerInput = () => { const [text, setText] = useState(''); const [image, setImage] = useState(null); const [imagePreview, setImagePreview] = useState(null); + const [pdf, setPdf] = useState(null); const [stats, setStats] = useState<{ tokens: number | null; chars: number }>({ tokens: null, chars: 0 }); const [error, setError] = useState(null); const [isLoading, setIsLoading] = useState(false); @@ -39,11 +41,25 @@ export const TokenizerInput = () => { }); }; + const convertPDFToBase64 = (file: File): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => { + const result = reader.result as string; + // Remove the data URL prefix (e.g., "data:application/pdf;base64,") + const base64 = result.split(',')[1]; + resolve(base64); + }; + reader.onerror = reject; + reader.readAsDataURL(file); + }); + }; + const handleAnalyze = async (inputData: InputData) => { - const { text, image } = inputData; - - // If both text and image are empty, reset stats - if (!text.trim() && !image) { + const { text, image, pdf } = inputData; + + // If text, image, and pdf are all empty, reset stats + if (!text.trim() && !image && !pdf) { setStats({ tokens: null, chars: 0 }); setError(null); setIsLoading(false); @@ -121,6 +137,46 @@ export const TokenizerInput = () => { console.log('Image tokens calculated:', imageTokens); } + // Handle PDF input - call /api/pdf + if (pdf) { + console.log('Processing PDF:', pdf.name, pdf.type, pdf.size); + const pdfBase64 = await convertPDFToBase64(pdf); + console.log('PDF converted to base64, length:', pdfBase64.length); + + if (pdf.type !== 'application/pdf') { + throw new Error(`Unsupported PDF type: ${pdf.type}`); + } + + const requestPayload = { + pdf: pdfBase64, // base64 string without data URL prefix + media_type: 'application/pdf' + }; + console.log('Making PDF API call with payload:', { + ...requestPayload, + pdf: `[base64 data ${pdfBase64.length} chars]` // Don't log full base64 + }); + + const pdfResponse = await fetch('/api/pdf', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestPayload), + }); + + console.log('PDF API response status:', pdfResponse.status); + + if (!pdfResponse.ok) { + const errorText = await pdfResponse.text(); + console.error('PDF API error:', errorText); + throw new Error(`PDF API request failed with status ${pdfResponse.status}: ${errorText}`); + } + + const pdfData = await pdfResponse.json(); + console.log('PDF API response data:', pdfData); + const pdfTokens = pdfData.input_tokens > 7 ? pdfData.input_tokens - 7 : 0; + totalTokens += pdfTokens; + console.log('PDF tokens calculated:', pdfTokens); + } + console.log('Total tokens:', totalTokens); setStats({ tokens: totalTokens, @@ -139,8 +195,8 @@ export const TokenizerInput = () => { const debouncedHandleAnalyze = useCallback(debounce(handleAnalyze, 500), []); useEffect(() => { - debouncedHandleAnalyze({ text, image }); - }, [text, image, debouncedHandleAnalyze]); + debouncedHandleAnalyze({ text, image, pdf }); + }, [text, image, pdf, debouncedHandleAnalyze]); const handleImageSelect = (event: React.ChangeEvent) => { const file = event.target.files?.[0]; @@ -178,6 +234,34 @@ export const TokenizerInput = () => { } }; + const handlePDFSelect = (event: React.ChangeEvent) => { + const file = event.target.files?.[0]; + if (file) { + if (file.type !== 'application/pdf') { + setError('Please select a valid PDF file.'); + return; + } + + const maxSize = 32 * 1024 * 1024; // 32MB + if (file.size > maxSize) { + setError('PDF file is too large. Please select a file smaller than 32MB.'); + return; + } + + setPdf(file); + setError(null); + } + }; + + const removePDF = () => { + setPdf(null); + + const fileInput = document.getElementById('pdf-input') as HTMLInputElement; + if (fileInput) { + fileInput.value = ''; + } + }; + return ( <>
@@ -244,6 +328,47 @@ export const TokenizerInput = () => {
)} + + {/* PDF Input */} +
+ +
+
+ + +
+ + {pdf && ( +
+ + {pdf.name} + +
+ )} +
+
{error &&

{error}

} @@ -252,7 +377,7 @@ export const TokenizerInput = () => {

Analyzing content...

)} - + ); }; @@ -261,15 +386,18 @@ interface TokenMetricsProps { tokens: number; chars: number; hasImage: boolean; + hasPdf: boolean; } -export const TokenMetrics = ({ tokens, chars, hasImage }: TokenMetricsProps) => ( +export const TokenMetrics = ({ tokens, chars, hasImage, hasPdf }: TokenMetricsProps) => (

Tokens

{tokens}

- {hasImage && ( -

includes image tokens

+ {(hasImage || hasPdf) && ( +

+ includes {hasImage && hasPdf ? 'image and PDF' : hasImage ? 'image' : 'PDF'} tokens +

)}