diff --git a/apps/rowboat/app/actions/uploaded-images.actions.ts b/apps/rowboat/app/actions/uploaded-images.actions.ts new file mode 100644 index 000000000..45ca6a688 --- /dev/null +++ b/apps/rowboat/app/actions/uploaded-images.actions.ts @@ -0,0 +1,141 @@ +"use server"; +import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3'; +import { getSignedUrl } from '@aws-sdk/s3-request-presigner'; +import crypto from 'crypto'; +import { authCheck } from '@/app/actions/auth.actions'; +import { USE_AUTH } from '@/app/lib/feature_flags'; +import { GoogleGenerativeAI } from '@google/generative-ai'; +import { UsageTracker } from '@/app/lib/billing'; +import { logUsage } from '@/app/actions/billing.actions'; + +export async function getUploadUrlForImage(mimeType: string): Promise<{ id: string; key: string; uploadUrl: string; url: string; mimeType: string }> { + // Enforce auth in server action context (supports guest mode when auth disabled) + if (USE_AUTH) { + await authCheck(); + } + + if (!mimeType || typeof mimeType !== 'string') { + throw new Error('mimeType is required'); + } + + const bucket = process.env.RAG_UPLOADS_S3_BUCKET || ''; + if (!bucket) { + throw new Error('S3 bucket not configured'); + } + + const ext = mimeType === 'image/jpeg' ? '.jpg' + : mimeType === 'image/webp' ? '.webp' + : mimeType === 'image/png' ? '.png' + : '.bin'; + + const id = crypto.randomUUID(); + const idWithExt = `${id}${ext}`; + const last2 = id.slice(-2).padStart(2, '0'); + const dirA = last2.charAt(0); + const dirB = last2.charAt(1); + const key = `uploaded_images/${dirA}/${dirB}/${idWithExt}`; + + const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1'; + const s3 = new S3Client({ + region, + credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? { + accessKeyId: process.env.AWS_ACCESS_KEY_ID as string, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string, + } : undefined, + }); + + const command = new PutObjectCommand({ Bucket: bucket, Key: key, ContentType: mimeType }); + const uploadUrl = await getSignedUrl(s3, command, { expiresIn: 600 }); + + return { id: idWithExt, key, uploadUrl, url: `/api/uploaded-images/${idWithExt}`, mimeType }; +} + +export async function describeUploadedImage(id: string): Promise<{ id: string; description: string | null }> { + if (USE_AUTH) { + await authCheck(); + } + + if (!id || typeof id !== 'string') { + throw new Error('id is required'); + } + + const bucket = process.env.RAG_UPLOADS_S3_BUCKET || ''; + if (!bucket) { + throw new Error('S3 bucket not configured'); + } + + const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1'; + const s3 = new S3Client({ + region, + credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? { + accessKeyId: process.env.AWS_ACCESS_KEY_ID as string, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string, + } : undefined, + }); + + const lastDot = id.lastIndexOf('.'); + const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id; + const last2 = idWithoutExt.slice(-2).padStart(2, '0'); + const dirA = last2.charAt(0); + const dirB = last2.charAt(1); + const key = `uploaded_images/${dirA}/${dirB}/${id}`; + + // Fetch object bytes from S3 + const { GetObjectCommand } = await import('@aws-sdk/client-s3'); + const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key })); + const contentType = resp.ContentType || 'application/octet-stream'; + const body = resp.Body as any; + const chunks: Uint8Array[] = []; + await new Promise((resolve, reject) => { + body.on('data', (c: Uint8Array) => chunks.push(c)); + body.on('end', () => resolve()); + body.on('error', reject); + }); + const buf = Buffer.concat(chunks); + + let descriptionMarkdown: string | null = null; + const usageTracker = new UsageTracker(); + try { + const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || ''; + if (apiKey) { + const genAI = new GoogleGenerativeAI(apiKey); + const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' }); + const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.'; + const result = await model.generateContent([ + { inlineData: { data: buf.toString('base64'), mimeType: contentType } }, + prompt, + ]); + const response: any = result.response as any; + descriptionMarkdown = response?.text?.() || null; + + // Track usage + try { + const inputTokens = response?.usageMetadata?.promptTokenCount || 0; + const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0; + usageTracker.track({ + type: 'LLM_USAGE', + modelName: 'gemini-2.5-flash', + inputTokens, + outputTokens, + context: 'uploaded_images.describe', + }); + } catch { + // ignore + } + } + } catch (e) { + console.warn('Gemini description failed', e); + } + + // Log usage to billing + try { + const items = usageTracker.flush(); + if (items.length > 0) { + await logUsage({ items }); + } + } catch { + // ignore billing logging errors + } + + return { id, description: descriptionMarkdown }; +} diff --git a/apps/rowboat/app/api/tmp-images/upload/route.ts b/apps/rowboat/app/api/tmp-images/upload/route.ts new file mode 100644 index 000000000..21b41e27a --- /dev/null +++ b/apps/rowboat/app/api/tmp-images/upload/route.ts @@ -0,0 +1,99 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { requireAuth } from '@/app/lib/auth'; +import { tempBinaryCache } from '@/src/application/services/temp-binary-cache'; +import { GoogleGenerativeAI } from '@google/generative-ai'; +import { UsageTracker, getCustomerForUserId, logUsage as libLogUsage } from '@/app/lib/billing'; +import { USE_AUTH, USE_BILLING } from '@/app/lib/feature_flags'; + +// POST /api/tmp-images/upload +// Accepts an image file (multipart/form-data, field name: "file") +// Stores it in the in-memory temp cache and returns a temporary URL. +export async function POST(request: NextRequest) { + try { + // Require authentication if enabled + let currentUser: any | null = null; + if (USE_AUTH) { + try { + currentUser = await requireAuth(); + } catch { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); + } + } + + const contentType = request.headers.get('content-type') || ''; + if (!contentType.includes('multipart/form-data')) { + return NextResponse.json({ error: 'Expected multipart/form-data' }, { status: 400 }); + } + + const form = await request.formData(); + const file = form.get('file') as File | null; + if (!file) { + return NextResponse.json({ error: 'Missing file' }, { status: 400 }); + } + + const arrayBuf = await file.arrayBuffer(); + const buf = Buffer.from(arrayBuf); + const mime = file.type || 'application/octet-stream'; + + // Optionally describe image with Gemini + let descriptionMarkdown: string | null = null; + const usageTracker = new UsageTracker(); + try { + const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || ''; + if (apiKey) { + const genAI = new GoogleGenerativeAI(apiKey); + const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' }); + const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.'; + const result = await model.generateContent([ + { inlineData: { data: buf.toString('base64'), mimeType: mime } }, + prompt, + ]); + const response: any = result.response as any; + descriptionMarkdown = response?.text?.() || null; + + // Track usage similar to rag-worker + try { + const inputTokens = response?.usageMetadata?.promptTokenCount || 0; + const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0; + usageTracker.track({ + type: 'LLM_USAGE', + modelName: 'gemini-2.5-flash', + inputTokens, + outputTokens, + context: 'tmp_images.upload_with_description', + }); + } catch { + // ignore usage tracking errors + } + } + } catch (e) { + console.warn('Gemini description failed', e); + } + + // Store in temp cache and return temp URL + const ttlSec = 10 * 60; // 10 minutes + const id = tempBinaryCache.put(buf, mime, ttlSec * 1000); + const url = `/api/tmp-images/${id}`; + + // Log usage to billing similar to rag-worker + try { + if (USE_BILLING && currentUser) { + const customer = await getCustomerForUserId(currentUser.id); + if (customer) { + const items = usageTracker.flush(); + if (items.length > 0) { + await libLogUsage(customer.id, { items }); + } + } + } + } catch { + // ignore billing logging errors + } + + return NextResponse.json({ url, storage: 'temp', id, mimeType: mime, expiresInSec: ttlSec, description: descriptionMarkdown }); + } catch (e) { + console.error('tmp image upload error', e); + return NextResponse.json({ error: 'Upload failed' }, { status: 500 }); + } +} + diff --git a/apps/rowboat/app/api/uploaded-images/[id]/route.ts b/apps/rowboat/app/api/uploaded-images/[id]/route.ts new file mode 100644 index 000000000..fd723f996 --- /dev/null +++ b/apps/rowboat/app/api/uploaded-images/[id]/route.ts @@ -0,0 +1,63 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; +import { Readable } from 'stream'; +import { requireAuth } from '@/app/lib/auth'; + +// Serves uploaded images from S3 at path: /api/uploaded-images/{uuid}.{ext} +// Reconstructs the S3 key using the same sharding logic as image upload. +export async function GET(request: NextRequest, props: { params: Promise<{ id: string }> }) { + // Require authentication (handles guest mode internally when USE_AUTH is disabled) + await requireAuth(); + + const params = await props.params; + const id = params.id; + if (!id) { + return NextResponse.json({ error: 'Missing id' }, { status: 400 }); + } + + const bucket = process.env.RAG_UPLOADS_S3_BUCKET || ''; + if (!bucket) { + return NextResponse.json({ error: 'S3 bucket not configured' }, { status: 500 }); + } + + const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1'; + const s3 = new S3Client({ + region, + credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? { + accessKeyId: process.env.AWS_ACCESS_KEY_ID, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY, + } as any : undefined, + }); + + // Expect id to include extension (e.g., ".png") + const lastDot = id.lastIndexOf('.'); + const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id; + const filename = id; + + // Reconstruct directory sharding from last two characters of UUID (without extension) + const last2 = idWithoutExt.slice(-2).padStart(2, '0'); + const dirA = last2.charAt(0); + const dirB = last2.charAt(1); + const key = `uploaded_images/${dirA}/${dirB}/${id}`; + try { + const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key })); + const contentType = resp.ContentType || 'application/octet-stream'; + const body = resp.Body as any; + const webStream = body?.transformToWebStream + ? body.transformToWebStream() + : (Readable as any)?.toWeb + ? (Readable as any).toWeb(body) + : body; + return new NextResponse(webStream, { + status: 200, + headers: { + 'Content-Type': contentType, + 'Cache-Control': 'public, max-age=31536000, immutable', + 'Content-Disposition': `inline; filename="${filename}"`, + }, + }); + } catch (e) { + console.error('S3 get error', e); + return NextResponse.json({ error: 'Not found' }, { status: 404 }); + } +} diff --git a/apps/rowboat/app/projects/[projectId]/playground/components/chat.tsx b/apps/rowboat/app/projects/[projectId]/playground/components/chat.tsx index 11b88aad4..d7378377c 100644 --- a/apps/rowboat/app/projects/[projectId]/playground/components/chat.tsx +++ b/apps/rowboat/app/projects/[projectId]/playground/components/chat.tsx @@ -152,11 +152,24 @@ export function Chat({ } }, []); - function handleUserMessage(prompt: string) { - const updatedMessages: z.infer[] = [...messages, { - role: 'user', - content: prompt, - }]; + function handleUserMessage(prompt: string, imageDebug?: { url: string; description?: string | null }) { + // Insert an internal-only debug message with image URL/markdown (if provided), + // then the actual user message last so streaming triggers correctly. + const debugMessages: z.infer[] = imageDebug ? [{ + role: 'assistant', + content: `Image Description\n\nURL: ${imageDebug.url}\n\n${imageDebug.description ? imageDebug.description : ''}`.trim(), + agentName: 'Image Description', + responseType: 'internal', + } as any] : []; + + const updatedMessages: z.infer[] = [ + ...messages, + ...debugMessages, + { + role: 'user', + content: prompt, + } as any, + ]; setMessages(updatedMessages); setError(null); setIsLastInteracted(true); @@ -229,9 +242,46 @@ export function Chat({ } // set up a cached turn + // Merge-at-send: if the immediately preceding message is our internal + // Image Description debug message, append its details (URL/markdown) + // to the outgoing user message content, without changing the UI. + const last = messages[messages.length - 1]; + let mergedContent = (typeof last?.content === 'string' ? last.content : '') || ''; + if (messages.length >= 2) { + const prev = messages[messages.length - 2] as any; + const isImageDebug = prev && prev.role === 'assistant' && prev.responseType === 'internal' && prev.agentName === 'Image Description' && typeof prev.content === 'string'; + if (isImageDebug) { + // Expect prev.content to have: "Image Description\n\nURL: \n\n" + // Extract URL and markdown blocks for a clean append + const content = prev.content as string; + let url: string | undefined; + let markdown: string | undefined; + const urlMatch = content.match(/URL:\s*(\S+)/i); + if (urlMatch) url = urlMatch[1]; + // markdown is whatever comes after the blank line following URL + const parts = content.split(/\n\n/); + if (parts.length >= 3) { + markdown = parts.slice(2).join('\n\n').trim(); + } + const appendSections: string[] = []; + if (url) appendSections.push(`The user uploaded an image. URL: ${url}`); + if (markdown) appendSections.push(`Image description (markdown):\n\n${markdown}`); + if (appendSections.length > 0) { + mergedContent = [mergedContent, appendSections.join('\n\n')] + .filter(Boolean) + .join('\n\n'); + } + } + } + + const messagesToSend: z.infer[] = [{ + role: 'user', + content: mergedContent, + } as any]; + const response = await createCachedTurn({ conversationId: conversationId.current, - messages: messages.slice(-1), // only send the last message + messages: messagesToSend, // send merged content only }); if (ignore) { return; @@ -500,4 +550,4 @@ export function Chat({ /> ); -} \ No newline at end of file +} diff --git a/apps/rowboat/components/common/compose-box-playground.tsx b/apps/rowboat/components/common/compose-box-playground.tsx index 2c43a4554..2d32d60f4 100644 --- a/apps/rowboat/components/common/compose-box-playground.tsx +++ b/apps/rowboat/components/common/compose-box-playground.tsx @@ -3,7 +3,7 @@ import { Textarea } from '@/components/ui/textarea'; import { Button, Spinner } from "@heroui/react"; interface ComposeBoxPlaygroundProps { - handleUserMessage: (message: string) => void; + handleUserMessage: (message: string, imageDebug?: { url: string; description?: string | null }) => void; messages: any[]; loading: boolean; disabled?: boolean; @@ -22,9 +22,12 @@ export function ComposeBoxPlayground({ onCancel, }: ComposeBoxPlaygroundProps) { const [input, setInput] = useState(''); + const [uploading, setUploading] = useState(false); + const [pendingImage, setPendingImage] = useState<{ url?: string; previewSrc?: string; mimeType?: string; description?: string | null } | null>(null); const [isFocused, setIsFocused] = useState(false); const textareaRef = useRef(null); const previousMessagesLength = useRef(messages.length); + const uploadAbortRef = useRef(null); // Handle auto-focus when new messages arrive useEffect(() => { @@ -35,12 +38,27 @@ export function ComposeBoxPlayground({ }, [messages.length, shouldAutoFocus]); function handleInput() { - const prompt = input.trim(); - if (!prompt) { + // Mirror send-button disable rules to block Enter submits + if (disabled || loading || uploading) return; + if (pendingImage?.url && pendingImage.description === undefined) return; + const text = input.trim(); + if (!text && !pendingImage) { return; } + // Only include the user's typed text; omit image URL/markdown from user message + const parts: string[] = []; + if (text) parts.push(text); + const prompt = parts.join('\n\n'); + // Build optional debug payload to render as internal-only message in debug view + const imageDebug = pendingImage?.url + ? { url: pendingImage.url, description: pendingImage.description ?? null } + : undefined; setInput(''); - handleUserMessage(prompt); + if (pendingImage?.previewSrc) { + try { URL.revokeObjectURL(pendingImage.previewSrc); } catch {} + } + setPendingImage(null); + handleUserMessage(prompt, imageDebug); } const handleInputKeyDown = (e: React.KeyboardEvent) => { @@ -55,6 +73,95 @@ export function ComposeBoxPlayground({ onFocus?.(); }; + async function handleImagePicked(file: File) { + if (!file) return; + try { + // Show immediate local preview + const previewSrc = URL.createObjectURL(file); + setPendingImage({ previewSrc }); + setUploading(true); + // Cancel any in-flight request + if (uploadAbortRef.current) { + try { uploadAbortRef.current.abort(); } catch {} + uploadAbortRef.current = null; + } + const controller = new AbortController(); + uploadAbortRef.current = controller; + try { + // 1) Request a presigned S3 upload URL via server action + const { getUploadUrlForImage } = await import('@/app/actions/uploaded-images.actions'); + const urlData = await getUploadUrlForImage(file.type); + const uploadUrl: string | undefined = urlData?.uploadUrl; + const imageId: string | undefined = urlData?.id; // includes extension + const imageUrl: string | undefined = urlData?.url; // points to /api/uploaded-images/ + if (!uploadUrl || !imageId || !imageUrl) throw new Error('Invalid upload URL response'); + + // 2) Upload the file directly to S3 + const putRes = await fetch(uploadUrl, { + method: 'PUT', + headers: { 'Content-Type': file.type }, + body: file, + signal: controller.signal, + }); + if (!putRes.ok) throw new Error(`Failed to upload image: ${putRes.status}`); + + // 3) Update local state with URL (description pending) + if (uploadAbortRef.current === controller) { + setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description: undefined }); + } + + // 4) Ask server to generate description from S3 image + try { + const { describeUploadedImage } = await import('@/app/actions/uploaded-images.actions'); + const descData = await describeUploadedImage(imageId); + const description: string | null = descData?.description ?? null; + if (uploadAbortRef.current === controller) { + setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description }); + } + } catch { + // If description fails, still allow sending + if (uploadAbortRef.current === controller) { + setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description: null }); + } + } + } catch (err: any) { + if (err?.name === 'AbortError') throw err; + // Fallback to temp in-memory upload for local/dev without S3 + const form = new FormData(); + form.append('file', file); + const res = await fetch('/api/tmp-images/upload', { + method: 'POST', + body: form, + signal: controller.signal, + }); + if (!res.ok) throw new Error(`Upload failed: ${res.status}`); + const data = await res.json(); + const url: string | undefined = data?.url; + if (!url) throw new Error('No URL returned'); + if (uploadAbortRef.current === controller) { + setPendingImage({ url, previewSrc, mimeType: data?.mimeType || file.type, description: data?.description ?? null }); + } + } + } catch (e: any) { + if (e?.name === 'AbortError') { + // Swallow aborts + console.log('Image upload/description aborted'); + } else { + console.error('Image upload failed', e); + alert('Image upload failed. Please try again.'); + } + } finally { + if (uploadAbortRef.current === null) { + // Dismissed earlier; ensure uploading is false + setUploading(false); + } else { + // If this is still the active controller, clear uploading and ref + setUploading(false); + uploadAbortRef.current = null; + } + } + } + return (
{/* Keyboard shortcut hint */} @@ -68,6 +175,33 @@ export function ComposeBoxPlayground({ bg-white dark:bg-[#1e2023] flex items-end gap-2"> {/* Textarea */}
+ {pendingImage && ( +
+ Uploaded image preview + +
+ )}