diff --git a/apps/backend/app.ts b/apps/backend/app.ts index 4ac40de..c97b5ee 100644 --- a/apps/backend/app.ts +++ b/apps/backend/app.ts @@ -7,6 +7,7 @@ import { dj_route } from './routes/djs.route.js'; import { flowsheet_route } from './routes/flowsheet.route.js'; import { library_route } from './routes/library.route.js'; +import { scanner_route } from './routes/scanner.route.js'; import { schedule_route } from './routes/schedule.route.js'; import { events_route } from './routes/events.route.js'; import { request_line_route } from './routes/requestLine.route.js'; @@ -36,6 +37,9 @@ const swaggerDoc = parse_yaml(swaggerContent); app.use('/api-docs', swaggerUi.serve, swaggerUi.setup(swaggerDoc)); // Business logic routes +// Scanner route must be registered before the general library route +// because /library/scan is a more specific prefix than /library +app.use('/library/scan', scanner_route); app.use('/library', library_route); app.use('/flowsheet', flowsheet_route); diff --git a/apps/backend/controllers/scanner.controller.ts b/apps/backend/controllers/scanner.controller.ts new file mode 100644 index 0000000..126c43c --- /dev/null +++ b/apps/backend/controllers/scanner.controller.ts @@ -0,0 +1,98 @@ +/** + * Scanner controller for vinyl record image scanning and UPC lookup. + */ + +import { RequestHandler } from 'express'; +import { processImages } from '../services/scanner/processor.js'; +import { ScanContext } from '../services/scanner/types.js'; +import { DiscogsService } from '../services/discogs/discogs.service.js'; + +/** + * POST /library/scan + * + * Accepts multipart form data with vinyl record images and optional context. + * Uses Gemini to extract metadata and attempts catalog matching. + * + * Form fields: + * - images: up to 5 JPEG files (via multer) + * - photo_types: JSON string array or comma-separated list of photo type labels + * - catalog_item_id: optional known catalog item ID + * - sticker_text: optional text from library sticker + * - detected_upc: optional UPC from barcode scanner + * - artist_name: optional known artist name + * - album_title: optional known album title + */ +export const scanImages: RequestHandler = async (req, res, next) => { + try { + const files = req.files as Express.Multer.File[] | undefined; + if (!files || files.length === 0) { + res.status(400).json({ status: 400, message: 'No images provided' }); + return; + } + + // Parse photo_types from form field + let photoTypes: string[] = []; + const rawPhotoTypes = req.body.photo_types; + if (rawPhotoTypes) { + if (typeof rawPhotoTypes === 'string') { + try { + photoTypes = JSON.parse(rawPhotoTypes); + } catch { + // Fall back to comma-separated parsing + photoTypes = rawPhotoTypes.split(',').map((s: string) => s.trim()); + } + } else if (Array.isArray(rawPhotoTypes)) { + photoTypes = rawPhotoTypes; + } + } + + // Build scan context from optional form fields + const context: ScanContext = {}; + if (req.body.catalog_item_id) { + context.catalogItemId = parseInt(req.body.catalog_item_id, 10); + } + if (req.body.sticker_text) { + context.stickerText = req.body.sticker_text; + } + if (req.body.detected_upc) { + context.detectedUPC = req.body.detected_upc; + } + if (req.body.artist_name) { + context.artistName = req.body.artist_name; + } + if (req.body.album_title) { + context.albumTitle = req.body.album_title; + } + + const images = files.map((file) => file.buffer); + const result = await processImages(images, photoTypes, context); + + res.status(200).json(result); + } catch (error) { + console.error('Error scanning images:', error); + next(error); + } +}; + +/** + * POST /library/scan/upc-lookup + * + * Looks up a UPC barcode on Discogs to find release information. + * + * Body: { upc: string } + */ +export const upcLookup: RequestHandler = async (req, res, next) => { + const { upc } = req.body; + if (!upc || typeof upc !== 'string') { + res.status(400).json({ status: 400, message: 'Missing or invalid parameter: upc' }); + return; + } + + try { + const results = await DiscogsService.searchByBarcode(upc); + res.status(200).json(results); + } catch (error) { + console.error('Error looking up UPC:', error); + next(error); + } +}; diff --git a/apps/backend/package.json b/apps/backend/package.json index 67e2d14..7041728 100644 --- a/apps/backend/package.json +++ b/apps/backend/package.json @@ -15,6 +15,8 @@ "author": "AyBruno", "license": "MIT", "dependencies": { + "@google/generative-ai": "^0.24.1", + "@types/multer": "^2.0.0", "@wxyc/authentication": "*", "@wxyc/database": "*", "async-mutex": "^0.5.0", @@ -26,6 +28,7 @@ "groq-sdk": "^0.5.0", "jose": "^6.1.3", "lru-cache": "^10.2.0", + "multer": "^2.1.0", "node-fetch": "^3.3.2", "node-ssh": "^13.2.1", "postgres": "^3.4.4", diff --git a/apps/backend/routes/scanner.route.ts b/apps/backend/routes/scanner.route.ts new file mode 100644 index 0000000..154e099 --- /dev/null +++ b/apps/backend/routes/scanner.route.ts @@ -0,0 +1,24 @@ +/** + * Scanner routes for vinyl record image scanning and UPC lookup. + */ + +import { requirePermissions } from '@wxyc/authentication'; +import { Router } from 'express'; +import multer from 'multer'; +import * as scannerController from '../controllers/scanner.controller.js'; + +export const scanner_route = Router(); + +const upload = multer({ + storage: multer.memoryStorage(), + limits: { fileSize: 10 * 1024 * 1024 }, +}); + +scanner_route.post( + '/', + requirePermissions({ catalog: ['write'] }), + upload.array('images', 5), + scannerController.scanImages +); + +scanner_route.post('/upc-lookup', requirePermissions({ catalog: ['read'] }), scannerController.upcLookup); diff --git a/apps/backend/services/discogs/discogs.service.ts b/apps/backend/services/discogs/discogs.service.ts index 72a33dc..d49fcb4 100644 --- a/apps/backend/services/discogs/discogs.service.ts +++ b/apps/backend/services/discogs/discogs.service.ts @@ -357,12 +357,20 @@ class DiscogsServiceClass { } } + /** + * Search for releases by UPC/EAN barcode. + */ + async searchByBarcode(barcode: string): Promise { + const response = await this.search({ barcode, type: 'release' }); + return response; + } + /** * Build search params using Discogs-specific fields. */ private buildSearchParams(request: DiscogsSearchRequest, limit: number): Record { const params: Record = { - type: 'release', + type: request.type || 'release', per_page: limit, }; @@ -374,6 +382,9 @@ class DiscogsServiceClass { } else if (request.track) { params.release_title = request.track; } + if (request.barcode) { + params.barcode = request.barcode; + } return params; } diff --git a/apps/backend/services/requestLine/types.ts b/apps/backend/services/requestLine/types.ts index f0b219d..a802cc5 100644 --- a/apps/backend/services/requestLine/types.ts +++ b/apps/backend/services/requestLine/types.ts @@ -271,6 +271,8 @@ export interface DiscogsSearchRequest { artist?: string; album?: string; track?: string; + barcode?: string; + type?: string; } /** diff --git a/apps/backend/services/scanner/gemini.service.ts b/apps/backend/services/scanner/gemini.service.ts new file mode 100644 index 0000000..0ffcf65 --- /dev/null +++ b/apps/backend/services/scanner/gemini.service.ts @@ -0,0 +1,138 @@ +/** + * Gemini AI Service for vinyl record image extraction. + * + * Uses Google's Gemini Flash model to analyze photos of vinyl records + * and extract metadata (label, catalog number, UPC, DJ reviews). + * + * Follows the singleton pattern from parser.service.ts. + */ + +import { GoogleGenerativeAI } from '@google/generative-ai'; +import { ScanContext, ScanExtraction, ExtractionField } from './types.js'; +import { SCANNER_SYSTEM_PROMPT, buildUserPrompt } from './prompts.js'; + +/** + * Gemini client singleton. + */ +let _geminiClient: GoogleGenerativeAI | null = null; + +/** + * Get or create the Gemini client. + */ +function getGeminiClient(): GoogleGenerativeAI { + if (!_geminiClient) { + const apiKey = process.env.GEMINI_API_KEY; + if (!apiKey) { + throw new Error('GEMINI_API_KEY is not configured'); + } + _geminiClient = new GoogleGenerativeAI(apiKey); + } + return _geminiClient; +} + +/** + * Reset the Gemini client (useful for testing). + */ +export function resetGeminiClient(): void { + _geminiClient = null; +} + +/** + * Raw response shape from Gemini extraction. + */ +interface RawExtractionResponse { + label_name?: { value: string; confidence: number }; + catalog_number?: { value: string; confidence: number }; + review_text?: { value: string; confidence: number }; + upc?: { value: string; confidence: number }; +} + +/** + * Parse a raw field into an ExtractionField, validating structure. + */ +function parseField(raw: { value: string; confidence: number } | undefined): ExtractionField | undefined { + if (!raw || typeof raw.value !== 'string' || typeof raw.confidence !== 'number') { + return undefined; + } + return { + value: raw.value, + confidence: Math.max(0, Math.min(1, raw.confidence)), + }; +} + +/** + * Extract metadata from vinyl record images using Gemini Flash. + * + * @param images - JPEG image buffers to analyze + * @param photoTypes - Descriptive labels for each image (e.g., "front_cover", "center_label") + * @param context - Optional context about the known album + * @returns Extracted metadata fields with confidence scores + * @throws Error if Gemini API fails or returns invalid response + */ +export async function extractFromImages( + images: Buffer[], + photoTypes: string[], + context: ScanContext +): Promise { + const client = getGeminiClient(); + const model = client.getGenerativeModel({ model: 'gemini-2.0-flash' }); + + console.log(`[Scanner] Extracting metadata from ${images.length} image(s)`); + + const userPrompt = buildUserPrompt(photoTypes, context); + + // Build multimodal content parts: images as inline base64 + text prompt + const imageParts = images.map((buffer) => ({ + inlineData: { + mimeType: 'image/jpeg', + data: buffer.toString('base64'), + }, + })); + + try { + const result = await model.generateContent({ + contents: [ + { + role: 'user', + parts: [{ text: SCANNER_SYSTEM_PROMPT }, ...imageParts, { text: userPrompt }], + }, + ], + generationConfig: { + responseMimeType: 'application/json', + temperature: 0.1, + }, + }); + + const response = result.response; + const content = response.text(); + if (!content) { + throw new Error('Empty response from Gemini'); + } + + const parsed: RawExtractionResponse = JSON.parse(content); + console.log(`[Scanner] Raw extraction response:`, JSON.stringify(parsed)); + + const extraction: ScanExtraction = {}; + + const labelName = parseField(parsed.label_name); + if (labelName) extraction.labelName = labelName; + + const catalogNumber = parseField(parsed.catalog_number); + if (catalogNumber) extraction.catalogNumber = catalogNumber; + + const reviewText = parseField(parsed.review_text); + if (reviewText) extraction.reviewText = reviewText; + + const upc = parseField(parsed.upc); + if (upc) extraction.upc = upc; + + return extraction; + } catch (error) { + if (error instanceof SyntaxError) { + console.error(`[Scanner] Failed to parse JSON response:`, error); + throw new Error(`Invalid JSON response from Gemini: ${error.message}`); + } + console.error(`[Scanner] Error extracting from images:`, error); + throw error; + } +} diff --git a/apps/backend/services/scanner/processor.ts b/apps/backend/services/scanner/processor.ts new file mode 100644 index 0000000..37efc3d --- /dev/null +++ b/apps/backend/services/scanner/processor.ts @@ -0,0 +1,72 @@ +/** + * Scanner image processor. + * + * Orchestrates Gemini extraction and catalog matching for scanned + * vinyl record images. + */ + +import { ScanContext, ScanExtraction, ScanResult } from './types.js'; +import { extractFromImages } from './gemini.service.js'; +import * as libraryService from '../library.service.js'; + +/** + * Process scanned images through the extraction and matching pipeline. + * + * 1. Sends images to Gemini for metadata extraction + * 2. If a catalogItemId is provided in context, uses it directly as the match + * 3. Otherwise, attempts to match extraction results against the library catalog + * using artist name and album title from the extraction or context + * + * @param images - JPEG image buffers to analyze + * @param photoTypes - Descriptive labels for each image + * @param context - Optional context about the known album + * @returns Extraction results and optional matched album ID + */ +export async function processImages(images: Buffer[], photoTypes: string[], context: ScanContext): Promise { + const extraction: ScanExtraction = await extractFromImages(images, photoTypes, context); + + // If context already includes a known catalog item, use it directly + if (context.catalogItemId) { + return { + extraction, + matchedAlbumId: context.catalogItemId, + }; + } + + // Attempt catalog matching using available metadata + const matchedAlbumId = await tryMatchCatalog(extraction, context); + + return { + extraction, + matchedAlbumId, + }; +} + +/** + * Attempt to match extraction results against the library catalog. + * + * Uses artist name and album title from context or extraction to perform + * a fuzzy search of the library database. + */ +async function tryMatchCatalog(extraction: ScanExtraction, context: ScanContext): Promise { + const artistName = context.artistName || extraction.labelName?.value; + const albumTitle = context.albumTitle; + + if (!artistName && !albumTitle) { + return undefined; + } + + try { + const results = await libraryService.fuzzySearchLibrary(artistName, albumTitle, 1); + + if (Array.isArray(results) && results.length > 0) { + const topResult = results[0] as { id?: number }; + return topResult.id; + } + + return undefined; + } catch (error) { + console.error('[Scanner] Catalog matching failed:', error); + return undefined; + } +} diff --git a/apps/backend/services/scanner/prompts.ts b/apps/backend/services/scanner/prompts.ts new file mode 100644 index 0000000..7cf8872 --- /dev/null +++ b/apps/backend/services/scanner/prompts.ts @@ -0,0 +1,76 @@ +/** + * Prompts for the Gemini-powered vinyl record scanner. + * + * Instructs the model to extract metadata from photos of physical + * vinyl records, including label text, catalog numbers, UPC barcodes, + * and handwritten DJ review notes. + */ + +import { ScanContext } from './types.js'; + +/** + * System prompt for Gemini image extraction. + */ +export const SCANNER_SYSTEM_PROMPT = `You are a metadata extraction system for a college radio station's vinyl record library. + +Your task is to examine photos of vinyl records and extract the following fields: + +1. **label_name**: The record label printed on the center label or sleeve (e.g., "Sub Pop", "Merge Records", "4AD"). +2. **catalog_number**: The catalog/release number assigned by the label (e.g., "SP 1234", "MRG-567"). This is NOT the library code. +3. **review_text**: Any handwritten DJ notes or reviews found on the record, sleeve, or sticker. These are typically brief opinions about the music written by station DJs (e.g., "Great opener, side B is stronger", "Play track 3!"). +4. **upc**: The UPC/EAN barcode number, if visible (a 12- or 13-digit number). + +For each field you extract, provide a confidence score between 0 and 1: +- 1.0: Text is clearly legible and unambiguous +- 0.7-0.9: Mostly legible with minor uncertainty +- 0.4-0.6: Partially legible or inferred from context +- 0.1-0.3: Very uncertain, mostly guessing +- Omit the field entirely if nothing is detected + +Important notes: +- DJ reviews are often handwritten in marker or pen directly on the record sleeve or on stickers attached to the sleeve. They may be informal, abbreviated, or hard to read. +- Catalog numbers appear on center labels, spines, and back covers. Do not confuse them with the station's own library classification codes. +- If multiple labels or catalog numbers are visible (e.g., front and back), prefer the one on the center label. +- UPC barcodes are typically on the back cover or shrink wrap. + +Respond with valid JSON only, no markdown formatting. Use this exact structure: +{ + "label_name": { "value": "string", "confidence": number }, + "catalog_number": { "value": "string", "confidence": number }, + "review_text": { "value": "string", "confidence": number }, + "upc": { "value": "string", "confidence": number } +} + +Omit any field that is not detected in the images.`; + +/** + * Build the user prompt with optional context about the album. + */ +export function buildUserPrompt(photoTypes: string[], context: ScanContext): string { + const parts: string[] = []; + + parts.push(`I am sending ${photoTypes.length} photo(s) of a vinyl record.`); + + if (photoTypes.length > 0) { + parts.push(`Photo types: ${photoTypes.join(', ')}.`); + } + + if (context.artistName || context.albumTitle) { + const contextParts: string[] = []; + if (context.artistName) contextParts.push(`Artist: ${context.artistName}`); + if (context.albumTitle) contextParts.push(`Album: ${context.albumTitle}`); + parts.push(`Known catalog information: ${contextParts.join(', ')}.`); + } + + if (context.stickerText) { + parts.push(`Text detected on library sticker: "${context.stickerText}".`); + } + + if (context.detectedUPC) { + parts.push(`UPC detected by barcode scanner: ${context.detectedUPC}.`); + } + + parts.push('Please extract all visible metadata from these images.'); + + return parts.join(' '); +} diff --git a/apps/backend/services/scanner/types.ts b/apps/backend/services/scanner/types.ts new file mode 100644 index 0000000..a1d1074 --- /dev/null +++ b/apps/backend/services/scanner/types.ts @@ -0,0 +1,45 @@ +/** + * Type definitions for the vinyl record scanner service. + * + * Used by the Gemini-powered image extraction pipeline to process + * photos of vinyl records and extract catalog metadata. + */ + +/** + * Context provided by the client to assist with extraction. + * May include known catalog information or text detected on-device. + */ +export interface ScanContext { + catalogItemId?: number; + stickerText?: string; + detectedUPC?: string; + artistName?: string; + albumTitle?: string; +} + +/** + * A single extracted field with its confidence score. + */ +export interface ExtractionField { + value: string; + confidence: number; +} + +/** + * Structured extraction results from Gemini image analysis. + */ +export interface ScanExtraction { + labelName?: ExtractionField; + catalogNumber?: ExtractionField; + reviewText?: ExtractionField; + upc?: ExtractionField; +} + +/** + * Final result from the scan pipeline, including extraction + * and optional catalog match. + */ +export interface ScanResult { + extraction: ScanExtraction; + matchedAlbumId?: number; +} diff --git a/package-lock.json b/package-lock.json index 9ef6de8..d27b915 100644 --- a/package-lock.json +++ b/package-lock.json @@ -71,6 +71,8 @@ "version": "1.0.0", "license": "MIT", "dependencies": { + "@google/generative-ai": "^0.24.1", + "@types/multer": "^2.0.0", "@wxyc/authentication": "*", "@wxyc/database": "*", "async-mutex": "^0.5.0", @@ -82,6 +84,7 @@ "groq-sdk": "^0.5.0", "jose": "^6.1.3", "lru-cache": "^10.2.0", + "multer": "^2.1.0", "node-fetch": "^3.3.2", "node-ssh": "^13.2.1", "postgres": "^3.4.4", @@ -2435,6 +2438,15 @@ "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, + "node_modules/@google/generative-ai": { + "version": "0.24.1", + "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz", + "integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@hapi/address": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/@hapi/address/-/address-5.1.1.tgz", @@ -4106,7 +4118,6 @@ "version": "1.19.6", "resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.6.tgz", "integrity": "sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==", - "dev": true, "license": "MIT", "dependencies": { "@types/connect": "*", @@ -4117,7 +4128,6 @@ "version": "3.4.38", "resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.38.tgz", "integrity": "sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==", - "dev": true, "license": "MIT", "dependencies": { "@types/node": "*" @@ -4144,7 +4154,6 @@ "version": "5.0.6", "resolved": "https://registry.npmjs.org/@types/express/-/express-5.0.6.tgz", "integrity": "sha512-sKYVuV7Sv9fbPIt/442koC7+IIwK5olP1KWeD88e/idgoJqDm3JV/YUiPwkoKK92ylff2MGxSz1CSjsXelx0YA==", - "dev": true, "license": "MIT", "dependencies": { "@types/body-parser": "*", @@ -4156,7 +4165,6 @@ "version": "5.1.0", "resolved": "https://registry.npmjs.org/@types/express-serve-static-core/-/express-serve-static-core-5.1.0.tgz", "integrity": "sha512-jnHMsrd0Mwa9Cf4IdOzbz543y4XJepXrbia2T4b6+spXC2We3t1y6K44D3mR8XMFSXMCf3/l7rCgddfx7UNVBA==", - "dev": true, "license": "MIT", "dependencies": { "@types/node": "*", @@ -4169,7 +4177,6 @@ "version": "2.0.5", "resolved": "https://registry.npmjs.org/@types/http-errors/-/http-errors-2.0.5.tgz", "integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==", - "dev": true, "license": "MIT" }, "node_modules/@types/istanbul-lib-coverage": { @@ -4413,6 +4420,15 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/multer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@types/multer/-/multer-2.0.0.tgz", + "integrity": "sha512-C3Z9v9Evij2yST3RSBktxP9STm6OdMc5uR1xF1SGr98uv8dUlAL2hqwrZ3GVB3uyMyiegnscEK6PGtYvNrjTjw==", + "license": "MIT", + "dependencies": { + "@types/express": "*" + } + }, "node_modules/@types/node": { "version": "24.10.4", "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.4.tgz", @@ -4448,21 +4464,18 @@ "version": "6.14.0", "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.14.0.tgz", "integrity": "sha512-eOunJqu0K1923aExK6y8p6fsihYEn/BYuQ4g0CxAAgFc4b/ZLN4CrsRZ55srTdqoiLzU2B2evC+apEIxprEzkQ==", - "dev": true, "license": "MIT" }, "node_modules/@types/range-parser": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/@types/range-parser/-/range-parser-1.2.7.tgz", "integrity": "sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==", - "dev": true, "license": "MIT" }, "node_modules/@types/send": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/@types/send/-/send-1.2.1.tgz", "integrity": "sha512-arsCikDvlU99zl1g69TcAB3mzZPpxgw0UQnaHeC1Nwb015xp8bknZv5rIfri9xTOcMuaVgvabfIRA7PSZVuZIQ==", - "dev": true, "license": "MIT", "dependencies": { "@types/node": "*" @@ -4472,7 +4485,6 @@ "version": "2.2.0", "resolved": "https://registry.npmjs.org/@types/serve-static/-/serve-static-2.2.0.tgz", "integrity": "sha512-8mam4H1NHLtu7nmtalF7eyBH14QyOASmcxHhSfEoRyr0nP/YdoesEtU+uSRvMe96TW/HPTtkoKqQLl53N7UXMQ==", - "dev": true, "license": "MIT", "dependencies": { "@types/http-errors": "*", @@ -5270,6 +5282,12 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/append-field": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz", + "integrity": "sha512-klpgFSWLW1ZEs8svjfb7g4qWY0YS5imI82dTg+QahUvJ8YqAY0P10Uk8tTyh9ZGuYEZEMaeJYCF5BFuX552hsw==", + "license": "MIT" + }, "node_modules/arg": { "version": "4.1.3", "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", @@ -5730,7 +5748,6 @@ "version": "1.1.2", "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", - "devOptional": true, "license": "MIT" }, "node_modules/buildcheck": { @@ -5758,6 +5775,17 @@ "esbuild": ">=0.18" } }, + "node_modules/busboy": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz", + "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==", + "dependencies": { + "streamsearch": "^1.1.0" + }, + "engines": { + "node": ">=10.16.0" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -6090,6 +6118,21 @@ "dev": true, "license": "MIT" }, + "node_modules/concat-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-2.0.0.tgz", + "integrity": "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==", + "engines": [ + "node >= 6.0" + ], + "license": "MIT", + "dependencies": { + "buffer-from": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.0.2", + "typedarray": "^0.0.6" + } + }, "node_modules/concurrently": { "version": "9.2.1", "resolved": "https://registry.npmjs.org/concurrently/-/concurrently-9.2.1.tgz", @@ -9365,6 +9408,47 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/multer": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/multer/-/multer-2.1.0.tgz", + "integrity": "sha512-TBm6j41rxNohqawsxlsWsNNh/VdV4QFXcBvRcPhXaA05EZ79z0qJ2bQFpync6JBoHTeNY5Q1JpG7AlTjdlfAEA==", + "license": "MIT", + "dependencies": { + "append-field": "^1.0.0", + "busboy": "^1.6.0", + "concat-stream": "^2.0.0", + "type-is": "^1.6.18" + }, + "engines": { + "node": ">= 10.16.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/multer/node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/multer/node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "license": "MIT", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/mz": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", @@ -10264,6 +10348,20 @@ "dev": true, "license": "MIT" }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/readdirp": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", @@ -10417,6 +10515,26 @@ "tslib": "^2.1.0" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/safe-regex": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/safe-regex/-/safe-regex-2.1.1.tgz", @@ -10767,6 +10885,23 @@ "node": ">= 0.8" } }, + "node_modules/streamsearch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", + "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/string-length": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/string-length/-/string-length-4.0.2.tgz", @@ -12551,6 +12686,12 @@ "url": "https://opencollective.com/express" } }, + "node_modules/typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA==", + "license": "MIT" + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", @@ -12718,6 +12859,12 @@ "punycode": "^2.1.0" } }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, "node_modules/v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", diff --git a/tests/unit/services/scanner/gemini.service.test.ts b/tests/unit/services/scanner/gemini.service.test.ts new file mode 100644 index 0000000..6d05102 --- /dev/null +++ b/tests/unit/services/scanner/gemini.service.test.ts @@ -0,0 +1,229 @@ +/** + * Unit tests for the Gemini scanner service. + */ + +// Mock @google/generative-ai before importing the service +const mockGenerateContent = jest.fn(); +const mockGetGenerativeModel = jest.fn().mockReturnValue({ + generateContent: mockGenerateContent, +}); +const MockGoogleGenerativeAI = jest.fn().mockImplementation(() => ({ + getGenerativeModel: mockGetGenerativeModel, +})); + +jest.mock('@google/generative-ai', () => ({ + GoogleGenerativeAI: MockGoogleGenerativeAI, +})); + +import { extractFromImages, resetGeminiClient } from '../../../../apps/backend/services/scanner/gemini.service'; +import { ScanContext } from '../../../../apps/backend/services/scanner/types'; + +describe('gemini.service', () => { + const originalEnv = process.env; + + beforeEach(() => { + jest.clearAllMocks(); + resetGeminiClient(); + process.env = { ...originalEnv, GEMINI_API_KEY: 'test-api-key' }; + }); + + afterEach(() => { + process.env = originalEnv; + }); + + describe('extractFromImages', () => { + const mockImages = [Buffer.from('fake-image-data')]; + const mockPhotoTypes = ['center_label']; + const mockContext: ScanContext = {}; + + it('initializes the Gemini client with the API key', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => + JSON.stringify({ + label_name: { value: 'Sub Pop', confidence: 0.95 }, + }), + }, + }); + + await extractFromImages(mockImages, mockPhotoTypes, mockContext); + + expect(MockGoogleGenerativeAI).toHaveBeenCalledWith('test-api-key'); + }); + + it('calls Gemini with the correct model', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => JSON.stringify({}), + }, + }); + + await extractFromImages(mockImages, mockPhotoTypes, mockContext); + + expect(mockGetGenerativeModel).toHaveBeenCalledWith({ model: 'gemini-2.0-flash' }); + }); + + it('sends images as base64 inline data', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => JSON.stringify({}), + }, + }); + + await extractFromImages(mockImages, mockPhotoTypes, mockContext); + + const callArgs = mockGenerateContent.mock.calls[0][0]; + const parts = callArgs.contents[0].parts; + + // Should have: system prompt text, image(s), user prompt text + const imagePart = parts.find((p: Record) => p.inlineData); + expect(imagePart).toBeDefined(); + expect(imagePart.inlineData.mimeType).toBe('image/jpeg'); + expect(imagePart.inlineData.data).toBe(Buffer.from('fake-image-data').toString('base64')); + }); + + it('requests JSON response format', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => JSON.stringify({}), + }, + }); + + await extractFromImages(mockImages, mockPhotoTypes, mockContext); + + const callArgs = mockGenerateContent.mock.calls[0][0]; + expect(callArgs.generationConfig.responseMimeType).toBe('application/json'); + }); + + it('returns parsed ScanExtraction from the Gemini response', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => + JSON.stringify({ + label_name: { value: 'Merge Records', confidence: 0.92 }, + catalog_number: { value: 'MRG-567', confidence: 0.85 }, + review_text: { value: 'Great album, play track 3!', confidence: 0.7 }, + upc: { value: '036172091928', confidence: 0.99 }, + }), + }, + }); + + const result = await extractFromImages(mockImages, mockPhotoTypes, mockContext); + + expect(result.labelName).toEqual({ value: 'Merge Records', confidence: 0.92 }); + expect(result.catalogNumber).toEqual({ value: 'MRG-567', confidence: 0.85 }); + expect(result.reviewText).toEqual({ value: 'Great album, play track 3!', confidence: 0.7 }); + expect(result.upc).toEqual({ value: '036172091928', confidence: 0.99 }); + }); + + it('omits fields not present in the response', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => + JSON.stringify({ + label_name: { value: 'Sub Pop', confidence: 0.9 }, + }), + }, + }); + + const result = await extractFromImages(mockImages, mockPhotoTypes, mockContext); + + expect(result.labelName).toEqual({ value: 'Sub Pop', confidence: 0.9 }); + expect(result.catalogNumber).toBeUndefined(); + expect(result.reviewText).toBeUndefined(); + expect(result.upc).toBeUndefined(); + }); + + it('clamps confidence scores to [0, 1]', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => + JSON.stringify({ + label_name: { value: 'Test', confidence: 1.5 }, + catalog_number: { value: 'X', confidence: -0.3 }, + }), + }, + }); + + const result = await extractFromImages(mockImages, mockPhotoTypes, mockContext); + + expect(result.labelName?.confidence).toBe(1); + expect(result.catalogNumber?.confidence).toBe(0); + }); + + it('includes context in the prompt when provided', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => JSON.stringify({}), + }, + }); + + const contextWithInfo: ScanContext = { + artistName: 'Superchunk', + albumTitle: 'Foolish', + stickerText: 'RO 5/3', + detectedUPC: '036172091928', + }; + + await extractFromImages(mockImages, mockPhotoTypes, contextWithInfo); + + const callArgs = mockGenerateContent.mock.calls[0][0]; + const textParts = callArgs.contents[0].parts + .filter((p: Record) => p.text) + .map((p: { text: string }) => p.text); + + const userPrompt = textParts[textParts.length - 1]; + expect(userPrompt).toContain('Superchunk'); + expect(userPrompt).toContain('Foolish'); + expect(userPrompt).toContain('RO 5/3'); + expect(userPrompt).toContain('036172091928'); + }); + + it('throws when the API key is missing', async () => { + resetGeminiClient(); + delete process.env.GEMINI_API_KEY; + + await expect(extractFromImages(mockImages, mockPhotoTypes, mockContext)).rejects.toThrow( + 'GEMINI_API_KEY is not configured' + ); + }); + + it('throws on empty response from Gemini', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => '', + }, + }); + + await expect(extractFromImages(mockImages, mockPhotoTypes, mockContext)).rejects.toThrow( + 'Empty response from Gemini' + ); + }); + + it('throws on invalid JSON from Gemini', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => 'not valid json {{{', + }, + }); + + await expect(extractFromImages(mockImages, mockPhotoTypes, mockContext)).rejects.toThrow( + 'Invalid JSON response from Gemini' + ); + }); + + it('reuses the client on subsequent calls', async () => { + mockGenerateContent.mockResolvedValue({ + response: { + text: () => JSON.stringify({}), + }, + }); + + await extractFromImages(mockImages, mockPhotoTypes, mockContext); + await extractFromImages(mockImages, mockPhotoTypes, mockContext); + + // Client should only be constructed once + expect(MockGoogleGenerativeAI).toHaveBeenCalledTimes(1); + }); + }); +}); diff --git a/tests/unit/services/scanner/processor.test.ts b/tests/unit/services/scanner/processor.test.ts new file mode 100644 index 0000000..9523276 --- /dev/null +++ b/tests/unit/services/scanner/processor.test.ts @@ -0,0 +1,141 @@ +/** + * Unit tests for the scanner image processor. + */ + +// Mock the gemini service +jest.mock('../../../../apps/backend/services/scanner/gemini.service', () => ({ + extractFromImages: jest.fn(), +})); + +// Mock the library service +jest.mock('../../../../apps/backend/services/library.service', () => ({ + fuzzySearchLibrary: jest.fn(), +})); + +import { processImages } from '../../../../apps/backend/services/scanner/processor'; +import { extractFromImages } from '../../../../apps/backend/services/scanner/gemini.service'; +import { fuzzySearchLibrary } from '../../../../apps/backend/services/library.service'; +import { ScanContext, ScanExtraction } from '../../../../apps/backend/services/scanner/types'; + +const mockExtractFromImages = extractFromImages as jest.MockedFunction; +const mockFuzzySearchLibrary = fuzzySearchLibrary as jest.MockedFunction; + +describe('processor', () => { + const mockImages = [Buffer.from('fake-image')]; + const mockPhotoTypes = ['center_label']; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + describe('processImages', () => { + it('returns extraction results from gemini service', async () => { + const mockExtraction: ScanExtraction = { + labelName: { value: 'Sub Pop', confidence: 0.9 }, + catalogNumber: { value: 'SP 1234', confidence: 0.85 }, + }; + + mockExtractFromImages.mockResolvedValue(mockExtraction); + mockFuzzySearchLibrary.mockResolvedValue([]); + + const result = await processImages(mockImages, mockPhotoTypes, {}); + + expect(result.extraction).toEqual(mockExtraction); + expect(mockExtractFromImages).toHaveBeenCalledWith(mockImages, mockPhotoTypes, {}); + }); + + it('uses catalogItemId from context when provided', async () => { + const mockExtraction: ScanExtraction = { + labelName: { value: 'Merge', confidence: 0.9 }, + }; + + mockExtractFromImages.mockResolvedValue(mockExtraction); + + const context: ScanContext = { catalogItemId: 42 }; + const result = await processImages(mockImages, mockPhotoTypes, context); + + expect(result.matchedAlbumId).toBe(42); + // Should not attempt catalog matching when ID is already known + expect(mockFuzzySearchLibrary).not.toHaveBeenCalled(); + }); + + it('attempts catalog matching when no catalogItemId is provided', async () => { + const mockExtraction: ScanExtraction = { + labelName: { value: 'Sub Pop', confidence: 0.9 }, + }; + + mockExtractFromImages.mockResolvedValue(mockExtraction); + mockFuzzySearchLibrary.mockResolvedValue([{ id: 101, artist_name: 'Nirvana', album_title: 'Bleach' }]); + + const context: ScanContext = { + artistName: 'Nirvana', + albumTitle: 'Bleach', + }; + const result = await processImages(mockImages, mockPhotoTypes, context); + + expect(mockFuzzySearchLibrary).toHaveBeenCalledWith('Nirvana', 'Bleach', 1); + expect(result.matchedAlbumId).toBe(101); + }); + + it('uses label name from extraction when no artist in context', async () => { + const mockExtraction: ScanExtraction = { + labelName: { value: 'Merge Records', confidence: 0.9 }, + }; + + mockExtractFromImages.mockResolvedValue(mockExtraction); + mockFuzzySearchLibrary.mockResolvedValue([]); + + const context: ScanContext = { albumTitle: 'Foolish' }; + const result = await processImages(mockImages, mockPhotoTypes, context); + + expect(mockFuzzySearchLibrary).toHaveBeenCalledWith('Merge Records', 'Foolish', 1); + expect(result.matchedAlbumId).toBeUndefined(); + }); + + it('returns undefined matchedAlbumId when no context for matching', async () => { + const mockExtraction: ScanExtraction = {}; + + mockExtractFromImages.mockResolvedValue(mockExtraction); + + const result = await processImages(mockImages, mockPhotoTypes, {}); + + expect(mockFuzzySearchLibrary).not.toHaveBeenCalled(); + expect(result.matchedAlbumId).toBeUndefined(); + }); + + it('returns undefined matchedAlbumId when catalog search fails', async () => { + const mockExtraction: ScanExtraction = { + labelName: { value: 'Unknown Label', confidence: 0.5 }, + }; + + mockExtractFromImages.mockResolvedValue(mockExtraction); + mockFuzzySearchLibrary.mockRejectedValue(new Error('DB error')); + + const context: ScanContext = { artistName: 'Test Artist' }; + const result = await processImages(mockImages, mockPhotoTypes, context); + + expect(result.matchedAlbumId).toBeUndefined(); + expect(result.extraction).toEqual(mockExtraction); + }); + + it('returns undefined matchedAlbumId when catalog search returns empty', async () => { + const mockExtraction: ScanExtraction = { + labelName: { value: 'Rare Label', confidence: 0.8 }, + }; + + mockExtractFromImages.mockResolvedValue(mockExtraction); + mockFuzzySearchLibrary.mockResolvedValue([]); + + const context: ScanContext = { artistName: 'Unknown Band' }; + const result = await processImages(mockImages, mockPhotoTypes, context); + + expect(result.matchedAlbumId).toBeUndefined(); + }); + + it('propagates errors from gemini service', async () => { + mockExtractFromImages.mockRejectedValue(new Error('Gemini API failed')); + + await expect(processImages(mockImages, mockPhotoTypes, {})).rejects.toThrow('Gemini API failed'); + }); + }); +});