diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7bc53939..ea9b5db9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,7 +16,6 @@ env: IMAGE_PREFIX: ${{ github.repository_owner }}/edit-mind jobs: - build-and-push: runs-on: ubuntu-latest permissions: @@ -25,7 +24,7 @@ jobs: packages: write artifact-metadata: write attestations: write - + strategy: matrix: service: @@ -36,23 +35,26 @@ jobs: dockerfile: docker/Dockerfile.web context: . platform: + - linux/amd64 - linux/arm64 steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: "node-cleanup" + - name: Maximize build space run: | sudo rm -rf /usr/share/dotnet sudo rm -rf /usr/local/lib/android sudo rm -rf /opt/ghc sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /usr/local/.ghcup sudo rm -rf /usr/share/swift sudo rm -rf /usr/local/share/boost + sudo apt-get autoremove -y sudo apt-get clean - docker system prune -af --volumes + sudo docker system prune -a -f df -h + + - name: Checkout code + uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -73,11 +75,10 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Generate platform tag - id: platform-tag + - name: Generate platform suffix + id: platform run: | - PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g') - echo "tag=${PLATFORM_TAG}" >> $GITHUB_OUTPUT + echo "suffix=$(echo ${{ matrix.platform }} | sed 's/\//-/g')" >> $GITHUB_OUTPUT - name: Extract metadata (tags, labels) id: meta @@ -85,30 +86,45 @@ jobs: with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-${{ matrix.service.name }} tags: | - type=raw,value=latest-${{ steps.platform-tag.outputs.tag }},enable={{is_default_branch}} - type=semver,pattern={{version}}-${{ steps.platform-tag.outputs.tag }} - type=semver,pattern={{major}}.{{minor}}-${{ steps.platform-tag.outputs.tag }} - type=sha,prefix={{branch}}-${{ steps.platform-tag.outputs.tag }}- - - name: Build and push Docker image + type=ref,event=branch,suffix=-${{ steps.platform.outputs.suffix }} + type=semver,pattern={{version}},suffix=-${{ steps.platform.outputs.suffix }} + type=semver,pattern={{major}}.{{minor}},suffix=-${{ steps.platform.outputs.suffix }} + + - name: Set up env files + run: | + cp .env.example .env + cp .env.system.example .env.system + + - name: Start ChromaDB (testing) + run: docker compose up -d chroma + + - name: Build and Test uses: docker/build-push-action@v6 + with: + context: ${{ matrix.service.context }} + file: ${{ matrix.service.dockerfile }} + target: testing + platforms: ${{ matrix.platform }} + push: false + cache-from: type=gha,scope=${{ matrix.service.name }}-${{ matrix.platform }} + cache-to: type=gha,mode=max,scope=${{ matrix.service.name }}-${{ matrix.platform }} + + - name: Build and push Docker image id: push - if: github.event_name != 'pull_request' + uses: docker/build-push-action@v6 with: context: ${{ matrix.service.context }} file: ${{ matrix.service.dockerfile }} target: production - platforms: linux/arm64 + platforms: ${{ matrix.platform }} build-args: | NODE_VERSION=22.20.0 PNPM_VERSION=10.20.0 push: ${{ github.event_name != 'pull_request' }} - load: false tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: | - type=gha,scope=${{ matrix.service.name }}-${{ matrix.platform }} - type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-${{ matrix.service.name }}:buildcache-${{ matrix.platform }} - cache-to: type=gha,mode=max,scope=${{ matrix.service.name }}-${{ matrix.platform }} + cache-from: type=gha,scope=${{ matrix.service }}-${{ matrix.platform }} + cache-to: type=gha,mode=max,scope=${{ matrix.service }}-${{ matrix.platform }} sbom: false - name: Generate artifact attestation @@ -119,12 +135,42 @@ jobs: subject-digest: ${{ steps.push.outputs.digest }} push-to-registry: true - create-release: + create-manifest: needs: build-and-push runs-on: ubuntu-latest + if: github.event_name != 'pull_request' permissions: - contents: write + packages: write + strategy: + matrix: + service: + - name: background-jobs + - name: web + steps: + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create and push manifest + run: | + TAG="${{ github.ref_name }}" + if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then + TAG="main" + fi + + docker buildx imagetools create -t ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-${{ matrix.service.name }}:${TAG} \ + ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-${{ matrix.service.name }}:${TAG}-linux-amd64 \ + ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-${{ matrix.service.name }}:${TAG}-linux-arm64 + + create-release: + needs: create-manifest + runs-on: ubuntu-latest if: startsWith(github.ref, 'refs/tags/') + permissions: + contents: write steps: - name: Checkout code uses: actions/checkout@v4 @@ -135,7 +181,6 @@ jobs: tag_name: ${{ github.ref_name }} name: Release ${{ github.ref_name }} draft: true - prerelease: false files: | docker-compose.yml .env.example diff --git a/.gitignore b/.gitignore index 6623f7d8..65fecb9b 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ test-results/* .env.dev docker/.env.system .env.system +!python/.faces +*.pkl diff --git a/apps/background-jobs/package.json b/apps/background-jobs/package.json index 13395212..b697e7cc 100644 --- a/apps/background-jobs/package.json +++ b/apps/background-jobs/package.json @@ -1,6 +1,6 @@ { "name": "background-jobs", - "version": "0.1.1", + "version": "0.1.2", "private": true, "type": "module", "scripts": { diff --git a/apps/background-jobs/src/index.ts b/apps/background-jobs/src/index.ts index 169b53fd..bb8de765 100644 --- a/apps/background-jobs/src/index.ts +++ b/apps/background-jobs/src/index.ts @@ -5,12 +5,13 @@ import { BullMQAdapter } from '@bull-board/api/bullMQAdapter' import { ExpressAdapter } from '@bull-board/express' import foldersRoute from './routes/folders' import stitcherRoute from './routes/stitcher' +import faceRoute from './routes/face' import { config } from './config' -import { faceMatcherQueue, immichImporterQueue, videoQueue, videoStitcherQueue } from './queue' +import { immichImporterQueue, videoQueue, videoStitcherQueue } from './queue' import './jobs/videoIndexer' -import './jobs/faceMatcher' import './jobs/ImmichImporter' import './jobs/videoStitcher' +import './jobs/faceLabelling' import { pythonService } from '@shared/services/pythonService' import { initializeWatchers } from './watcher' @@ -28,7 +29,6 @@ if (process.env.NODE_ENV === 'development') { createBullBoard({ queues: [ new BullMQAdapter(videoQueue), - new BullMQAdapter(faceMatcherQueue), new BullMQAdapter(immichImporterQueue), new BullMQAdapter(videoStitcherQueue), ], @@ -40,6 +40,7 @@ if (process.env.NODE_ENV === 'development') { app.use('/folders', foldersRoute) app.use('/stitcher', stitcherRoute) +app.use('/face', faceRoute) app.get('/health', (_req, res) => res.json({ status: 'ok' })) diff --git a/apps/background-jobs/src/jobs/ImmichImporter.ts b/apps/background-jobs/src/jobs/ImmichImporter.ts index 0364f286..c6808483 100644 --- a/apps/background-jobs/src/jobs/ImmichImporter.ts +++ b/apps/background-jobs/src/jobs/ImmichImporter.ts @@ -1,10 +1,8 @@ import { decryptApiKey } from '@shared/services/encryption' import { Worker, Job } from 'bullmq' -import { connection } from '../queue' +import { connection } from '../services/redis' import { ImmichImporterJobData } from '@shared/types/immich' import { getAllImmichFaces } from '@shared/services/immich' -import { reindexFaces } from '@shared/utils/faces' -import { pythonService } from '@shared/services/pythonService' import { prisma } from 'src/services/db' async function processImmichImporterJob(job: Job) { @@ -14,9 +12,7 @@ async function processImmichImporterJob(job: Job) { }) if (!integration) throw new Error('Integration not found') const apiKey = decryptApiKey(integration.immichApiKey) - const facesFiles = await getAllImmichFaces({ baseUrl: integration.immichBaseUrl, apiKey }) - if (!pythonService.isServiceRunning) await pythonService.start() - if (job.id) await reindexFaces(facesFiles, job.id) + await getAllImmichFaces({ baseUrl: integration.immichBaseUrl, apiKey }) } catch (error) { console.error(error) } diff --git a/apps/background-jobs/src/jobs/faceLabelling.ts b/apps/background-jobs/src/jobs/faceLabelling.ts new file mode 100644 index 00000000..3a502d62 --- /dev/null +++ b/apps/background-jobs/src/jobs/faceLabelling.ts @@ -0,0 +1,130 @@ +import { Worker, Job } from 'bullmq' +import { connection } from '../services/redis' +import { logger } from '@shared/services/logger' +import { FaceLabellingJobData } from '@shared/types/face' +import type { FaceDetectionData } from '@shared/types/unknownFace' +import { promises as fs } from 'fs' +import { existsSync } from 'fs' +import path from 'path' +import type { Scene } from '@shared/types/scene' +import { FACES_DIR, PROCESSED_VIDEOS_DIR, UNKNOWN_FACES_DIR } from '@shared/constants' +import { getByVideoSource, updateMetadata } from '@shared/services/vectorDb' + +async function processFaceLabellingJob(job: Job) { + const { faces, name } = job.data + logger.info({ jobId: job.id }, 'Starting Face labelling job') + + const personDir = path.join(FACES_DIR, name) + if (!existsSync(personDir)) { + await fs.mkdir(personDir, { recursive: true }) + } + + for (const face of faces) { + try { + const jsonPath = path.join(UNKNOWN_FACES_DIR, face.jsonFile) + + if (existsSync(jsonPath)) { + let faceData: FaceDetectionData + try { + faceData = JSON.parse(await fs.readFile(jsonPath, 'utf8')) + } catch { + continue + } + + const imageFile = faceData.image_file + const srcImagePath = path.join(UNKNOWN_FACES_DIR, imageFile) + const destImagePath = path.join(personDir, imageFile) + + const scenes = await getByVideoSource(faceData.video_path) + const sortedAppearances = faceData.all_appearances?.sort((a, b) => a.frame_index - b.frame_index) + + if (sortedAppearances && scenes && scenes.length > 0) { + const firstAppearance = sortedAppearances[0] + const lastAppearance = sortedAppearances[sortedAppearances.length - 1] + + for (const scene of scenes) { + // Check if the face appears at any point during the scene + const overlapsScene = + firstAppearance.timestamp_seconds <= scene.endTime && lastAppearance.timestamp_seconds >= scene.startTime + + if (!overlapsScene) continue + + if (scene.faces.includes(face.faceId)) { + scene.faces = scene.faces.map((f) => (f === face.faceId ? name : f)) + } + + if (scene.facesData) { + scene.facesData = scene.facesData.map((f) => + f.name === face.faceId ? { ...f, name, confidence: 100 } : f + ) + } + + await updateMetadata(scene) + } + + const videoDir = path.join(PROCESSED_VIDEOS_DIR, path.basename(faceData.video_path)) + const scenesJsonPath = path.join(videoDir, 'scenes.json') + + if (existsSync(scenesJsonPath)) { + const fileScenes: Scene[] = JSON.parse(await fs.readFile(scenesJsonPath, 'utf8')) + let modified = false + + for (const scene of fileScenes) { + const inRange = + scene.startTime <= faceData.last_appearance?.timestamp_seconds && + scene.endTime >= faceData.first_appearance?.timestamp_seconds + + if (!inRange) continue + + let sceneModified = false + + if (scene.faces.includes(face.faceId)) { + scene.faces = scene.faces.map((f) => (f === face.faceId ? name : f)) + sceneModified = true + } + + if (scene.facesData) { + const hadFace = scene.facesData.some((f) => f.name === face.faceId) + scene.facesData = scene.facesData.map((f) => + f.name === face.faceId ? { ...f, name, confidence: 100 } : f + ) + if (hadFace) sceneModified = true + } + + if (sceneModified) modified = true + } + + if (modified) { + await fs.writeFile(scenesJsonPath, JSON.stringify(fileScenes, null, 2), 'utf8') + } + } + } + if (existsSync(srcImagePath)) { + await fs.copyFile(srcImagePath, destImagePath) + await fs.unlink(srcImagePath) + } + try { + await fs.unlink(jsonPath) + } catch (error) { + logger.warn(error) + } + } else { + try { + const imageFile = face.jsonFile.replace('.json', '.jpg') + const srcImagePath = path.join(UNKNOWN_FACES_DIR, imageFile) + await fs.unlink(srcImagePath) + } catch (error) { + logger.warn(error) + } + } + } catch (err) { + logger.error('Label error: ' + err) + } + } + logger.info({ jobId: job.id }, 'Face labelling job completed') +} + +export const faceLabellingWorker = new Worker('face-labelling', processFaceLabellingJob, { + connection, + concurrency: 3, +}) diff --git a/apps/background-jobs/src/jobs/faceMatcher.ts b/apps/background-jobs/src/jobs/faceMatcher.ts deleted file mode 100644 index 6fbc4eed..00000000 --- a/apps/background-jobs/src/jobs/faceMatcher.ts +++ /dev/null @@ -1,150 +0,0 @@ -import { Worker, Job } from 'bullmq' -import { promises as fs } from 'fs' -import path from 'path' -import { getByVideoSource, updateMetadata } from '@shared/services/vectorDb' -import type { Scene } from '@shared/types/scene' -import { pythonService } from '@shared/services/pythonService' -import { connection } from '../queue' -import { findMatchingFaces, reindexFaces } from '@shared/utils/faces' -import { FaceLabelingJobData, MatchResult } from '@shared/types/face' -import { FACES_DIR, PROCESSED_VIDEOS_DIR } from '@shared/constants' -import { logger } from '@shared/services/logger' -import { safeUpdateKnownFaces, safeUpdateScenesFile } from 'src/utils/locker' - -async function processFaceMatcherJob(job: Job) { - const { personName, referenceImages, unknownFacesDir } = job.data - - logger.info( - { jobId: job.id, personName, referenceImagesCount: referenceImages.length, unknownFacesDir }, - 'Starting face matcher job' - ) - - if (!pythonService.isServiceRunning()) { - logger.info('Python service not running, starting it') - await pythonService.start() - logger.info('Python service started successfully') - } - - await job.updateProgress(1) - const processedFaces: Array<{ name: string; image_path: string }> = [] - - const result = await findMatchingFaces(personName, referenceImages, unknownFacesDir, async (progress) => { - const { match } = progress - if (match) { - const imagePath = await processMatch(match, personName, unknownFacesDir) - - if (imagePath) { - processedFaces.push({ - name: personName, - image_path: path.basename(imagePath.toString()), - }) - } - } - const progressPercent = progress.progress || 0 - await job.updateProgress(progressPercent) - }) - - logger.info({ jobId: job.id, matchesFound: result.matches.length }, 'Face matching completed') - - try { - logger.info({ jobId: job.id }, 'All matches processed successfully') - - if (job.id) await reindexFaces(processedFaces, job.id) - logger.info({ jobId: job.id }, 'Face reindexing completed') - } catch (error) { - logger.error({ jobId: job.id, error: error instanceof Error ? error.message : error }, 'Error processing matches') - throw error - } -} - -async function processMatch(match: MatchResult, personName: string, unknownFacesDir: string) { - const { json_file, image_file, face_id, face_data } = match - - try { - const scenes: Scene[] = await getByVideoSource(face_data.video_path) - - const faceTimestamp = parseFloat(face_data.timestamp_seconds.toString()) - - const modifiedScenes = scenes - .map((scene) => { - if (scene.startTime <= faceTimestamp && scene.endTime >= faceTimestamp) { - scene.faces = scene.faces.map((face) => (face === face_id ? personName : face)) - if (scene.facesData) { - scene.facesData = scene.facesData?.map((f) => (f.name === face_id ? { ...f, name: personName } : f)) - } - return scene - } - return undefined - }) - .filter((scene): scene is Scene => scene !== undefined) - - const videoDir = path.join(PROCESSED_VIDEOS_DIR, path.basename(face_data.video_path)) - - const scenesPath = path.join(videoDir, 'scenes.json') - - // Update vector DB - for (const scene of modifiedScenes) { - await updateMetadata(scene) - } - - // Update scenes.json file with lock - await safeUpdateScenesFile(scenesPath, face_id, personName, faceTimestamp) - - logger.info({ modifiedScenesCount: modifiedScenes.length, personName }, 'Scene metadata updated successfully') - const personDir = path.join(FACES_DIR, personName) - - const dirExists = await fs - .access(personDir) - .then(() => true) - .catch(() => false) - - if (!dirExists) { - await fs.mkdir(personDir, { recursive: true }) - } - - const oldImagePath = path.join(unknownFacesDir, image_file) - const newImagePath = path.join(personDir, image_file) - - await fs.rename(oldImagePath, newImagePath) - - await safeUpdateKnownFaces(personName, newImagePath) - - const jsonPath = path.join(unknownFacesDir, json_file) - await fs.unlink(jsonPath) - - logger.info({ face_id, personName, imageFile: image_file }, 'Match processed successfully') - return newImagePath - } catch (error) { - logger.error( - { - imageFile: image_file, - faceId: face_id, - personName, - error: error instanceof Error ? error.message : error, - stack: error instanceof Error ? error.stack : undefined, - }, - 'Error processing match' - ) - throw error - } -} - -export const faceMatcherWorker = new Worker('face-matcher', processFaceMatcherJob, { - connection, - concurrency: 1, -}) - -faceMatcherWorker.on('completed', (job) => { - logger.info({ jobId: job.id }, 'Face labeling job completed') -}) - -faceMatcherWorker.on('failed', (job: Job | undefined, err: Error) => { - logger.error( - { - jobId: job?.id, - error: err.message, - stack: err.stack, - }, - 'Face labeling job failed' - ) -}) diff --git a/apps/background-jobs/src/jobs/videoIndexer.ts b/apps/background-jobs/src/jobs/videoIndexer.ts index 191893de..9a0a8aca 100644 --- a/apps/background-jobs/src/jobs/videoIndexer.ts +++ b/apps/background-jobs/src/jobs/videoIndexer.ts @@ -1,6 +1,6 @@ import { Worker, Job } from 'bullmq' import { prisma } from '../services/db' -import { connection } from '../queue' +import { connection } from '../services/redis' import path from 'path' import { existsSync, mkdirSync, promises as fs } from 'fs' import { PROCESSED_VIDEOS_DIR, THUMBNAILS_DIR } from '@shared/constants' diff --git a/apps/background-jobs/src/jobs/videoStitcher.ts b/apps/background-jobs/src/jobs/videoStitcher.ts index 00456313..c83c8cb0 100644 --- a/apps/background-jobs/src/jobs/videoStitcher.ts +++ b/apps/background-jobs/src/jobs/videoStitcher.ts @@ -1,6 +1,6 @@ import { generateCompilationResponse } from '@shared/services/modelRouter' import { Worker, Job } from 'bullmq' -import { connection } from '../queue' +import { connection } from '../services/redis' import { stitchVideos } from '@shared/utils/sticher' import { getVideoWithScenesBySceneIds } from '@shared/services/vectorDb' import { prisma } from '../services/db' diff --git a/apps/background-jobs/src/queue.ts b/apps/background-jobs/src/queue.ts index f46c7f88..a6f5c8cb 100644 --- a/apps/background-jobs/src/queue.ts +++ b/apps/background-jobs/src/queue.ts @@ -1,12 +1,5 @@ import { Queue } from 'bullmq' -import { config } from './config' -import IORedis from 'ioredis' - -export const connection = new IORedis({ - host: config.redisHost, - port: config.redisPort, - maxRetriesPerRequest: null, -}) +import { connection } from './services/redis' export const videoQueue = new Queue('video-indexing', { connection, @@ -19,7 +12,7 @@ export const videoQueue = new Queue('video-indexing', { }, }) -export const faceMatcherQueue = new Queue('face-matcher', { +export const immichImporterQueue = new Queue('immich-importer', { connection, defaultJobOptions: { attempts: 3, @@ -30,7 +23,7 @@ export const faceMatcherQueue = new Queue('face-matcher', { }, }) -export const immichImporterQueue = new Queue('immich-importer', { +export const videoStitcherQueue = new Queue('video-stitcher', { connection, defaultJobOptions: { attempts: 3, @@ -41,7 +34,7 @@ export const immichImporterQueue = new Queue('immich-importer', { }, }) -export const videoStitcherQueue = new Queue('video-stitcher', { +export const faceLabellingQueue = new Queue('face-labelling', { connection, defaultJobOptions: { attempts: 3, @@ -51,4 +44,3 @@ export const videoStitcherQueue = new Queue('video-stitcher', { }, }, }) - diff --git a/apps/background-jobs/src/routes/face.ts b/apps/background-jobs/src/routes/face.ts new file mode 100644 index 00000000..d1f8d502 --- /dev/null +++ b/apps/background-jobs/src/routes/face.ts @@ -0,0 +1,39 @@ +import express from 'express' +import { faceLabellingQueue } from '../queue' +import { FaceLabellingJobData } from '@shared/types/face' + +const router = express.Router() + +router.post('/label', async (req, res) => { + const { faces, name } = req.body as FaceLabellingJobData + + if (!faces) { + return res.status(400).json({ error: 'faces is required' }) + } + + if (!name) { + return res.status(400).json({ error: 'name is required' }) + } + + try { + await faceLabellingQueue.add( + 'face-labelling', + { + faces, + name, + }, + { + removeOnComplete: true, + } + ) + + res.json({ + message: 'Face labelling job queued', + }) + } catch (error) { + console.error(error) + res.status(500).json({ error: 'Failed to queue face labelling job' }) + } +}) + +export default router diff --git a/apps/background-jobs/src/services/faces.ts b/apps/background-jobs/src/services/faces.ts index d3034263..cbd4ba8f 100644 --- a/apps/background-jobs/src/services/faces.ts +++ b/apps/background-jobs/src/services/faces.ts @@ -1,9 +1,7 @@ import { promises as fs } from 'fs' import path from 'path' import { existsSync } from 'fs' -import { faceMatcherQueue } from 'src/queue' -import { AddFaceLabelingJobParams } from '@shared/types/face' -import { FACES_DIR, KNOWN_FACES_FILE, UNKNOWN_FACES_DIR } from '@shared/constants' +import { FACES_DIR, UNKNOWN_FACES_DIR } from '@shared/constants'; const FACES_PER_PAGE = 40 @@ -50,32 +48,31 @@ export const getAllUnknownFaces = async (page = 1, limit = FACES_PER_PAGE) => { } export const getAllKnownFaces = async () => { - if (!existsSync(KNOWN_FACES_FILE)) { - fs.writeFile(KNOWN_FACES_FILE, JSON.stringify({}), 'utf8') + if (!existsSync(FACES_DIR)) { + await fs.mkdir(FACES_DIR, { recursive: true }) + return null } - if (existsSync(KNOWN_FACES_FILE)) { - const facesData = await fs.readFile(KNOWN_FACES_FILE, 'utf-8') - const faces = JSON.parse(facesData) + const peopleFolders = await fs.readdir(FACES_DIR, { withFileTypes: true }) - const cleanedFaces: Record = {} - for (const [person, paths] of Object.entries(faces)) { - cleanedFaces[person] = (paths as string[]).map((path) => path.replace(FACES_DIR, '')) - } + const result: Record = {} - return cleanedFaces - } + for (const entry of peopleFolders) { + if (!entry.isDirectory()) continue - return null -} + const personName = entry.name + const personFolder = path.join(FACES_DIR, personName) -export const addFaceLabelingJob = async (params: AddFaceLabelingJobParams) => { - const job = await faceMatcherQueue.add('face-matcher', params, { - attempts: 3, - backoff: { - type: 'exponential', - delay: 5000, - }, - }) + const files = await fs.readdir(personFolder) + const jpgFiles = files.filter((f) => f.toLowerCase().endsWith('.jpg')) - return job -} + if (jpgFiles.length === 0) continue + + jpgFiles.sort() + + const lastImage = jpgFiles[jpgFiles.length - 1] + + result[personName] = path.join(personName, lastImage) + } + + return result +} \ No newline at end of file diff --git a/apps/background-jobs/src/services/redis.ts b/apps/background-jobs/src/services/redis.ts new file mode 100644 index 00000000..1afced19 --- /dev/null +++ b/apps/background-jobs/src/services/redis.ts @@ -0,0 +1,8 @@ +import { config } from '../config' +import IORedis from 'ioredis' + +export const connection = new IORedis({ + host: config.redisHost, + port: config.redisPort, + maxRetriesPerRequest: null, +}) diff --git a/apps/background-jobs/src/utils/locker.ts b/apps/background-jobs/src/utils/locker.ts index 218a68da..d0c447f6 100644 --- a/apps/background-jobs/src/utils/locker.ts +++ b/apps/background-jobs/src/utils/locker.ts @@ -1,11 +1,8 @@ -import { KNOWN_FACES_FILE } from '@shared/constants' import { Scene } from '@shared/schemas' import { logger } from '@shared/services/logger' import { existsSync } from 'fs' import { readFile, open, unlink, writeFile } from 'fs/promises' -const LOCK_FILE = KNOWN_FACES_FILE + '.lock' - export async function acquireLock(lockPath: string) { while (true) { try { @@ -27,31 +24,6 @@ async function releaseLock(lockPath: string) { } } -export async function safeUpdateKnownFaces(personName: string, newImagePath: string) { - await acquireLock(LOCK_FILE) - - try { - let faces: Record = {} - - try { - const content = await readFile(KNOWN_FACES_FILE, 'utf-8') - faces = JSON.parse(content) - } catch { - faces = {} - } - - if (!faces[personName]) { - faces[personName] = [] - } - - faces[personName].push(newImagePath) - - await writeFile(KNOWN_FACES_FILE, JSON.stringify(faces, null, 2)) - } finally { - await releaseLock(LOCK_FILE) - } -} - export async function safeUpdateScenesFile( scenesPath: string, faceId: string, @@ -69,7 +41,6 @@ export async function safeUpdateScenesFile( const allScenes: Scene[] = JSON.parse(await readFile(scenesPath, 'utf-8')) - // Update the scenes in the array let modified = false for (const scene of allScenes) { if (scene.startTime <= faceTimestamp && scene.endTime >= faceTimestamp) { diff --git a/apps/desktop/lib/conveyor/handlers/app-handler.ts b/apps/desktop/lib/conveyor/handlers/app-handler.ts index b4c7c436..9992ef52 100644 --- a/apps/desktop/lib/conveyor/handlers/app-handler.ts +++ b/apps/desktop/lib/conveyor/handlers/app-handler.ts @@ -22,7 +22,6 @@ import { convertTimeToWords } from '@shared/utils/time' import { getLocationName } from '@shared/utils/location' import { FACES_DIR, PROCESSED_VIDEOS_DIR, THUMBNAILS_DIR } from '@shared/constants' import { handle, sender } from '@/lib/main/shared' -import { reindexFaces } from '@shared/utils/faces' export const registerAppHandlers = (app: App, webContents: WebContents) => { const send = sender(webContents) @@ -249,7 +248,6 @@ export const registerAppHandlers = (app: App, webContents: WebContents) => { try { const jsonPath = path.join(unknownFacesDir, jsonFile) - const jobId = '' try { await fs.unlink(jsonPath) @@ -257,7 +255,6 @@ export const registerAppHandlers = (app: App, webContents: WebContents) => { console.error(error) } - return await reindexFaces([], jobId) return { success: true } } catch (error) { diff --git a/apps/desktop/package.json b/apps/desktop/package.json index 4dd30930..cc504602 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -1,6 +1,6 @@ { "name": "desktop", - "version": "0.1.1", + "version": "0.1.2", "description": "AI-Powered Video Indexing and Search", "main": "./out/main/main.js", "license": "MIT", diff --git a/apps/web/app/features/training/components/KnownFacesGrid.tsx b/apps/web/app/features/training/components/KnownFacesGrid.tsx index b31c0a03..b583e090 100644 --- a/apps/web/app/features/training/components/KnownFacesGrid.tsx +++ b/apps/web/app/features/training/components/KnownFacesGrid.tsx @@ -11,12 +11,8 @@ export const KnownFacesGrid: React.FC = ({ knownFaces }) => {knownFaces.map((face) => (
- {face.images.length > 0 ? ( - {face.name} + {face.image ? ( + {face.name} ) : (
@@ -35,9 +31,6 @@ export const KnownFacesGrid: React.FC = ({ knownFaces }) =>

{face.name}

-

- {face.images.length} sample{face.images.length !== 1 ? 's' : ''} -

))} diff --git a/apps/web/app/features/training/components/LabelingForm.tsx b/apps/web/app/features/training/components/LabelingForm.tsx index a7aad594..e610e014 100644 --- a/apps/web/app/features/training/components/LabelingForm.tsx +++ b/apps/web/app/features/training/components/LabelingForm.tsx @@ -96,7 +96,7 @@ export const LabelingForm: React.FC = ({ {knownFaces.map((face) => ( ))} diff --git a/apps/web/app/features/training/components/StatusNotifications.tsx b/apps/web/app/features/training/components/StatusNotifications.tsx deleted file mode 100644 index 21b61968..00000000 --- a/apps/web/app/features/training/components/StatusNotifications.tsx +++ /dev/null @@ -1,104 +0,0 @@ -import React from 'react' -import { X, CheckCircle, AlertCircle, Loader2 } from 'lucide-react' - -interface MatchingStatus { - isMatching: boolean - progress: number - matchesFound: number - currentPerson: string - error: string | null -} - -interface StatusNotificationsProps { - matchingStatus: MatchingStatus - successMessage: string | null - onDismissSuccess: () => void - onDismissError: () => void -} - -export const StatusNotifications: React.FC = ({ - matchingStatus, - successMessage, - onDismissSuccess, - onDismissError, -}) => { - return ( -
- {successMessage && ( -
-
-
- -
-
-

Success

-

{successMessage}

-
- -
-
- )} - - {matchingStatus.error && ( -
-
-
- -
-
-

Error

-

{matchingStatus.error}

-
- -
-
- )} - - {matchingStatus.isMatching && ( -
-
-
- -
-
-

- Finding Similar Faces -

-

- Searching for faces matching "{matchingStatus.currentPerson}" -

- -
-
-
- -
- - {Math.round(matchingStatus.progress)}% complete - - {matchingStatus.matchesFound > 0 && ( - - {matchingStatus.matchesFound} matches found - - )} -
-
-
-
- )} -
- ) -} \ No newline at end of file diff --git a/apps/web/app/features/training/hooks/useTraining.ts b/apps/web/app/features/training/hooks/useTraining.ts index 76cd423a..a54161ad 100644 --- a/apps/web/app/features/training/hooks/useTraining.ts +++ b/apps/web/app/features/training/hooks/useTraining.ts @@ -1,13 +1,6 @@ import { useState, useEffect, useCallback } from 'react' import type { UnknownFace, KnownFace } from '@shared/types/face' -interface MatchingStatus { - isMatching: boolean - progress: number - matchesFound: number - currentPerson: string - error: string | null -} interface PaginationData { total: number @@ -26,16 +19,8 @@ export const useTraining = () => { const [newFaceName, setNewFaceName] = useState('') const [isLabeling, setIsLabeling] = useState(false) const [activeTab, setActiveTab] = useState('unknown') - const [matchingStatus, setMatchingStatus] = useState({ - isMatching: false, - progress: 0, - matchesFound: 0, - currentPerson: '', - error: null, - }) const [successMessage, setSuccessMessage] = useState(null) - // Pagination states const [unknownPagination, setUnknownPagination] = useState({ total: 0, page: 1, @@ -64,10 +49,6 @@ export const useTraining = () => { }) } catch (error) { console.error('Error fetching unknown faces:', error) - setMatchingStatus((prev) => ({ - ...prev, - error: 'Failed to load unknown faces. Please refresh the page.', - })) } finally { setLoading(false) } @@ -80,14 +61,12 @@ export const useTraining = () => { if (!response.ok) throw new Error('Failed to fetch known faces') const data = await response.json() - // If data.faces is already in array format from pagination if (Array.isArray(data.faces)) { setKnownFaces(data.faces) } else { - // Convert object format to array if needed - const knownFacesArray = Object.entries(data.faces).map(([name, images]) => ({ + const knownFacesArray = Object.entries(data.faces).map(([name, image]) => ({ name, - images: images as string[], + image: image as string, })) setKnownFaces(knownFacesArray) } @@ -100,10 +79,6 @@ export const useTraining = () => { }) } catch (error) { console.error('Error fetching known faces:', error) - setMatchingStatus((prev) => ({ - ...prev, - error: 'Failed to load known faces. Please refresh the page.', - })) } finally { setLoading(false) } @@ -152,106 +127,22 @@ export const useTraining = () => { } }, [selectedFaces.size, unknownFaces]) - const dismissSuccess = useCallback(() => { - setSuccessMessage(null) - }, []) - - const dismissError = useCallback(() => { - setMatchingStatus((prev) => ({ ...prev, error: null })) - }, []) - - const pollMatchingStatus = useCallback( - async (personName: string) => { - const pollInterval = 2000 - const maxPolls = 60 - - let pollCount = 0 - - const poll = async () => { - if (pollCount >= maxPolls) { - setMatchingStatus((prev) => ({ - ...prev, - isMatching: false, - })) - return - } - - try { - const response = await fetch(`/api/faces/matching-status?person=${encodeURIComponent(personName)}`) - - if (!response.ok) { - throw new Error('Failed to fetch matching status') - } - - const status = await response.json() - - setMatchingStatus((prev) => ({ - ...prev, - progress: status.progress || prev.progress, - matchesFound: status.matchesFound || 0, - isMatching: status.isActive, - })) - - if (status.isActive) { - pollCount++ - setTimeout(poll, pollInterval) - } else { - if (status.matchesFound > 0) { - setSuccessMessage( - `Automatic matching complete! Found and labeled ${status.matchesFound} additional matching face(s).` - ) - await fetchData() - } - setMatchingStatus((prev) => ({ - ...prev, - isMatching: false, - })) - } - } catch (error) { - console.error('Error polling matching status:', error) - setMatchingStatus((prev) => ({ - ...prev, - isMatching: false, - error: 'Failed to get automatic matching status.', - })) - } - } - - poll() - }, - [fetchData] - ) const handleLabelFaces = useCallback(async () => { if (selectedFaces.size === 0) { - setMatchingStatus((prev) => ({ - ...prev, - error: 'Please select at least one face to label.', - })) return } const targetName = labelMode === 'existing' ? selectedKnownFace : newFaceName.trim() if (!targetName) { - setMatchingStatus((prev) => ({ - ...prev, - error: 'Please select an existing face or enter a new name.', - })) return } setIsLabeling(true) - setMatchingStatus({ - isMatching: true, - progress: 0, - matchesFound: 0, - currentPerson: targetName, - error: null, - }) + setSuccessMessage(null) try { - // Prepare all faces data const selectedFacesArray = Array.from(selectedFaces) const facesToLabel = selectedFacesArray .map((image_hash) => { @@ -266,7 +157,6 @@ export const useTraining = () => { }) .filter((face): face is { jsonFile: string; faceId: string } => face !== null) - // Single API call with all faces const response = await fetch('/api/faces/label', { method: 'POST', headers: { @@ -284,7 +174,7 @@ export const useTraining = () => { throw new Error(result.error || 'Failed to label faces') } - const { labeledCount, failedCount } = result + const { labeledCount } = result if (labeledCount > 0) { setSuccessMessage( @@ -293,33 +183,17 @@ export const useTraining = () => { ) } - if (failedCount > 0) { - setMatchingStatus((prev) => ({ - ...prev, - error: `${failedCount} face(s) failed to label. Please try again.`, - })) - } - setSelectedFaces(new Set()) setNewFaceName('') setSelectedKnownFace('') await fetchData() - - // Start polling for matching status - if (labeledCount > 0) { - pollMatchingStatus(targetName) - } } catch (error) { console.error('Error labeling faces:', error) - setMatchingStatus((prev) => ({ - ...prev, - error: 'An unexpected error occurred while labeling faces. Please try again.', - })) } finally { setIsLabeling(false) } - }, [selectedFaces, labelMode, selectedKnownFace, newFaceName, unknownFaces, fetchData, pollMatchingStatus]) + }, [selectedFaces, labelMode, selectedKnownFace, newFaceName, unknownFaces, fetchData]) const handleDeleteUnknownFace = useCallback(async (face: UnknownFace) => { try { @@ -350,10 +224,6 @@ export const useTraining = () => { setSuccessMessage('Face deleted successfully.') } catch (error) { console.error('Error deleting face:', error) - setMatchingStatus((prev) => ({ - ...prev, - error: 'Failed to delete face. Please try again.', - })) } }, []) @@ -367,7 +237,6 @@ export const useTraining = () => { newFaceName, isLabeling, activeTab, - matchingStatus, successMessage, unknownPagination, knownPagination, @@ -381,7 +250,5 @@ export const useTraining = () => { handleDeleteUnknownFace, handleUnknownPageChange, handleKnownPageChange, - dismissSuccess, - dismissError, } } diff --git a/apps/web/app/routes/api.faces.label.ts b/apps/web/app/routes/api.faces.label.ts index 351ee484..b8413b3d 100644 --- a/apps/web/app/routes/api.faces.label.ts +++ b/apps/web/app/routes/api.faces.label.ts @@ -1,150 +1,45 @@ -import { promises as fs } from 'fs' -import path from 'path' +import { FACES_DIR } from '@shared/constants' +import { logger } from '@shared/services/logger' import { existsSync } from 'fs' -import type { Scene } from '@shared/types/scene' +import { mkdir } from 'fs/promises' +import path from 'path' import type { ActionFunctionArgs } from 'react-router' -import { getByVideoSource, updateMetadata } from '@shared/services/vectorDb' -import { addFaceLabelingJob } from '../../../background-jobs/src/services/faces' -import { FACES_DIR, KNOWN_FACES_FILE, PROCESSED_VIDEOS_DIR, UNKNOWN_FACES_DIR } from '@shared/constants' - -interface LabelFaceRequest { - jsonFile: string - faceId: string -} export async function action({ request }: ActionFunctionArgs) { if (request.method !== 'POST') { return { success: false, error: 'Method not allowed' } } - const { faces, name } = (await request.json()) as { faces: LabelFaceRequest[]; name: string } - const unknownFacesDir = UNKNOWN_FACES_DIR - - if (!faces || faces.length === 0) { - return { success: false, error: 'No faces provided' } - } - - if (!name || !name.trim()) { - return { success: false, error: 'Name is required' } - } - - const personDir = path.join(FACES_DIR, name) - if (!existsSync(personDir)) { - await fs.mkdir(personDir, { recursive: true }) - } + const { faces, name } = await request.json() - let labeledCount = 0 - let failedCount = 0 - const errors: string[] = [] + if (!faces?.length) return { success: false, error: 'No faces provided' } + if (!name?.trim()) return { success: false, error: 'Name is required' } try { - // Load existing known faces - let knownFaces: Record = {} - if (existsSync(KNOWN_FACES_FILE)) { - knownFaces = JSON.parse(await fs.readFile(KNOWN_FACES_FILE, 'utf-8')) - } - - if (!knownFaces[name]) { - knownFaces[name] = [] - } - - // Process each face - for (const face of faces) { - try { - const jsonPath = path.join(unknownFacesDir, face.jsonFile) - const faceData = JSON.parse(await fs.readFile(jsonPath, 'utf-8')) - const imageFile = faceData.image_file - const imagePath = path.join(unknownFacesDir, imageFile) - const videoDir = path.join(PROCESSED_VIDEOS_DIR, path.basename(faceData.video_path)) - - const scenesPath = path.join(videoDir, 'scenes.json') - - // Update scenes in vector DB - const scenes: Scene[] = await getByVideoSource(faceData.video_path) - const modifiedScenes = scenes - .map((scene) => { - if ( - scene.startTime >= faceData.frame_start_time_ms / 1000 && - scene.endTime <= faceData.frame_end_time_ms / 1000 - ) { - if (scene.faces.includes(face.faceId)) { - scene.faces = scene.faces.map((f) => (f === face.faceId ? name : f)) - if (scene.facesData) { - scene.facesData = scene.facesData?.map((f) => (f.name === face.faceId ? { ...f, name } : f)) - } - return scene - } - } - return undefined - }) - .filter((scene): scene is Scene => scene !== undefined) - - for (const scene of modifiedScenes) { - await updateMetadata(scene) - } - - if (existsSync(scenesPath)) { - const allScenes: Scene[] = JSON.parse(await fs.readFile(scenesPath, 'utf-8')) - - // Update the scenes in the array - for (const scene of allScenes) { - if ( - scene.startTime >= faceData.frame_start_time_ms / 1000 && - scene.endTime <= faceData.frame_end_time_ms / 1000 - ) { - if (scene.faces.includes(face.faceId)) { - scene.faces = scene.faces.map((f) => (f === face.faceId ? name : f)) - if (scene.facesData) { - scene.facesData = scene.facesData?.map((f) => (f.name === face.faceId ? { ...f, name } : f)) - } - return scene - } - } - } - - // Write back to scenes.json - await fs.writeFile(scenesPath, JSON.stringify(allScenes, null, 2)) - } - // Move image to person directory - const newImagePath = path.join(personDir, imageFile) - await fs.copyFile(imagePath, newImagePath) - await fs.unlink(imagePath) - - // Add to known faces - knownFaces[name].push(newImagePath) - - // // Delete JSON file - await fs.unlink(jsonPath) - - labeledCount++ - } catch (error) { - console.error(`Error labeling face ${face.faceId}:`, error) - failedCount++ - errors.push(`Failed to label face ${face.faceId}`) - } + const personDir = path.join(FACES_DIR, name) + if (!existsSync(personDir)) { + await mkdir(personDir, { recursive: true }) } - - // Save updated known faces - await fs.writeFile(KNOWN_FACES_FILE, JSON.stringify(knownFaces, null, 2)) - - // Trigger matching job once with all reference images - if (labeledCount > 0) { - const referenceImages = knownFaces[name] - await addFaceLabelingJob({ - personName: name, - referenceImages: referenceImages.slice(0, 10), - unknownFacesDir, - }) + const backgroundJobsUrl = process.env.BACKGROUND_JOBS_URL + + const res = await fetch(`${backgroundJobsUrl}/face/label`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ faces, name }), + }) + + const data = await res.json() + if (data.error) { + throw new Error(data.error) } - + return { success: true } + } catch (error) { + logger.error(error) return { - success: true, - labeledCount, - failedCount, - errors: errors.length > 0 ? errors : undefined, + success: false, + error: 'Error adding face labeling job', } - } catch (error) { - console.error('Error labeling faces:', error) - return { success: false, error: 'Failed to label faces' } } } diff --git a/apps/web/app/routes/api.faces.matching-status.ts b/apps/web/app/routes/api.faces.matching-status.ts deleted file mode 100644 index 1314b9eb..00000000 --- a/apps/web/app/routes/api.faces.matching-status.ts +++ /dev/null @@ -1,50 +0,0 @@ -import type { LoaderFunctionArgs } from 'react-router' -import { faceMatcherQueue } from '../../../background-jobs/src/queue' - -export async function loader({ request }: LoaderFunctionArgs) { - const url = new URL(request.url) - const personName = url.searchParams.get('person') - - if (!personName) { - return Response.json({ error: 'Person name required' }, { status: 400 }) - } - - try { - const jobs = await faceMatcherQueue.getJobs(['active', 'waiting', 'delayed']) - const activeJob = jobs.find((job) => job.data.personName === personName) - - if (!activeJob) { - const completedJobs = await faceMatcherQueue.getJobs(['completed'], 0, 1) - const recentJob = completedJobs.find((job) => job.data.personName === personName) - - if (recentJob) { - return Response.json({ - isActive: false, - progress: 100, - matchesFound: recentJob.returnvalue?.matchesFound || 0, - status: 'completed', - }) - } - - return Response.json({ - isActive: false, - progress: 0, - matchesFound: 0, - status: 'not_found', - }) - } - - const progress = activeJob.progress - const state = await activeJob.getState() - - return Response.json({ - isActive: state === 'active' || state === 'waiting', - progress: typeof progress === 'number' ? progress : 0, - matchesFound: 0, - status: state, - }) - } catch (error) { - console.error('Error fetching matching status:', error) - return Response.json({ error: 'Failed to fetch status' }, { status: 500 }) - } -} diff --git a/apps/web/app/routes/app.search.tsx b/apps/web/app/routes/app.search.tsx index 1c989b5b..eae113ae 100644 --- a/apps/web/app/routes/app.search.tsx +++ b/apps/web/app/routes/app.search.tsx @@ -11,9 +11,9 @@ import { hybridSearch } from '@shared/services/vectorDb' import { motion, AnimatePresence } from 'framer-motion' import { getUser } from '~/services/user.sever' import { SearchInput } from '~/features/search/components/SearchInput' -import type { VideoSearchParams } from '@shared/types/search'; +import type { VideoSearchParams } from '@shared/types/search' import { generateActionFromPrompt } from '@shared/services/modelRouter' -import { getSearchStats } from '@shared/utils/search'; +import { getSearchStats } from '@shared/utils/search' import { buildSearchQueryFromSuggestions } from '@shared/services/suggestion' import { logger } from '@shared/services/logger' @@ -42,7 +42,7 @@ export async function action({ request }: { request: Request }) { searchQuery = buildSearchQueryFromSuggestions(suggestions) console.debug('Using suggestions for search:', searchQuery) } else { - searchQuery = await (await generateActionFromPrompt(query)).data + searchQuery = (await generateActionFromPrompt(query)).data console.debug('Generated search query from AI:', searchQuery) } @@ -182,7 +182,9 @@ export default function SearchPage() { )} - {!hasResults && !isSearching && } + {!hasResults && !isSearching && ( + + )} ) diff --git a/apps/web/app/routes/app.training.tsx b/apps/web/app/routes/app.training.tsx index 46e004d9..73d814a1 100644 --- a/apps/web/app/routes/app.training.tsx +++ b/apps/web/app/routes/app.training.tsx @@ -3,7 +3,6 @@ import { Loader2 } from 'lucide-react' import { LabelingForm } from '~/features/training/components/LabelingForm' import { UnknownFacesGrid } from '~/features/training/components/UnknownFacesGrid' import { KnownFacesGrid } from '~/features/training/components/KnownFacesGrid' -import { StatusNotifications } from '~/features/training/components/StatusNotifications' import { useTraining } from '~/features/training/hooks/useTraining' import type { MetaFunction } from 'react-router' import { DashboardLayout } from '~/layouts/DashboardLayout' @@ -28,8 +27,6 @@ const Training: React.FC = () => { newFaceName, isLabeling, activeTab, - matchingStatus, - successMessage, setLabelMode, setSelectedKnownFace, setNewFaceName, @@ -38,8 +35,6 @@ const Training: React.FC = () => { handleSelectAll, handleLabelFaces, handleDeleteUnknownFace, - dismissSuccess, - dismissError, unknownPagination, handleUnknownPageChange, } = useTraining() @@ -57,13 +52,6 @@ const Training: React.FC = () => { return ( }> - -
diff --git a/apps/web/package.json b/apps/web/package.json index feaafc66..6653d805 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -1,6 +1,6 @@ { "name": "web", - "version": "0.1.1", + "version": "0.1.2", "private": true, "type": "module", "scripts": { diff --git a/docker/Dockerfile.background-jobs b/docker/Dockerfile.background-jobs index 5e292b52..8b56d868 100644 --- a/docker/Dockerfile.background-jobs +++ b/docker/Dockerfile.background-jobs @@ -22,87 +22,74 @@ COPY apps/background-jobs/package.json ./apps/background-jobs/ COPY apps/web/package.json ./apps/web/ COPY packages/shared/package.json ./packages/shared/ COPY packages/prisma ./packages/prisma/ +COPY .npmrc ./ +COPY prisma.config.ts ./ RUN --mount=type=cache,id=pnpm,target=/pnpm/store \ pnpm install --frozen-lockfile -FROM base AS python-deps +FROM --platform=$BUILDPLATFORM python:3.11-slim-bookworm AS python-deps + +ARG TARGETARCH + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv RUN apt-get update && apt-get install -y --no-install-recommends \ - python3=3.11.* \ - python3-venv \ - python3-pip \ - ffmpeg \ - cmake \ - build-essential \ - libboost-all-dev \ - libopenblas-dev \ - liblapack-dev \ - && ln -sf /usr/bin/python3 /usr/bin/python \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get clean - -RUN python3 -m venv /app/.venv -ENV PATH="/app/.venv/bin:$PATH" \ - VIRTUAL_ENV="/app/.venv" + build-essential cmake libopenblas-dev && rm -rf /var/lib/apt/lists/* +WORKDIR /app COPY python/requirements.txt ./python/ -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install --upgrade pip setuptools wheel && \ - pip install dlib-bin && \ - pip install face_recognition && \ - pip install fer && \ - pip install -r python/requirements.txt +RUN uv venv /app/.venv && \ + VIRTUAL_ENV=/app/.venv uv pip install --no-cache -r python/requirements.txt + + FROM node-deps AS builder -COPY --from=node-deps ./app/node_modules /app/node_modules -COPY apps ./apps -COPY packages ./packages -COPY python ./python -COPY tsconfig.json ./ +COPY . . -RUN pnpm rebuild @tailwindcss/oxide rollup sharp -RUN pnpm --filter shared build -RUN pnpm --filter background-jobs build +RUN --mount=type=cache,id=pnpm,target=/pnpm/store \ + pnpm install --frozen-lockfile + +RUN pnpm --filter prisma generate + +RUN pnpm --filter shared build && \ + pnpm --filter background-jobs build + +FROM base AS prod-deps + +COPY pnpm-workspace.yaml pnpm-lock.yaml package.json prisma.config.ts ./ +COPY apps/background-jobs/package.json ./apps/background-jobs/ +COPY apps/web/package.json ./apps/web/ +COPY packages/shared/package.json ./packages/shared/ +COPY packages/prisma ./packages/prisma/ +COPY .npmrc ./ +COPY prisma.config.ts ./ + +RUN --mount=type=cache,id=pnpm,target=/pnpm/store \ + pnpm install --prod --frozen-lockfile FROM base AS production RUN apt-get update && apt-get install -y --no-install-recommends \ - python3=3.11.* \ - python3-venv \ - libopenblas0 \ - liblapack3 \ - libx11-6 \ - libgtk-3-0 \ - libboost-python1.74.0 \ - libboost-thread1.74.0 \ - libboost-filesystem1.74.0 \ - ffmpeg \ - libsm6 \ - libxext6 \ - libxrender1 \ - libgomp1 \ - && ln -sf /usr/bin/python3 /usr/bin/python \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get clean + python3 ffmpeg libopenblas0 liblapack3 libsm6 libxext6 libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app COPY --from=python-deps /app/.venv /app/.venv -COPY --from=builder /app/python /app/python -COPY --from=builder /app/node_modules /app/node_modules -COPY --from=builder /app/apps /app/apps -COPY --from=builder /app/packages /app/packages -COPY --from=builder /app/pnpm-workspace.yaml /app/pnpm-lock.yaml /app/package.json /app/prisma.config.ts /app/tsconfig.json ./ +COPY --from=prod-deps /app/node_modules /app/node_modules +COPY --from=builder /app/apps/background-jobs/dist ./apps/background-jobs/dist +COPY --from=builder /app/packages/shared/dist ./packages/shared/dist +COPY --from=builder /app/packages/prisma ./packages/prisma +COPY --from=builder /app/package.json /app/pnpm-workspace.yaml /app/prisma.config.ts ./ -ENV NODE_ENV=production \ - PATH="/app/.venv/bin:$PATH" \ - VIRTUAL_ENV="/app/.venv" +ENV NODE_ENV=production VIRTUAL_ENV="/app/.venv" PATH="/app/.venv/bin:$PATH" -WORKDIR /app/ EXPOSE 4000 8765 @@ -118,30 +105,22 @@ FROM builder AS development COPY --from=python-deps /app/.venv /app/.venv COPY --from=python-deps /app/python /app/python +COPY python/requirements-dev.txt ./python/ RUN apt-get update && apt-get install -y --no-install-recommends \ - python3=3.11.* \ - python3-venv \ libopenblas0 \ liblapack3 \ - libx11-6 \ - libgtk-3-0 \ - libboost-python1.74.0 \ - libboost-thread1.74.0 \ - libboost-filesystem1.74.0 \ ffmpeg \ - libsm6 \ - libxext6 \ - libxrender1 \ - libgomp1 \ - && ln -sf /usr/bin/python3 /usr/bin/python \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get clean + libx11-6 libgtk-3-0 libsm6 libxext6 libxrender1 libgomp1 \ + && rm -rf /var/lib/apt/lists/* ENV NODE_ENV=development \ PATH="/app/.venv/bin:$PATH" \ VIRTUAL_ENV="/app/.venv" +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install -r python/requirements-dev.txt + RUN pnpm --filter background-jobs build WORKDIR /app/ @@ -153,4 +132,27 @@ CMD ["sh", "-c", "\ pnpm --filter prisma generate && \ pnpm --filter prisma seed && \ pnpm --filter background-jobs dev \ -"] \ No newline at end of file +"] + + + +FROM base AS testing + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 ffmpeg libopenblas0 liblapack3 libsm6 libxext6 libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv + +COPY python ./python/ + +RUN uv venv /app/.venv && \ + VIRTUAL_ENV=/app/.venv uv pip install --no-cache -r python/requirements.txt && \ + uv pip install --no-cache -r python/requirements-dev.txt + +ENV NODE_ENV=testing VIRTUAL_ENV="/app/.venv" PATH="/app/.venv/bin:$PATH" + +WORKDIR /app/python +RUN pytest tests \ No newline at end of file diff --git a/docker/Dockerfile.web b/docker/Dockerfile.web index da11fe43..5345131f 100644 --- a/docker/Dockerfile.web +++ b/docker/Dockerfile.web @@ -17,6 +17,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ openssl \ ca-certificates \ curl \ + python3 \ && rm -rf /var/lib/apt/lists/* COPY pnpm-workspace.yaml pnpm-lock.yaml package.json prisma.config.ts .npmrc ./ @@ -53,6 +54,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ openssl \ ca-certificates \ curl \ + python3 \ && rm -rf /var/lib/apt/lists/* WORKDIR /app @@ -61,6 +63,7 @@ COPY --from=builder /app/pnpm-workspace.yaml ./ COPY --from=builder /app/pnpm-lock.yaml ./ COPY --from=builder /app/package.json ./ COPY --from=builder /app/prisma.config.ts ./ +COPY --from=builder /app/tsconfig.json ./ COPY --from=builder /app/.npmrc ./ COPY --from=builder /app/apps/web/package.json ./apps/web/ @@ -95,6 +98,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ openssl \ ca-certificates \ curl \ + python3 \ && rm -rf /var/lib/apt/lists/ WORKDIR /app @@ -120,3 +124,40 @@ CMD ["sh", "-c", "\ pnpm --filter prisma seed && \ pnpm --filter web dev --host 0.0.0.0 --port 3745 \ "] + + + +FROM base AS testing + +RUN apt-get update && apt-get install -y --no-install-recommends \ + openssl \ + ca-certificates \ + curl \ + python3 \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY --from=builder /app/pnpm-workspace.yaml ./ +COPY --from=builder /app/pnpm-lock.yaml ./ +COPY --from=builder /app/package.json ./ +COPY --from=builder /app/prisma.config.ts ./ +COPY --from=builder /app/tsconfig.json ./ +COPY --from=builder /app/.npmrc ./ + +COPY --from=builder /app/apps/web/package.json ./apps/web/ +COPY --from=builder /app/apps/background-jobs/package.json ./apps/background-jobs/ +COPY --from=builder /app/packages/shared/package.json ./packages/shared/ +COPY --from=builder /app/packages/prisma/package.json ./packages/prisma/ +COPY --from=builder /app/packages/prisma/schema.prisma ./packages/prisma + +RUN --mount=type=cache,id=pnpm,target=/pnpm/store \ + pnpm install --frozen-lockfile --prod=false + +COPY --from=builder /app/packages/prisma ./packages/prisma +COPY --from=builder /app/packages/shared ./packages/shared +COPY --from=builder /app/apps/web ./apps/web + + +ENV NODE_ENV=testing +RUN pnpm --filter shared test tests/services/vectorDb.test.ts tests/services/modelRouter.test.ts \ No newline at end of file diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 0a35288e..b09dd415 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -35,7 +35,7 @@ services: - ${HOST_MEDIA_PATH:-./media}:/media/videos - whisper-models:/app/models - yolo-models:/app/models/yolo - - background_jobs_node_modules:/app/node_modules + - background_jobs_node_modules-dev:/app/node_modules web: container_name: edit-mind-web-dev build: @@ -61,21 +61,19 @@ services: environment: NODE_ENV: development DATABASE_URL: postgresql://user:password@postgres:5432/app - CHROMA_URL: http://chroma:8000 - REDIS_URL: redis://redis:6379 volumes: - ../apps/web:/app/apps/web - ../packages:/app/packages - ../data:/app/data - ../tsconfig.json:/app/tsconfig.json - ${HOST_MEDIA_PATH:-./media}:/media/videos - - web_node_modules:/app/node_modules + - web_node_modules-dev:/app/node_modules networks: - app-network chroma: image: chromadb/chroma:1.3.5 - container_name: edit-mind-chroma + container_name: edit-mind-chroma-dev restart: unless-stopped environment: CHROMA_CORS_ALLOW_ORIGINS: '["http://localhost:3745", "http://localhost:4000", "http://web:3745", "http://background-jobs:4000"]' @@ -96,7 +94,7 @@ services: image: redis:7-alpine container_name: edit-mind-redis-dev ports: - - "${REDIS_PORT:-6379}:6379" + - "${REDIS_PORT:-6380}:6380" volumes: - redis_data_dev:/data networks: @@ -105,7 +103,7 @@ services: image: postgres:16-alpine container_name: edit-mind-postgres-dev ports: - - "${POSTGRES_PORT:-5433}:5432" + - "${POSTGRES_PORT:-5434}:5432" environment: POSTGRES_USER: ${POSTGRES_USER:-user} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password} @@ -131,9 +129,9 @@ volumes: driver: local yolo-models: driver: local - web_node_modules: + web_node_modules-dev: driver: local - background_jobs_node_modules: + background_jobs_node_modules-dev: driver: local networks: diff --git a/package.json b/package.json index f9ceabf8..4e2cabce 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "edit-mind", - "version": "0.1.1", + "version": "0.1.2", "description": "AI-Powered Video Indexing and Search", "main": "./out/main/main.js", "license": "MIT", @@ -12,9 +12,7 @@ "type": "git", "url": "https://github.com/iliashad/edit-mind" }, - "scripts": { - "postinstall": "pnpm prisma generate" - }, + "scripts": {}, "dependencies": { "@prisma/client": "6.19.0", "bcryptjs": "^2.4.3" diff --git a/packages/prisma/package.json b/packages/prisma/package.json index c5fec216..36fd6cd2 100644 --- a/packages/prisma/package.json +++ b/packages/prisma/package.json @@ -1,6 +1,6 @@ { "name": "prisma", - "version": "0.1.1", + "version": "0.1.2", "private": true, "license": "MIT", "author": { diff --git a/packages/shared/constants/index.ts b/packages/shared/constants/index.ts index 503e5472..a1932dad 100644 --- a/packages/shared/constants/index.ts +++ b/packages/shared/constants/index.ts @@ -12,11 +12,10 @@ export const IS_WIN = process.platform === 'win32' // Directories export const THUMBNAILS_DIR = process.env.THUMBNAILS_PATH || '.thumbnails' -export const FACES_DIR = process.env.FACES_DIR || '.faces' +export const FACES_DIR = process.env.FACES_DIR || '.faces_db' export const PROCESSED_VIDEOS_DIR = process.env.PROCESSED_VIDEOS_DIR || path.resolve('.results') export const DATA_DIR = path.resolve('data') export const UNKNOWN_FACES_DIR = process.env.UNKNOWN_FACES_DIR || '.unknown_faces' -export const KNOWN_FACES_FILE = process.env.KNOWN_FACES_FILE || '.faces.json' export const BACKGROUND_JOBS_DIR = process.env.BACKGROUND_JOBS_DIR || '/apps/background-jobs' export const CACHE_FILE = '.locations.json' diff --git a/packages/shared/package.json b/packages/shared/package.json index 04ea40e3..9f5e6160 100644 --- a/packages/shared/package.json +++ b/packages/shared/package.json @@ -1,6 +1,6 @@ { "name": "shared", - "version": "0.1.1", + "version": "0.1.2", "description": "AI-Powered Video Indexing and Search", "type": "module", "main": "dist/index.js", diff --git a/packages/shared/services/gemini.ts b/packages/shared/services/gemini.ts index d1ffe5bb..dbe05006 100644 --- a/packages/shared/services/gemini.ts +++ b/packages/shared/services/gemini.ts @@ -1,4 +1,4 @@ -import { GenerateContentResult, GoogleGenerativeAI, HarmCategory, HarmBlockThreshold } from '@google/generative-ai' +import { GenerateContentResult, GoogleGenerativeAI, HarmCategory, HarmBlockThreshold, GenerativeModel } from '@google/generative-ai' import { GEMINI_API_KEY, GEMINI_MODEL_NAME } from '../constants' import { GENERAL_RESPONSE_PROMPT, @@ -20,13 +20,12 @@ import { VideoSearchParamsSchema } from '@shared/schemas/search' const CONTEXT_WINDOW_LIMIT = 20000 // based on gemini-2.5-pro -if (!GEMINI_API_KEY) { - throw new Error('Gemini API key missing') +let model: GenerativeModel; +if (GEMINI_API_KEY) { + const genAI = new GoogleGenerativeAI(GEMINI_API_KEY) + model = genAI.getGenerativeModel({ model: GEMINI_MODEL_NAME }) } -const genAI = new GoogleGenerativeAI(GEMINI_API_KEY) -const model = genAI.getGenerativeModel({ model: GEMINI_MODEL_NAME }) - const formatHistory = (chatHistory?: ChatMessage[]) => { return chatHistory?.length ? `Recent conversation:\n${chatHistory diff --git a/packages/shared/services/immich.ts b/packages/shared/services/immich.ts index e42f4382..0d217ad0 100644 --- a/packages/shared/services/immich.ts +++ b/packages/shared/services/immich.ts @@ -1,7 +1,7 @@ import * as fs from 'fs/promises' import * as path from 'path' import fetch from 'node-fetch' -import { FACES_DIR } from '../constants' +import { FACES_DIR } from '../constants'; import { AssetsBucketResponse, Face, ImmichConfig, PeopleResponse, Person, TimeBucket } from '../types/immich' import * as Jimp from 'jimp' import { logger } from './logger' @@ -163,7 +163,7 @@ async function processAssetForPerson( const imageBuffer = await client.getAssetImage(assetId) const faceFiles = await Promise.all( - matchedFaces.map((face) => extractAndSaveFace(imageBuffer, face, assetId, personDir)) + matchedFaces.map((face) => extractAndSaveFace(imageBuffer, face, personDir)) ) return faceFiles.filter((file): file is string => file !== null) @@ -172,7 +172,6 @@ async function processAssetForPerson( async function extractAndSaveFace( imageBuffer: Buffer, face: Face, - assetId: string, personDir: string ): Promise { try { @@ -195,6 +194,7 @@ async function extractAndSaveFace( } } + async function savePersonThumbnail(client: ImmichClient, personId: string, personDir: string) { try { const thumbnail = await client.getPersonThumbnail(personId) @@ -206,5 +206,5 @@ async function savePersonThumbnail(client: ImmichClient, personId: string, perso } function sanitizeName(name: string): string { - return name.replace(/[^a-zA-Z0-9-_]/g, '_') + return name.replace(/[^a-zA-Z0-9-_]/g, '_'); } diff --git a/packages/shared/services/pythonService.ts b/packages/shared/services/pythonService.ts index a980b8af..356dc7ec 100644 --- a/packages/shared/services/pythonService.ts +++ b/packages/shared/services/pythonService.ts @@ -4,10 +4,16 @@ import { spawn, ChildProcess } from 'child_process' import WebSocket from 'ws' import { Analysis, AnalysisProgress } from '../types/analysis' import { IS_WIN, MAX_RESTARTS, PYTHON_PORT, PYTHON_SCRIPT, RESTART_BACKOFF_MS, VENV_PATH } from '../constants' -import { FaceIndexingProgress, FaceMatchingProgress, FindMatchingFacesResponse } from '../types/face' import { TranscriptionProgress } from '../types/transcription' import { logger } from './logger' -import { CallbackMap, PythonMessageType } from '../types/python' +import { PythonMessageType } from '../types/python' + +interface JobCallbacks { + onProgress?: (data: any) => void + onResult?: (data: any) => void + onError?: (error: Error) => void + onComplete?: () => void +} class PythonService { private static instance: PythonService @@ -16,7 +22,7 @@ class PythonService { private serviceUrl: string private isRunning = false private restartCount = 0 - private messageCallbacks: CallbackMap = {} + private jobCallbacks: Map = new Map() private port: string private startPromise: Promise | null = null @@ -38,7 +44,6 @@ class PythonService { public async start(): Promise { if (this.startPromise) { - logger.debug('Python service already starting, waiting...') return this.startPromise } @@ -79,8 +84,6 @@ class PythonService { const servicePath = PYTHON_SCRIPT - logger.debug(`Spawning Python service: ${pythonExecutable} ${servicePath}`) - this.serviceProcess = spawn(pythonExecutable, [servicePath, '--port', this.port, '--host', '0.0.0.0'], { stdio: ['ignore', 'pipe', 'pipe'], detached: false, @@ -90,16 +93,6 @@ class PythonService { throw new Error('Failed to spawn Python service process') } - logger.debug(`Python service spawned with PID: ${this.serviceProcess.pid}`) - - this.serviceProcess.stdout?.on('data', (data) => { - logger.debug(`[PythonService STDOUT]: ${data.toString().trim()}`) - }) - - this.serviceProcess.stderr?.on('data', (data) => { - logger.debug(`[PythonService STDERR]: ${data.toString().trim()}`) - }) - this.serviceProcess.on('error', (error) => { logger.error('Python service process error: ' + error) }) @@ -111,7 +104,6 @@ class PythonService { this.handleCrash() }) - logger.debug('Waiting for Python service to be ready...') const maxAttempts = 15 const delayMs = 1000 @@ -130,7 +122,6 @@ class PythonService { logger.error(error) throw new Error(`Python service failed to start within ${maxAttempts * delayMs}ms`) } - logger.debug(`Attempt ${attempt} failed, retrying in ${delayMs}ms...`) await new Promise((resolve) => setTimeout(resolve, delayMs)) } } @@ -149,6 +140,7 @@ class PythonService { this.serviceProcess.kill('SIGTERM') this.serviceProcess = null } + this.jobCallbacks.clear() this.isRunning = false } @@ -159,115 +151,62 @@ class PythonService { onResult: (result: Analysis) => void, onError: (error: Error) => void ): void { - if (!this.isRunning || !this.client) { - onError(new Error('Python service is not running.')) - return - } - - if (this.client.readyState !== WebSocket.OPEN) { - onError(new Error(`WebSocket not open. State: ${this.client.readyState}`)) + if (this.client?.readyState !== WebSocket.OPEN) { + onError(new Error(`WebSocket not open. State: ${this.client?.readyState}`)) return } - this.messageCallbacks['analysis_progress'] = onProgress - this.messageCallbacks['analysis_completed'] = onResult - this.messageCallbacks['analysis_error'] = onError + this.jobCallbacks.set(job_id, { + onProgress, + onResult, + onError, + }) const message = { type: 'analyze', - payload: { video_path: videoPath, job_id }, + payload: { video_path: encodeURI(videoPath), job_id }, } - this.client.send(JSON.stringify(message)) + try { + this.client.send(JSON.stringify(message)) + } catch (error) { + this.jobCallbacks.delete(job_id) + onError(new Error(`Failed to send message: ${error}`)) + } } - public async transcribe( + public transcribe( videoPath: string, jsonFilePath: string, job_id: string, onProgress: (progress: TranscriptionProgress) => void, onComplete: (result: void) => void, onError: (error: Error) => void - ): Promise { - if (!this.isRunning || !this.client) { - onError(new Error('Python service is not running.')) - return - } - - if (this.client.readyState !== WebSocket.OPEN) { - onError(new Error(`WebSocket not open. State: ${this.client.readyState}`)) + ): void { + if (this.client?.readyState !== WebSocket.OPEN) { + onError(new Error(`WebSocket not open. State: ${this.client?.readyState}`)) return } - this.messageCallbacks['transcription_progress'] = onProgress - this.messageCallbacks['transcription_completed'] = onComplete - this.messageCallbacks['transcription_error'] = onError + this.jobCallbacks.set(job_id, { + onProgress, + onComplete, + onError, + }) const message = { type: 'transcribe', - payload: { video_path: videoPath, json_file_path: jsonFilePath, job_id }, + payload: { video_path: encodeURI(videoPath), json_file_path: jsonFilePath, job_id }, } - this.client.send(JSON.stringify(message)) - } - - public reindexFaces( - specificFaces: { name: string; image_path: string }[], - jobId: string, - onProgress: (progress: FaceIndexingProgress) => void, - onComplete: (result: void) => void, - onError: (error: Error) => void - ): void { - if (!this.isRunning || !this.client) { - onError(new Error('Python service is not running.')) - return - } - - this.messageCallbacks['reindex_progress'] = onProgress - this.messageCallbacks['reindex_complete'] = onComplete - this.messageCallbacks['reindex_error'] = onError - - const message = { - type: 'reindex_faces', - specific_faces: specificFaces, - job_id: jobId, + try { + this.client?.send(JSON.stringify(message)) + } catch (error) { + this.jobCallbacks.delete(job_id) + onError(new Error(`Failed to send message: ${error}`)) } - - this.client.send(JSON.stringify(message)) } - public findMatchingFaces( - personName: string, - referenceImages: string[], - unknownFacesDir: string, - onProgress: (progress: FaceMatchingProgress) => void, - onComplete: (result: FindMatchingFacesResponse) => void, - onError: (error: Error) => void - ): void { - if (!this.isRunning || !this.client) { - onError(new Error('Python service is not running.')) - return - } - if (this.client.readyState !== WebSocket.OPEN) { - onError(new Error(`WebSocket not open. State: ${this.client.readyState}`)) - return - } - - this.messageCallbacks['face_matching_progress'] = onProgress - this.messageCallbacks['face_matching_complete'] = onComplete - this.messageCallbacks['face_matching_error'] = onError - const message = { - type: 'find_matching_faces', - payload: { - person_name: personName, - reference_images: referenceImages, - unknown_faces_dir: unknownFacesDir, - tolerance: 0.4, - }, - } - - this.client.send(JSON.stringify(message)) - } public getServiceUrl(): string { return this.serviceUrl } @@ -286,7 +225,9 @@ class PythonService { reject(new Error('WebSocket connection timeout')) }, 3000) - this.client = new WebSocket(this.serviceUrl) + this.client = new WebSocket(this.serviceUrl, { + maxPayload: 1024 * 1024 * 1024, // 1GB + }) this.client.on('open', () => { clearTimeout(timeout) @@ -297,14 +238,52 @@ class PythonService { this.client.on('message', (data) => { try { const message = JSON.parse(data.toString()) - const { type, payload, job_id } = message + const { type, payload } = message + const job_id = payload?.job_id + + if (!job_id) { + logger.warn(`โš ๏ธ Received message without job_id: ${type}`) + return + } - const callback = this.messageCallbacks[type as PythonMessageType] + const callbacks = this.jobCallbacks.get(job_id) + + if (!callbacks) { + logger.warn(`โš ๏ธ No callbacks registered for job_id: ${job_id}`) + return + } - if (callback) { - callback({ ...payload, job_id }) - } else { - logger.warn(`โš ๏ธ No callback registered for message type: ${type}`) + switch (type as PythonMessageType) { + case 'analysis_progress': + callbacks.onProgress?.(payload) + break + + case 'analysis_completed': + callbacks.onResult?.(payload) + this.jobCallbacks.delete(job_id) + break + + case 'analysis_error': + callbacks.onError?.(new Error(payload.message || 'Analysis failed')) + this.jobCallbacks.delete(job_id) + break + + case 'transcription_progress': + callbacks.onProgress?.(payload) + break + + case 'transcription_completed': + callbacks.onComplete?.() + this.jobCallbacks.delete(job_id) + break + + case 'transcription_error': + callbacks.onError?.(new Error(payload.message || 'Transcription failed')) + this.jobCallbacks.delete(job_id) + break + + default: + logger.warn(`โš ๏ธ Unknown message type: ${type}`) } } catch (error) { logger.error('โŒ Error processing message: ' + error) diff --git a/packages/shared/tests/services/vectorDb.test.ts b/packages/shared/tests/services/vectorDb.test.ts index f027e4f4..3b25d61d 100644 --- a/packages/shared/tests/services/vectorDb.test.ts +++ b/packages/shared/tests/services/vectorDb.test.ts @@ -23,7 +23,7 @@ import { EmbeddingInput } from '@shared/types/vector' import { Scene } from '@shared/types/scene' import { VideoSearchParams } from '@shared/types/search' -const TEST_TIMEOUT = 40000 +const TEST_TIMEOUT = 40000 * 2 const mockScene = (overrides?: Partial): Scene => ({ id: `scene-${Date.now()}-${Math.random()}`, diff --git a/packages/shared/types/analysis.ts b/packages/shared/types/analysis.ts index 271605bb..9805279b 100644 --- a/packages/shared/types/analysis.ts +++ b/packages/shared/types/analysis.ts @@ -1,7 +1,10 @@ export interface Face { name: string location: [number, number, number, number] - emotion?: Record + emotion: { + label: string, + confidence: number + } bbox: BBox confidence: number } diff --git a/packages/shared/types/face.ts b/packages/shared/types/face.ts index 1869eb51..9532ee34 100644 --- a/packages/shared/types/face.ts +++ b/packages/shared/types/face.ts @@ -9,12 +9,7 @@ export interface FaceIndexingProgress { } export interface KnownFace { name: string - images: string[] -} -export interface AddFaceLabelingJobParams { - personName: string - referenceImages: string[] - unknownFacesDir: string + image: string } export type FaceIndexProgress = { @@ -47,10 +42,6 @@ export interface FaceMatchingProgress { match?: MatchResult } -interface FaceData { - video_path: string - timestamp_seconds: number -} export interface FindMatchingFacesResponse { success: boolean @@ -58,4 +49,11 @@ export interface FindMatchingFacesResponse { matches_found: number matches: MatchResult[] reference_images_used: number +} +export interface FaceLabellingJobData { + faces: { + jsonFile: string + faceId: string + }[] + name: string } \ No newline at end of file diff --git a/packages/shared/types/unknownFace.ts b/packages/shared/types/unknownFace.ts new file mode 100644 index 00000000..5c4a4571 --- /dev/null +++ b/packages/shared/types/unknownFace.ts @@ -0,0 +1,53 @@ +interface BoundingBox { + top: number; + right: number; + bottom: number; + left: number; + width: number; + height: number; +} + +interface FrameDimensions { + width: number; + height: number; +} + +interface Label { + name: string | null; + labeled_by: string | null; + labeled_at: string | null; + confidence: number | null; + notes: string | null; +} + +interface Appearance { + frame_index: number; + timestamp_ms: number; + timestamp_seconds: number; + formatted_timestamp: string; + bounding_box: BoundingBox; + padded_bounding_box: BoundingBox; +} + +export interface FaceDetectionData { + image_file: string; + json_file: string; + image_hash: string; + created_at: string; + video_path: string; + video_name: string; + frame_index: number; + timestamp_ms: number; + timestamp_seconds: number; + formatted_timestamp: string; + frame_dimensions: FrameDimensions; + face_id: string; + bounding_box: BoundingBox; + padded_bounding_box: BoundingBox; + first_appearance: Appearance; + all_appearances: Appearance[]; + total_appearances: number; + label: Label; + last_updated: string; + last_appearance: Appearance; +} \ No newline at end of file diff --git a/packages/shared/utils/faces.ts b/packages/shared/utils/faces.ts deleted file mode 100644 index 909829cf..00000000 --- a/packages/shared/utils/faces.ts +++ /dev/null @@ -1,58 +0,0 @@ -import { logger } from '../services/logger' -import { pythonService } from '../services/pythonService' -import { FaceIndexProgress, FaceMatchingProgress, FindMatchingFacesResponse } from '../types/face' - -type ProgressCallback = (progress: FaceIndexProgress) => Promise - -export function reindexFaces( - specificFaces: { name: string; image_path: string }[], - jobId: string, - onProgress?: ProgressCallback -): Promise { - return new Promise((resolve, reject) => { - pythonService.reindexFaces( - specificFaces, - jobId, - async (progress) => { - if (onProgress) { - try { - await onProgress(progress) - } catch (error) { - logger.error('โŒ Error in progress callback:' + error) - } - } - }, - (result) => { - resolve(result) - }, - (error) => { - reject(error) - } - ) - }) -} -export function findMatchingFaces( - personName: string, - referenceImages: string[], - unknownFacesDir: string, - onProgress?: (progress: FaceMatchingProgress) => void -): Promise { - return new Promise((resolve, reject) => { - pythonService.findMatchingFaces( - personName, - referenceImages, - unknownFacesDir, - (progress) => { - if (onProgress) { - onProgress(progress) - } - }, - (result) => { - resolve(result) - }, - (error) => { - reject(error) - } - ) - }) -} diff --git a/packages/shared/utils/scenes.ts b/packages/shared/utils/scenes.ts index beb8ecad..f3bdc151 100644 --- a/packages/shared/utils/scenes.ts +++ b/packages/shared/utils/scenes.ts @@ -87,7 +87,7 @@ export const createScenes = async ( transcription: getTranscriptionForTimeRange(startTime, endTime), description: generateSceneDescription(frame.objects, frame.faces), shot_type: frame.shot_type, - emotions: [], + emotions: frame.faces?.map((face) => ({ name: face.name, emotion: face.emotion.label })), source: videoPath, camera: '', createdAt: 0, diff --git a/packages/ui/package.json b/packages/ui/package.json index 94ca18a0..e8916a7d 100644 --- a/packages/ui/package.json +++ b/packages/ui/package.json @@ -1,6 +1,6 @@ { "name": "ui", - "version": "0.1.1", + "version": "0.1.2", "private": true, "main": "./dist/index.js", "module": "./dist/index.mjs", diff --git a/python/.faces/Aiony Haust/image.jpg b/python/.faces/Aiony Haust/image.jpg new file mode 100644 index 00000000..6b52f443 Binary files /dev/null and b/python/.faces/Aiony Haust/image.jpg differ diff --git a/python/.faces/Ilias/image.jpg b/python/.faces/Ilias/image.jpg new file mode 100644 index 00000000..5cba9b47 Binary files /dev/null and b/python/.faces/Ilias/image.jpg differ diff --git a/python/add_face.py b/python/add_face.py deleted file mode 100644 index fbd79e59..00000000 --- a/python/add_face.py +++ /dev/null @@ -1,82 +0,0 @@ -import sys -import os -import json -import face_recognition - -def add_face(name: str, image_path: str, known_faces_file: str) -> None: - """ - Extracts a face encoding from an image and adds it to the known faces file. - """ - if not os.path.exists(image_path): - print(json.dumps({"status": "error", "message": "Image file not found."}), file=sys.stderr) - sys.exit(1) - - try: - image = face_recognition.load_image_file(image_path) - face_encodings = face_recognition.face_encodings(image) - - if len(face_encodings) == 0: - print(json.dumps({"status": "warning", "message": "No face detected in image."}), file=sys.stderr) - sys.exit(0) # Not an error, just no face found - - if len(face_encodings) > 1: - print(json.dumps({"status": "warning", "message": "Multiple faces found. Using the first one."}), file=sys.stderr) - - new_encoding = face_encodings[0].tolist() - - # Load existing known faces - known_faces = [] - if os.path.exists(known_faces_file): - try: - with open(known_faces_file, 'r') as f: - content = f.read().strip() - if content: # Only parse if file is not empty - known_faces = json.load(open(known_faces_file, 'r')) - else: - known_faces = [] - except json.JSONDecodeError as e: - print(json.dumps({ - "status": "error", - "message": f"Corrupted known_faces.json: {e.msg} at line {e.lineno}" - }), file=sys.stderr) - sys.exit(1) - - # Add new face encoding in the format expected by FaceRecognizer - # Format: [{"name": "Person Name", "encoding": [...]}, ...] - known_faces.append({ - "name": name, - "encoding": new_encoding - }) - - # Write back to file with proper formatting - with open(known_faces_file, 'w') as f: - json.dump(known_faces, f, indent=2) - - print(json.dumps({ - "status": "success", - "message": f"Face for {name} added successfully.", - "total_encodings": len(known_faces) - })) - sys.exit(0) - - except Exception as e: - print(json.dumps({ - "status": "error", - "message": f"Error processing image: {str(e)}" - }), file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - sys.exit(1) - -if __name__ == "__main__": - if len(sys.argv) != 4: - print(json.dumps({ - "status": "error", - "message": "Usage: python add_face.py " - }), file=sys.stderr) - sys.exit(1) - - name = sys.argv[1] - image_path = sys.argv[2] - known_faces_file = sys.argv[3] - add_face(name, image_path, known_faces_file) \ No newline at end of file diff --git a/python/analysis_service.py b/python/analysis_service.py index a4dbdf95..9a3636a0 100644 --- a/python/analysis_service.py +++ b/python/analysis_service.py @@ -8,16 +8,13 @@ from dataclasses import dataclass, field from enum import Enum from datetime import datetime -from concurrent.futures import Future import websockets from websockets.legacy.server import WebSocketServerProtocol from websockets.exceptions import ConnectionClosed, ConnectionClosedOK, ConnectionClosedError from transcribe import TranscriptionService from analyze import AnalysisConfig, OutputManager, VideoAnalysisResult, VideoAnalyzer -from batch_add_faces import batch_add_faces_from_folder -from face_matcher import FaceMatchingResult -import os +import urllib.parse from dotenv import load_dotenv load_dotenv() @@ -54,7 +51,6 @@ class MessageType(Enum): # Client requests ANALYZE = "analyze" TRANSCRIBE = "transcribe" - REINDEX_FACES = "reindex_faces" HEALTH = "health" # Server responses @@ -66,13 +62,6 @@ class MessageType(Enum): TRANSCRIPTION_PROGRESS = "transcription_progress" TRANSCRIPTION_COMPLETED = "transcription_completed" TRANSCRIPTION_ERROR = "transcription_error" - REINDEX_PROGRESS = "reindex_progress" - REINDEX_COMPLETE = "reindex_complete" - REINDEX_ERROR = "reindex_error" - FIND_MATCHING_FACES = "find_matching_faces" - FACE_MATCHING_PROGRESS = "face_matching_progress" - FACE_MATCHING_COMPLETE = "face_matching_complete" - FACE_MATCHING_ERROR = "face_matching_error" @dataclass class ServiceMetrics: @@ -262,7 +251,6 @@ def __init__( max_concurrent_analyses ) self.transcription_service: "TranscriptionService" = TranscriptionService() - self.reindex_lock: asyncio.Lock = asyncio.Lock() # Track active callback guards for cleanup self.active_guards: Set["CallbackGuard"] = set() @@ -273,9 +261,7 @@ def __init__( ] = { MessageType.ANALYZE.value: self._handle_analyze, MessageType.TRANSCRIBE.value: self._handle_transcribe, - MessageType.REINDEX_FACES.value: self._handle_reindex_faces, MessageType.HEALTH.value: self._handle_health, - MessageType.FIND_MATCHING_FACES.value: self._handle_find_matching_faces, } def cleanup_guards(self, websocket: "WebSocketServerProtocol") -> None: @@ -368,7 +354,10 @@ async def _handle_analyze( self, websocket: "WebSocketServerProtocol", payload: JsonDict ) -> None: """Handle video analysis request with concurrency control.""" - video_path_str = payload.get("video_path") + + encoded_path = payload['video_path'] + video_path_str = urllib.parse.unquote(encoded_path) + if not isinstance(video_path_str, str): logger.error("Missing or invalid 'video_path' in payload") await self._send_message( @@ -541,10 +530,10 @@ async def _handle_transcribe( self, websocket: "WebSocketServerProtocol", payload: JsonDict ) -> None: """Handle transcription request with live progress updates and debug logging.""" - video_path = payload.get("video_path") + encoded_path = payload.get("video_path") json_file_path = payload.get("json_file_path") - if not isinstance(video_path, str) or not isinstance(json_file_path, str): + if not isinstance(encoded_path, str) or not isinstance(json_file_path, str): await self._send_message( websocket, MessageType.TRANSCRIPTION_ERROR, @@ -558,7 +547,7 @@ async def _handle_transcribe( logger.error("Missing or invalid 'job_id' in payload") job_id = None # Continue but log the issue - video_path_normalized = str(Path(video_path).resolve()) + video_path_normalized = urllib.parse.unquote(encoded_path) self.state.start_transcription(video_path_normalized) logger.info(f"Started transcription for: {video_path_normalized}") @@ -580,7 +569,7 @@ def progress_callback(progress: int, elapsed: str) -> None: result = await loop.run_in_executor( None, lambda: self.transcription_service.transcribe( - video_path, + video_path_normalized, json_file_path, progress_callback ) @@ -607,209 +596,6 @@ def progress_callback(progress: int, elapsed: str) -> None: ) finally: self.state.finish_transcription(video_path_normalized) - - async def _handle_find_matching_faces( - self, websocket: "WebSocketServerProtocol", payload: JsonDict - ) -> None: - """Handle face matching request.""" - person_name = payload.get("person_name") - reference_images = payload.get("reference_images") - unknown_faces_dir = payload.get("unknown_faces_dir", "analysis_results/unknown_faces") - tolerance = payload.get("tolerance", 0.6) - - if not isinstance(person_name, str) or not person_name: - await self._send_message( - websocket, - MessageType.FACE_MATCHING_ERROR, - {"message": "Missing or invalid 'person_name'"} - ) - return - - if not isinstance(reference_images, list) or not reference_images: - await self._send_message( - websocket, - MessageType.FACE_MATCHING_ERROR, - {"message": "Missing or invalid 'reference_images'"} - ) - return - - job_id = payload.get("job_id") - if not isinstance(job_id, str): - logger.error("Missing or invalid 'job_id' in payload") - job_id = None - - logger.info(f"Starting face matching for {person_name} with {len(reference_images)} references") - - guard = CallbackGuard(websocket, self.connection_manager) - self.active_guards.add(guard) - - loop = asyncio.get_running_loop() - - def progress_callback(data: Dict[str, str]) -> None: - """Thread-safe synchronous progress callback for face matching.""" - progress_data: JsonDict = { - "person_name": person_name, - **data - } - - asyncio.run_coroutine_threadsafe( - self._send_message(websocket, MessageType.FACE_MATCHING_PROGRESS, progress_data, job_id=job_id), - loop - ) - logger.debug(f"Sent face matching progress: {progress_data}") - - - try: - from face_matcher import find_and_label_matching_faces - - logger.info(f"Running face matching for {person_name}") - result: FaceMatchingResult = await find_and_label_matching_faces( - person_name=person_name, - reference_image_paths=reference_images, - unknown_faces_dir=unknown_faces_dir, - tolerance=tolerance, - progress_callback=progress_callback - ) - - if result["success"]: - logger.info(f"Face matching complete: {result['matches_found']} matches found for {person_name}") - complete_data: JsonDict = { - "person_name": person_name, - "matches_found": result["matches_found"], - "matches": result["matches"], - "reference_images_used": result["reference_images_used"] - } - try: - await self._send_message( - websocket, - MessageType.FACE_MATCHING_COMPLETE, - complete_data, - job_id=job_id - ) - except Exception as e: - logger.warning(f"Failed to send face matching complete event: {e}") - else: - await self._send_message( - websocket, - MessageType.FACE_MATCHING_ERROR, - {"message": result.get("error", "Unknown error")}, - job_id=job_id - ) - - except Exception as e: - logger.exception(f"Face matching failed for {person_name}") - await self._send_message( - websocket, - MessageType.FACE_MATCHING_ERROR, - {"message": f"Face matching failed: {str(e)}"}, - job_id=job_id - ) - finally: - guard.cancel() - self.active_guards.discard(guard) - - - async def _handle_reindex_faces( - self, websocket: "WebSocketServerProtocol", payload: JsonDict - ) -> None: - """Handle face reindexing request with exclusive locking.""" - if self.reindex_lock.locked(): - logger.warning("Reindex request received while another reindex is active.") - await self._send_message( - websocket, - MessageType.REINDEX_ERROR, - {"message": "Face reindexing is already in progress. Please wait."} - ) - return - - job_id = payload.get("job_id") - specific_faces = payload.get("specific_faces") - if not isinstance(job_id, str): - logger.error("Missing or invalid 'job_id' in payload") - job_id = None - - async with self.reindex_lock: - logger.info("Starting face reindexing...") - - faces_dir = os.getenv("FACES_DIR", ".faces") - known_faces_f = os.getenv("KNOWN_FACES_FILE_LOADED", ".known_faces.json") - - if not isinstance(faces_dir, str): - faces_dir = ".faces" - if not isinstance(known_faces_f, str): - known_faces_f = "known_faces.json" - - guard = CallbackGuard(websocket, self.connection_manager) - self.active_guards.add(guard) - - loop = asyncio.get_running_loop() - - def reindex_progress_callback(data: Dict[str, Union[str, int]]) -> None: - """Thread-safe synchronous progress callback for reindexing.""" - if not guard.is_active(): - return - - elapsed = data.get("elapsed", "") - progress = data.get("progress", 0) - - if not isinstance(elapsed, str): - elapsed = str(elapsed) - if not isinstance(progress, int): - progress = 0 - - progress_data: JsonDict = { - "elapsed": elapsed, - "progress": progress - } - - asyncio.run_coroutine_threadsafe( - self._send_message(websocket, MessageType.REINDEX_PROGRESS, progress_data, job_id=job_id), - loop - ) - - - try: - logger.info("Running face reindexing") - success = await batch_add_faces_from_folder( - progress_callback=reindex_progress_callback, - specific_faces=specific_faces - ) - - if success: - logger.info("Face reindexing completed successfully.") - complete_data: JsonDict = { - "status": "done", - "message": "Face reindexing completed successfully." - } - try: - await self._send_message( - websocket, - MessageType.REINDEX_COMPLETE, - complete_data, - job_id=job_id - ) - except Exception as e: - logger.warning(f"Failed to send reindex complete event: {e}") - else: - logger.error("Face reindexing failed. Check logs for details.") - await self._send_message( - websocket, - MessageType.REINDEX_ERROR, - {"message": "Face reindexing failed. Check service logs."}, - job_id=job_id - ) - - except Exception as e: - logger.exception("Face reindexing exception") - await self._send_message( - websocket, - MessageType.REINDEX_ERROR, - {"message": f"Reindexing failed: {str(e)}"}, - job_id=job_id - ) - finally: - guard.cancel() - self.active_guards.discard(guard) class WebSocketHandler: """Coordinates WebSocket connections and message processing.""" diff --git a/python/analyze.py b/python/analyze.py index 9305b034..0993f9e4 100644 --- a/python/analyze.py +++ b/python/analyze.py @@ -385,8 +385,6 @@ def load_plugins(config: AnalysisConfig) -> List[AnalyzerPlugin]: ("ShotTypePlugin", "shot_type"), ("DominantColorPlugin", "dominant_color"), ("EnvironmentPlugin", "environment"), - ("ActivityPlugin", "activity"), - ("EmotionDetectionPlugin", "emotion_detection"), ("TextDetectionPlugin", "text_detection"), ] diff --git a/python/batch_add_faces.py b/python/batch_add_faces.py deleted file mode 100644 index c3c4e7cc..00000000 --- a/python/batch_add_faces.py +++ /dev/null @@ -1,183 +0,0 @@ -import sys -import os -import json -import asyncio -from pathlib import Path -from typing import Optional, Callable, List, Dict, Union -import time -import os -from dotenv import load_dotenv - -load_dotenv() - -async def create_faces_data_from_folder_async(): - """ - Asynchronously scans the specified faces directory, expecting subfolders named after individuals, - collects image paths, and optionally saves this data to a JSON file. - """ - faces_data = {} - faces_directory= os.getenv("FACES_DIR", ".faces") - - output_json_path = os.getenv("KNOWN_FACES_FILE", ".known_faces.json") - def _sync_scan(): - if not os.path.exists(faces_directory): - print(f"ERROR(create_faces): .faces directory not found at {faces_directory}", file=sys.stderr) - return {} - - found_people_count = 0 - for person_name in os.listdir(faces_directory): - person_folder_path = os.path.join(faces_directory, person_name) - if os.path.isdir(person_folder_path): - found_people_count += 1 - image_paths = [] - for filename in os.listdir(person_folder_path): - if filename.lower().endswith(('.png', '.jpg', '.jpeg')): - relative_path = os.path.join(faces_directory, person_name, filename) - image_paths.append(relative_path.replace("\\", "/")) - if image_paths: - faces_data[person_name] = image_paths - return faces_data - - faces_data = await asyncio.to_thread(_sync_scan) - - if faces_data: - try: - output_dir = os.path.dirname(output_json_path) - if output_dir and not os.path.exists(output_dir): - print(f"DEBUG(create_faces): Creating output directory: {output_dir}", file=sys.stderr) - os.makedirs(output_dir) - - def _sync_dump(): - with open(output_json_path, 'w') as f: - json.dump(faces_data, f, indent=2) - await asyncio.to_thread(_sync_dump) - print(f"Successfully generated and saved faces data to {output_json_path}", file=sys.stderr) - except IOError as e: - print(f"Error saving faces data to {output_json_path}: {e}", file=sys.stderr) - else: - print(f"No faces data found in '{faces_directory}' to save to '{output_json_path}'.", file=sys.stderr) - - return faces_data - -async def batch_add_faces_from_folder( - progress_callback: Optional[Callable[[Dict[str, Union[int, float]]], None]] = None, - specific_faces: Optional[List[Dict[str, str]]] = None -): - """ - If specific_faces is provided, only processes those specific images. - specific_faces format: - [{"name": "John", "image_path": "photo1.jpg"}, ...] # just filename - """ - - faces_directory = os.getenv("FACES_DIR", ".faces") - known_faces_file = os.getenv("KNOWN_FACES_FILE_LOADED", ".known_faces.json") - - start_time = time.monotonic() - - async def report_progress(message: str, current: int = 0, total: int = 0): - elapsed = time.monotonic() - start_time - progress_percent = (current / total * 100) if total > 0 else 0 - - hours, remainder = divmod(int(elapsed), 3600) - minutes, seconds = divmod(remainder, 60) - elapsed_str = f"{hours:02}:{minutes:02}:{seconds:02}" - - if progress_callback: - try: - progress_callback({ - "current_item": current, - "total_items": total, - "progress": round(progress_percent, 1), - "elapsed": int(elapsed) - }) - except Exception as e: - print(f"Failed to send progress via callback: {e}", file=sys.stderr) - - print(f"[{elapsed_str} | {progress_percent:.1f}%] {message}", file=sys.stderr) - - if specific_faces is not None: - await report_progress("Using provided face list...", current=0, total=1) - - images_to_process = [] - for face_info in specific_faces: - name = face_info['name'] - image_path = face_info['image_path'] - - image_path = os.path.join(faces_directory, name, image_path) - - images_to_process.append({ - 'name': name, - 'image_path': image_path - }) - - script_dir = Path(__file__).parent - add_face_script = script_dir / "add_face.py" - - project_root = Path(os.getcwd()) - - total_images = len(images_to_process) - processed_images = 0 - - if total_images == 0: - await report_progress("No images found to process.", current=0, total=0) - return True - - for image_info in images_to_process: - name = image_info['name'] - image_path_relative = image_info['image_path'] - - processed_images += 1 - - message_prefix = f"Processing image: {image_path_relative} (person: {name})" - await report_progress(message_prefix, processed_images, total_images) - - # Handle both relative and absolute paths - if os.path.isabs(image_path_relative): - absolute_image_path = Path(image_path_relative) - else: - absolute_image_path = project_root / image_path_relative - - if not absolute_image_path.exists(): - warning_msg = f"Image not found at {absolute_image_path}. Skipping." - await report_progress(warning_msg, processed_images, total_images) - continue - - command = [ - sys.executable, - str(add_face_script), - name, - str(absolute_image_path), - str(project_root / known_faces_file) - ] - process = await asyncio.create_subprocess_exec( - *command, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE - ) - - stdout_data = await process.communicate() - return_code = process.returncode - - stdout_str = stdout_data.decode().strip() - if return_code == 0: - status_message = "Success" - if stdout_str: - try: - print(stdout_str) - json_output = json.loads(stdout_str) - if json_output.get("status") == "warning": - status_message = f"Warning: {json_output.get('message', 'Multiple faces found.')}" - elif json_output.get("status") == "error": - status_message = f"Error (despite code 0): {json_output.get('message', 'Unknown error')}" - except json.JSONDecodeError: - status_message = f"Success (non-JSON output: {stdout_str[:50]}...)" - - await report_progress(status_message, processed_images, total_images) - else: - error_msg = f"add_face.py failed for {image_path_relative}. Return code: {process.returncode}" - print(error_msg, file=sys.stderr) - await report_progress(error_msg, processed_images, total_images) - return False - - await report_progress("All faces processed.", total_images, total_images) - return True \ No newline at end of file diff --git a/python/face_matcher.py b/python/face_matcher.py deleted file mode 100644 index bb46dbb2..00000000 --- a/python/face_matcher.py +++ /dev/null @@ -1,259 +0,0 @@ -import os -import sys -import json -import face_recognition -import numpy as np -from typing import List, Dict, Optional, Callable, TypedDict, Union -import time -from dataclasses import dataclass -import logging - -logger = logging.getLogger(__name__) - -class FaceMatchingResult(TypedDict): - success: bool - error: Optional[str] - matches: List[Dict[str, str]] - person_name: str - matches_found: int - reference_images_used: int - -@dataclass -class MatchResult: - """Result of a face match operation.""" - json_file: str - image_file: str - confidence: float - face_data: dict - -class FaceMatcher: - """Handles face matching and labeling operations.""" - - def __init__(self, tolerance: float = 0.6): - """ - Initialize the face matcher. - - Args: - tolerance: Lower is more strict. Default 0.6 is a good balance. - """ - self.tolerance = tolerance - - def load_reference_encodings(self, image_paths: List[str]) -> List[np.ndarray]: - """ - Load face encodings from reference images. - - Args: - image_paths: List of paths to reference images - - Returns: - List of face encodings - """ - encodings = [] - - for image_path in image_paths: - try: - image = face_recognition.load_image_file(image_path) - face_encodings = face_recognition.face_encodings(image) - - if face_encodings: - encodings.append(face_encodings[0]) - print(f"Loaded encoding from: {image_path}", file=sys.stderr) - else: - print(f"Warning: No face found in {image_path}", file=sys.stderr) - except Exception as e: - print(f"Error loading {image_path}: {e}", file=sys.stderr) - - return encodings - - def remove_json_file(self, json_path: str, reason: str): - """ - Remove a JSON file and log the reason. - - Args: - json_path: Path to the JSON file to remove - reason: Reason for removal - """ - try: - os.remove(json_path) - print(f"Removed JSON file: {json_path} - Reason: {reason}", file=sys.stderr) - logger.info(f"Removed JSON file: {json_path} - Reason: {reason}") - except Exception as e: - print(f"Error removing {json_path}: {e}", file=sys.stderr) - logger.error(f"Error removing {json_path}: {e}") - - def find_matches( - self, - reference_encodings: List[np.ndarray], - unknown_faces_dir: str, - progress_callback: Optional[Callable[[Dict[str, Union[int, float]]], None]] = None, - ) -> List[MatchResult]: - """ - Find all matching faces in the unknown faces directory. - - Args: - reference_encodings: List of face encodings to match against - unknown_faces_dir: Directory containing unknown face JSON files - progress_callback: Optional callback for progress updates - - Returns: - List of matching face results - """ - matches = [] - json_files = [f for f in os.listdir(unknown_faces_dir) if f.endswith('.json')] - total_files = len(json_files) - start_time = time.monotonic() - - print(f"Scanning {total_files} unknown faces for matches...", file=sys.stderr) - - for idx, json_file in enumerate(json_files, 1): - json_path = os.path.join(unknown_faces_dir, json_file) - - try: - # Load face data - with open(json_path, 'r') as f: - face_data = json.load(f) - - image_file = face_data.get('image_file') - if not image_file: - self.remove_json_file(json_path, "No image_file field in JSON") - continue - - image_path = os.path.join(unknown_faces_dir, image_file) - - # Check if image file exists - if not os.path.exists(image_path): - print(f"Image not found: {image_path}", file=sys.stderr) - self.remove_json_file(json_path, f"Image file not found: {image_file}") - continue - - # Load and encode the unknown face - unknown_image = face_recognition.load_image_file(image_path) - unknown_encodings = face_recognition.face_encodings(unknown_image) - - # Check if face was detected - if not unknown_encodings: - print(f"No face found in {image_file}", file=sys.stderr) - self.remove_json_file(json_path, f"No face detected in image: {image_file}") - continue - - unknown_encoding = unknown_encodings[0] - - # Compare with reference encodings - distances = face_recognition.face_distance(reference_encodings, unknown_encoding) - min_distance = np.min(distances) - - if min_distance <= self.tolerance: - confidence = 1.0 - min_distance - matches.append(MatchResult( - json_file=json_file, - image_file=image_file, - confidence=float(confidence), - face_data=face_data - )) - logger.info(f"Face match found: {image_file} (confidence: {confidence:.2%})") - face_id = self.get_face_id_from_json(json_file, unknown_faces_dir) - elapsed = time.monotonic() - start_time - progress_percent = (idx / total_files) * 100 - if progress_callback: - progress_callback({ - "current": idx, - "total": total_files, - "progress": round(progress_percent, 1), - "elapsed": int(elapsed), - "match": { - "json_file": json_file, - "image_file": image_file, - "confidence": float(confidence), - "face_id": face_id, - "face_data": face_data - } - }) - logger.info(f"Face match found and progress callback sent") - - except Exception as e: - print(f"Error processing {json_file}: {e}", file=sys.stderr) - logger.error(f"Error processing {json_file}: {e}") - - return matches - - def get_face_id_from_json(self, json_file: str, unknown_faces_dir: str) -> Optional[str]: - """Extract face_id from JSON file.""" - json_path = os.path.join(unknown_faces_dir, json_file) - try: - with open(json_path, 'r') as f: - data = json.load(f) - return data.get('image_hash') - except Exception as e: - print(f"Error reading face_id from {json_file}: {e}", file=sys.stderr) - return None - - -async def find_and_label_matching_faces( - person_name: str, - reference_image_paths: List[str], - unknown_faces_dir: str = "analysis_results/unknown_faces", - tolerance: float = 0.6, - progress_callback: Optional[Callable[[dict], None]] = None -) -> FaceMatchingResult: - """ - Find and return all faces matching the reference images. - - Args: - person_name: Name of the person to label - reference_image_paths: List of paths to reference images (the ones just labeled) - unknown_faces_dir: Directory containing unknown faces - tolerance: Face recognition tolerance (lower = more strict) - progress_callback: Optional callback for progress updates - - Returns: - Dictionary with matching faces information - """ - import asyncio - - matcher = FaceMatcher(tolerance=tolerance) - - # Load reference encodings - print(f"Loading {len(reference_image_paths)} reference images for {person_name}...", file=sys.stderr) - reference_encodings = await asyncio.to_thread( - matcher.load_reference_encodings, - reference_image_paths - ) - - if not reference_encodings: - return { - "success": False, - "error": "No valid face encodings found in reference images", - "matches": [], - "person_name": person_name, - "matches_found": 0, - "reference_images_used": 0, - } - - print(f"Found {len(reference_encodings)} reference encodings", file=sys.stderr) - - matches = await asyncio.to_thread( - matcher.find_matches, - reference_encodings, - unknown_faces_dir, - progress_callback - ) - - match_data = [] - for match in matches: - face_id = matcher.get_face_id_from_json(match.json_file, unknown_faces_dir) - match_data.append({ - "json_file": match.json_file, - "image_file": match.image_file, - "confidence": match.confidence, - "face_id": face_id, - "face_data": match.face_data - }) - - return { - "success": True, - "person_name": person_name, - "matches_found": len(match_data), - "matches": match_data, - "reference_images_used": len(reference_encodings), - "error": None, - } \ No newline at end of file diff --git a/python/face_recognizer.py b/python/face_recognizer.py index 3accffa4..d4161cbb 100644 --- a/python/face_recognizer.py +++ b/python/face_recognizer.py @@ -1,245 +1,183 @@ -import face_recognition +from deepface import DeepFace import numpy as np import json from collections import defaultdict -import os from typing import List, Dict +import os from dotenv import load_dotenv +import cv2 load_dotenv() + class FaceRecognizer: - def __init__(self, known_faces_file: str ='.faces.json', tolerance: float =0.40, model: str ='cnn'): + def __init__(self, known_faces_folder: str = '.faces', tolerance: float = 0.40, model: str = 'VGG-Face'): """ - Initialize the face recognizer. + Initialize the face recognizer using DeepFace. Args: - known_faces_file: Path to JSON file storing known faces - tolerance: Lower is more strict (default 0.6, range 0.0-1.0) - model: 'cnn' for accuracy or 'hog' for speed + known_faces_folder: Path to folder where we have faces labeled + tolerance: Distance threshold for face matching (lower = stricter) + model: DeepFace model ('VGG-Face', 'Facenet', 'Facenet512', 'OpenFace', 'DeepFace', 'DeepID', 'ArcFace', 'Dlib', 'SFace') """ - self.known_faces_file = known_faces_file + self.known_faces_folder = known_faces_folder self.tolerance = tolerance - self.model = model - self.known_face_encodings: List[np.ndarray] = [] - self.known_face_names: List[str] = [] + self.model = model + self.detector_backend = 'opencv' self.unknown_face_encodings: Dict[str, List[np.ndarray]] = defaultdict(list) self.unknown_face_counter = 0 - self.load_known_faces() - - - - def load_known_faces(self): - """Load known faces from JSON ensuring float arrays.""" - self.known_face_encodings = [] - self.known_face_names = [] - - try: - with open(self.known_faces_file, 'r', encoding='utf-8') as f: - data = json.load(f) - except FileNotFoundError: - # File doesn't exist yet, start with empty lists - return - except json.JSONDecodeError as e: - raise ValueError(f"Invalid JSON in {self.known_faces_file}: {e}") - - if isinstance(data, dict): - for name, enc_list in data.items(): - # Skip if enc_list is not a list - if not isinstance(enc_list, list): - continue - - for enc in enc_list: - # Skip if enc is not a list/array (e.g., if it's a string path) - if not isinstance(enc, (list, np.ndarray)): - continue - - # Skip if enc is empty or doesn't look like an encoding (should be 128 floats) - if not enc or len(enc) != 128: - continue - - try: - # Ensure numeric array - encoding_array = np.array(enc, dtype=np.float64) - self.known_face_encodings.append(encoding_array) - self.known_face_names.append(name) - except (ValueError, TypeError) as e: - # Skip invalid encodings - print(f"Warning: Skipping invalid encoding for {name}: {e}") - continue - - elif isinstance(data, list): - for entry in data: - if not isinstance(entry, dict): - continue - - name = entry.get("name") - enc = entry.get("encoding") or entry.get("encodings") - if not name or not enc: - continue - - # Handle multiple encodings per person - if isinstance(enc[0], list): - for e in enc: - if len(e) != 128: - continue - try: - self.known_face_encodings.append(np.array(e, dtype=np.float64)) - self.known_face_names.append(name) - except (ValueError, TypeError): - continue - else: - if len(enc) == 128: - try: - self.known_face_encodings.append(np.array(enc, dtype=np.float64)) - self.known_face_names.append(name) - except (ValueError, TypeError): - continue - - - def recognize_faces(self, frame: np.ndarray, upsample: int =1) -> List[Dict[str, str]]: + def recognize_faces(self, frame: np.ndarray, upsample: int = 1) -> List[Dict[str, str]]: """ - Recognize faces in a frame with improved accuracy. + Recognize faces in a frame using DeepFace. Args: - frame: Image array (RGB format recommended) - upsample: Number of times to upsample image for detection (higher = more accurate but slower) + frame: Image array (BGR format from OpenCV) + upsample: Not used with DeepFace but kept for compatibility """ - # Convert to RGB if needed (face_recognition uses RGB) - if len(frame.shape) == 2: # Grayscale - frame = np.stack([frame] * 3, axis=-1) - - # Detect faces with upsampling for better detection - face_locations = face_recognition.face_locations( - frame, - number_of_times_to_upsample=upsample, - model=self.model - ) - - # Get 128-dimensional face encodings - face_encodings = face_recognition.face_encodings( - frame, - face_locations, - num_jitters=10 # More jitters = more accurate but slower - ) - def distance_to_confidence(face_distance: float, tolerance: float =self.tolerance) -> float: - """Convert face distance to confidence (0-1 scale).""" - if face_distance > tolerance: - return 0.0 - # Smooth mapping: closer distance -> higher confidence - confidence = 1 / (1 + np.exp(15 * (face_distance - tolerance))) - return confidence + # Convert BGR to RGB if needed + if len(frame.shape) == 3 and frame.shape[2] == 3: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + else: + frame_rgb = frame recognized_faces = [] - for face_location, face_encoding in zip(face_locations, face_encodings): - name = "Unknown" - confidence = 0.0 + + try: - if len(self.known_face_encodings) > 0: - # Calculate distances to all known faces - face_distances = face_recognition.face_distance( - self.known_face_encodings, - face_encoding, - ) + analysis_results = DeepFace.analyze( + img_path=frame_rgb, + actions=['emotion'], + enforce_detection=False, + detector_backend=self.detector_backend, + ) + if not isinstance(analysis_results, list): + analysis_results = [analysis_results] + + face_objs = DeepFace.extract_faces( + img_path=frame_rgb, + detector_backend=self.detector_backend, + enforce_detection=False, + align=True + ) + + + for i, (face_obj, analysis) in enumerate(zip(face_objs, analysis_results)): + if face_obj['confidence'] == 0: + continue + + facial_area = face_obj['facial_area'] - # Find best match - best_match_index = np.argmin(face_distances) - best_distance = face_distances[best_match_index] + top = facial_area['y'] + left = facial_area['x'] + bottom = facial_area['y'] + facial_area['h'] + right = facial_area['x'] + facial_area['w'] + face_location = (top, right, bottom, left) + + emotion_probs = analysis.get('emotion', {}) + emotion_data = None - # Check if match is within tolerance - if best_distance <= self.tolerance: - name = self.known_face_names[best_match_index] - # Convert distance to confidence score (0-1) - confidence = distance_to_confidence(best_distance, self.tolerance) - + if emotion_probs: + dominant_emotion = max(emotion_probs.items(), key=lambda x: x[1]) + emotion_data = { + 'emotion': dominant_emotion[0], + 'confidence': dominant_emotion[1] / 100.0 + } - if name == "Unknown": - # Check if this unknown face has been seen before - found_existing_unknown = False - for unknown_name, encodings in self.unknown_face_encodings.items(): - if encodings: - distances = face_recognition.face_distance(encodings, face_encoding) - if np.min(distances) <= self.tolerance: - name = unknown_name - found_existing_unknown = True - confidence = distance_to_confidence(np.min(distances), self.tolerance) - break - if not found_existing_unknown: - name = f"Unknown_{self.unknown_face_counter:03d}" - self.unknown_face_counter += 1 - confidence = 0.0 - self.unknown_face_encodings[name].append(face_encoding) - - recognized_faces.append({ - "name": name, - "confidence": confidence, - "encoding": face_encoding.tolist(), - "location": face_location - }) + name = "Unknown" + confidence = 0.0 + + try: + os.makedirs(self.known_faces_folder, exist_ok=True) + face_analysis_results = DeepFace.find( + img_path=frame_rgb, + enforce_detection=False, + detector_backend=self.detector_backend, + silent=True, + db_path=self.known_faces_folder, + ) + + if len(face_analysis_results) > 0 and len(face_analysis_results[0]) > 0: + best_match_path = face_analysis_results[0].iloc[0] + name = os.path.basename(os.path.dirname(best_match_path["identity"])) + confidence = best_match_path["confidence"] + except Exception as e: + pass + + face_encoding = None + try: + face_img = face_obj['face'] + embedding_objs = DeepFace.represent( + img_path=face_img, + model_name=self.model, + enforce_detection=False + ) + + if not embedding_objs: + continue + + face_encoding = np.array(embedding_objs[0]['embedding']) + + except Exception as e: + print(f"Error getting embedding: {e}") + continue + + if name == "Unknown": + found_existing_unknown = False + for unknown_name, encodings in self.unknown_face_encodings.items(): + if encodings: + distances = [self._cosine_distance(enc, face_encoding) for enc in encodings] + if np.min(distances) <= self.tolerance: + name = unknown_name + found_existing_unknown = True + confidence = self._distance_to_confidence(np.min(distances), self.tolerance) + break + + if not found_existing_unknown: + name = f"Unknown_{self.unknown_face_counter:03d}" + self.unknown_face_counter += 1 + confidence = 0.0 + + self.unknown_face_encodings[name].append(face_encoding) + + recognized_faces.append({ + "name": name, + "confidence": confidence, + "encoding": face_encoding.tolist(), + "location": face_location, + "emotion_label": emotion_data['emotion'] if emotion_data else None, + "emotion_confidence": emotion_data['confidence'] if emotion_data else None + }) + + except Exception as e: + print(f"Error in face recognition: {e}") return recognized_faces - def add_known_face(self, name: str, encoding: np.ndarray) -> None: - """Add a known face encoding.""" - self.known_face_encodings.append(np.array(encoding)) - self.known_face_names.append(name) - - def save_known_faces(self) -> None: - """Save known faces to JSON file.""" - known_faces_data: Dict[str, List[List[float]]] = {} - for name, encoding in zip(self.known_face_names, self.known_face_encodings): - if name not in known_faces_data: - known_faces_data[name] = [] - known_faces_data[name].append(encoding.tolist()) + @staticmethod + def _cosine_distance(encoding1: np.ndarray, encoding2: np.ndarray) -> float: + """Calculate cosine distance between two embeddings.""" + dot_product = np.dot(encoding1, encoding2) + norm1 = np.linalg.norm(encoding1) + norm2 = np.linalg.norm(encoding2) - with open(self.known_faces_file, 'w') as f: - json.dump(known_faces_data, f, indent=2) - - def get_all_faces(self) -> List[Dict[str, str]]: - """ - Returns a list of all known and unknown faces with their representative encodings and counts. - """ - all_faces: Dict[str, List[np.ndarray]] = defaultdict(list) - for name, encoding in zip(self.known_face_names, self.known_face_encodings): - all_faces[name].append(encoding) + if norm1 == 0 or norm2 == 0: + return 1.0 - for name, encodings in self.unknown_face_encodings.items(): - all_faces[name].extend(encodings) + cosine_similarity = dot_product / (norm1 * norm2) + return 1 - cosine_similarity - result = [] - for name, encodings in all_faces.items(): - # For simplicity, use the first encoding as representative and count all occurrences - if encodings: - result.append({ - "name": name, - "encoding": encodings[0].tolist(), - "count": len(encodings) - }) - return result + def _distance_to_confidence(self, face_distance: float, tolerance: float) -> float: + """Convert face distance to confidence (0-1 scale).""" + if face_distance > tolerance: + return 0.0 + confidence = 1 / (1 + np.exp(15 * (face_distance - tolerance))) + return confidence - def label_face(self, old_name: str, new_name: str) -> None: - """ - Labels an existing face (known or unknown) with a new name. - If old_name was unknown, it becomes known. - """ - # Move encodings from unknown to known if applicable - if old_name.startswith("Unknown_") and old_name in self.unknown_face_encodings: - for encoding in self.unknown_face_encodings[old_name]: - self.add_known_face(new_name, encoding) - del self.unknown_face_encodings[old_name] - else: - # Update existing known face names - for i, name in enumerate(self.known_face_names): - if name == old_name: - self.known_face_names[i] = new_name - self.save_known_faces() def merge_faces(self, names_to_merge: List[str], new_name: str) -> None: - """ - Merges multiple faces (known or unknown) under a single new name. - """ + """Merges multiple faces under a single new name.""" merged_encodings: List[np.ndarray] = [] for name in names_to_merge: if name.startswith("Unknown_") and name in self.unknown_face_encodings: @@ -247,20 +185,16 @@ def merge_faces(self, names_to_merge: List[str], new_name: str) -> None: merged_encodings.append(encoding) del self.unknown_face_encodings[name] else: - # Collect encodings of known faces to merge for i, known_name in enumerate(self.known_face_names): if known_name == name: merged_encodings.append(self.known_face_encodings[i]) - # Remove old entry to avoid duplicates - self.known_face_encodings[i] = None # Mark for deletion - self.known_face_names[i] = None # Mark for deletion + self.known_face_encodings[i] = None + self.known_face_names[i] = None - # Clean up marked for deletion self.known_face_encodings = [e for e in self.known_face_encodings if e is not None] self.known_face_names = [n for n in self.known_face_names if n is not None] - # Add merged encodings under the new name for encoding in merged_encodings: self.add_known_face(new_name, encoding) - self.save_known_faces() + self.save_known_faces() \ No newline at end of file diff --git a/python/plugins/dominant_color.py b/python/plugins/dominant_color.py index 380b475b..15b069ae 100644 --- a/python/plugins/dominant_color.py +++ b/python/plugins/dominant_color.py @@ -30,7 +30,6 @@ def to_json_dict(self) -> Dict[str, Union[str, float, bool]]: class SceneColorAnalysis: """Scene-level color analysis""" dominant_color: Optional[Dict[str, Union[str, float, bool]]] - color_palette: List[Dict[str, Union[str, float, bool]]] overall_brightness: float overall_saturation: float overall_warmth: float @@ -168,7 +167,7 @@ def _extract_dominant_colors(self, frame: np.ndarray, num_colors: int) -> List[C random_state=42, n_init=5, max_iter=100, - tol=0.01 + tol=0.01, ) kmeans.fit(pixels) @@ -322,7 +321,6 @@ def get_results(self) -> Optional[SceneColorAnalysis]: return SceneColorAnalysis( dominant_color=dominant_color_json, - color_palette=json_color_palette, overall_brightness=round(overall_brightness, 2), overall_saturation=round(overall_saturation, 2), overall_warmth=round(overall_warmth, 2), diff --git a/python/plugins/emotion_detection.py b/python/plugins/emotion_detection.py deleted file mode 100644 index 2a0d10a9..00000000 --- a/python/plugins/emotion_detection.py +++ /dev/null @@ -1,162 +0,0 @@ -import warnings -from typing import Dict, Optional, Union, List -import numpy as np -import cv2 - -from plugins.base import AnalyzerPlugin, FrameAnalysis, PluginResult - -try: - from fer import FER - FER_AVAILABLE = True -except ImportError: - FER_AVAILABLE = False - print("Warning: FER (Facial Emotion Recognition) package not available") - - -class EmotionDetectionPlugin(AnalyzerPlugin): - """A plugin for detecting emotions in faces.""" - - def __init__(self, config: Dict[str, Union[str, bool, int, float]]): - super().__init__(config) - self.emotion_detector: Optional[FER] = None - self.emotion_scale = float(config.get('emotion_scale', 0.5)) - self.use_mtcnn = bool(config.get('use_mtcnn', False)) - self.min_face_size = int(config.get('min_face_size', 30)) - self.iou_threshold = float(config.get('emotion_iou_threshold', 0.3)) - self.enabled = FER_AVAILABLE - - def setup(self) -> None: - """Initialize the FER emotion detector.""" - if not FER_AVAILABLE: - print(" โœ— Emotion Detection: FER package not available, skipping") - self.enabled = False - return - - try: - self.emotion_detector = FER(mtcnn=self.use_mtcnn) - print(f" โœ“ Emotion Detection: FER initialized (MTCNN: {self.use_mtcnn}, Scale: {self.emotion_scale})") - except Exception as e: - print(f" โœ— Emotion Detection: Failed to initialize FER: {e}") - self.enabled = False - - def analyze_frame(self, frame: np.ndarray, frame_analysis: FrameAnalysis, video_path: str) -> FrameAnalysis: - """Analyze a single frame for emotions.""" - if not self.enabled or self.emotion_detector is None: - return frame_analysis - - if 'faces' in frame_analysis and frame_analysis['faces']: - self._add_emotions(frame, frame_analysis) - return frame_analysis - - def _add_emotions(self, frame: np.ndarray, frame_analysis: FrameAnalysis) -> None: - """Add emotion data to recognized faces.""" - if not self.enabled or self.emotion_detector is None: - return - - faces = frame_analysis.get('faces', []) - if not faces or not isinstance(faces, list): - return - - valid_faces: List[Dict[str, Union[str, List[int], Optional[Dict[str, float]], float, List[float], Dict[str, float], Dict[str, int]]]] = [] - for face in faces: - if not isinstance(face, dict) or 'location' not in face: - if isinstance(face, dict): - face['emotion'] = None - continue - - location = face['location'] - if not isinstance(location, list) or len(location) != 4: - face['emotion'] = None - continue - - ft, fr, fb, fl = location - face_width = fr - fl - face_height = fb - ft - - if face_width < self.min_face_size or face_height < self.min_face_size: - face['emotion'] = None - continue - - valid_faces.append(face) - - - if not valid_faces: - return - - # Resize frame for faster emotion detection - original_height, original_width = frame.shape[:2] - - if self.emotion_scale != 1.0: - small_frame = cv2.resize( - frame, - (0, 0), - fx=self.emotion_scale, - fy=self.emotion_scale, - interpolation=cv2.INTER_LINEAR - ) - else: - small_frame = frame - - # Detect emotions on smaller frame - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - try: - emotions_results = self.emotion_detector.detect_emotions(small_frame) - except Exception as e: - # Silently handle errors and continue - for face in faces: - face['emotion'] = None - return - - if not emotions_results: - for face in faces: - face['emotion'] = None - return - - # Match emotions to faces using IoU - scale_inverse = 1.0 / self.emotion_scale - - for face in valid_faces: - ft, fr, fb, fl = face['location'] - face_area = (fr - fl) * (fb - ft) - - best_match_emotion = None - max_iou = 0.0 - - for emotion_res in emotions_results: - # Scale emotion box back to original frame coordinates - el, et, ew, eh = emotion_res['box'] - - if self.emotion_scale != 1.0: - el = int(el * scale_inverse) - et = int(et * scale_inverse) - ew = int(ew * scale_inverse) - eh = int(eh * scale_inverse) - - er = el + ew - eb = et + eh - emotion_area = ew * eh - - # Calculate IoU - x_overlap = max(0, min(fr, er) - max(fl, el)) - y_overlap = max(0, min(fb, eb) - max(ft, et)) - intersection_area = x_overlap * y_overlap - - union_area = face_area + emotion_area - intersection_area - iou = intersection_area / union_area if union_area > 0 else 0 - - if iou > max_iou: - max_iou = iou - best_match_emotion = emotion_res['emotions'] - - # Assign emotion if IoU is above threshold - if max_iou > self.iou_threshold: - face['emotion'] = best_match_emotion - else: - face['emotion'] = None - - def get_results(self) -> PluginResult: - return None - - def get_summary(self) -> PluginResult: - return None \ No newline at end of file diff --git a/python/plugins/face_recognition.py b/python/plugins/face_recognition.py index 003c592e..fddf097b 100644 --- a/python/plugins/face_recognition.py +++ b/python/plugins/face_recognition.py @@ -27,101 +27,27 @@ def __init__(self, config: Dict[str, Union[str, bool, int, float]]): self.face_recognizer: Optional[FaceRecognizer] = None self.all_faces: List[Dict[str, Union[float, str, int]]] = [] self.unknown_faces_output_path: Optional[Path] = None - self.known_faces_file = os.getenv("KNOWN_FACES_FILE_LOADED", ".known_faces.json") - - self.detection_model = str(config.get('detection_model', 'hog')) + self.known_faces_folder = os.getenv("FACES_DIR") or '.faces' + self.detection_model = str(config.get('detection_model', 'VGG-Face')) self.face_scale = float(config.get('face_scale', 0.5)) self.save_unknown_faces = bool(config.get('save_unknown_faces', True)) - self.unknown_save_interval = int(config.get('unknown_save_interval', 5)) + + self.saved_unknown_faces: Dict[str, Dict[str, Union[str, List]]] = {} def setup(self) -> None: - """Initialize face recognizer and load known faces.""" - logger.info("=" * 70) - logger.info("FACE RECOGNITION PLUGIN SETUP") - logger.info("=" * 70) - - logger.info(f"Detection model: {self.detection_model} (HOG=fast, CNN=accurate)") - logger.info(f"Processing scale: {self.face_scale*100}%") - logger.info(f"Loading known faces from: {self.known_faces_file}") - - if not os.path.exists(self.known_faces_file): - with open(self.known_faces_file, 'w', encoding='utf-8') as f: - json.dump([], f) - else: - self._log_file_metadata() - + """Initialize face recognizer and load known faces.""" self.face_recognizer = FaceRecognizer( - known_faces_file=self.known_faces_file, + known_faces_folder=self.known_faces_folder, model=self.detection_model ) - - self._log_loaded_faces() + if self.known_faces_folder: + os.makedirs(self.known_faces_folder, exist_ok=True) if self.save_unknown_faces: path_str = os.getenv("UNKNOWN_FACES_DIR", str(self.config.get('unknown_faces_dir', 'unknown_faces'))) self.unknown_faces_output_path = Path(path_str) self.unknown_faces_output_path.mkdir(parents=True, exist_ok=True) - logger.info(f"Unknown faces directory: {self.unknown_faces_output_path}") - logger.info(f"Saving every {self.unknown_save_interval}th unknown face detection") - - logger.info("=" * 70 + "\n") - - def _log_file_metadata(self) -> None: - """Log known faces file metadata.""" - file_stat = os.stat(self.known_faces_file) - import time - modified_str = time.strftime( - '%Y-%m-%d %H:%M:%S', - time.localtime(file_stat.st_mtime) - ) - - logger.info(f"File size: {file_stat.st_size} bytes") - logger.info(f"Last modified: {modified_str}") - - try: - with open(self.known_faces_file, 'r') as f: - known_faces_data = json.load(f) - logger.info(f"JSON structure: {len(known_faces_data)} entries") - except Exception as e: - logger.error(f"Error reading known faces file: {e}") - - def _log_loaded_faces(self) -> None: - """Log detailed information about loaded known faces.""" - if not hasattr(self.face_recognizer, 'known_face_encodings') or \ - not hasattr(self.face_recognizer, 'known_face_names'): - logger.warning("Face recognizer not properly initialized") - return - - known_encodings = self.face_recognizer.known_face_encodings - known_names = self.face_recognizer.known_face_names - - if not known_encodings or not known_names: - logger.warning("No known faces loaded!") - return - - face_counts: Dict[str, int] = {} - for name in known_names: - face_counts[name] = face_counts.get(name, 0) + 1 - - total_encodings = len(known_encodings) - total_people = len(face_counts) - - logger.info( - f"โœ“ Loaded {total_encodings} encoding(s) for {total_people} person/people" - ) - logger.info("") - logger.info("Loaded faces breakdown:") - logger.info("-" * 50) - - sorted_faces = sorted(face_counts.items(), key=lambda x: x[1], reverse=True) - - for name, count in sorted_faces: - plural = "image" if count == 1 else "images" - logger.info(f" โ€ข {name}: {count} {plural}") - - logger.info("-" * 50) - logger.info(f"Total: {total_people} unique face(s), {total_encodings} encoding(s)") - logger.info("") + logger.info(f"Unknown faces directory: {self.unknown_faces_output_path}") def analyze_frame( self, @@ -149,8 +75,8 @@ def analyze_frame( frame_scale_inverse = 1.0 / self.face_scale ui_scale = float(frame_analysis.get('scale_factor', 1.0)) + output_faces = [] - for face in recognized_faces: top, right, bottom, left = face['location'] @@ -173,17 +99,11 @@ def analyze_frame( ui_y = abs_y * ui_scale ui_w = abs_w * ui_scale ui_h = abs_h * ui_scale - + output_face: Dict[str, Union[str, List[int], Optional[Dict[str, float]], float, List[float], Dict[str, float], Dict[str, int]]] = { "name": face['name'], "location": [top, right, bottom, left], - "emotion": face.get('emotion'), "confidence": face.get("confidence"), - "encoding": ( - face['encoding'].tolist() - if isinstance(face['encoding'], np.ndarray) - else face['encoding'] - ), "bbox": { "x": ui_x, "y": ui_y, @@ -193,6 +113,10 @@ def analyze_frame( "frame_dimensions": { "width": original_width, "height": original_height + }, + "emotion":{ + "label": face.get('emotion_label'), + "confidence": face.get('emotion_confidence') } } output_faces.append(output_face) @@ -204,36 +128,34 @@ def analyze_frame( }) if face['name'].startswith("Unknown_") and self.save_unknown_faces: - unknown_id = face['name'].split('_')[1] if '_' in face['name'] else '0' - unknown_num = int(unknown_id) if unknown_id.isdigit() else 0 + face_original = face.copy() + face_original['location'] = [top, right, bottom, left] - if unknown_num % self.unknown_save_interval == 0: - face_original = face.copy() - face_original['location'] = [top, right, bottom, left] - - self._save_unknown_face( - frame, - int(frame_analysis['start_time_ms']), - int(frame_analysis.get('frame_idx', 0)), - face_original, - frame_analysis, - video_path - ) - + self._track_unknown_face( + frame, + int(frame_analysis['start_time_ms']), + int(frame_analysis.get('frame_idx', 0)), + face_original, + video_path, + original_width, + original_height + ) frame_analysis['faces'] = output_faces return frame_analysis - def _save_unknown_face( + def _track_unknown_face( self, frame: np.ndarray, timestamp_ms: int, frame_idx: int, face: Dict[str, Union[str, List[int], np.ndarray, List[float]]], - frame_analysis: FrameAnalysis, - video_path: str + video_path: str, + frame_width: int, + frame_height: int ) -> None: - """Crop and save unknown face with metadata.""" - h, w = frame.shape[:2] + """Track unknown face appearances and save only the first occurrence.""" + face_id = face['name'] + location = face['location'] if not isinstance(location, list) or len(location) != 4: return @@ -242,8 +164,8 @@ def _save_unknown_face( left = max(0, int(left)) top = max(0, int(top)) - right = min(w, int(right)) - bottom = min(h, int(bottom)) + right = min(frame_width, int(right)) + bottom = min(frame_height, int(bottom)) original_bbox = { 'top': top, @@ -259,8 +181,8 @@ def _save_unknown_face( padded_left = max(0, left - padding_w) padded_top = max(0, top - padding_h) - padded_right = min(w, right + padding_w) - padded_bottom = min(h, bottom + padding_h) + padded_right = min(frame_width, right + padding_w) + padded_bottom = min(frame_height, bottom + padding_h) padded_bbox = { 'top': padded_top, @@ -271,12 +193,68 @@ def _save_unknown_face( 'height': padded_bottom - padded_top } + appearance_data = { + "frame_index": frame_idx, + "timestamp_ms": timestamp_ms, + "timestamp_seconds": timestamp_ms / 1000, + "formatted_timestamp": self._format_timestamp(timestamp_ms), + "bounding_box": original_bbox, + "padded_bounding_box": padded_bbox + } + + # Check if this unknown face has been saved before + if face_id not in self.saved_unknown_faces: + # First time seeing this face - save the image + self._save_unknown_face_first_occurrence( + frame, + timestamp_ms, + frame_idx, + face, + video_path, + frame_width, + frame_height, + appearance_data + ) + else: + # Face already saved - just update the JSON with new appearance + self._update_unknown_face_appearances(face_id, appearance_data) + + + def _save_unknown_face_first_occurrence( + self, + frame: np.ndarray, + timestamp_ms: int, + frame_idx: int, + face: Dict[str, Union[str, List[int], np.ndarray, List[float]]], + video_path: str, + frame_width: int, + frame_height: int, + appearance_data: Dict + ) -> None: + """Save the first occurrence of an unknown face.""" + face_id = face['name'] + location = face['location'] + top, right, bottom, left = location + + left = max(0, int(left)) + top = max(0, int(top)) + right = min(frame_width, int(right)) + bottom = min(frame_height, int(bottom)) + + padding_w = int((right - left) * 0.1) + padding_h = int((bottom - top) * 0.1) + + padded_left = max(0, left - padding_w) + padded_top = max(0, top - padding_h) + padded_right = min(frame_width, right + padding_w) + padded_bottom = min(frame_height, bottom + padding_h) + face_image = frame[padded_top:padded_bottom, padded_left:padded_right] if face_image.size == 0: return - base_filename = f"{face['name']}_{timestamp_ms}ms_frame{frame_idx}" + base_filename = f"{face_id}_first_{timestamp_ms}ms" image_filename = f"{base_filename}.jpg" json_filename = f"{base_filename}.json" @@ -288,16 +266,37 @@ def _save_unknown_face( try: self.unknown_faces_output_path.mkdir(parents=True, exist_ok=True) - + h, w = frame.shape[:2] cv2.imwrite( str(image_filepath), face_image, [cv2.IMWRITE_JPEG_QUALITY, 85] ) + original_bbox = { + 'top': top, + 'right': right, + 'bottom': bottom, + 'left': left, + 'width': right - left, + 'height': bottom - top + } + padding_w = int((right - left) * 0.1) + padding_h = int((bottom - top) * 0.1) - encoding = face['encoding'] - encoding_list = encoding.tolist() if isinstance(encoding, np.ndarray) else encoding - + padded_left = max(0, left - padding_w) + padded_top = max(0, top - padding_h) + padded_right = min(w, right + padding_w) + padded_bottom = min(h, bottom + padding_h) + + padded_bbox = { + 'top': padded_top, + 'right': padded_right, + 'bottom': padded_bottom, + 'left': padded_left, + 'width': padded_right - padded_left, + 'height': padded_bottom - padded_top + } + metadata = { "image_file": image_filename, "json_file": json_filename, @@ -313,7 +312,9 @@ def _save_unknown_face( "face_id": face['name'], "bounding_box": original_bbox, "padded_bounding_box": padded_bbox, - "face_encoding": encoding_list, + "first_appearance": appearance_data, + "all_appearances": [appearance_data], + "total_appearances": 1, "label": { "name": None, "labeled_by": None, @@ -326,11 +327,45 @@ def _save_unknown_face( with open(json_filepath, 'w', encoding='utf-8') as json_file: json.dump(metadata, json_file, indent=2, ensure_ascii=False) - logger.debug(f"Saved unknown face: {image_filename}") + self.saved_unknown_faces[face_id] = { + "json_path": str(json_filepath), + "image_path": str(image_filepath), + "appearances": [appearance_data] + } + except Exception as e: logger.error(f"Error saving unknown face {image_filepath}: {e}") + + def _update_unknown_face_appearances( + self, + face_id: str, + appearance_data: Dict + ) -> None: + """Update the JSON file with a new appearance of an already-saved unknown face.""" + if face_id not in self.saved_unknown_faces: + return + + json_path = self.saved_unknown_faces[face_id]["json_path"] + + try: + with open(json_path, 'r', encoding='utf-8') as f: + metadata = json.load(f) + metadata["all_appearances"].append(appearance_data) + metadata["total_appearances"] = len(metadata["all_appearances"]) + metadata["last_updated"] = datetime.now().isoformat() + metadata["last_appearance"] = appearance_data + + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(metadata, f, indent=2, ensure_ascii=False) + + self.saved_unknown_faces[face_id]["appearances"].append(appearance_data) + + + except Exception as e: + logger.error(f"Error updating unknown face {face_id}: {e}") + @staticmethod def _format_timestamp(timestamp_ms: int) -> str: """Format timestamp in HH:MM:SS.mmm format.""" @@ -376,35 +411,24 @@ def get_summary(self) -> PluginResult: 'last_seen': max(timestamps) if timestamps else 0.0 } - logger.info("\n" + "=" * 70) - logger.info("FACE RECOGNITION SUMMARY") - logger.info("=" * 70) - logger.info(f"Total faces detected: {len(self.all_faces)}") logger.info(f"Known people identified: {len(known_people)}") + logger.info(f"Unique unknown faces: {unique_unknown}") - if known_people: - logger.info("") - logger.info("Known people appearances:") - for person in sorted(known_people): - info = known_appearances[person] - logger.info(f" โ€ข {person}:") - logger.info(f" Appearances: {info['count']}") - logger.info(f" First seen: {info['first_seen']:.1f}s") - logger.info(f" Last seen: {info['last_seen']:.1f}s") - - logger.info("") - logger.info(f"Unknown faces: {unknown_count} ({unique_unknown} unique)") - - if unknown_count > 0 and self.save_unknown_faces: - logger.info(f"Unknown faces saved to: {self.unknown_faces_output_path}") - - logger.info("=" * 70 + "\n") return { "known_people_identified": known_people, "known_appearances": known_appearances, "unknown_faces_detected": unknown_count, "unique_unknown_faces": unique_unknown, + "unknown_faces_saved": len(self.saved_unknown_faces), "total_faces_detected": len(self.all_faces), - "unknown_faces_directory": str(self.unknown_faces_output_path) if self.save_unknown_faces else None + "unknown_faces_directory": str(self.unknown_faces_output_path) if self.save_unknown_faces else None, + "unknown_faces_details": { + face_id: { + "image_path": data["image_path"], + "json_path": data["json_path"], + "total_appearances": len(data["appearances"]) + } + for face_id, data in self.saved_unknown_faces.items() + } } \ No newline at end of file diff --git a/python/requirements-dev.txt b/python/requirements-dev.txt new file mode 100644 index 00000000..2177852b --- /dev/null +++ b/python/requirements-dev.txt @@ -0,0 +1,3 @@ +-r requirements.txt +pytest +coverage>=7.11.0 diff --git a/python/requirements.txt b/python/requirements.txt index 9062e353..04846eff 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,11 +1,7 @@ # Core dependencies -opencv-python>=4.8.0 numpy>=1.24.0,<2.0.0 tqdm>=4.66.0 -# Emotion detection (optional - can be heavy) -fer==22.5.1 -tensorflow>=2.13.0 # Audio transcription faster-whisper>=1.2.1 @@ -20,24 +16,26 @@ Pillow>=9.3.0 # Monitoring psutil>=5.9.0 -# Video processing with audio support -ffmpeg-python>=0.2.0 -moviepy>=1.0.3 - -# Websockets +# Websockets websockets>=12.0 python-multipart>=0.0.6 -# Testing -pytest>=7.4.0 -coverage>=7.11.0 - +# Environment python-dotenv>=1.2.1 # ML/AI frameworks (CPU ONLY) --extra-index-url https://download.pytorch.org/whl/cpu torch>=2.6.0 torchvision>=0.16.0 + +# If x86 (AMD64), use the tiny CPU-only version +tensorflow-cpu==2.15.0; platform_machine == "x86_64" +tensorflow==2.15.0; platform_machine == "aarch64" + +# Face recognition (PyTorch backend) +deepface>=0.0.96 +tf-keras + ultralytics>=8.0.0 transformers>=4.35.0 --index-url https://pypi.org/simple \ No newline at end of file diff --git a/python/tests/test_analyze.py b/python/tests/test_analyze.py new file mode 100644 index 00000000..96379bb0 --- /dev/null +++ b/python/tests/test_analyze.py @@ -0,0 +1,103 @@ +import unittest +import sys +import os +import json +import tempfile +import shutil +from unittest.mock import patch + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from analyze import VideoAnalyzer, AnalysisConfig + +class TestVideoAnalysis(unittest.TestCase): + + def setUp(self): + self.video_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'test_video.mp4')) + + if not os.path.exists(self.video_path): + with open(self.video_path, 'w') as f: + f.write("dummy video content") + self.dummy_video_created = True + else: + self.dummy_video_created = False + + self.tmp_dir = tempfile.TemporaryDirectory() + self.output_json_path = os.path.join(self.tmp_dir.name, 'analysis_output.json') + self.unknown_faces_dir = os.path.join(os.path.dirname(__file__), 'unknown_faces') + + self.config = AnalysisConfig( + sample_interval_seconds=1.0, + enable_performance_report=False, + lazy_plugin_init=False, + unknown_faces_dir=self.unknown_faces_dir + ) + + reference_json_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'test_video_analysis.json')) + with open(reference_json_path, 'r') as f: + self.reference_analysis = json.load(f) + + def tearDown(self): + if self.dummy_video_created and os.path.exists(self.video_path): + os.remove(self.video_path) + self.tmp_dir.cleanup() + if os.path.exists(self.unknown_faces_dir): + shutil.rmtree(self.unknown_faces_dir) + + @patch('analyze.load_plugins') + def test_full_analysis_pipeline(self, mock_load_plugins): + """Test full analysis pipeline with mocked plugins""" + + mock_plugins = [] + + class MockPlugin: + def __init__(self, config): + self.config = config + + def setup(self): + pass + + def analyze_frame(self, frame, frame_analysis, video_path): + return frame_analysis + + def analyze_scene(self, frame_analyses): + pass + + def get_results(self): + return {} + + def analyze_activities(self, frame_analyses, scene_analysis): + return [] + + def get_summary(self): + return {} + + mock_load_plugins.return_value = [MockPlugin({})] + + analyzer = VideoAnalyzer(self.video_path, self.config) + result = analyzer.analyze() + + result_dict = result.to_dict() + + with open(self.output_json_path, "w", encoding="utf-8") as f: + json.dump(result_dict, f, indent=4, ensure_ascii=False) + + self.assertIsNone(result.error) + + self.assertIn('scene_analysis', result_dict) + self.assertIn('frame_analysis', result_dict) + self.assertIn('detected_activities', result_dict) + self.assertIn('face_recognition_summary', result_dict) + + def test_file_not_found_error(self): + non_existent_path = '/path/to/nonexistent/video.mp4' + analyzer = VideoAnalyzer(non_existent_path, self.config) + result = analyzer.analyze() + + self.assertIsNotNone(result.error) + self.assertIn("Video file not found", result.error) + self.assertEqual(result.video_file, non_existent_path) + self.assertEqual(len(result.frame_analysis), 0) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/python/tests/test_transcribe.py b/python/tests/test_transcribe.py index deda4a57..71a399a5 100644 --- a/python/tests/test_transcribe.py +++ b/python/tests/test_transcribe.py @@ -10,7 +10,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from transcribe import run_transcription +from transcribe import run_transcription, TranscriptionService class MockWhisperSegment: @@ -86,11 +86,13 @@ def tearDown(self): if os.path.exists(self.output_json_path): os.remove(self.output_json_path) + @patch('transcribe.TranscriptionService.is_model_cached') @patch('transcribe.WhisperModel') @patch('transcribe.Path.exists') - def test_transcribe_video(self, mock_path_exists, mock_whisper_model): + def test_transcribe_video(self, mock_path_exists, mock_whisper_model, mock_is_cached): """Test transcription with mocked Whisper model""" mock_path_exists.return_value = True + mock_is_cached.return_value = True # Create mock model instance mock_model_instance = MagicMock() @@ -165,11 +167,13 @@ def test_transcribe_video(self, mock_path_exists, mock_whisper_model): self.assertIn('word', word, f"Segment {i}, word {j} missing word text") + @patch('transcribe.TranscriptionService.is_model_cached') @patch('transcribe.WhisperModel') @patch('transcribe.Path.exists') - def test_transcribe_empty_audio(self, mock_path_exists, mock_whisper_model): + def test_transcribe_empty_audio(self, mock_path_exists, mock_whisper_model, mock_is_cached): """Test transcription with video that has no audio""" mock_path_exists.return_value = True + mock_is_cached.return_value = True # Pretend model is already cached # Mock model to raise RuntimeError for no audio mock_model_instance = MagicMock() @@ -184,11 +188,13 @@ def test_transcribe_empty_audio(self, mock_path_exists, mock_whisper_model): self.assertEqual(len(result['segments']), 0, "Expected no segments for no audio") self.assertEqual(result['language'], 'N/A', "Expected N/A language for no audio") + @patch('transcribe.TranscriptionService.is_model_cached') @patch('transcribe.WhisperModel') @patch('transcribe.Path.exists') - def test_transcribe_with_progress_callback(self, mock_path_exists, mock_whisper_model): + def test_transcribe_with_progress_callback(self, mock_path_exists, mock_whisper_model, mock_is_cached): """Test transcription with progress callback""" mock_path_exists.return_value = True + mock_is_cached.return_value = True # Pretend model is already cached mock_model_instance = MagicMock() mock_model_instance.transcribe.return_value = ( diff --git a/python/tests/test_video.mp4 b/python/tests/test_video.mp4 index d62b6d50..587079e0 100644 Binary files a/python/tests/test_video.mp4 and b/python/tests/test_video.mp4 differ diff --git a/python/tests/test_video_analysis.json b/python/tests/test_video_analysis.json new file mode 100644 index 00000000..9d1e0c17 --- /dev/null +++ b/python/tests/test_video_analysis.json @@ -0,0 +1,3660 @@ +{ + "scene_analysis": { + "dominant_color": { + "name": "Black", + "hex": "#000000", + "percentage": 50.4, + "is_vibrant": false, + "is_muted": true + }, + "overall_brightness": 29.27, + "overall_saturation": 21.67, + "overall_warmth": 3.7, + "color_mood": "dark", + "color_harmony": "analogous", + "environment": "snow", + "environment_confidence": 0.5, + "object_distribution": { + "person": 52, + "car": 4, + "laptop": 12, + "potted plant": 11, + "tv": 11, + "vase": 11, + "keyboard": 8, + "cell phone": 4, + "mouse": 7, + "apple": 4, + "orange": 4, + "refrigerator": 4, + "oven": 4 + }, + "total_frames": 60 + }, + "detected_activities": [], + "face_recognition_summary": { + "known_people_identified": [ + "Aiony Haust", + "Ilias" + ], + "known_appearances": { + "Aiony Haust": { + "count": 4, + "first_seen": 0.0, + "last_seen": 3.0 + }, + "Ilias": { + "count": 4, + "first_seen": 20.0, + "last_seen": 23.0 + } + }, + "unknown_faces_detected": 20, + "unique_unknown_faces": 5, + "unknown_faces_saved": 5, + "total_faces_detected": 28, + "unknown_faces_directory": "unknown_faces", + "unknown_faces_details": { + "Unknown_000": { + "image_path": "unknown_faces/Unknown_000_first_4000ms.jpg", + "json_path": "unknown_faces/Unknown_000_first_4000ms.json", + "total_appearances": 4 + }, + "Unknown_001": { + "image_path": "unknown_faces/Unknown_001_first_8000ms.jpg", + "json_path": "unknown_faces/Unknown_001_first_8000ms.json", + "total_appearances": 4 + }, + "Unknown_002": { + "image_path": "unknown_faces/Unknown_002_first_12000ms.jpg", + "json_path": "unknown_faces/Unknown_002_first_12000ms.json", + "total_appearances": 4 + }, + "Unknown_003": { + "image_path": "unknown_faces/Unknown_003_first_37000ms.jpg", + "json_path": "unknown_faces/Unknown_003_first_37000ms.json", + "total_appearances": 4 + }, + "Unknown_004": { + "image_path": "unknown_faces/Unknown_004_first_37000ms.jpg", + "json_path": "unknown_faces/Unknown_004_first_37000ms.json", + "total_appearances": 4 + } + } + }, + "frame_analysis": [ + { + "start_time_ms": 0, + "end_time_ms": 1000, + "duration_ms": 1000, + "frame_idx": 0, + "objects": [ + { + "label": "person", + "confidence": 0.9628351926803589, + "bbox": { + "x": 352.805908203125, + "y": 105.13653564453125, + "width": 574.772705078125, + "height": 607.6312866210938 + } + } + ], + "faces": [ + { + "name": "Aiony Haust", + "location": [ + 168, + 832, + 456, + 544 + ], + "confidence": 100.0, + "bbox": { + "x": 544.0, + "y": 168.0, + "width": 288.0, + "height": 288.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.5970541834831238 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a woman with a black shirt and a pink background" + }, + { + "start_time_ms": 1000, + "end_time_ms": 2000, + "duration_ms": 1000, + "frame_idx": 30, + "objects": [ + { + "label": "person", + "confidence": 0.9628399014472961, + "bbox": { + "x": 352.805419921875, + "y": 105.13812255859375, + "width": 574.77197265625, + "height": 607.6322631835938 + } + } + ], + "faces": [ + { + "name": "Aiony Haust", + "location": [ + 168, + 830, + 456, + 542 + ], + "confidence": 100.0, + "bbox": { + "x": 542.0, + "y": 168.0, + "width": 288.0, + "height": 288.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.6164354681968689 + } + } + ], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#000000", + "percentage": 50.4, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Black", + "hex": "#000000", + "percentage": 50.4, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Medium Purple", + "hex": "#8481be", + "percentage": 29.8, + "is_vibrant": false, + "is_muted": false + } + ], + "brightness": 17.48, + "saturation": 19.12, + "color_temperature": "neutral", + "environment_caption": "a woman with a black shirt and a pink background" + }, + { + "start_time_ms": 2000, + "end_time_ms": 3000, + "duration_ms": 1000, + "frame_idx": 60, + "objects": [ + { + "label": "person", + "confidence": 0.9628399014472961, + "bbox": { + "x": 352.805419921875, + "y": 105.13812255859375, + "width": 574.77197265625, + "height": 607.6322631835938 + } + } + ], + "faces": [ + { + "name": "Aiony Haust", + "location": [ + 168, + 830, + 456, + 542 + ], + "confidence": 100.0, + "bbox": { + "x": 542.0, + "y": 168.0, + "width": 288.0, + "height": 288.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.6164354681968689 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a woman with a black shirt and a pink background", + "detected_text": [] + }, + { + "start_time_ms": 3000, + "end_time_ms": 4000, + "duration_ms": 1000, + "frame_idx": 90, + "objects": [ + { + "label": "person", + "confidence": 0.9628399014472961, + "bbox": { + "x": 352.805419921875, + "y": 105.13812255859375, + "width": 574.77197265625, + "height": 607.6322631835938 + } + } + ], + "faces": [ + { + "name": "Aiony Haust", + "location": [ + 168, + 830, + 456, + 542 + ], + "confidence": 100.0, + "bbox": { + "x": 542.0, + "y": 168.0, + "width": 288.0, + "height": 288.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.6164354681968689 + } + } + ], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#03050a", + "percentage": 66.8, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Black", + "hex": "#03050a", + "percentage": 66.8, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Gray", + "hex": "#6f6da2", + "percentage": 17.2, + "is_vibrant": false, + "is_muted": false + } + ], + "brightness": 17.48, + "saturation": 19.12, + "color_temperature": "neutral", + "environment_caption": "a woman with a black shirt and a pink background" + }, + { + "start_time_ms": 4000, + "end_time_ms": 5000, + "duration_ms": 1000, + "frame_idx": 120, + "objects": [ + { + "label": "person", + "confidence": 0.9587202072143555, + "bbox": { + "x": 462.8580322265625, + "y": 15.13702392578125, + "width": 712.801513671875, + "height": 699.4501342773438 + } + } + ], + "faces": [ + { + "name": "Unknown_000", + "location": [ + 108, + 984, + 416, + 676 + ], + "confidence": 0.0, + "bbox": { + "x": 676.0, + "y": 108.0, + "width": 308.0, + "height": 308.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "happy", + "confidence": 0.9712995256542594 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a man with a beard and glasses on" + }, + { + "start_time_ms": 5000, + "end_time_ms": 6000, + "duration_ms": 1000, + "frame_idx": 150, + "objects": [ + { + "label": "person", + "confidence": 0.9580745100975037, + "bbox": { + "x": 462.9479675292969, + "y": 14.6707763671875, + "width": 712.7727355957031, + "height": 699.81494140625 + } + } + ], + "faces": [ + { + "name": "Unknown_000", + "location": [ + 108, + 984, + 418, + 674 + ], + "confidence": 0.9974060524685332, + "bbox": { + "x": 674.0, + "y": 108.0, + "width": 310.0, + "height": 310.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "happy", + "confidence": 0.9703518152236938 + } + } + ], + "shot_type": "long-shot", + "dominant_color": { + "name": "Gainsboro", + "hex": "#d8dddf", + "percentage": 65.6, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Gainsboro", + "hex": "#d8dddf", + "percentage": 65.6, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Dark Gray", + "hex": "#a29f9a", + "percentage": 22.4, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 72.48, + "saturation": 13.26, + "color_temperature": "neutral", + "environment_caption": "a man with glasses and a green shirt", + "detected_text": [] + }, + { + "start_time_ms": 6000, + "end_time_ms": 7000, + "duration_ms": 1000, + "frame_idx": 180, + "objects": [ + { + "label": "person", + "confidence": 0.9580745100975037, + "bbox": { + "x": 462.9479675292969, + "y": 14.6707763671875, + "width": 712.7727355957031, + "height": 699.81494140625 + } + } + ], + "faces": [ + { + "name": "Unknown_000", + "location": [ + 108, + 984, + 418, + 674 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 674.0, + "y": 108.0, + "width": 310.0, + "height": 310.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "happy", + "confidence": 0.9703518152236938 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a man with glasses and a green shirt" + }, + { + "start_time_ms": 7000, + "end_time_ms": 8000, + "duration_ms": 1000, + "frame_idx": 210, + "objects": [ + { + "label": "person", + "confidence": 0.9580745100975037, + "bbox": { + "x": 462.9479675292969, + "y": 14.6707763671875, + "width": 712.7727355957031, + "height": 699.81494140625 + } + } + ], + "faces": [ + { + "name": "Unknown_000", + "location": [ + 108, + 984, + 418, + 674 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 674.0, + "y": 108.0, + "width": 310.0, + "height": 310.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "happy", + "confidence": 0.9703518152236938 + } + } + ], + "shot_type": "medium-shot", + "dominant_color": { + "name": "Gainsboro", + "hex": "#d4d9db", + "percentage": 71.0, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Gainsboro", + "hex": "#d4d9db", + "percentage": 71.0, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Gray", + "hex": "#8b908a", + "percentage": 18.0, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 72.48, + "saturation": 13.26, + "color_temperature": "neutral", + "environment_caption": "a man with glasses and a green shirt" + }, + { + "start_time_ms": 8000, + "end_time_ms": 9000, + "duration_ms": 1000, + "frame_idx": 240, + "objects": [ + { + "label": "person", + "confidence": 0.9500717520713806, + "bbox": { + "x": 392.6436767578125, + "y": 1.4569091796875, + "width": 763.62890625, + "height": 710.5550537109375 + } + } + ], + "faces": [ + { + "name": "Unknown_001", + "location": [ + 124, + 976, + 430, + 670 + ], + "confidence": 0.0, + "bbox": { + "x": 670.0, + "y": 124.0, + "width": 306.0, + "height": 306.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.6319472193717957 + } + } + ], + "shot_type": "medium-shot", + "environment_caption": "a woman with long hair and a black shirt", + "detected_text": [] + }, + { + "start_time_ms": 9000, + "end_time_ms": 10000, + "duration_ms": 1000, + "frame_idx": 270, + "objects": [ + { + "label": "person", + "confidence": 0.9472954273223877, + "bbox": { + "x": 392.6351318359375, + "y": 1.94036865234375, + "width": 763.622802734375, + "height": 710.1270141601562 + } + } + ], + "faces": [ + { + "name": "Unknown_001", + "location": [ + 126, + 976, + 430, + 672 + ], + "confidence": 0.9969557314857264, + "bbox": { + "x": 672.0, + "y": 126.0, + "width": 304.0, + "height": 304.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.5150555372238159 + } + } + ], + "shot_type": "medium-shot", + "dominant_color": { + "name": "Gray", + "hex": "#7f7d7d", + "percentage": 54.6, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Gray", + "hex": "#7f7d7d", + "percentage": 54.6, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Black", + "hex": "#0d1410", + "percentage": 27.0, + "is_vibrant": false, + "is_muted": false + } + ], + "brightness": 34.99, + "saturation": 25.18, + "color_temperature": "neutral", + "environment_caption": "a woman with long hair and blue eyes stands against a brick wall" + }, + { + "start_time_ms": 10000, + "end_time_ms": 11000, + "duration_ms": 1000, + "frame_idx": 300, + "objects": [ + { + "label": "person", + "confidence": 0.9472954273223877, + "bbox": { + "x": 392.6351318359375, + "y": 1.94036865234375, + "width": 763.622802734375, + "height": 710.1270141601562 + } + } + ], + "faces": [ + { + "name": "Unknown_001", + "location": [ + 126, + 976, + 430, + 672 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 672.0, + "y": 126.0, + "width": 304.0, + "height": 304.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.5150555372238159 + } + } + ], + "shot_type": "medium-shot", + "environment_caption": "a woman with long hair and blue eyes stands against a brick wall" + }, + { + "start_time_ms": 11000, + "end_time_ms": 12000, + "duration_ms": 1000, + "frame_idx": 330, + "objects": [ + { + "label": "person", + "confidence": 0.9472954273223877, + "bbox": { + "x": 392.6351318359375, + "y": 1.94036865234375, + "width": 763.622802734375, + "height": 710.1270141601562 + } + } + ], + "faces": [ + { + "name": "Unknown_001", + "location": [ + 126, + 976, + 430, + 672 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 672.0, + "y": 126.0, + "width": 304.0, + "height": 304.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.5150555372238159 + } + } + ], + "shot_type": "medium-shot", + "dominant_color": { + "name": "Gray", + "hex": "#7d7d7d", + "percentage": 54.0, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Gray", + "hex": "#7d7d7d", + "percentage": 54.0, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Black", + "hex": "#0c1310", + "percentage": 24.2, + "is_vibrant": false, + "is_muted": false + } + ], + "brightness": 34.99, + "saturation": 25.18, + "color_temperature": "neutral", + "environment_caption": "a woman with long hair and blue eyes stands against a brick wall", + "detected_text": [] + }, + { + "start_time_ms": 12000, + "end_time_ms": 13000, + "duration_ms": 1000, + "frame_idx": 360, + "objects": [ + { + "label": "person", + "confidence": 0.9509174823760986, + "bbox": { + "x": 355.1968688964844, + "y": 59.98150634765625, + "width": 569.1416320800781, + "height": 653.9951782226562 + } + } + ], + "faces": [ + { + "name": "Unknown_002", + "location": [ + 256, + 772, + 522, + 506 + ], + "confidence": 0.0, + "bbox": { + "x": 506.0, + "y": 256.0, + "width": 266.0, + "height": 266.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "happy", + "confidence": 0.9999781847000122 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a woman with blue makeup and a white shirt" + }, + { + "start_time_ms": 13000, + "end_time_ms": 14000, + "duration_ms": 1000, + "frame_idx": 390, + "objects": [ + { + "label": "person", + "confidence": 0.9485589861869812, + "bbox": { + "x": 355.2869873046875, + "y": 60.8836669921875, + "width": 569.02685546875, + "height": 653.12060546875 + } + } + ], + "faces": [ + { + "name": "Unknown_002", + "location": [ + 256, + 774, + 522, + 508 + ], + "confidence": 0.9971492183963553, + "bbox": { + "x": 508.0, + "y": 256.0, + "width": 266.0, + "height": 266.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "happy", + "confidence": 0.999967098236084 + } + } + ], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#010101", + "percentage": 64.0, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Black", + "hex": "#010101", + "percentage": 64.0, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Gainsboro", + "hex": "#dedfdb", + "percentage": 30.0, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 33.73, + "saturation": 4.1, + "color_temperature": "neutral", + "environment_caption": "a woman with blue makeup and a white shirt" + }, + { + "start_time_ms": 14000, + "end_time_ms": 15000, + "duration_ms": 1000, + "frame_idx": 420, + "objects": [ + { + "label": "person", + "confidence": 0.9485589861869812, + "bbox": { + "x": 355.2869873046875, + "y": 60.8836669921875, + "width": 569.02685546875, + "height": 653.12060546875 + } + } + ], + "faces": [ + { + "name": "Unknown_002", + "location": [ + 256, + 774, + 522, + 508 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 508.0, + "y": 256.0, + "width": 266.0, + "height": 266.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "happy", + "confidence": 0.999967098236084 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a woman with blue makeup and a white shirt", + "detected_text": [] + }, + { + "start_time_ms": 15000, + "end_time_ms": 16000, + "duration_ms": 1000, + "frame_idx": 450, + "objects": [ + { + "label": "person", + "confidence": 0.9485589861869812, + "bbox": { + "x": 355.2869873046875, + "y": 60.8836669921875, + "width": 569.02685546875, + "height": 653.12060546875 + } + } + ], + "faces": [ + { + "name": "Unknown_002", + "location": [ + 256, + 774, + 522, + 508 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 508.0, + "y": 256.0, + "width": 266.0, + "height": 266.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "happy", + "confidence": 0.999967098236084 + } + } + ], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#010101", + "percentage": 60.6, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Black", + "hex": "#010101", + "percentage": 60.6, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Gainsboro", + "hex": "#dfe0dd", + "percentage": 34.2, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 33.73, + "saturation": 4.1, + "color_temperature": "neutral", + "environment_caption": "a woman with blue makeup and a white shirt" + }, + { + "start_time_ms": 16000, + "end_time_ms": 17000, + "duration_ms": 1000, + "frame_idx": 480, + "objects": [ + { + "label": "person", + "confidence": 0.9499943852424622, + "bbox": { + "x": 458.72833251953125, + "y": 102.4029541015625, + "width": 420.7724609375, + "height": 610.1551513671875 + } + }, + { + "label": "car", + "confidence": 0.5991251468658447, + "bbox": { + "x": 400.7100830078125, + "y": 396.457763671875, + "width": 167.8426513671875, + "height": 308.276611328125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a man with a beard and glasses" + }, + { + "start_time_ms": 17000, + "end_time_ms": 18000, + "duration_ms": 1000, + "frame_idx": 510, + "objects": [ + { + "label": "person", + "confidence": 0.9493444561958313, + "bbox": { + "x": 458.6393737792969, + "y": 102.02294921875, + "width": 420.8230285644531, + "height": 610.55419921875 + } + }, + { + "label": "car", + "confidence": 0.5729292631149292, + "bbox": { + "x": 400.73651123046875, + "y": 396.8541564941406, + "width": 167.8951416015625, + "height": 307.9218444824219 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#040503", + "percentage": 76.4, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Black", + "hex": "#040503", + "percentage": 76.4, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Thistle", + "hex": "#d8c1c3", + "percentage": 13.2, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 15.57, + "saturation": 12.56, + "color_temperature": "neutral", + "environment_caption": "a man with glasses and a beard", + "detected_text": [] + }, + { + "start_time_ms": 18000, + "end_time_ms": 19000, + "duration_ms": 1000, + "frame_idx": 540, + "objects": [ + { + "label": "person", + "confidence": 0.9493444561958313, + "bbox": { + "x": 458.6393737792969, + "y": 102.02294921875, + "width": 420.8230285644531, + "height": 610.55419921875 + } + }, + { + "label": "car", + "confidence": 0.5729292631149292, + "bbox": { + "x": 400.73651123046875, + "y": 396.8541564941406, + "width": 167.8951416015625, + "height": 307.9218444824219 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a man with glasses and a beard" + }, + { + "start_time_ms": 19000, + "end_time_ms": 20000, + "duration_ms": 1000, + "frame_idx": 570, + "objects": [ + { + "label": "person", + "confidence": 0.9493444561958313, + "bbox": { + "x": 458.6393737792969, + "y": 102.02294921875, + "width": 420.8230285644531, + "height": 610.55419921875 + } + }, + { + "label": "car", + "confidence": 0.5729292631149292, + "bbox": { + "x": 400.73651123046875, + "y": 396.8541564941406, + "width": 167.8951416015625, + "height": 307.9218444824219 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#040503", + "percentage": 73.4, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Black", + "hex": "#040503", + "percentage": 73.4, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Dim Gray", + "hex": "#7a5f4d", + "percentage": 13.6, + "is_vibrant": false, + "is_muted": false + } + ], + "brightness": 15.57, + "saturation": 12.56, + "color_temperature": "neutral", + "environment_caption": "a man with glasses and a beard" + }, + { + "start_time_ms": 20000, + "end_time_ms": 21000, + "duration_ms": 1000, + "frame_idx": 600, + "objects": [ + { + "label": "person", + "confidence": 0.9015654921531677, + "bbox": { + "x": 243.37310791015625, + "y": 29.81060791015625, + "width": 396.38580322265625, + "height": 685.0991821289062 + } + }, + { + "label": "laptop", + "confidence": 0.8121369481086731, + "bbox": { + "x": 486.2861328125, + "y": 283.58929443359375, + "width": 367.5047607421875, + "height": 211.75140380859375 + } + }, + { + "label": "potted plant", + "confidence": 0.5900073051452637, + "bbox": { + "x": 111.02456665039062, + "y": 245.43099975585938, + "width": 56.27459716796875, + "height": 58.0830078125 + } + } + ], + "faces": [ + { + "name": "Ilias", + "location": [ + 108, + 598, + 248, + 458 + ], + "confidence": 100.0, + "bbox": { + "x": 458.0, + "y": 108.0, + "width": 140.0, + "height": 140.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.5334884524345398 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a man sitting on a couch using a laptop", + "detected_text": [] + }, + { + "start_time_ms": 21000, + "end_time_ms": 22000, + "duration_ms": 1000, + "frame_idx": 630, + "objects": [ + { + "label": "person", + "confidence": 0.901495635509491, + "bbox": { + "x": 243.90869140625, + "y": 30.0322265625, + "width": 395.6014404296875, + "height": 684.9541015625 + } + }, + { + "label": "laptop", + "confidence": 0.8135069608688354, + "bbox": { + "x": 484.7569580078125, + "y": 283.59283447265625, + "width": 369.3446044921875, + "height": 211.7928466796875 + } + }, + { + "label": "potted plant", + "confidence": 0.5596430897712708, + "bbox": { + "x": 111.18846130371094, + "y": 245.4981689453125, + "width": 55.78643798828125, + "height": 58.0672607421875 + } + } + ], + "faces": [ + { + "name": "Ilias", + "location": [ + 106, + 598, + 246, + 458 + ], + "confidence": 100.0, + "bbox": { + "x": 458.0, + "y": 106.0, + "width": 140.0, + "height": 140.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.9020096063613892 + } + } + ], + "shot_type": "long-shot", + "dominant_color": { + "name": "Dim Gray", + "hex": "#7a6d4f", + "percentage": 42.8, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Dim Gray", + "hex": "#7a6d4f", + "percentage": 42.8, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Silver", + "hex": "#bfbba6", + "percentage": 30.2, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 43.42, + "saturation": 27.95, + "color_temperature": "neutral", + "environment_caption": "a man sitting on a couch using a laptop" + }, + { + "start_time_ms": 22000, + "end_time_ms": 23000, + "duration_ms": 1000, + "frame_idx": 660, + "objects": [ + { + "label": "person", + "confidence": 0.901495635509491, + "bbox": { + "x": 243.90869140625, + "y": 30.0322265625, + "width": 395.6014404296875, + "height": 684.9541015625 + } + }, + { + "label": "laptop", + "confidence": 0.8135069608688354, + "bbox": { + "x": 484.7569580078125, + "y": 283.59283447265625, + "width": 369.3446044921875, + "height": 211.7928466796875 + } + }, + { + "label": "potted plant", + "confidence": 0.5596430897712708, + "bbox": { + "x": 111.18846130371094, + "y": 245.4981689453125, + "width": 55.78643798828125, + "height": 58.0672607421875 + } + } + ], + "faces": [ + { + "name": "Ilias", + "location": [ + 106, + 598, + 246, + 458 + ], + "confidence": 100.0, + "bbox": { + "x": 458.0, + "y": 106.0, + "width": 140.0, + "height": 140.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.9020096063613892 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a man sitting on a couch using a laptop" + }, + { + "start_time_ms": 23000, + "end_time_ms": 24000, + "duration_ms": 1000, + "frame_idx": 690, + "objects": [ + { + "label": "person", + "confidence": 0.901495635509491, + "bbox": { + "x": 243.90869140625, + "y": 30.0322265625, + "width": 395.6014404296875, + "height": 684.9541015625 + } + }, + { + "label": "laptop", + "confidence": 0.8135069608688354, + "bbox": { + "x": 484.7569580078125, + "y": 283.59283447265625, + "width": 369.3446044921875, + "height": 211.7928466796875 + } + }, + { + "label": "potted plant", + "confidence": 0.5596430897712708, + "bbox": { + "x": 111.18846130371094, + "y": 245.4981689453125, + "width": 55.78643798828125, + "height": 58.0672607421875 + } + } + ], + "faces": [ + { + "name": "Ilias", + "location": [ + 106, + 598, + 246, + 458 + ], + "confidence": 100.0, + "bbox": { + "x": 458.0, + "y": 106.0, + "width": 140.0, + "height": 140.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "neutral", + "confidence": 0.9020096063613892 + } + } + ], + "shot_type": "long-shot", + "dominant_color": { + "name": "Dim Gray", + "hex": "#7c7256", + "percentage": 47.8, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Dim Gray", + "hex": "#7c7256", + "percentage": 47.8, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Black", + "hex": "#2b2720", + "percentage": 26.6, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 43.42, + "saturation": 27.95, + "color_temperature": "neutral", + "environment_caption": "a man sitting on a couch using a laptop", + "detected_text": [] + }, + { + "start_time_ms": 24000, + "end_time_ms": 25000, + "duration_ms": 1000, + "frame_idx": 720, + "objects": [], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a black background with a white and red flower" + }, + { + "start_time_ms": 25000, + "end_time_ms": 26000, + "duration_ms": 1000, + "frame_idx": 750, + "objects": [ + { + "label": "tv", + "confidence": 0.9024169445037842, + "bbox": { + "x": 441.0497741699219, + "y": 281.669677734375, + "width": 398.4394836425781, + "height": 236.312255859375 + } + }, + { + "label": "vase", + "confidence": 0.8301916122436523, + "bbox": { + "x": 171.33966064453125, + "y": 442.7064208984375, + "width": 73.32754516601562, + "height": 134.6492919921875 + } + }, + { + "label": "laptop", + "confidence": 0.8062654733657837, + "bbox": { + "x": 903.8436279296875, + "y": 550.888427734375, + "width": 252.3330078125, + "height": 47.7503662109375 + } + }, + { + "label": "potted plant", + "confidence": 0.7861149907112122, + "bbox": { + "x": 39.194091796875, + "y": 306.49383544921875, + "width": 304.70477294921875, + "height": 272.39306640625 + } + }, + { + "label": "keyboard", + "confidence": 0.7242661714553833, + "bbox": { + "x": 519.7657470703125, + "y": 617.1258544921875, + "width": 207.818603515625, + "height": 36.46337890625 + } + }, + { + "label": "cell phone", + "confidence": 0.7105028033256531, + "bbox": { + "x": 374.19024658203125, + "y": 489.6094970703125, + "width": 51.353515625, + "height": 100.920654296875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Dark Olive Green", + "hex": "#3e383a", + "percentage": 41.4, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Dark Olive Green", + "hex": "#3e383a", + "percentage": 41.4, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Dim Gray", + "hex": "#766186", + "percentage": 41.2, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 38.56, + "saturation": 34.39, + "color_temperature": "neutral", + "environment_caption": "a computer on a desk with a flower in a vase" + }, + { + "start_time_ms": 26000, + "end_time_ms": 27000, + "duration_ms": 1000, + "frame_idx": 780, + "objects": [ + { + "label": "tv", + "confidence": 0.9023927450180054, + "bbox": { + "x": 441.05108642578125, + "y": 281.6634521484375, + "width": 398.4364013671875, + "height": 236.34381103515625 + } + }, + { + "label": "vase", + "confidence": 0.8300076723098755, + "bbox": { + "x": 171.34829711914062, + "y": 442.7603759765625, + "width": 73.321044921875, + "height": 134.596923828125 + } + }, + { + "label": "laptop", + "confidence": 0.8062610030174255, + "bbox": { + "x": 903.8638916015625, + "y": 550.8924560546875, + "width": 252.315185546875, + "height": 47.746826171875 + } + }, + { + "label": "potted plant", + "confidence": 0.7857850790023804, + "bbox": { + "x": 39.188720703125, + "y": 306.49444580078125, + "width": 304.7701416015625, + "height": 272.4002685546875 + } + }, + { + "label": "keyboard", + "confidence": 0.7233920693397522, + "bbox": { + "x": 519.7647094726562, + "y": 617.1199340820312, + "width": 207.8187255859375, + "height": 36.4774169921875 + } + }, + { + "label": "cell phone", + "confidence": 0.7098793983459473, + "bbox": { + "x": 374.1974792480469, + "y": 489.61572265625, + "width": 51.3460693359375, + "height": 100.94921875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a computer on a desk with a flower in a vase", + "detected_text": [] + }, + { + "start_time_ms": 27000, + "end_time_ms": 28000, + "duration_ms": 1000, + "frame_idx": 810, + "objects": [ + { + "label": "tv", + "confidence": 0.9023927450180054, + "bbox": { + "x": 441.05108642578125, + "y": 281.6634521484375, + "width": 398.4364013671875, + "height": 236.34381103515625 + } + }, + { + "label": "vase", + "confidence": 0.8300076723098755, + "bbox": { + "x": 171.34829711914062, + "y": 442.7603759765625, + "width": 73.321044921875, + "height": 134.596923828125 + } + }, + { + "label": "laptop", + "confidence": 0.8062610030174255, + "bbox": { + "x": 903.8638916015625, + "y": 550.8924560546875, + "width": 252.315185546875, + "height": 47.746826171875 + } + }, + { + "label": "potted plant", + "confidence": 0.7857850790023804, + "bbox": { + "x": 39.188720703125, + "y": 306.49444580078125, + "width": 304.7701416015625, + "height": 272.4002685546875 + } + }, + { + "label": "keyboard", + "confidence": 0.7233920693397522, + "bbox": { + "x": 519.7647094726562, + "y": 617.1199340820312, + "width": 207.8187255859375, + "height": 36.4774169921875 + } + }, + { + "label": "cell phone", + "confidence": 0.7098793983459473, + "bbox": { + "x": 374.1974792480469, + "y": 489.61572265625, + "width": 51.3460693359375, + "height": 100.94921875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Dark Olive Green", + "hex": "#423c41", + "percentage": 45.4, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Dark Olive Green", + "hex": "#423c41", + "percentage": 45.4, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Dim Gray", + "hex": "#72608a", + "percentage": 36.2, + "is_vibrant": false, + "is_muted": false + } + ], + "brightness": 38.56, + "saturation": 34.37, + "color_temperature": "neutral", + "environment_caption": "a computer on a desk with a flower in a vase" + }, + { + "start_time_ms": 28000, + "end_time_ms": 29000, + "duration_ms": 1000, + "frame_idx": 840, + "objects": [ + { + "label": "tv", + "confidence": 0.9023927450180054, + "bbox": { + "x": 441.05108642578125, + "y": 281.6634521484375, + "width": 398.4364013671875, + "height": 236.34381103515625 + } + }, + { + "label": "vase", + "confidence": 0.8300076723098755, + "bbox": { + "x": 171.34829711914062, + "y": 442.7603759765625, + "width": 73.321044921875, + "height": 134.596923828125 + } + }, + { + "label": "laptop", + "confidence": 0.8062610030174255, + "bbox": { + "x": 903.8638916015625, + "y": 550.8924560546875, + "width": 252.315185546875, + "height": 47.746826171875 + } + }, + { + "label": "potted plant", + "confidence": 0.7857850790023804, + "bbox": { + "x": 39.188720703125, + "y": 306.49444580078125, + "width": 304.7701416015625, + "height": 272.4002685546875 + } + }, + { + "label": "keyboard", + "confidence": 0.7233920693397522, + "bbox": { + "x": 519.7647094726562, + "y": 617.1199340820312, + "width": 207.8187255859375, + "height": 36.4774169921875 + } + }, + { + "label": "cell phone", + "confidence": 0.7098793983459473, + "bbox": { + "x": 374.1974792480469, + "y": 489.61572265625, + "width": 51.3460693359375, + "height": 100.94921875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a computer on a desk with a flower in a vase" + }, + { + "start_time_ms": 29000, + "end_time_ms": 30000, + "duration_ms": 1000, + "frame_idx": 870, + "objects": [ + { + "label": "mouse", + "confidence": 0.8866870999336243, + "bbox": { + "x": 909.430419921875, + "y": 423.8297119140625, + "width": 71.1005859375, + "height": 30.4603271484375 + } + }, + { + "label": "keyboard", + "confidence": 0.6217185258865356, + "bbox": { + "x": 601.2000732421875, + "y": 427.2948303222656, + "width": 296.57763671875, + "height": 60.66168212890625 + } + }, + { + "label": "tv", + "confidence": 0.5394118428230286, + "bbox": { + "x": 1083.0751953125, + "y": 216.0004119873047, + "width": 196.2890625, + "height": 195.14967346191406 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#0c110d", + "percentage": 75.0, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Black", + "hex": "#0c110d", + "percentage": 75.0, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Dark Olive Green", + "hex": "#645341", + "percentage": 24.2, + "is_vibrant": false, + "is_muted": false + } + ], + "brightness": 13.55, + "saturation": 50.06, + "color_temperature": "neutral", + "environment_caption": "a desk with a computer and a monitor", + "detected_text": [ + { + "text": "RZEWo]", + "confidence": 0.39622962994327204, + "bounding_box": [ + [ + 530, + 4 + ], + [ + 748, + 4 + ], + [ + 748, + 88 + ], + [ + 530, + 88 + ] + ], + "bbox": { + "x": 530, + "y": 4, + "width": 218, + "height": 84 + } + } + ] + }, + { + "start_time_ms": 30000, + "end_time_ms": 31000, + "duration_ms": 1000, + "frame_idx": 900, + "objects": [ + { + "label": "mouse", + "confidence": 0.886897087097168, + "bbox": { + "x": 909.4288330078125, + "y": 423.8321533203125, + "width": 71.1025390625, + "height": 30.458984375 + } + }, + { + "label": "keyboard", + "confidence": 0.621772825717926, + "bbox": { + "x": 601.205322265625, + "y": 427.28790283203125, + "width": 296.637451171875, + "height": 60.68896484375 + } + }, + { + "label": "tv", + "confidence": 0.5393102169036865, + "bbox": { + "x": 1083.075439453125, + "y": 216.0001220703125, + "width": 196.289306640625, + "height": 195.1778564453125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a desk with a computer and a monitor" + }, + { + "start_time_ms": 31000, + "end_time_ms": 32000, + "duration_ms": 1000, + "frame_idx": 930, + "objects": [ + { + "label": "mouse", + "confidence": 0.886897087097168, + "bbox": { + "x": 909.4288330078125, + "y": 423.8321533203125, + "width": 71.1025390625, + "height": 30.458984375 + } + }, + { + "label": "keyboard", + "confidence": 0.621772825717926, + "bbox": { + "x": 601.205322265625, + "y": 427.28790283203125, + "width": 296.637451171875, + "height": 60.68896484375 + } + }, + { + "label": "tv", + "confidence": 0.5393102169036865, + "bbox": { + "x": 1083.075439453125, + "y": 216.0001220703125, + "width": 196.289306640625, + "height": 195.1778564453125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#0b110e", + "percentage": 73.6, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Black", + "hex": "#0b110e", + "percentage": 73.6, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Dark Olive Green", + "hex": "#634f3d", + "percentage": 25.4, + "is_vibrant": false, + "is_muted": false + } + ], + "brightness": 13.55, + "saturation": 50.02, + "color_temperature": "neutral", + "environment_caption": "a desk with a computer and a monitor" + }, + { + "start_time_ms": 32000, + "end_time_ms": 33000, + "duration_ms": 1000, + "frame_idx": 960, + "objects": [ + { + "label": "mouse", + "confidence": 0.886897087097168, + "bbox": { + "x": 909.4288330078125, + "y": 423.8321533203125, + "width": 71.1025390625, + "height": 30.458984375 + } + }, + { + "label": "keyboard", + "confidence": 0.621772825717926, + "bbox": { + "x": 601.205322265625, + "y": 427.28790283203125, + "width": 296.637451171875, + "height": 60.68896484375 + } + }, + { + "label": "tv", + "confidence": 0.5393102169036865, + "bbox": { + "x": 1083.075439453125, + "y": 216.0001220703125, + "width": 196.289306640625, + "height": 195.1778564453125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a desk with a computer and a monitor", + "detected_text": [ + { + "text": "RZEWo]", + "confidence": 0.3645427713455217, + "bounding_box": [ + [ + 530, + 4 + ], + [ + 748, + 4 + ], + [ + 748, + 88 + ], + [ + 530, + 88 + ] + ], + "bbox": { + "x": 530, + "y": 4, + "width": 218, + "height": 84 + } + } + ] + }, + { + "start_time_ms": 33000, + "end_time_ms": 34000, + "duration_ms": 1000, + "frame_idx": 990, + "objects": [ + { + "label": "apple", + "confidence": 0.6653153896331787, + "bbox": { + "x": 401.8502197265625, + "y": 115.9088134765625, + "width": 475.73583984375, + "height": 475.2803955078125 + } + }, + { + "label": "orange", + "confidence": 0.615795910358429, + "bbox": { + "x": 402.082763671875, + "y": 114.66912841796875, + "width": 474.0806884765625, + "height": 476.17718505859375 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#030303", + "percentage": 81.4, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Black", + "hex": "#030303", + "percentage": 81.4, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Dark Olive Green", + "hex": "#505050", + "percentage": 9.6, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 10.25, + "saturation": 0.0, + "color_temperature": "neutral", + "environment_caption": "the moon is seen in this image taken from the camera" + }, + { + "start_time_ms": 34000, + "end_time_ms": 35000, + "duration_ms": 1000, + "frame_idx": 1020, + "objects": [ + { + "label": "apple", + "confidence": 0.6658236384391785, + "bbox": { + "x": 401.8471984863281, + "y": 115.91490173339844, + "width": 475.7424011230469, + "height": 475.27357482910156 + } + }, + { + "label": "orange", + "confidence": 0.6149478554725647, + "bbox": { + "x": 402.0781555175781, + "y": 114.6710205078125, + "width": 474.0817565917969, + "height": 476.172607421875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "the moon is seen in this image taken from the camera" + }, + { + "start_time_ms": 35000, + "end_time_ms": 36000, + "duration_ms": 1000, + "frame_idx": 1050, + "objects": [ + { + "label": "apple", + "confidence": 0.6658236384391785, + "bbox": { + "x": 401.8471984863281, + "y": 115.91490173339844, + "width": 475.7424011230469, + "height": 475.27357482910156 + } + }, + { + "label": "orange", + "confidence": 0.6149478554725647, + "bbox": { + "x": 402.0781555175781, + "y": 114.6710205078125, + "width": 474.0817565917969, + "height": 476.172607421875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#030303", + "percentage": 82.8, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Black", + "hex": "#030303", + "percentage": 82.8, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Dark Olive Green", + "hex": "#4f4f4f", + "percentage": 9.4, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 10.25, + "saturation": 0.0, + "color_temperature": "neutral", + "environment_caption": "the moon is seen in this image taken from the camera", + "detected_text": [] + }, + { + "start_time_ms": 36000, + "end_time_ms": 37000, + "duration_ms": 1000, + "frame_idx": 1080, + "objects": [ + { + "label": "apple", + "confidence": 0.6658236384391785, + "bbox": { + "x": 401.8471984863281, + "y": 115.91490173339844, + "width": 475.7424011230469, + "height": 475.27357482910156 + } + }, + { + "label": "orange", + "confidence": 0.6149478554725647, + "bbox": { + "x": 402.0781555175781, + "y": 114.6710205078125, + "width": 474.0817565917969, + "height": 476.172607421875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "the moon is seen in this image taken from the camera" + }, + { + "start_time_ms": 37000, + "end_time_ms": 38000, + "duration_ms": 1000, + "frame_idx": 1110, + "objects": [ + { + "label": "person", + "confidence": 0.9668274521827698, + "bbox": { + "x": 587.9398193359375, + "y": 1.2698974609375, + "width": 618.8538818359375, + "height": 711.7847900390625 + } + }, + { + "label": "person", + "confidence": 0.8326878547668457, + "bbox": { + "x": 226.10601806640625, + "y": 105.094482421875, + "width": 371.0274658203125, + "height": 518.998779296875 + } + }, + { + "label": "refrigerator", + "confidence": 0.8258785605430603, + "bbox": { + "x": 0.3137969970703125, + "y": 4.10003662109375, + "width": 156.9606170654297, + "height": 518.9912109375 + } + }, + { + "label": "vase", + "confidence": 0.8127474784851074, + "bbox": { + "x": 0.072845458984375, + "y": 621.9625244140625, + "width": 149.15328979492188, + "height": 98.0374755859375 + } + }, + { + "label": "oven", + "confidence": 0.6676654815673828, + "bbox": { + "x": 1159.55908203125, + "y": 335.5508117675781, + "width": 119.927734375, + "height": 380.3027038574219 + } + } + ], + "faces": [ + { + "name": "Unknown_003", + "location": [ + 186, + 488, + 322, + 352 + ], + "confidence": 0.0, + "bbox": { + "x": 352.0, + "y": 186.0, + "width": 136.0, + "height": 136.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "sad", + "confidence": 0.6397718191146851 + } + }, + { + "name": "Unknown_004", + "location": [ + 8, + 904, + 204, + 708 + ], + "confidence": 0.0, + "bbox": { + "x": 708.0, + "y": 8.0, + "width": 196.0, + "height": 196.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "sad", + "confidence": 0.9755483894886506 + } + } + ], + "shot_type": "long-shot", + "dominant_color": { + "name": "Dark Gray", + "hex": "#b4a797", + "percentage": 35.0, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Dark Gray", + "hex": "#b4a797", + "percentage": 35.0, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Sienna", + "hex": "#8a4d32", + "percentage": 34.6, + "is_vibrant": true, + "is_muted": false + } + ], + "brightness": 41.78, + "saturation": 44.67, + "color_temperature": "neutral", + "environment_caption": "a woman in a blue jacket" + }, + { + "start_time_ms": 38000, + "end_time_ms": 39000, + "duration_ms": 1000, + "frame_idx": 1140, + "objects": [ + { + "label": "person", + "confidence": 0.9668616652488708, + "bbox": { + "x": 587.944580078125, + "y": 1.2705078125, + "width": 618.8514404296875, + "height": 711.781982421875 + } + }, + { + "label": "person", + "confidence": 0.8330450057983398, + "bbox": { + "x": 226.09925842285156, + "y": 105.11700439453125, + "width": 371.05162048339844, + "height": 519.531982421875 + } + }, + { + "label": "refrigerator", + "confidence": 0.8253546357154846, + "bbox": { + "x": 0.3138427734375, + "y": 4.10382080078125, + "width": 156.91375732421875, + "height": 518.9447021484375 + } + }, + { + "label": "vase", + "confidence": 0.8120298981666565, + "bbox": { + "x": 0.072235107421875, + "y": 621.9801025390625, + "width": 149.1405029296875, + "height": 98.0198974609375 + } + }, + { + "label": "oven", + "confidence": 0.6659095883369446, + "bbox": { + "x": 1159.54736328125, + "y": 335.5455322265625, + "width": 119.939453125, + "height": 380.2899169921875 + } + } + ], + "faces": [ + { + "name": "Unknown_003", + "location": [ + 188, + 486, + 322, + 352 + ], + "confidence": 0.9973879484574109, + "bbox": { + "x": 352.0, + "y": 188.0, + "width": 134.0, + "height": 134.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "sad", + "confidence": 0.7972616794989491 + } + }, + { + "name": "Unknown_004", + "location": [ + 8, + 904, + 204, + 708 + ], + "confidence": 0.9975271930302738, + "bbox": { + "x": 708.0, + "y": 8.0, + "width": 196.0, + "height": 196.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "sad", + "confidence": 0.9754615411437476 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a woman in a blue jacket", + "detected_text": [] + }, + { + "start_time_ms": 39000, + "end_time_ms": 40000, + "duration_ms": 1000, + "frame_idx": 1170, + "objects": [ + { + "label": "person", + "confidence": 0.9668616652488708, + "bbox": { + "x": 587.944580078125, + "y": 1.2705078125, + "width": 618.8514404296875, + "height": 711.781982421875 + } + }, + { + "label": "person", + "confidence": 0.8330450057983398, + "bbox": { + "x": 226.09925842285156, + "y": 105.11700439453125, + "width": 371.05162048339844, + "height": 519.531982421875 + } + }, + { + "label": "refrigerator", + "confidence": 0.8253546357154846, + "bbox": { + "x": 0.3138427734375, + "y": 4.10382080078125, + "width": 156.91375732421875, + "height": 518.9447021484375 + } + }, + { + "label": "vase", + "confidence": 0.8120298981666565, + "bbox": { + "x": 0.072235107421875, + "y": 621.9801025390625, + "width": 149.1405029296875, + "height": 98.0198974609375 + } + }, + { + "label": "oven", + "confidence": 0.6659095883369446, + "bbox": { + "x": 1159.54736328125, + "y": 335.5455322265625, + "width": 119.939453125, + "height": 380.2899169921875 + } + } + ], + "faces": [ + { + "name": "Unknown_003", + "location": [ + 188, + 486, + 322, + 352 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 352.0, + "y": 188.0, + "width": 134.0, + "height": 134.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "sad", + "confidence": 0.7972616794989491 + } + }, + { + "name": "Unknown_004", + "location": [ + 8, + 904, + 204, + 708 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 708.0, + "y": 8.0, + "width": 196.0, + "height": 196.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "sad", + "confidence": 0.9754615411437476 + } + } + ], + "shot_type": "long-shot", + "dominant_color": { + "name": "Dark Olive Green", + "hex": "#4d2a1c", + "percentage": 41.6, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Dark Olive Green", + "hex": "#4d2a1c", + "percentage": 41.6, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Dark Gray", + "hex": "#b5afa4", + "percentage": 29.8, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 41.78, + "saturation": 44.67, + "color_temperature": "neutral", + "environment_caption": "a woman in a blue jacket" + }, + { + "start_time_ms": 40000, + "end_time_ms": 41000, + "duration_ms": 1000, + "frame_idx": 1200, + "objects": [ + { + "label": "person", + "confidence": 0.9668616652488708, + "bbox": { + "x": 587.944580078125, + "y": 1.2705078125, + "width": 618.8514404296875, + "height": 711.781982421875 + } + }, + { + "label": "person", + "confidence": 0.8330450057983398, + "bbox": { + "x": 226.09925842285156, + "y": 105.11700439453125, + "width": 371.05162048339844, + "height": 519.531982421875 + } + }, + { + "label": "refrigerator", + "confidence": 0.8253546357154846, + "bbox": { + "x": 0.3138427734375, + "y": 4.10382080078125, + "width": 156.91375732421875, + "height": 518.9447021484375 + } + }, + { + "label": "vase", + "confidence": 0.8120298981666565, + "bbox": { + "x": 0.072235107421875, + "y": 621.9801025390625, + "width": 149.1405029296875, + "height": 98.0198974609375 + } + }, + { + "label": "oven", + "confidence": 0.6659095883369446, + "bbox": { + "x": 1159.54736328125, + "y": 335.5455322265625, + "width": 119.939453125, + "height": 380.2899169921875 + } + } + ], + "faces": [ + { + "name": "Unknown_003", + "location": [ + 188, + 486, + 322, + 352 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 352.0, + "y": 188.0, + "width": 134.0, + "height": 134.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "sad", + "confidence": 0.7972616794989491 + } + }, + { + "name": "Unknown_004", + "location": [ + 8, + 904, + 204, + 708 + ], + "confidence": 0.9975273768433653, + "bbox": { + "x": 708.0, + "y": 8.0, + "width": 196.0, + "height": 196.0 + }, + "frame_dimensions": { + "width": 1280, + "height": 720 + }, + "emotion": { + "label": "sad", + "confidence": 0.9754615411437476 + } + } + ], + "shot_type": "long-shot", + "environment_caption": "a woman in a blue jacket" + }, + { + "start_time_ms": 41000, + "end_time_ms": 42000, + "duration_ms": 1000, + "frame_idx": 1230, + "objects": [ + { + "label": "mouse", + "confidence": 0.8133947849273682, + "bbox": { + "x": 705.120361328125, + "y": 444.5634765625, + "width": 32.68798828125, + "height": 20.60784912109375 + } + }, + { + "label": "tv", + "confidence": 0.7941663265228271, + "bbox": { + "x": 480.699951171875, + "y": 288.009765625, + "width": 170.217041015625, + "height": 174.0390625 + } + }, + { + "label": "vase", + "confidence": 0.7732422947883606, + "bbox": { + "x": 699.1503295898438, + "y": 341.97613525390625, + "width": 45.7423095703125, + "height": 79.1451416015625 + } + }, + { + "label": "potted plant", + "confidence": 0.7437313199043274, + "bbox": { + "x": 797.5447387695312, + "y": 388.0364990234375, + "width": 33.6376953125, + "height": 45.54327392578125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#030202", + "percentage": 77.4, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Black", + "hex": "#030202", + "percentage": 77.4, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Gainsboro", + "hex": "#e2e1df", + "percentage": 14.8, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 19.1, + "saturation": 6.12, + "color_temperature": "neutral", + "environment_caption": "a desk with a computer and a plant", + "detected_text": [] + }, + { + "start_time_ms": 42000, + "end_time_ms": 43000, + "duration_ms": 1000, + "frame_idx": 1260, + "objects": [ + { + "label": "mouse", + "confidence": 0.8130481243133545, + "bbox": { + "x": 705.154296875, + "y": 444.5655517578125, + "width": 32.6572265625, + "height": 20.62066650390625 + } + }, + { + "label": "tv", + "confidence": 0.7942075729370117, + "bbox": { + "x": 480.7119140625, + "y": 288.0045166015625, + "width": 170.2042236328125, + "height": 173.94793701171875 + } + }, + { + "label": "vase", + "confidence": 0.7744741439819336, + "bbox": { + "x": 699.1466064453125, + "y": 341.90911865234375, + "width": 45.750732421875, + "height": 79.2159423828125 + } + }, + { + "label": "potted plant", + "confidence": 0.7450681328773499, + "bbox": { + "x": 797.5067138671875, + "y": 388.0240783691406, + "width": 33.6748046875, + "height": 45.5902099609375 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a desk with a computer and a plant" + }, + { + "start_time_ms": 43000, + "end_time_ms": 44000, + "duration_ms": 1000, + "frame_idx": 1290, + "objects": [ + { + "label": "mouse", + "confidence": 0.8130481243133545, + "bbox": { + "x": 705.154296875, + "y": 444.5655517578125, + "width": 32.6572265625, + "height": 20.62066650390625 + } + }, + { + "label": "tv", + "confidence": 0.7942075729370117, + "bbox": { + "x": 480.7119140625, + "y": 288.0045166015625, + "width": 170.2042236328125, + "height": 173.94793701171875 + } + }, + { + "label": "vase", + "confidence": 0.7744741439819336, + "bbox": { + "x": 699.1466064453125, + "y": 341.90911865234375, + "width": 45.750732421875, + "height": 79.2159423828125 + } + }, + { + "label": "potted plant", + "confidence": 0.7450681328773499, + "bbox": { + "x": 797.5067138671875, + "y": 388.0240783691406, + "width": 33.6748046875, + "height": 45.5902099609375 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#040302", + "percentage": 76.2, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Black", + "hex": "#040302", + "percentage": 76.2, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Gainsboro", + "hex": "#e1e2e1", + "percentage": 17.2, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 19.1, + "saturation": 6.12, + "color_temperature": "neutral", + "environment_caption": "a desk with a computer and a plant" + }, + { + "start_time_ms": 44000, + "end_time_ms": 45000, + "duration_ms": 1000, + "frame_idx": 1320, + "objects": [ + { + "label": "person", + "confidence": 0.857846200466156, + "bbox": { + "x": 313.9718322753906, + "y": 2.7562255859375, + "width": 696.9882507324219, + "height": 461.4671630859375 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a woman ' s eye is covered in snow", + "detected_text": [] + }, + { + "start_time_ms": 45000, + "end_time_ms": 46000, + "duration_ms": 1000, + "frame_idx": 1350, + "objects": [ + { + "label": "person", + "confidence": 0.8586587905883789, + "bbox": { + "x": 313.891357421875, + "y": 2.761383056640625, + "width": 697.05126953125, + "height": 461.28582763671875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#050405", + "percentage": 54.4, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Black", + "hex": "#050405", + "percentage": 54.4, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Silver", + "hex": "#cac3c3", + "percentage": 29.0, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 30.08, + "saturation": 14.08, + "color_temperature": "neutral", + "environment_caption": "a woman ' s eye is covered in snow" + }, + { + "start_time_ms": 46000, + "end_time_ms": 47000, + "duration_ms": 1000, + "frame_idx": 1380, + "objects": [ + { + "label": "person", + "confidence": 0.8586587905883789, + "bbox": { + "x": 313.891357421875, + "y": 2.761383056640625, + "width": 697.05126953125, + "height": 461.28582763671875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a woman ' s eye is covered in snow" + }, + { + "start_time_ms": 47000, + "end_time_ms": 48000, + "duration_ms": 1000, + "frame_idx": 1410, + "objects": [ + { + "label": "person", + "confidence": 0.8586686253547668, + "bbox": { + "x": 313.9041748046875, + "y": 2.7619781494140625, + "width": 697.032470703125, + "height": 461.26512145996094 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#050404", + "percentage": 53.6, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Black", + "hex": "#050404", + "percentage": 53.6, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Silver", + "hex": "#c8c0bf", + "percentage": 24.6, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 30.08, + "saturation": 14.08, + "color_temperature": "neutral", + "environment_caption": "a woman ' s eye is covered in snow", + "detected_text": [] + }, + { + "start_time_ms": 48000, + "end_time_ms": 49000, + "duration_ms": 1000, + "frame_idx": 1440, + "objects": [ + { + "label": "person", + "confidence": 0.9548642039299011, + "bbox": { + "x": 401.7174072265625, + "y": 194.9915771484375, + "width": 476.5882568359375, + "height": 517.625244140625 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a woman covering her face with her hands" + }, + { + "start_time_ms": 49000, + "end_time_ms": 50000, + "duration_ms": 1000, + "frame_idx": 1470, + "objects": [ + { + "label": "person", + "confidence": 0.9548318982124329, + "bbox": { + "x": 401.71435546875, + "y": 194.993408203125, + "width": 476.601318359375, + "height": 517.6309814453125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#000101", + "percentage": 65.0, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Black", + "hex": "#000101", + "percentage": 65.0, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Dark Olive Green", + "hex": "#46433e", + "percentage": 22.8, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 13.32, + "saturation": 8.47, + "color_temperature": "neutral", + "environment_caption": "a woman covering her face with her hands" + }, + { + "start_time_ms": 50000, + "end_time_ms": 51000, + "duration_ms": 1000, + "frame_idx": 1500, + "objects": [ + { + "label": "person", + "confidence": 0.9548318982124329, + "bbox": { + "x": 401.71435546875, + "y": 194.993408203125, + "width": 476.601318359375, + "height": 517.6309814453125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a woman covering her face with her hands", + "detected_text": [] + }, + { + "start_time_ms": 51000, + "end_time_ms": 52000, + "duration_ms": 1000, + "frame_idx": 1530, + "objects": [ + { + "label": "person", + "confidence": 0.9548318982124329, + "bbox": { + "x": 401.71435546875, + "y": 194.993408203125, + "width": 476.601318359375, + "height": 517.6309814453125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#000000", + "percentage": 60.0, + "is_vibrant": false, + "is_muted": true + }, + "color_palette": [ + { + "name": "Black", + "hex": "#000000", + "percentage": 60.0, + "is_vibrant": false, + "is_muted": true + }, + { + "name": "Dark Olive Green", + "hex": "#3f403c", + "percentage": 21.2, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 13.32, + "saturation": 8.47, + "color_temperature": "neutral", + "environment_caption": "a woman covering her face with her hands" + }, + { + "start_time_ms": 52000, + "end_time_ms": 53000, + "duration_ms": 1000, + "frame_idx": 1560, + "objects": [ + { + "label": "person", + "confidence": 0.9246562123298645, + "bbox": { + "x": 223.675537109375, + "y": 28.230224609375, + "width": 598.3995361328125, + "height": 686.0201416015625 + } + }, + { + "label": "laptop", + "confidence": 0.9024955034255981, + "bbox": { + "x": 524.2127685546875, + "y": 407.7945251464844, + "width": 278.358154296875, + "height": 195.31912231445312 + } + }, + { + "label": "person", + "confidence": 0.7939924001693726, + "bbox": { + "x": 474.5268859863281, + "y": 81.31982421875, + "width": 590.2978210449219, + "height": 632.7647705078125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a woman sitting on a couch" + }, + { + "start_time_ms": 53000, + "end_time_ms": 54000, + "duration_ms": 1000, + "frame_idx": 1590, + "objects": [ + { + "label": "person", + "confidence": 0.9247915744781494, + "bbox": { + "x": 223.71224975585938, + "y": 28.230712890625, + "width": 598.3650207519531, + "height": 686.0186767578125 + } + }, + { + "label": "laptop", + "confidence": 0.9021109342575073, + "bbox": { + "x": 524.215087890625, + "y": 407.81317138671875, + "width": 278.71923828125, + "height": 195.2994384765625 + } + }, + { + "label": "person", + "confidence": 0.7942219376564026, + "bbox": { + "x": 474.6187744140625, + "y": 81.3194580078125, + "width": 590.2081298828125, + "height": 632.760986328125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Dark Olive Green", + "hex": "#3f291e", + "percentage": 42.8, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Dark Olive Green", + "hex": "#3f291e", + "percentage": 42.8, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Sienna", + "hex": "#a47443", + "percentage": 36.6, + "is_vibrant": true, + "is_muted": false + } + ], + "brightness": 38.04, + "saturation": 52.75, + "color_temperature": "neutral", + "environment_caption": "a woman sitting on a couch", + "detected_text": [] + }, + { + "start_time_ms": 54000, + "end_time_ms": 55000, + "duration_ms": 1000, + "frame_idx": 1620, + "objects": [ + { + "label": "person", + "confidence": 0.9247915744781494, + "bbox": { + "x": 223.71224975585938, + "y": 28.230712890625, + "width": 598.3650207519531, + "height": 686.0186767578125 + } + }, + { + "label": "laptop", + "confidence": 0.9021109342575073, + "bbox": { + "x": 524.215087890625, + "y": 407.81317138671875, + "width": 278.71923828125, + "height": 195.2994384765625 + } + }, + { + "label": "person", + "confidence": 0.7942219376564026, + "bbox": { + "x": 474.6187744140625, + "y": 81.3194580078125, + "width": 590.2081298828125, + "height": 632.760986328125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a woman sitting on a couch" + }, + { + "start_time_ms": 55000, + "end_time_ms": 56000, + "duration_ms": 1000, + "frame_idx": 1650, + "objects": [ + { + "label": "person", + "confidence": 0.9247915744781494, + "bbox": { + "x": 223.71224975585938, + "y": 28.230712890625, + "width": 598.3650207519531, + "height": 686.0186767578125 + } + }, + { + "label": "laptop", + "confidence": 0.9021109342575073, + "bbox": { + "x": 524.215087890625, + "y": 407.81317138671875, + "width": 278.71923828125, + "height": 195.2994384765625 + } + }, + { + "label": "person", + "confidence": 0.7942219376564026, + "bbox": { + "x": 474.6187744140625, + "y": 81.3194580078125, + "width": 590.2081298828125, + "height": 632.760986328125 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Dark Olive Green", + "hex": "#40291e", + "percentage": 43.6, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Dark Olive Green", + "hex": "#40291e", + "percentage": 43.6, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Sienna", + "hex": "#a5743f", + "percentage": 36.0, + "is_vibrant": true, + "is_muted": false + } + ], + "brightness": 38.04, + "saturation": 52.75, + "color_temperature": "warm", + "environment_caption": "a woman sitting on a couch" + }, + { + "start_time_ms": 56000, + "end_time_ms": 57000, + "duration_ms": 1000, + "frame_idx": 1680, + "objects": [ + { + "label": "person", + "confidence": 0.948384165763855, + "bbox": { + "x": 401.0283508300781, + "y": 177.92779541015625, + "width": 477.2428894042969, + "height": 534.5269165039062 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a man in a suit and sunglasses is dancing", + "detected_text": [] + }, + { + "start_time_ms": 57000, + "end_time_ms": 58000, + "duration_ms": 1000, + "frame_idx": 1710, + "objects": [ + { + "label": "person", + "confidence": 0.9483422636985779, + "bbox": { + "x": 401.0250244140625, + "y": 177.9107666015625, + "width": 477.246337890625, + "height": 534.54296875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#010302", + "percentage": 65.8, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Black", + "hex": "#010302", + "percentage": 65.8, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Dim Gray", + "hex": "#766d53", + "percentage": 20.2, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 16.73, + "saturation": 12.41, + "color_temperature": "neutral", + "environment_caption": "a man in a suit and sunglasses is dancing" + }, + { + "start_time_ms": 58000, + "end_time_ms": 59000, + "duration_ms": 1000, + "frame_idx": 1740, + "objects": [ + { + "label": "person", + "confidence": 0.9483422636985779, + "bbox": { + "x": 401.0250244140625, + "y": 177.9107666015625, + "width": 477.246337890625, + "height": 534.54296875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "environment_caption": "a man in a suit and sunglasses is dancing" + }, + { + "start_time_ms": 59000, + "end_time_ms": 59967, + "duration_ms": 967, + "frame_idx": 1770, + "objects": [ + { + "label": "person", + "confidence": 0.9483422636985779, + "bbox": { + "x": 401.0250244140625, + "y": 177.9107666015625, + "width": 477.246337890625, + "height": 534.54296875 + } + } + ], + "faces": [], + "shot_type": "long-shot", + "dominant_color": { + "name": "Black", + "hex": "#010202", + "percentage": 70.8, + "is_vibrant": false, + "is_muted": false + }, + "color_palette": [ + { + "name": "Black", + "hex": "#010202", + "percentage": 70.8, + "is_vibrant": false, + "is_muted": false + }, + { + "name": "Dark Gray", + "hex": "#b6b09a", + "percentage": 14.8, + "is_vibrant": false, + "is_muted": true + } + ], + "brightness": 16.73, + "saturation": 12.41, + "color_temperature": "neutral", + "environment_caption": "a man in a suit and sunglasses is dancing", + "detected_text": [] + } + ] +} \ No newline at end of file diff --git a/python/transcribe.py b/python/transcribe.py index 87ce312b..d7c2ebda 100644 --- a/python/transcribe.py +++ b/python/transcribe.py @@ -189,7 +189,15 @@ def is_model_cached(self) -> bool: """Check if model is already downloaded""" model_repo = self._get_model_path() cache_path = Path(self.cache_dir) / model_repo.replace("/", "--") - return cache_path.exists() and any(cache_path.iterdir()) + + # Check if path exists first before calling iterdir() + if not cache_path.exists(): + return False + + try: + return any(cache_path.iterdir()) + except (OSError, FileNotFoundError): + return False def download_model_async(self) -> None: """Download model in background thread"""