diff --git a/README.md b/README.md index 9400c4e..1bf97ac 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Amazon Photos CLI -A Laravel-based CLI tool to find photos in your **Amazon Photos** library that are not classified in any album. +A Laravel-based CLI tool to find photos in your **Amazon Photos** library that are not classified in any album, and to detect possible duplicate photos using visual comparison. > **Note:** This project uses Amazon Photos' unofficial API (reverse-engineered). It requires manual cookie extraction from your browser session — there is no OAuth flow. @@ -9,7 +9,8 @@ A Laravel-based CLI tool to find photos in your **Amazon Photos** library that a ## Features - Detect all photos that do not belong to any album -- Four combinable filters: +- Detect duplicate photos using perceptual hashing ([sapientpro/image-comparator](https://github.com/sapientpro/image-comparator)) +- Combinable filters for both commands: - Skip photos already analyzed recently (`--skip-analyzed-days`) - Only analyze photos uploaded in the last N days (`--uploaded-last-days`) - Filter by upload date range (`--uploaded-between`) @@ -122,7 +123,90 @@ php artisan photos:find-unclassified \ --- -## CSV output format +## Find duplicate photos + +```bash +php artisan photos:find-duplicates +``` + +Detection works in two steps: + +1. **Candidate grouping** — photos are grouped by a lightweight metadata criterion (`--group-by`) to avoid comparing every photo against every other. +2. **Visual comparison** — each candidate pair is downloaded and compared using perceptual hashing. Only pairs whose similarity score meets the threshold (`--similarity`) are reported as duplicates. + +### Grouping criteria (`--group-by`) + +| Value | Groups candidates when… | +|-------|--------------------------| +| `name` *(default)* | Two or more photos share the same file name | +| `taken-at` | Two or more photos share the exact same EXIF capture timestamp | +| `name-and-taken-at` | Same file name **and** same EXIF capture timestamp (strictest) | + +Photos without an EXIF date are excluded when using `taken-at` or `name-and-taken-at`. + +### Direct comparison by ID + +If you already know the IDs of two photos and want to compare them immediately — without fetching your entire library — pass both IDs as arguments: + +```bash +php artisan photos:find-duplicates {photo1-id} {photo2-id} +``` + +The command fetches both photos directly via `/nodes/{id}`, compares them visually, and reports the similarity percentage together with whether they meet the threshold. + +```bash +# Compare two specific photos (default 90% threshold) +php artisan photos:find-duplicates AbCdEf1234 XyZwVu5678 + +# Use a custom threshold +php artisan photos:find-duplicates AbCdEf1234 XyZwVu5678 --similarity=85 +``` + +### Examples + +```bash +# By file name (default), minimum 90% similarity +php artisan photos:find-duplicates + +# By capture date, stricter threshold +php artisan photos:find-duplicates --group-by=taken-at --similarity=95 + +# Strictest criteria, scoped to a date range +php artisan photos:find-duplicates --group-by=name-and-taken-at --taken-between=01/01/2024,31/12/2024 + +# Export to CSV +php artisan photos:find-duplicates --output=csv --csv-path=exports/duplicates.csv +``` + +### Available options + +| Option | Description | Default | +|--------|-------------|---------| +| `--group-by=CRITERIA` | Candidate grouping: `name`, `taken-at`, `name-and-taken-at` | `name` | +| `--similarity=N` | Minimum similarity percentage to confirm a duplicate (0–100) | `90` | +| `--uploaded-last-days=N` | Only photos uploaded in the last N days | disabled | +| `--uploaded-between=FROM,TO` | Only photos uploaded between two dates (`dd/mm/yyyy`) | — | +| `--taken-between=FROM,TO` | Only photos taken between two dates (`dd/mm/yyyy`, EXIF) | — | +| `--output=console\|csv` | Output format | `console` | +| `--csv-path=PATH` | CSV file path (relative to `storage/app`) | `amazon-photos/duplicates.csv` | + +### CSV output format (duplicates) + +| Column | Description | +|--------|-------------| +| `pair` | Pair number — the two rows with the same number are the duplicate pair | +| `similarity` | Visual similarity percentage (0–100) | +| `id` | Amazon Photos node ID | +| `name` | File name | +| `uploaded_at` | Upload timestamp (ISO 8601) | +| `taken_at` | Capture timestamp from EXIF (ISO 8601, empty if unavailable) | +| `url` | Temporary download URL | + +> **Note:** Comparison downloads each candidate image. On large libraries, narrow the scope with `--uploaded-between` or `--taken-between` to keep runs fast. + +--- + +## CSV output format (unclassified) | Column | Description | |--------|-------------| @@ -163,19 +247,23 @@ Logs are written to `storage/logs/laravel.log`. Set `LOG_LEVEL=debug` in `.env` ``` app/ ├── Console/Commands/ -│ └── FindUnclassifiedPhotosCommand.php # Main Artisan command +│ ├── FindUnclassifiedPhotosCommand.php # Artisan command: photos:find-unclassified +│ └── FindDuplicatePhotosCommand.php # Artisan command: photos:find-duplicates ├── DTOs/ │ ├── Photo.php # Immutable photo value object -│ └── Album.php # Immutable album value object +│ ├── Album.php # Immutable album value object +│ └── DuplicatePair.php # Confirmed duplicate pair with similarity score ├── Services/ │ ├── AmazonPhotos/ │ │ ├── AmazonPhotosClient.php # HTTP client (auth, retries) -│ │ ├── PhotoService.php # Fetch all photos (paginated) +│ │ ├── PhotoService.php # Fetch all photos (paginated) + fetch by ID │ │ └── AlbumService.php # Fetch albums + their children -│ └── Cache/ -│ └── AnalysisHistoryCache.php # File-based analysis history +│ ├── Cache/ +│ │ └── AnalysisHistoryCache.php # File-based analysis history +│ └── ImageComparatorService.php # Perceptual hash comparison wrapper └── Support/ ├── DateParser.php # European date format (dd/mm/yyyy) + ├── DuplicateDetector.php # Candidate grouping by metadata └── PhotoFilter.php # Apply all CLI filters config/ └── amazon-photos.php # Package configuration diff --git a/app/Console/Commands/FindDuplicatePhotosCommand.php b/app/Console/Commands/FindDuplicatePhotosCommand.php new file mode 100644 index 0000000..d991ca2 --- /dev/null +++ b/app/Console/Commands/FindDuplicatePhotosCommand.php @@ -0,0 +1,316 @@ +credentialsConfigured()) { + $this->error('Amazon Photos credentials are not configured. Please set AMAZON_PHOTOS_SESSION_ID, AMAZON_PHOTOS_UBID, and AMAZON_PHOTOS_AT in your .env file.'); + + return self::FAILURE; + } + + $photo1Id = $this->argument('photo1'); + $photo2Id = $this->argument('photo2'); + + if ($photo1Id !== null || $photo2Id !== null) { + if ($photo1Id === null || $photo2Id === null) { + $this->error('You must provide both photo IDs to compare. Usage: photos:find-duplicates {photo1} {photo2}'); + + return self::FAILURE; + } + + return $this->compareTwoPhotosById($photo1Id, $photo2Id, (float) $this->option('similarity')); + } + + $groupBy = $this->option('group-by'); + $validGroupBy = [DuplicateDetector::GROUP_BY_NAME, DuplicateDetector::GROUP_BY_TAKEN_AT, DuplicateDetector::GROUP_BY_NAME_AND_TAKEN_AT]; + if (! in_array($groupBy, $validGroupBy)) { + $this->error("Invalid --group-by value: \"{$groupBy}\". Use \"name\", \"taken-at\", or \"name-and-taken-at\"."); + + return self::FAILURE; + } + + $threshold = (float) $this->option('similarity'); + if ($threshold < 0 || $threshold > 100) { + $this->error('Invalid --similarity value. Must be between 0 and 100.'); + + return self::FAILURE; + } + + try { + $filters = $this->parseFilters(); + } catch (\InvalidArgumentException $e) { + $this->error($e->getMessage()); + + return self::FAILURE; + } + + Log::info('photos:find-duplicates started', ['group_by' => $groupBy, 'similarity' => $threshold, 'filters' => $filters]); + + // Step 1 — fetch all photos + $this->info('Fetching all photos...'); + $allPhotos = $this->withProgress( + 'Fetching photos', + fn () => $this->photoService->fetchAll() + ); + + // Step 2 — apply filters + $filteredPhotos = PhotoFilter::apply($allPhotos, $filters, $this->history); + $this->line(" → {$filteredPhotos->count()} photos after filters (of {$allPhotos->count()} total)."); + + // Step 3 — group candidates by metadata + $candidateGroups = DuplicateDetector::candidateGroups($filteredPhotos, $groupBy); + $totalCandidatePairs = $candidateGroups->sum(fn (Collection $g) => intdiv($g->count() * ($g->count() - 1), 2)); + $this->line(" → {$totalCandidatePairs} candidate pairs across {$candidateGroups->count()} groups (grouped by \"{$groupBy}\")."); + + if ($totalCandidatePairs === 0) { + $this->newLine(); + $this->info('No candidate pairs found. No duplicates to report.'); + + return self::SUCCESS; + } + + // Step 4 — compare each candidate pair visually + $this->newLine(); + $this->info('Comparing images… (downloads each candidate — may take a while)'); + $pairs = $this->compareCandidates($candidateGroups, $threshold); + + $this->newLine(); + $this->info("Found {$pairs->count()} duplicate pairs above {$threshold}% similarity."); + + if ($pairs->isEmpty()) { + $this->line('No visual duplicates confirmed.'); + + return self::SUCCESS; + } + + $output = $this->option('output'); + + if ($output === 'csv') { + return $this->exportCsv($pairs); + } + + $this->renderConsoleTable($pairs); + + return self::SUCCESS; + } + + private function compareTwoPhotosById(string $id1, string $id2, float $threshold): int + { + $this->info("Fetching photo \"{$id1}\"..."); + $a = $this->photoService->fetchById($id1); + + $this->info("Fetching photo \"{$id2}\"..."); + $b = $this->photoService->fetchById($id2); + + $this->info('Comparing images…'); + $similarity = $this->comparatorService->compare($a, $b); + + if ($similarity === null) { + $this->warn('Could not compare the two photos (missing URL or download failed).'); + + return self::FAILURE; + } + + $this->newLine(); + $this->renderConsoleTable(collect([new DuplicatePair($a, $b, $similarity)])); + $this->line("Similarity: {$similarity}% (threshold: {$threshold}%)"); + + if ($similarity >= $threshold) { + $this->info('These photos are duplicates.'); + } else { + $this->info('These photos are NOT duplicates.'); + } + + return self::SUCCESS; + } + + /** + * Compare all pairs within each candidate group and return confirmed duplicates. + * + * @param Collection> $candidateGroups + * @return Collection + */ + private function compareCandidates(Collection $candidateGroups, float $threshold): Collection + { + $totalPairs = $candidateGroups->sum(fn (Collection $g) => intdiv($g->count() * ($g->count() - 1), 2)); + $bar = $this->output->createProgressBar($totalPairs); + $bar->setFormat(" %current%/%max% [%bar%] %percent:3s%% — %message%\n"); + $bar->setMessage('starting…'); + $bar->start(); + + $confirmed = collect(); + + $candidateGroups->each(function (Collection $group) use ($threshold, $bar, $confirmed) { + $photos = $group->values(); + + for ($i = 0; $i < $photos->count(); $i++) { + for ($j = $i + 1; $j < $photos->count(); $j++) { + $a = $photos[$i]; + $b = $photos[$j]; + + $bar->setMessage("\"{$a->name}\""); + $similarity = $this->comparatorService->compare($a, $b); + $bar->advance(); + + if ($similarity !== null && $similarity >= $threshold) { + $confirmed->push(new DuplicatePair($a, $b, $similarity)); + } + } + } + }); + + $bar->finish(); + + return $confirmed; + } + + /** + * @param Collection $pairs + */ + private function renderConsoleTable(Collection $pairs): void + { + $this->newLine(); + + $pairs->each(function (DuplicatePair $pair, int $index) { + $label = 'Pair '.($index + 1)." — {$pair->similarity}% similar"; + $this->line("{$label}"); + $this->table( + ['ID', 'Name', 'Uploaded At', 'Taken At', 'URL'], + collect([$pair->first, $pair->second])->map(fn (Photo $p) => [ + $p->id, + mb_strimwidth($p->name, 0, 50, '…'), + $p->uploadedAt->format('d/m/Y H:i'), + $p->takenAt?->format('d/m/Y H:i') ?? '—', + $p->url ? mb_strimwidth($p->url, 0, 60, '…') : '—', + ])->toArray() + ); + $this->newLine(); + }); + } + + /** + * @param Collection $pairs + */ + private function exportCsv(Collection $pairs): int + { + $path = $this->option('csv-path') ?: 'amazon-photos/duplicates.csv'; + + $rows = []; + $rows[] = implode(',', ['pair', 'similarity', 'id', 'name', 'uploaded_at', 'taken_at', 'url']); + + $pairs->each(function (DuplicatePair $pair, int $index) use (&$rows) { + $pairNumber = (string) ($index + 1); + $similarity = (string) $pair->similarity; + + foreach ([$pair->first, $pair->second] as $photo) { + $row = array_merge( + ['pair' => $pairNumber, 'similarity' => $similarity], + $photo->toCsvRow() + ); + $rows[] = implode(',', array_map( + fn (string $v) => '"'.str_replace('"', '""', $v).'"', + $row + )); + } + }); + + Storage::put($path, implode("\n", $rows)); + + $absolutePath = Storage::path($path); + $this->info("CSV exported to: {$absolutePath}"); + Log::info("CSV exported to: {$absolutePath}", ['pairs' => $pairs->count()]); + + return self::SUCCESS; + } + + /** + * @return array + */ + private function parseFilters(): array + { + $filters = []; + + $uploadedLastDays = (int) $this->option('uploaded-last-days'); + if ($uploadedLastDays > 0) { + $filters['uploaded_last_days'] = $uploadedLastDays; + } + + $uploadedBetween = $this->option('uploaded-between'); + if (! empty($uploadedBetween)) { + $filters['uploaded_between'] = DateParser::parseBetween($uploadedBetween); + } + + $takenBetween = $this->option('taken-between'); + if (! empty($takenBetween)) { + $filters['taken_between'] = DateParser::parseBetween($takenBetween); + } + + return $filters; + } + + /** + * @template T + * + * @param callable(): T $callable + * @return T + */ + private function withProgress(string $label, callable $callable): mixed + { + $bar = $this->output->createProgressBar(); + $bar->setFormat(" %message% [%bar%] %elapsed:6s%\n"); + $bar->setMessage($label); + $bar->start(); + + $result = $callable(); + + $bar->finish(); + + return $result; + } + + private function credentialsConfigured(): bool + { + return ! empty(config('amazon-photos.session_id')) + && ! empty(config('amazon-photos.ubid')) + && ! empty(config('amazon-photos.at')); + } +} diff --git a/app/DTOs/DuplicatePair.php b/app/DTOs/DuplicatePair.php new file mode 100644 index 0000000..9140aa9 --- /dev/null +++ b/app/DTOs/DuplicatePair.php @@ -0,0 +1,12 @@ +client->get("/nodes/{$id}", [ + 'tempLink' => 'true', + 'resourceVersion' => 'V2', + 'ContentType' => 'JSON', + ]); + + return Photo::fromApiResponse($response); + } + /** * Fetch all photos from Amazon Photos (paginates automatically). * diff --git a/app/Services/ImageComparatorService.php b/app/Services/ImageComparatorService.php new file mode 100644 index 0000000..2acc93a --- /dev/null +++ b/app/Services/ImageComparatorService.php @@ -0,0 +1,43 @@ +url === null || $b->url === null) { + Log::debug('Skipping image comparison: one or both photos have no URL.', [ + 'photo_a' => $a->id, + 'photo_b' => $b->id, + ]); + + return null; + } + + try { + return $this->comparator->compare($a->url, $b->url); + } catch (ImageResourceException $e) { + Log::warning('Image comparison failed: could not load image resource.', [ + 'photo_a' => $a->id, + 'photo_b' => $b->id, + 'error' => $e->getMessage(), + ]); + + return null; + } + } +} diff --git a/app/Support/DuplicateDetector.php b/app/Support/DuplicateDetector.php new file mode 100644 index 0000000..937a99b --- /dev/null +++ b/app/Support/DuplicateDetector.php @@ -0,0 +1,51 @@ + $photos + * @param string $groupBy One of: 'name', 'taken-at', 'name-and-taken-at' + * @return Collection> + */ + public static function candidateGroups(Collection $photos, string $groupBy = self::GROUP_BY_NAME): Collection + { + $eligible = match ($groupBy) { + self::GROUP_BY_TAKEN_AT, self::GROUP_BY_NAME_AND_TAKEN_AT => $photos->filter( + fn (Photo $p) => $p->takenAt !== null + ), + default => $photos, + }; + + return $eligible + ->groupBy(fn (Photo $p) => self::key($p, $groupBy)) + ->filter(fn (Collection $group) => $group->count() > 1) + ->values(); + } + + private static function key(Photo $photo, string $groupBy): string + { + return match ($groupBy) { + self::GROUP_BY_TAKEN_AT => $photo->takenAt->toIso8601String(), + self::GROUP_BY_NAME_AND_TAKEN_AT => $photo->name.'|'.$photo->takenAt->toIso8601String(), + default => $photo->name, + }; + } +} diff --git a/composer.json b/composer.json index c38fc94..3ddcbdc 100644 --- a/composer.json +++ b/composer.json @@ -14,7 +14,8 @@ "laravel/fortify": "^1.34", "laravel/framework": "^13.0", "laravel/tinker": "^3.0", - "laravel/wayfinder": "^0.1.14" + "laravel/wayfinder": "^0.1.14", + "sapientpro/image-comparator": "*" }, "require-dev": { "fakerphp/faker": "^1.24", diff --git a/composer.lock b/composer.lock index a1cbcf4..9964594 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "d227c5575f3ada2cda4908c88c2e2050", + "content-hash": "07fa5933c9513b89d38c8489b8a7cbf7", "packages": [ { "name": "bacon/bacon-qr-code", @@ -3770,6 +3770,52 @@ }, "time": "2025-12-14T04:43:48+00:00" }, + { + "name": "sapientpro/image-comparator", + "version": "v1.0.1", + "source": { + "type": "git", + "url": "https://github.com/sapientpro/image-comparator.git", + "reference": "19219b83f4a729e29baae2c490bb8f032cb2d0d7" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sapientpro/image-comparator/zipball/19219b83f4a729e29baae2c490bb8f032cb2d0d7", + "reference": "19219b83f4a729e29baae2c490bb8f032cb2d0d7", + "shasum": "" + }, + "require": { + "ext-gd": "*", + "php": "^8.1" + }, + "require-dev": { + "phpunit/phpunit": "10.0.0", + "squizlabs/php_codesniffer": "3.7.2" + }, + "type": "library", + "autoload": { + "psr-4": { + "SapientPro\\ImageComparator\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "SapientPro", + "email": "info@sapient.pro", + "homepage": "https://sapient.pro/" + } + ], + "description": "Compare images using PHP", + "support": { + "issues": "https://github.com/sapientpro/image-comparator/issues", + "source": "https://github.com/sapientpro/image-comparator/tree/v1.0.1" + }, + "time": "2023-04-27T07:54:28+00:00" + }, { "name": "symfony/clock", "version": "v8.0.8", diff --git a/tests/Feature/FindDuplicatePhotosTest.php b/tests/Feature/FindDuplicatePhotosTest.php new file mode 100644 index 0000000..ecf38f4 --- /dev/null +++ b/tests/Feature/FindDuplicatePhotosTest.php @@ -0,0 +1,397 @@ +withValidCredentials(); + } + + // ------------------------------------------------------------------------- + // Happy path: visual comparison confirms duplicates + // ------------------------------------------------------------------------- + + public function test_duplicate_pair_is_confirmed_when_similarity_meets_threshold(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', url: 'https://example.com/1.jpg'); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', url: 'https://example.com/2.jpg'); + $c = $this->makePhoto('photo-3', name: 'IMG_9999.jpg', url: 'https://example.com/3.jpg'); + + $this->mockPhotoService(collect([$a, $b, $c])); + $this->mockComparatorService([[$a, $b, 95.0]]); + + $this->artisan('photos:find-duplicates') + ->assertSuccessful() + ->expectsOutputToContain('1 duplicate pairs'); + } + + // ------------------------------------------------------------------------- + // Pair below threshold is excluded + // ------------------------------------------------------------------------- + + public function test_pair_below_threshold_is_not_reported(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', url: 'https://example.com/1.jpg'); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', url: 'https://example.com/2.jpg'); + + $this->mockPhotoService(collect([$a, $b])); + $this->mockComparatorService([[$a, $b, 60.0]]); + + $this->artisan('photos:find-duplicates') + ->assertSuccessful() + ->expectsOutputToContain('No visual duplicates confirmed'); + } + + // ------------------------------------------------------------------------- + // Custom similarity threshold + // ------------------------------------------------------------------------- + + public function test_custom_similarity_threshold_is_applied(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', url: 'https://example.com/1.jpg'); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', url: 'https://example.com/2.jpg'); + + $this->mockPhotoService(collect([$a, $b])); + $this->mockComparatorService([[$a, $b, 75.0]]); + + // 75% is below the default 90 but above our custom 70 + $this->artisan('photos:find-duplicates --similarity=70') + ->assertSuccessful() + ->expectsOutputToContain('1 duplicate pairs'); + } + + // ------------------------------------------------------------------------- + // Photos without URL are skipped + // ------------------------------------------------------------------------- + + public function test_photos_without_url_are_skipped(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', url: null); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', url: null); + + $this->mockPhotoService(collect([$a, $b])); + // Comparator returns null (skipped) — service is still called but returns null + $this->mockComparatorService([[$a, $b, null]]); + + $this->artisan('photos:find-duplicates') + ->assertSuccessful() + ->expectsOutputToContain('No visual duplicates confirmed'); + } + + // ------------------------------------------------------------------------- + // No candidate pairs found (all names unique) + // ------------------------------------------------------------------------- + + public function test_reports_no_candidates_when_all_names_are_unique(): void + { + $this->mockPhotoService(collect([ + $this->makePhoto('photo-1', name: 'IMG_001.jpg'), + $this->makePhoto('photo-2', name: 'IMG_002.jpg'), + ])); + + $this->artisan('photos:find-duplicates') + ->assertSuccessful() + ->expectsOutputToContain('No candidate pairs found'); + } + + // ------------------------------------------------------------------------- + // group-by: taken-at + // ------------------------------------------------------------------------- + + public function test_candidates_grouped_by_taken_at(): void + { + $takenAt = Carbon::parse('2024-06-15 10:00:00'); + $a = $this->makePhoto('photo-1', name: 'IMG_001.jpg', takenAt: $takenAt, url: 'https://example.com/1.jpg'); + $b = $this->makePhoto('photo-2', name: 'IMG_002.jpg', takenAt: $takenAt, url: 'https://example.com/2.jpg'); + + $this->mockPhotoService(collect([$a, $b])); + $this->mockComparatorService([[$a, $b, 92.0]]); + + $this->artisan('photos:find-duplicates --group-by=taken-at') + ->assertSuccessful() + ->expectsOutputToContain('1 duplicate pairs'); + } + + // ------------------------------------------------------------------------- + // group-by: name-and-taken-at + // ------------------------------------------------------------------------- + + public function test_candidates_grouped_by_name_and_taken_at(): void + { + $takenAt = Carbon::parse('2024-06-15 10:00:00'); + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', takenAt: $takenAt, url: 'https://example.com/1.jpg'); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', takenAt: $takenAt, url: 'https://example.com/2.jpg'); + // Same name but different takenAt — different candidate group, no comparison + $c = $this->makePhoto('photo-3', name: 'IMG_1234.jpg', takenAt: Carbon::parse('2020-01-01'), url: 'https://example.com/3.jpg'); + + $this->mockPhotoService(collect([$a, $b, $c])); + $this->mockComparatorService([[$a, $b, 97.0]]); + + $this->artisan('photos:find-duplicates --group-by=name-and-taken-at') + ->assertSuccessful() + ->expectsOutputToContain('1 duplicate pairs'); + } + + // ------------------------------------------------------------------------- + // CSV export + // ------------------------------------------------------------------------- + + public function test_csv_is_exported_with_pair_and_similarity_columns(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', url: 'https://example.com/1.jpg'); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', url: 'https://example.com/2.jpg'); + + $this->mockPhotoService(collect([$a, $b])); + $this->mockComparatorService([[$a, $b, 95.0]]); + + $this->artisan('photos:find-duplicates --output=csv') + ->assertSuccessful(); + + Storage::assertExists('amazon-photos/duplicates.csv'); + + $csv = Storage::get('amazon-photos/duplicates.csv'); + $this->assertStringContainsString('pair,similarity,id,name,uploaded_at,taken_at,url', $csv); + $this->assertStringContainsString('photo-1', $csv); + $this->assertStringContainsString('photo-2', $csv); + $this->assertStringContainsString('95', $csv); + } + + public function test_csv_custom_path_is_used(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', url: 'https://example.com/1.jpg'); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', url: 'https://example.com/2.jpg'); + + $this->mockPhotoService(collect([$a, $b])); + $this->mockComparatorService([[$a, $b, 95.0]]); + + $this->artisan('photos:find-duplicates --output=csv --csv-path=custom/dupes.csv') + ->assertSuccessful(); + + Storage::assertExists('custom/dupes.csv'); + } + + // ------------------------------------------------------------------------- + // Filters + // ------------------------------------------------------------------------- + + public function test_uploaded_last_days_filter_excludes_older_photos(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', uploadedAt: Carbon::now()->subDays(3)); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', uploadedAt: Carbon::now()->subDays(3)); + $old = $this->makePhoto('photo-3', name: 'IMG_1234.jpg', uploadedAt: Carbon::now()->subDays(60)); + + $this->mockPhotoService(collect([$a, $b, $old])); + $this->mockComparatorService([[$a, $b, 95.0]]); + + $this->artisan('photos:find-duplicates --uploaded-last-days=7') + ->assertSuccessful() + ->expectsOutputToContain('1 duplicate pairs'); + } + + public function test_uploaded_between_filter(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', uploadedAt: Carbon::createFromFormat('d/m/Y', '10/01/2024')); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', uploadedAt: Carbon::createFromFormat('d/m/Y', '20/01/2024')); + $out = $this->makePhoto('photo-3', name: 'IMG_1234.jpg', uploadedAt: Carbon::createFromFormat('d/m/Y', '01/03/2024')); + + $this->mockPhotoService(collect([$a, $b, $out])); + $this->mockComparatorService([[$a, $b, 95.0]]); + + $this->artisan('photos:find-duplicates --uploaded-between=01/01/2024,31/01/2024') + ->assertSuccessful() + ->expectsOutputToContain('1 duplicate pairs'); + } + + public function test_taken_between_filter(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_1234.jpg', takenAt: Carbon::createFromFormat('d/m/Y', '10/06/2023')); + $b = $this->makePhoto('photo-2', name: 'IMG_1234.jpg', takenAt: Carbon::createFromFormat('d/m/Y', '20/06/2023')); + $out = $this->makePhoto('photo-3', name: 'IMG_1234.jpg', takenAt: Carbon::createFromFormat('d/m/Y', '10/01/2020')); + + $this->mockPhotoService(collect([$a, $b, $out])); + $this->mockComparatorService([[$a, $b, 95.0]]); + + $this->artisan('photos:find-duplicates --taken-between=01/01/2023,31/12/2023') + ->assertSuccessful() + ->expectsOutputToContain('1 duplicate pairs'); + } + + // ------------------------------------------------------------------------- + // Direct comparison by ID + // ------------------------------------------------------------------------- + + public function test_compares_two_photos_by_id_when_both_arguments_are_given(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_001.jpg', url: 'https://example.com/1.jpg'); + $b = $this->makePhoto('photo-2', name: 'IMG_002.jpg', url: 'https://example.com/2.jpg'); + + $mock = $this->createMock(PhotoService::class); + $mock->method('fetchById') + ->willReturnMap([['photo-1', $a], ['photo-2', $b]]); + $this->app->instance(PhotoService::class, $mock); + + $this->mockComparatorService([[$a, $b, 92.0]]); + + $this->artisan('photos:find-duplicates photo-1 photo-2') + ->assertSuccessful() + ->expectsOutputToContain('92%') + ->expectsOutputToContain('These photos are duplicates'); + } + + public function test_direct_comparison_reports_not_duplicate_when_below_threshold(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_001.jpg', url: 'https://example.com/1.jpg'); + $b = $this->makePhoto('photo-2', name: 'IMG_002.jpg', url: 'https://example.com/2.jpg'); + + $mock = $this->createMock(PhotoService::class); + $mock->method('fetchById') + ->willReturnMap([['photo-1', $a], ['photo-2', $b]]); + $this->app->instance(PhotoService::class, $mock); + + $this->mockComparatorService([[$a, $b, 50.0]]); + + $this->artisan('photos:find-duplicates photo-1 photo-2') + ->assertSuccessful() + ->expectsOutputToContain('NOT duplicates'); + } + + public function test_direct_comparison_fails_when_only_one_id_is_given(): void + { + $this->artisan('photos:find-duplicates photo-1') + ->assertFailed() + ->expectsOutputToContain('both photo IDs'); + } + + public function test_direct_comparison_fails_when_comparison_returns_null(): void + { + $a = $this->makePhoto('photo-1', name: 'IMG_001.jpg', url: null); + $b = $this->makePhoto('photo-2', name: 'IMG_002.jpg', url: null); + + $mock = $this->createMock(PhotoService::class); + $mock->method('fetchById') + ->willReturnMap([['photo-1', $a], ['photo-2', $b]]); + $this->app->instance(PhotoService::class, $mock); + + $this->mockComparatorService([[$a, $b, null]]); + + $this->artisan('photos:find-duplicates photo-1 photo-2') + ->assertFailed() + ->expectsOutputToContain('Could not compare'); + } + + // ------------------------------------------------------------------------- + // Validation errors + // ------------------------------------------------------------------------- + + public function test_fails_when_credentials_are_missing(): void + { + config(['amazon-photos.session_id' => '', 'amazon-photos.ubid' => '', 'amazon-photos.at' => '']); + + $this->artisan('photos:find-duplicates') + ->assertFailed() + ->expectsOutputToContain('credentials are not configured'); + } + + public function test_fails_with_invalid_group_by_option(): void + { + $this->mockPhotoService(collect()); + + $this->artisan('photos:find-duplicates --group-by=invalid') + ->assertFailed() + ->expectsOutputToContain('Invalid --group-by'); + } + + public function test_fails_with_similarity_out_of_range(): void + { + $this->mockPhotoService(collect()); + + $this->artisan('photos:find-duplicates --similarity=150') + ->assertFailed() + ->expectsOutputToContain('Invalid --similarity'); + } + + public function test_fails_with_invalid_date_format(): void + { + $this->artisan('photos:find-duplicates --uploaded-between=2024-01-01,2024-01-31') + ->assertFailed() + ->expectsOutputToContain('Invalid'); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private function makePhoto( + string $id, + string $name = 'image.jpg', + ?Carbon $uploadedAt = null, + ?Carbon $takenAt = null, + ?string $url = null, + ): Photo { + return new Photo( + id: $id, + name: $name, + uploadedAt: $uploadedAt ?? Carbon::now(), + takenAt: $takenAt, + url: $url, + parentIds: [], + ); + } + + /** + * @param Collection $photos + */ + private function mockPhotoService(Collection $photos): void + { + $mock = $this->createMock(PhotoService::class); + $mock->method('fetchAll')->willReturn($photos); + $this->app->instance(PhotoService::class, $mock); + } + + /** + * Mock ImageComparatorService. + * $expectations is an array of [Photo $a, Photo $b, float|null $returnValue]. + * + * @param array $expectations + */ + private function mockComparatorService(array $expectations): void + { + $mock = $this->createMock(ImageComparatorService::class); + + $mock->method('compare')->willReturnCallback( + function (Photo $a, Photo $b) use ($expectations): ?float { + foreach ($expectations as [$expA, $expB, $result]) { + if ($a->id === $expA->id && $b->id === $expB->id) { + return $result; + } + } + + return null; + } + ); + + $this->app->instance(ImageComparatorService::class, $mock); + } + + private function withValidCredentials(): void + { + config([ + 'amazon-photos.session_id' => 'test-session-id', + 'amazon-photos.ubid' => 'test-ubid', + 'amazon-photos.at' => 'test-at', + 'amazon-photos.tld' => 'com', + ]); + } +} diff --git a/tests/Unit/DuplicateDetectorTest.php b/tests/Unit/DuplicateDetectorTest.php new file mode 100644 index 0000000..f011420 --- /dev/null +++ b/tests/Unit/DuplicateDetectorTest.php @@ -0,0 +1,133 @@ +photo('p1', 'IMG_1234.jpg'), + $this->photo('p2', 'IMG_1234.jpg'), + $this->photo('p3', 'IMG_9999.jpg'), + ])); + + $this->assertCount(1, $groups); + $this->assertCount(2, $groups->first()); + } + + public function test_returns_empty_when_no_name_duplicates(): void + { + $groups = DuplicateDetector::candidateGroups(collect([ + $this->photo('p1', 'IMG_001.jpg'), + $this->photo('p2', 'IMG_002.jpg'), + ])); + + $this->assertCount(0, $groups); + } + + public function test_groups_multiple_name_candidate_sets(): void + { + $groups = DuplicateDetector::candidateGroups(collect([ + $this->photo('p1', 'IMG_AAA.jpg'), + $this->photo('p2', 'IMG_AAA.jpg'), + $this->photo('p3', 'IMG_BBB.jpg'), + $this->photo('p4', 'IMG_BBB.jpg'), + $this->photo('p5', 'IMG_CCC.jpg'), + ])); + + $this->assertCount(2, $groups); + } + + public function test_defaults_to_group_by_name(): void + { + $groups = DuplicateDetector::candidateGroups(collect([ + $this->photo('p1', 'IMG_1234.jpg'), + $this->photo('p2', 'IMG_1234.jpg'), + ])); + + $this->assertCount(1, $groups); + } + + // ------------------------------------------------------------------------- + // GROUP_BY_TAKEN_AT + // ------------------------------------------------------------------------- + + public function test_groups_photos_with_same_taken_at(): void + { + $takenAt = Carbon::parse('2024-06-15 10:00:00'); + + $groups = DuplicateDetector::candidateGroups(collect([ + $this->photo('p1', 'IMG_001.jpg', $takenAt), + $this->photo('p2', 'IMG_002.jpg', $takenAt), + $this->photo('p3', 'IMG_003.jpg', Carbon::parse('2024-06-15 11:00:00')), + ]), DuplicateDetector::GROUP_BY_TAKEN_AT); + + $this->assertCount(1, $groups); + $this->assertCount(2, $groups->first()); + } + + public function test_excludes_photos_without_taken_at_when_grouping_by_taken_at(): void + { + $groups = DuplicateDetector::candidateGroups(collect([ + $this->photo('p1', 'IMG_001.jpg', null), + $this->photo('p2', 'IMG_002.jpg', null), + ]), DuplicateDetector::GROUP_BY_TAKEN_AT); + + $this->assertCount(0, $groups); + } + + // ------------------------------------------------------------------------- + // GROUP_BY_NAME_AND_TAKEN_AT + // ------------------------------------------------------------------------- + + public function test_groups_photos_with_same_name_and_taken_at(): void + { + $takenAt = Carbon::parse('2024-06-15 10:00:00'); + + $groups = DuplicateDetector::candidateGroups(collect([ + $this->photo('p1', 'IMG_1234.jpg', $takenAt), + $this->photo('p2', 'IMG_1234.jpg', $takenAt), + // Same name but different takenAt — separate group, not returned (only 1 photo) + $this->photo('p3', 'IMG_1234.jpg', Carbon::parse('2020-01-01 00:00:00')), + ]), DuplicateDetector::GROUP_BY_NAME_AND_TAKEN_AT); + + $this->assertCount(1, $groups); + $this->assertCount(2, $groups->first()); + } + + public function test_excludes_photos_without_taken_at_when_grouping_by_name_and_taken_at(): void + { + $groups = DuplicateDetector::candidateGroups(collect([ + $this->photo('p1', 'IMG_1234.jpg', null), + $this->photo('p2', 'IMG_1234.jpg', null), + ]), DuplicateDetector::GROUP_BY_NAME_AND_TAKEN_AT); + + $this->assertCount(0, $groups); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private function photo(string $id, string $name, ?Carbon $takenAt = null): Photo + { + return new Photo( + id: $id, + name: $name, + uploadedAt: Carbon::now(), + takenAt: $takenAt, + url: null, + parentIds: [], + ); + } +}