diff --git a/.github/workflows/ecosystem-sync.yml b/.github/workflows/ecosystem-sync.yml new file mode 100644 index 0000000..eb27c77 --- /dev/null +++ b/.github/workflows/ecosystem-sync.yml @@ -0,0 +1,94 @@ +name: Ecosystem Cross-Repo Sync + +on: + issues: + types: [labeled] + +jobs: + create-partner-issue: + if: github.event.label.name == 'ecosystem' + runs-on: ubuntu-latest + steps: + - name: Extract cross-repo references + id: extract + uses: actions/github-script@v7 + with: + script: | + const body = context.payload.issue.body || ''; + const title = context.payload.issue.title || ''; + const issueNumber = context.payload.issue.number; + const thisRepo = `${context.repo.owner}/${context.repo.repo}`; + + // Find references to other deucebucket repos: deucebucket/repo-name#123 + const refPattern = /deucebucket\/([a-zA-Z0-9_-]+)#(\d+)/g; + const refs = []; + let match; + while ((match = refPattern.exec(body)) !== null) { + const targetRepo = `deucebucket/${match[1]}`; + if (targetRepo !== thisRepo) { + refs.push({ repo: match[1], number: parseInt(match[2]) }); + } + } + + // Find partner repos mentioned but without existing issues + const repoPattern = /deucebucket\/([a-zA-Z0-9_-]+)/g; + const partnerRepos = new Set(); + while ((match = repoPattern.exec(body)) !== null) { + const repo = match[1]; + if (`deucebucket/${repo}` !== thisRepo) { + partnerRepos.add(repo); + } + } + + core.setOutput('has_refs', refs.length > 0 ? 'true' : 'false'); + core.setOutput('partner_repos', JSON.stringify([...partnerRepos])); + core.setOutput('this_repo', context.repo.repo); + core.setOutput('issue_number', issueNumber); + core.setOutput('issue_title', title); + + - name: Comment with ecosystem links + if: steps.extract.outputs.has_refs == 'true' + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.ECOSYSTEM_PAT }} + script: | + const partnerRepos = JSON.parse('${{ steps.extract.outputs.partner_repos }}'); + const thisRepo = '${{ steps.extract.outputs.this_repo }}'; + const issueNumber = ${{ steps.extract.outputs.issue_number }}; + const issueTitle = '${{ steps.extract.outputs.issue_title }}'; + + for (const repo of partnerRepos) { + // Check if a tracking comment already exists in the partner repo's referenced issue + const body = context.payload.issue.body || ''; + const refMatch = body.match(new RegExp(`deucebucket/${repo}#(\\d+)`)); + + if (refMatch) { + const partnerIssueNumber = parseInt(refMatch[1]); + try { + // Add a cross-reference comment on the partner issue + const comments = await github.rest.issues.listComments({ + owner: 'deucebucket', + repo: repo, + issue_number: partnerIssueNumber + }); + + const alreadyLinked = comments.data.some(c => + c.body.includes(`deucebucket/${thisRepo}#${issueNumber}`) + ); + + if (!alreadyLinked) { + await github.rest.issues.createComment({ + owner: 'deucebucket', + repo: repo, + issue_number: partnerIssueNumber, + body: `### Ecosystem Link\n\nThis issue is linked to deucebucket/${thisRepo}#${issueNumber} — **${issueTitle}**\n\nBoth issues are tracked on [The Mead Hall](https://github.com/users/deucebucket/projects/1) project board.` + }); + console.log(`Linked ${repo}#${partnerIssueNumber} <-> ${thisRepo}#${issueNumber}`); + } else { + console.log(`Already linked: ${repo}#${partnerIssueNumber}`); + } + } catch (err) { + console.log(`Could not comment on ${repo}#${partnerIssueNumber}: ${err.message}`); + } + } + } diff --git a/CHANGELOG.md b/CHANGELOG.md index 226fac1..07491bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,271 @@ All notable changes to Library Manager will be documented in this file. +## [0.9.0-beta.149] - 2026-04-17 + +### Fixed + +- **Issue #211: Watch-folder failure tracking silently dropped** — Three + `INSERT` statements in `process_watch_folder` (`app.py:6906`, `6914`, + `6944`) referenced `added_at` on the `books` table, but the schema column + is `created_at`. Every insert raised `sqlite3.OperationalError: table + books has no column named added_at`. Two `except` blocks caught it with + `logger.debug`, hiding the error at default log levels. Effects: + - Successful watch-folder moves never produced a `pending` or + `needs_attention` row in the books table. + - Failed watch-folder moves never produced a `watch_folder_error` row — + users had no UI surface for the failure, only a log line. + - Fix: renamed `added_at` to `created_at` in all three INSERTs; + raised both swallow-except blocks from `logger.debug` to + `logger.warning(..., exc_info=True)` so the same class of silent + failure can't rot unnoticed again. + - Surfaced during live testing of #209. Bug has existed since the + watch-folder feature was introduced. + +--- + +## [0.9.0-beta.148] - 2026-04-17 + +### Fixed + +- **Issue #208: Watch-folder retry loop survives restarts** — The watch-folder + worker used an in-memory `set()` to remember which files it had already + processed. Every LM restart wiped the set, so whenever a file couldn't be + processed (unknown author, ambiguous match, move failure, mtime churn), the + worker would re-submit it on every scan forever. Server-side evidence showed + one LM instance generating ~48% of all Skaldleita `/match` traffic — 2,840 + requests in a single day on the same filename. Fix: + - New `watch_folder_processed` SQLite table (`path`, `processed_at`, + `outcome`, `error_message`) persists dedup across restarts. `outcome` + values: `moved`, `move_failed`, `aborted_by_server`. + - Added `watch_folder_is_processed()` / `watch_folder_mark_processed()` + helpers in `library_manager/database.py`; watch worker switched from + `set()` ops to these helpers. +- **Issue #208: Skaldleita `server_notice` handler** — Skaldleita responses + can now carry a `server_notice` block (severity/code/message/action/ + upgrade_url). `library_manager/providers/bookdb.py` logs every notice + (with upgrade URL) and, on `action=abort_task`, stashes it in a + `threading.local()` slot. The watch-folder worker reads that slot after + each identify attempt and, if an abort was signalled, marks the item as + `aborted_by_server` and skips the rest of the pipeline — no 30-second + retry loop. + +--- + +## [0.9.0-beta.147] - 2026-04-17 + +### Fixed + +- **Issue #209: Hard link failure silently copies and deletes originals** — When + `Use hard links` was enabled and the watch folder and library lived on different + filesystems, `os.link()` raised `EXDEV` and the code silently fell back to + `shutil.copy2()` followed by deleting each original file. That destroyed the + source data (breaking torrent seeds, doubling disk use, violating the user's + explicit "hard link" preference). Fix: + - Added a filesystem-compatibility pre-check at the start of + `move_to_output_folder`. When hard links are requested but source and library + are on different `st_dev`s, the function returns a clear, actionable error + ("Move your library to the same volume as the watch folder, or disable 'Use + hard links' in Settings") and does not touch source files. + - Removed the EXDEV copy+delete fallback from both the single-file and + directory-loop branches. Remaining `OSError`s (permission, `ENOSPC`, etc.) + propagate to the outer handler with source files intact, and the watch + worker records the failure as `watch_folder_error` with the error message + visible in the UI. + - Reported by `@kyleviloria` — files weren't lost because copies still existed + at the library destination, but the deletion of originals broke their + download workflow and burned disk. + +--- + +## [0.9.0-beta.146] - 2026-04-07 + +### Added + +- **Issue #110: Folder triage UI** - Dashboard now shows messy/garbage folder counts in an + info banner. Library view displays triage badges (Messy/Garbage) on affected books. Added + Settings toggle to enable/disable folder triage. Triage data now included in "all" library + view API responses. Split push corrections feature to #205 (blocked on Skaldleita). + +--- + +## [0.9.0-beta.145] - 2026-04-07 + +### Added + +- **Issue #203: Plugin system documentation and discoverability** - Added Python drop-in + plugin guide with manifest.json and BasePlugin interface examples directly in the Plugins + settings tab. Added secrets management card explaining secrets.json usage for Docker and + bare metal. Added ready-to-use API configurations for Google Books and Open Library. + Shipped example-logger plugin to `examples/plugins/` with comprehensive README covering + plugin creation, manifest fields, BasePlugin interface, configuration, and behavior. + Added new hint entries for plugin-related tooltips. + +--- + +## [0.9.0-beta.144] - 2026-04-07 + +### Fixed + +- **Issue #201: SAFETY BLOCK error for files in library root** - Fixed path normalization + mismatch where Windows mapped drives (e.g. `R:\`) resolve to UNC paths but config paths + were compared without `.resolve()`, causing library matching to fail. Fixed fallback logic + that assumed 2-level directory structure (`parent.parent`), which went above the library + root for loose files. Applied fix across all 4 path-matching locations in `app.py`, + `layer_ai_queue.py`, and `layer_audio_credits.py`. + +--- + +## [0.9.0-beta.143] - 2026-03-21 + +### Changed + +- **Issue #198: Comprehensive UI overhaul** - Extracted 728 lines of inline CSS from base.html + into `static/css/style.css` with CSS custom properties design system (spacing scale, border + radius tokens, transition timing). Consolidated duplicate `escapeHtml()` and `showToast()` + helpers from 5 templates into `static/js/common.js`. Reorganized Settings from 7 tabs + (Library, Processing, AI Setup, Safety, Advanced, Post-Processing, Plugins) into 4 tabs + (Library, Engine, Pipeline, Integrations) with section headers. Added mobile responsive + breakpoints for tables, nav-tabs, cards, and stat numbers. Replaced hardcoded hex colors + with CSS variables throughout all templates. Changed accent success color from `#00ff00` + to `#2ecc71` for professional appearance. Added sticky settings save bar with backdrop blur. + Replaced all inline `font-size` styles with utility classes (`fs-icon-lg`, `fs-icon-xl`). + Setup wizard styles extracted with `setup-mode` body class for navbar hiding. All modal + backgrounds now use theme CSS variables instead of hardcoded `#16213e`. + +--- + +## [0.9.0-beta.142] - 2026-03-21 + +### Added + +- **Issue #188: Drop-in Python plugin system** - New plugin loader that discovers and loads + Python plugins from a configurable directory (`/data/plugins` for Docker). Plugins extend + a simple `BasePlugin` class with `setup()`, `can_process()`, `process()`, and `teardown()` + methods. The loader handles manifest validation, dynamic module importing via `importlib`, + exception isolation (bad plugins never crash the app), timeout enforcement via + `ThreadPoolExecutor`, and deep-copying book data before passing to plugins. Each plugin is + wrapped in a `PluginAdapter` that implements the `LayerAdapter` interface, making plugins + fully compatible with the modular pipeline orchestrator. Plugins are registered in the + `LayerRegistry` and tracked by the existing health dashboard with auto-disable circuit + breaker support. + +- **Plugin manifest system** - Each plugin requires a `manifest.json` with metadata (id, + name, version, description), entry point configuration, ordering, and dependency + declarations (required config keys and secrets). Manifests are strictly validated on + discovery -- invalid plugins are logged as warnings and skipped. + +- **Plugin configuration** - New `plugin_dir` config key (default: `/data/plugins`) and + `plugin_configs` dict for per-plugin configuration overrides. Plugin-specific secrets are + read from `secrets.json`. + +- **Example plugin** - Template plugin at `test-env/example-plugin/` demonstrating the + `BasePlugin` interface with manifest.json and a simple logging implementation. + +--- + +## [0.9.0-beta.141] - 2026-03-21 + +### Added + +- **Issue #189: Plugin Health Dashboard** - New health monitoring section in the Plugins tab + showing real-time status for each custom API source. Tracks success rate (last 50 runs), + average response time, items processed/resolved, and last run timestamp. Health cards use + color-coded status indicators (green=active, yellow=errored, red=auto-disabled). Expandable + error logs show the 5 most recent failures per plugin with timestamps. Full metric log modal + shows the last 20 execution entries with status, duration, and error details. + +- **Auto-disable circuit breaker** - Plugins are automatically disabled after 5 consecutive + failures to prevent repeated errors from slowing the pipeline. Auto-disabled plugins show + a red status badge and a "Re-enable" button that resets the failure counter and re-enables + the layer. Toast notification logged when a plugin is auto-disabled. + +- **Plugin metrics recording** - New `plugin_metrics` database table tracks every custom + layer execution with timestamp, success/failure, duration, error message, and item counts. + Metrics are recorded automatically after each `CustomApiLayer.run()` batch with minimal + overhead (single INSERT, no aggregation on write path). Three new API endpoints: + `GET /api/plugins/health` (aggregated stats), `GET /api/plugins/health//logs` + (last 20 entries), `POST /api/plugins/health//reset` (re-enable disabled plugin). + +--- + +## [0.9.0-beta.140] - 2026-03-21 + +### Added + +- **Issue #186: Custom Layer Builder wizard UI** - New "Plugins" tab in settings with a 4-step + wizard for creating custom HTTP API metadata sources without writing code. Step 1 collects name + and description, step 2 configures URL template with variable placeholders, HTTP method, timeout, + and authentication (none/bearer/API key header/basic auth), step 3 maps API response fields to + book profile fields via JSONPath expressions with a configurable confidence weight slider, and + step 4 provides live API testing with sample book data showing HTTP status, response time, mapped + field values, and raw response. Full CRUD via `/api/plugins/` endpoints: list, save, delete, and + toggle layers. Each custom layer is stored in `config.json` under `custom_layers` and feeds into + the existing `CustomApiLayer` processing pipeline. + +--- + +## [0.9.0-beta.139] - 2026-03-21 + +### Added + +- **Issue #187: Expanded hook events with filtering and custom payloads** - Hooks now fire on 8 + event types (`scan_started`, `scan_completed`, `book_discovered`, `rename_proposed`, + `rename_applied`, `rename_rejected`, `processing_failed`, `queue_empty`) instead of just + `fixed`. Each hook supports a `run_on` list for per-hook event filtering, so a single hook + can subscribe to only the events it cares about. New `body_template` field enables custom + webhook payloads with full template variable support (enables Discord/Slack/Home Assistant + without code). All events use a standardized envelope format with `event`, `timestamp`, + `app_version`, and event-specific `payload`. New `emit_event()` helper centralizes event + dispatching across the codebase. Fully backward compatible — existing hooks default to + `run_on: ["rename_applied"]` and the legacy `"fixed"` event name is aliased automatically. + +--- + +## [0.9.0-beta.138] - 2026-03-21 + +### Fixed + +- **Issue #185: Custom layer infinite reprocessing** - `CustomApiLayer._apply_result` now + advances `verification_layer` to `self.order + 1` after updating the book profile. Previously + the layer never advanced the item, causing `_fetch_batch` to pick up the same books every cycle. + +--- + +## [0.9.0-beta.137] - 2026-03-10 + +### Added + +- **Issue #66: Standalone layer execution** - Play button next to each layer in the pipeline + settings section. Runs a single pipeline layer on demand via `POST /api/pipeline/run-layer/`. + Shows spinner during execution and result badge with processed/resolved counts. + +--- + +## [0.9.0-beta.136] - 2026-03-10 + +### Added + +- **Issue #66: Pipeline configuration UI in settings** - New "Processing Pipeline Order" section + in Settings > Processing with drag-and-drop and arrow button reordering of processing layers. + Each layer shows enable/disable toggle that saves to config. Experimental `use_modular_pipeline` + feature flag toggle. "Reset to Default Order" button. Pipeline order saved as JSON to config. + +--- + +## [0.9.0-beta.135] - 2026-03-10 + +### Added + +- **Issue #110: File validation in scan pipeline** - The existing `file_validation.py` module is now + integrated into the scan pipeline. Audio files are validated with ffprobe before queuing — corrupt, + truncated, or too-short files are marked `validation_failed` and skipped. Enabled by default, + requires ffprobe (gracefully skips if unavailable). Configurable thresholds in Settings: minimum + duration (default 10 min) and minimum file size (default 1 MB). Dashboard shows a warning when + validation failures exist. Books with `validation_failed` status are excluded from re-queuing. + +--- + ## [0.9.0-beta.134] - 2026-02-28 ### Fixed diff --git a/README.md b/README.md index 145c6ce..e9cd420 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ **Smart Audiobook Library Organizer with Multi-Source Metadata & AI Verification** -[![Version](https://img.shields.io/badge/version-0.9.0--beta.134-blue.svg)](CHANGELOG.md) +[![Version](https://img.shields.io/badge/version-0.9.0--beta.149-blue.svg)](CHANGELOG.md) [![Docker](https://img.shields.io/badge/docker-ghcr.io-blue.svg)](https://ghcr.io/deucebucket/library-manager) [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE) @@ -16,6 +16,23 @@ ## Recent Changes (stable) +> **beta.149** - **Fix: Watch-Folder Move Failures Now Appear in the UI** (Issue #211) +> - Three `INSERT` statements in the watch-folder worker referenced a phantom `added_at` column on the `books` table (the real column is `created_at`). Every insert silently raised `OperationalError`, caught by a `logger.debug` that hid the error. Result: watch-folder move failures never produced a `watch_folder_error` row — the UI never showed the failure, users only saw it in logs. +> - Fixed the column name and raised the swallow-except log level to `warning` with full traceback so future DB errors don't rot silently. + +> **beta.148** - **Fix: Watch-Folder Retry Loop Across Restarts + Skaldleita server_notice** (Issue #208) +> - **Persistent watch-folder dedup** - `watch_folder_processed` is now a SQLite table instead of an in-memory `set()`. Restarts no longer wipe it, killing the retry loop that had one LM instance hammering Skaldleita's `/match` every 30 seconds on the same file for days. +> - **Honors Skaldleita's abort signal** - When the server detects a retry loop it sends a `server_notice` in the response. LM now logs it (with an upgrade URL) and, on `action=abort_task`, stops retrying that file immediately. + +> **beta.147** - **Critical Fix: Hard Link Safety** (Issue #209) +> - **Stop silent copy+delete** - When "Use hard links" was enabled and the watch folder / library sat on different filesystems, LM used to copy every file and delete the originals. That broke torrent seeding and doubled disk use. Now LM fails fast with a clear error and leaves source files untouched. +> - **Pre-check filesystem compatibility** - Verifies `st_dev` match before any file operations when hard links are enabled. + +> **beta.140** - **Feature: Custom Layer Builder** (Issue #186) +> - **Plugins tab** - New settings tab with 4-step wizard to add custom HTTP API metadata sources +> - **No-code API integration** - Configure URL templates, authentication, JSONPath response mapping, and confidence weights +> - **Live testing** - Test API calls with sample book data before saving, with mapped field preview + > **beta.134** - **Hotfix: Settings Page Crash** (Issue #173) > - Jinja2 template recursion bug in hooks_settings.html caused blank settings page for all users on beta.133 diff --git a/app.py b/app.py index 78562d3..fd5962e 100644 --- a/app.py +++ b/app.py @@ -11,7 +11,7 @@ - Multi-provider AI (Gemini, OpenRouter, Ollama) """ -APP_VERSION = "0.9.0-beta.134" +APP_VERSION = "0.9.0-beta.149" GITHUB_REPO = "deucebucket/library-manager" # Your GitHub repo # Versioning Guide: @@ -52,7 +52,8 @@ from library_manager.database import ( init_db, get_db, set_db_path, cleanup_garbage_entries, cleanup_duplicate_history_entries, insert_history_entry, - should_requeue_book + should_requeue_book, + watch_folder_is_processed, watch_folder_mark_processed ) from library_manager.models.book_profile import ( SOURCE_WEIGHTS, FIELD_WEIGHTS, FieldValue, BookProfile, @@ -118,8 +119,11 @@ get_system_info, store_feedback, sanitize_string as feedback_sanitize, ) from library_manager.folder_triage import triage_folder, triage_book_path, should_use_path_hints, confidence_modifier +from library_manager.file_validation import validate_audio_file, check_ffmpeg_available from library_manager.hints import get_all_hints from library_manager.hooks import hooks_bp, run_hooks, build_hook_context +from library_manager.plugins import plugins_bp +from library_manager.plugin_loader import register_plugins, teardown_plugins # Try to import P2P cache (optional - gracefully degrades if not available) try: @@ -560,6 +564,7 @@ def extract_narrator_from_folder(folder_path): app = Flask(__name__) app.secret_key = 'library-manager-secret-key-2024' app.register_blueprint(hooks_bp) +app.register_blueprint(plugins_bp) # ============== INTERNATIONALIZATION (i18n) ============== # Flask-Babel for UI translations - book metadata (author/title) is NOT translated @@ -4859,6 +4864,53 @@ def compare_book_folders(source_path, dest_path, deep_analysis=True): return result +def _validate_book_audio(book_path, config, ffmpeg_available): + """Issue #110: Validate a book's audio file before queueing. + + Finds the first audio file in the book folder and validates it. + + Args: + book_path: Path to the book folder (or file for loose files) + config: Configuration dictionary + ffmpeg_available: Whether ffprobe/ffmpeg are available + + Returns: + (status, reason) where: + - ('valid', 'valid') - file passed validation + - ('invalid', reason) - file failed validation + - ('skipped', reason) - validation was skipped (disabled or ffmpeg unavailable) + """ + if not config.get('enable_file_validation', True): + return 'skipped', 'validation_disabled' + + if not ffmpeg_available: + return 'skipped', 'ffprobe_not_available' + + # Determine audio file to validate + path = Path(book_path) + if path.is_file(): + audio_file = str(path) + else: + audio_file = get_first_audio_file(str(path)) + + if not audio_file: + return 'skipped', 'no_audio_file_found' + + # Get config thresholds + min_duration = config.get('min_audio_duration_seconds', 600) + min_size_mb = config.get('min_audio_file_size_mb', 1) + min_size_bytes = min_size_mb * 1_000_000 + + is_valid, reason, metadata = validate_audio_file( + audio_file, min_duration=min_duration, min_size=min_size_bytes + ) + + if is_valid: + return 'valid', 'valid' + else: + return 'invalid', reason + + def deep_scan_library(config): """ Deep scan library - the AUTISTIC LIBRARIAN approach. @@ -4870,8 +4922,18 @@ def deep_scan_library(config): checked = 0 # Total book folders examined scanned = 0 # New books added to tracking queued = 0 # Books added to fix queue + validation_counts = {'valid': 0, 'invalid': 0, 'skipped': 0} # Issue #110: File validation stats issues_found = {} # path -> list of issues triage_counts = {'clean': 0, 'messy': 0, 'garbage': 0} # Issue #110: Folder triage stats + triage_enabled = config.get('enable_folder_triage', True) # Issue #110: Folder triage toggle + + # Issue #110: Check ffmpeg availability once at scan start + ffmpeg_available, ffmpeg_msg = check_ffmpeg_available() + if config.get('enable_file_validation', True): + if ffmpeg_available: + logger.info("[VALIDATION] ffprobe available - file validation enabled") + else: + logger.warning(f"[VALIDATION] {ffmpeg_msg} - file validation will be skipped") # Track files for duplicate detection file_signatures = {} # signature -> list of paths @@ -4940,6 +5002,17 @@ def deep_scan_library(config): book_id = c.lastrowid reset_layer = 1 # New books always start at layer 1 + # Issue #110: Validate audio file before queueing + v_status, v_reason = _validate_book_audio(path_str, config, ffmpeg_available) + validation_counts[v_status] = validation_counts.get(v_status, 0) + 1 + c.execute('UPDATE books SET validation_status = ?, validation_reason = ? WHERE id = ?', + (v_status, v_reason, book_id)) + if v_status == 'invalid': + c.execute('UPDATE books SET status = ? WHERE id = ?', ('validation_failed', book_id)) + conn.commit() + logger.info(f"[VALIDATION] Skipping invalid loose file ({v_reason}): {filename}") + continue + # Add to queue with special "loose_file" reason c.execute('''INSERT OR REPLACE INTO queue (book_id, reason, added_at, priority) @@ -5040,7 +5113,7 @@ def deep_scan_library(config): # Issue #132: Resolve path to prevent duplicates flat_path = str(author_dir.resolve()) # Issue #110: Triage folder name quality - flat_triage = triage_folder(author) + flat_triage = triage_folder(author) if triage_enabled else 'clean' checked += 1 @@ -5068,6 +5141,17 @@ def deep_scan_library(config): reset_layer = 1 logger.info(f"Added flat book: {flat_author} - {flat_title} (triage: {flat_triage})") + # Issue #110: Validate audio file before queueing + v_status, v_reason = _validate_book_audio(flat_path, config, ffmpeg_available) + validation_counts[v_status] = validation_counts.get(v_status, 0) + 1 + c.execute('UPDATE books SET validation_status = ?, validation_reason = ? WHERE id = ?', + (v_status, v_reason, flat_book_id)) + if v_status == 'invalid': + c.execute('UPDATE books SET status = ? WHERE id = ?', ('validation_failed', flat_book_id)) + conn.commit() + logger.info(f"[VALIDATION] Skipping invalid flat book ({v_reason}): {flat_author} - {flat_title}") + continue + # Queue for processing c.execute('SELECT id FROM queue WHERE book_id = ?', (flat_book_id,)) if not c.fetchone(): @@ -5211,7 +5295,7 @@ def deep_scan_library(config): checked += 1 # Issue #110: Triage folder name quality - series_book_triage = triage_folder(book_title) + series_book_triage = triage_folder(book_title) if triage_enabled else 'clean' # Check if already tracked c.execute('''SELECT id, status, profile, user_locked, attempt_count, @@ -5236,6 +5320,17 @@ def deep_scan_library(config): scanned += 1 reset_layer = 1 + # Issue #110: Validate audio file before queueing + v_status, v_reason = _validate_book_audio(book_path, config, ffmpeg_available) + validation_counts[v_status] = validation_counts.get(v_status, 0) + 1 + c.execute('UPDATE books SET validation_status = ?, validation_reason = ? WHERE id = ?', + (v_status, v_reason, book_id)) + if v_status == 'invalid': + c.execute('UPDATE books SET status = ? WHERE id = ?', ('validation_failed', book_id)) + conn.commit() + logger.info(f"[VALIDATION] Skipping invalid series book ({v_reason}): {author}/{book_title}") + continue + # Queue for processing c.execute('SELECT id FROM queue WHERE book_id = ?', (book_id,)) if not c.fetchone(): @@ -5277,7 +5372,7 @@ def deep_scan_library(config): checked += 1 # Issue #110: Triage folder name quality - folder_triage_result = triage_folder(title) + folder_triage_result = triage_folder(title) if triage_enabled else 'clean' triage_counts[folder_triage_result] = triage_counts.get(folder_triage_result, 0) + 1 if folder_triage_result != 'clean': logger.info(f"Folder triage: {folder_triage_result} - {title[:60]}") @@ -5356,6 +5451,17 @@ def deep_scan_library(config): scanned += 1 reset_layer = 1 + # Issue #110: Validate audio file before queueing + v_status, v_reason = _validate_book_audio(path, config, ffmpeg_available) + validation_counts[v_status] = validation_counts.get(v_status, 0) + 1 + c.execute('UPDATE books SET validation_status = ?, validation_reason = ? WHERE id = ?', + (v_status, v_reason, book_id)) + if v_status == 'invalid': + c.execute('UPDATE books SET status = ? WHERE id = ?', ('validation_failed', book_id)) + conn.commit() + logger.info(f"[VALIDATION] Skipping invalid book ({v_reason}): {author}/{title}") + continue + # Add to queue if has issues if all_issues: # Skip multi-book collections - they need manual splitting, not renaming @@ -5412,6 +5518,7 @@ def deep_scan_library(config): logger.info(f"Queued: {queued} books need fixing") logger.info(f"Already correct: {checked - queued} books") logger.info(f"Folder triage: {triage_counts['clean']} clean, {triage_counts['messy']} messy, {triage_counts['garbage']} garbage") + logger.info(f"File validation: {validation_counts['valid']} valid, {validation_counts['invalid']} invalid, {validation_counts['skipped']} skipped") return checked, scanned, queued @@ -6326,8 +6433,8 @@ def get_circuit_breaker(api_name): # WATCH FOLDER FUNCTIONALITY # ============================================================================ -# Track processed watch folder items to avoid reprocessing -watch_folder_processed = set() +# Issue #208: watch-folder dedup now lives in the watch_folder_processed +# SQLite table (see library_manager.database) so restarts don't reset state. watch_folder_last_scan = 0 def get_watch_folder_items(watch_folder: str, min_age_seconds: int = 30) -> list: @@ -6350,8 +6457,8 @@ def get_watch_folder_items(watch_folder: str, min_age_seconds: int = 30) -> list for item in watch_path.iterdir(): item_path = str(item.resolve()) - # Skip if already processed - if item_path in watch_folder_processed: + # Skip if already processed (persisted in SQLite, Issue #208) + if watch_folder_is_processed(item_path): continue # Check if folder contains audio files or is an audio file @@ -6414,6 +6521,22 @@ def move_to_output_folder(source_path: str, output_folder: str, author: str, tit except Exception as e: return False, None, f"Cannot create output folder: {e}" + # Issue #209: Fail fast if hard links requested across filesystems. + # Without this, a later os.link EXDEV would silently fall back to copy+delete, + # destroying the user's originals (e.g. breaking torrent seeds, doubling disk). + if use_hard_links: + try: + if source.stat().st_dev != output.stat().st_dev: + return False, None, ( + "Hard link failed: watch folder and library are on different " + "filesystems. Hard links require both paths on the same volume. " + "Move your library to the same volume as the watch folder, or " + "disable 'Use hard links' in Settings. Source files were not " + "modified." + ) + except OSError as e: + return False, None, f"Cannot verify filesystem compatibility: {e}" + # Sanitize author and title for filesystem safe_author = sanitize_path_component(author) if author else "Unknown" safe_title = sanitize_path_component(title) if title else source.name @@ -6492,10 +6615,6 @@ def move_to_output_folder(source_path: str, output_folder: str, author: str, tit if not atomic_move_done: dest_folder.mkdir(parents=True, exist_ok=True) - # Track if we fell back to copy (need to delete originals afterward) - used_copy_fallback = False - files_to_delete = [] - if atomic_move_done: # Atomic move succeeded - nothing more to do for the files pass @@ -6503,17 +6622,9 @@ def move_to_output_folder(source_path: str, output_folder: str, author: str, tit # Single file - move/link to destination folder dest_file = dest_folder / source.name if use_hard_links: - try: - os.link(source, dest_file) - except OSError as e: - if "Invalid cross-device link" in str(e) or e.errno == 18: - # Cross-filesystem - fall back to copy, then delete original - logger.warning(f"Hard link failed (cross-filesystem), falling back to copy+delete: {source.name}") - shutil.copy2(source, dest_file) - used_copy_fallback = True - files_to_delete.append(source) - else: - raise + # Pre-check guarantees same filesystem; other OSErrors (perm, ENOSPC) + # propagate to the outer handler with source intact. + os.link(source, dest_file) else: shutil.move(str(source), str(dest_file)) else: @@ -6531,23 +6642,13 @@ def move_to_output_folder(source_path: str, output_folder: str, author: str, tit dest_file.parent.mkdir(parents=True, exist_ok=True) if use_hard_links: - try: - os.link(src_file, dest_file) - except OSError as e: - if "Invalid cross-device link" in str(e) or e.errno == 18: - logger.warning(f"Hard link failed, copy+delete: {src_file.name}") - shutil.copy2(src_file, dest_file) - used_copy_fallback = True - files_to_delete.append(src_file) - else: - raise + os.link(src_file, dest_file) else: shutil.move(str(src_file), str(dest_file)) - # Clean up empty source folder if not using hard links OR if we used copy fallback - if (not use_hard_links or used_copy_fallback) and delete_empty: + # Clean up empty source folder when we moved files out (not for hardlinks — originals stay) + if not use_hard_links and delete_empty: try: - # Remove empty directories bottom-up for dirpath, dirnames, filenames in os.walk(str(source), topdown=False): if not filenames and not dirnames: os.rmdir(dirpath) @@ -6556,16 +6657,6 @@ def move_to_output_folder(source_path: str, output_folder: str, author: str, tit except Exception as e: logger.debug(f"Could not clean up empty folder {source}: {e}") - # Delete originals if we used copy fallback (handles both single files and directories) - if used_copy_fallback and delete_empty: - for f in files_to_delete: - try: - if f.exists(): - f.unlink() - logger.debug(f"Deleted source after copy fallback: {f}") - except Exception as e: - logger.warning(f"Could not delete source {f}: {e}") - return True, str(dest_folder), None except Exception as e: @@ -6578,7 +6669,7 @@ def process_watch_folder(config: dict) -> int: Process items in the watch folder. Returns number of items processed. """ - global watch_folder_processed, watch_folder_last_scan + global watch_folder_last_scan watch_folder = config.get('watch_folder', '').strip() output_folder = config.get('watch_output_folder', '').strip() @@ -6738,6 +6829,18 @@ def norm_conf(c): except Exception as e: logger.debug(f"Watch folder: API lookup failed, using path analysis: {e}") + # Issue #208: Skaldleita may have signalled 'abort_task' during the + # lookup above (retry-loop protection). Stop retrying this item and + # persist it so future scans skip it until the user upgrades / fixes + # the source. The warning + upgrade URL are already in the logs. + from library_manager.providers.bookdb import get_and_clear_server_abort + server_abort = get_and_clear_server_abort() + if server_abort: + abort_msg = server_abort.get('message', 'Skaldleita requested task abort') + logger.warning(f"Watch folder: Aborting '{item.name}' per Skaldleita server notice") + watch_folder_mark_processed(item_path, 'aborted_by_server', abort_msg) + continue + # Issue #57: Verify drastic author changes before accepting if needs_verification and api_author and api_title: try: @@ -6790,7 +6893,7 @@ def norm_conf(c): if success: logger.info(f"Watch folder: Moved to {new_path}") - watch_folder_processed.add(item_path) + watch_folder_mark_processed(item_path, 'moved') processed += 1 # Add to books table @@ -6800,7 +6903,7 @@ def norm_conf(c): # Unknown author - requires user intervention before processing # Issue #57: Include source_type for watch folder tracking c.execute('''INSERT OR REPLACE INTO books - (path, current_author, current_title, status, error_message, source_type, added_at, updated_at) + (path, current_author, current_title, status, error_message, source_type, created_at, updated_at) VALUES (?, ?, ?, 'needs_attention', ?, 'watch_folder', datetime('now'), datetime('now'))''', (new_path, author, title, 'Watch folder: Could not determine author - please review and correct')) logger.info(f"Watch folder: Flagged for attention (unknown author): {title}") @@ -6808,7 +6911,7 @@ def norm_conf(c): # Known author - normal processing # Issue #57: Include source_type for watch folder tracking c.execute('''INSERT OR REPLACE INTO books - (path, current_author, current_title, status, source_type, added_at, updated_at) + (path, current_author, current_title, status, source_type, created_at, updated_at) VALUES (?, ?, ?, 'pending', 'watch_folder', datetime('now'), datetime('now'))''', (new_path, author, title)) # Issue #126: Auto-enqueue for full pipeline processing @@ -6820,12 +6923,13 @@ def norm_conf(c): logger.info(f"Watch folder: Auto-enqueued for processing: {author}/{title}") conn.commit() except Exception as e: - logger.debug(f"Watch folder: Could not add to books table: {e}") + # Issue #211: was logger.debug which hid the real exception. + logger.warning(f"Watch folder: Could not add to books table: {e}", exc_info=True) else: logger.error(f"Watch folder: Failed to move {item.name}: {error}") # Issue #49: Track failed items in the database so user can see and fix them - # Add to watch_folder_processed to prevent infinite retry loop - watch_folder_processed.add(item_path) + # Issue #208: persist dedup so the retry loop dies across restarts too + watch_folder_mark_processed(item_path, 'move_failed', error) try: # Check if this item is already tracked c.execute('SELECT id FROM books WHERE path = ?', (item_path,)) @@ -6838,13 +6942,15 @@ def norm_conf(c): else: # Insert new record for the failed item c.execute('''INSERT INTO books - (path, current_author, current_title, status, error_message, source_type, added_at, updated_at) + (path, current_author, current_title, status, error_message, source_type, created_at, updated_at) VALUES (?, ?, ?, 'watch_folder_error', ?, 'watch_folder', datetime('now'), datetime('now'))''', (item_path, author, title, f'Watch folder: {error}')) conn.commit() logger.info(f"Watch folder: Tracked failure for user review: {item.name}") except Exception as db_err: - logger.debug(f"Watch folder: Could not track failure in DB: {db_err}") + # Issue #211: was logger.debug which hid the real exception; raise level + # so failures surface in normal operation instead of rotting silently. + logger.warning(f"Watch folder: Could not track failure in DB: {db_err}", exc_info=True) except Exception as e: logger.error(f"Watch folder: Error processing {item_path}: {e}") @@ -6923,6 +7029,15 @@ def dashboard(): WHERE h.status = 'pending_fix' ''') pending_fixes = c.fetchone()['count'] + # Issue #110: Count validation failures + c.execute("SELECT COUNT(*) as count FROM books WHERE validation_status = 'invalid'") + validation_failed_count = c.fetchone()['count'] + + # Issue #110: Count folder triage categories + c.execute("SELECT folder_triage, COUNT(*) as count FROM books WHERE folder_triage != 'clean' GROUP BY folder_triage") + triage_rows = c.fetchall() + triage_counts = {row['folder_triage']: row['count'] for row in triage_rows} + # Get recent history (use LEFT JOIN in case book was deleted) c.execute('''SELECT h.*, b.path FROM history h LEFT JOIN books b ON h.book_id = b.id @@ -6944,6 +7059,8 @@ def dashboard(): fixed_count=fixed_count, verified_count=verified_count, pending_fixes=pending_fixes, + validation_failed_count=validation_failed_count, + triage_counts=triage_counts, recent_history=recent_history, daily_stats=daily_stats, config=config, @@ -7200,12 +7317,34 @@ def settings_page(): # P2P cache setting (Issue #62) config['enable_p2p_cache'] = 'enable_p2p_cache' in request.form + # Issue #110: File validation settings + config['enable_file_validation'] = 'enable_file_validation' in request.form + config['min_audio_duration_seconds'] = int(request.form.get('min_audio_duration_seconds', 600)) + config['min_audio_file_size_mb'] = int(request.form.get('min_audio_file_size_mb', 1)) + config['enable_folder_triage'] = 'enable_folder_triage' in request.form + # Provider chain settings - parse comma-separated values into lists audio_chain_str = request.form.get('audio_provider_chain', 'bookdb,gemini').strip() config['audio_provider_chain'] = [p.strip() for p in audio_chain_str.split(',') if p.strip()] text_chain_str = request.form.get('text_provider_chain', 'gemini,openrouter').strip() config['text_provider_chain'] = [p.strip() for p in text_chain_str.split(',') if p.strip()] + # Pipeline order and modular pipeline feature flag + config['use_modular_pipeline'] = 'use_modular_pipeline' in request.form + pipeline_order_json = request.form.get('pipeline_order', '').strip() + if pipeline_order_json: + try: + proposed_order = json.loads(pipeline_order_json) + if isinstance(proposed_order, list): + from library_manager.pipeline.registry import default_registry + is_valid, errors = default_registry.validate_order(proposed_order) + if is_valid: + config['pipeline_order'] = proposed_order + else: + logger.warning(f"Invalid pipeline_order from settings form: {errors}") + except (ValueError, TypeError) as e: + logger.warning(f"Failed to parse pipeline_order from settings form: {e}") + # Save config (without secrets) save_config(config) @@ -7238,7 +7377,15 @@ def settings_page(): config['openrouter_api_key'] = secrets.get('openrouter_api_key', '') config['google_books_api_key'] = secrets.get('google_books_api_key', '') config['bookdb_api_key'] = secrets.get('bookdb_api_key', '') - return render_template('settings.html', config=config, version=APP_VERSION) + # Pipeline layer info for settings UI + from library_manager.pipeline.registry import default_registry + pipeline_layers = default_registry.get_ordered_layers(config) + pipeline_order = [layer.layer_id for layer in pipeline_layers] + pipeline_default_order = default_registry.get_all_layer_ids() + return render_template('settings.html', config=config, version=APP_VERSION, + pipeline_layers=pipeline_layers, + pipeline_order=pipeline_order, + pipeline_default_order=pipeline_default_order) # ============== PATH DIAGNOSTIC ============== @@ -7928,6 +8075,95 @@ def api_process_status(): return jsonify(get_processing_status()) +@app.route('/api/pipeline/run-layer/', methods=['POST']) +def api_run_single_layer(layer_id): + """Run a single pipeline layer on demand. + + Executes one batch of the specified layer synchronously. + The request blocks until the layer finishes processing its batch. + """ + global _bg_processing_active + + # Validate layer_id exists in registry + from library_manager.pipeline.registry import default_registry + layer_info = default_registry.get_layer(layer_id) + if layer_info is None: + return jsonify({ + 'success': False, + 'message': f'Unknown layer: {layer_id}' + }), 404 + + # Check that background processing is not currently running + if _bg_processing_active: + return jsonify({ + 'success': False, + 'message': 'Background processing is already running. Wait for it to finish.' + }), 409 + + config = load_config() + + # Check that the layer is enabled in config + if not config.get(layer_info.config_enable_key, True): + return jsonify({ + 'success': False, + 'message': f'Layer "{layer_info.layer_name}" is disabled. Enable it in settings first.' + }), 400 + + # Map layer_id to the corresponding processing function + layer_functions = { + 'audio_id': lambda: process_layer_1_audio(config), + 'audio_credits': lambda: process_layer_3_audio(config, verification_layer=2), + 'sl_requeue': lambda: process_sl_requeue_verification(config), + 'api_lookup': lambda: process_layer_1_api(config), + 'ai_verify': lambda: process_queue(config, verification_layer=4), + } + + layer_func = layer_functions.get(layer_id) + if layer_func is None: + return jsonify({ + 'success': False, + 'message': f'Layer "{layer_id}" does not have a processing function mapped.' + }), 501 + + # Update status to show this layer is running + update_processing_status('active', True) + update_processing_status('layer_name', layer_info.layer_name) + update_processing_status('current', f'Running {layer_info.layer_name} (manual)...') + + try: + processed, resolved = layer_func() + # process_queue returns -1 when rate-limited + if processed == -1: + processed = 0 + message = f'{layer_info.layer_name}: Rate limited, try again later.' + elif processed == 0: + message = f'{layer_info.layer_name}: No items to process at this layer.' + else: + message = f'{layer_info.layer_name}: {processed} processed, {resolved} resolved.' + + log_action("run_layer", detail=f"layer={layer_id} processed={processed} resolved={resolved}", result="success") + + return jsonify({ + 'success': True, + 'layer_id': layer_id, + 'layer_name': layer_info.layer_name, + 'processed': max(0, processed), + 'resolved': resolved, + 'message': message + }) + except Exception as e: + logger.error(f"Error running layer {layer_id}: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'message': f'Error running {layer_info.layer_name}: {str(e)}' + }), 500 + finally: + update_processing_status('active', False) + update_processing_status('layer_name', 'Idle') + update_processing_status('current', 'Idle') + clear_current_book() + + @app.route('/api/live_status') def api_live_status(): """Get comprehensive live status for the status bar. @@ -8722,6 +8958,10 @@ def api_stats(): c.execute("SELECT COUNT(*) as count FROM books WHERE status = 'verified'") verified = c.fetchone()['count'] + # Issue #110: Validation failure count + c.execute("SELECT COUNT(*) as count FROM books WHERE validation_status = 'invalid'") + validation_failed = c.fetchone()['count'] + conn.close() return jsonify({ @@ -8730,6 +8970,7 @@ def api_stats(): 'fixed': fixed, 'pending_fixes': pending, 'verified': verified, + 'validation_failed': validation_failed, 'worker_running': is_worker_running(), 'processing': get_processing_status() }) @@ -9230,6 +9471,10 @@ def build_order_by(sort_cols, default_order): c.execute("SELECT COUNT(*) FROM books WHERE user_locked = 1") counts['locked'] = c.fetchone()[0] + # Issue #110: Count validation failures + c.execute("SELECT COUNT(*) FROM books WHERE validation_status = 'invalid'") + counts['validation_failed'] = c.fetchone()[0] + # Count orphans (detected on-the-fly) orphan_list = [] for lib_path in config.get('library_paths', []): @@ -9468,6 +9713,27 @@ def build_order_by(sort_cols, default_order): 'user_locked': True }) + # Issue #110: Validation failed filter + elif status_filter == 'validation_failed': + order = build_order_by(BOOK_SORT_COLS, 'current_author, current_title') + c.execute('''SELECT id, path, current_author, current_title, status, updated_at, + validation_status, validation_reason + FROM books + WHERE validation_status = 'invalid' + ''' + order + ''' + LIMIT ? OFFSET ?''', (per_page, offset)) + for row in c.fetchall(): + items.append({ + 'id': row['id'], + 'type': 'book', + 'book_id': row['id'], + 'author': row['current_author'], + 'title': row['current_title'], + 'path': row['path'], + 'status': 'validation_failed', + 'validation_reason': row['validation_reason'] + }) + # Issue #53: Media type filters elif status_filter == 'audiobook_only': order = build_order_by(BOOK_SORT_COLS, 'current_author, current_title') @@ -9559,7 +9825,7 @@ def build_order_by(sort_cols, default_order): else: # 'all' - show everything from books table order = build_order_by(BOOK_SORT_COLS, 'current_author ASC, current_title ASC') c.execute('''SELECT b.id, b.path, b.current_author, b.current_title, b.status, - b.user_locked, b.confidence, b.media_type, + b.user_locked, b.confidence, b.media_type, b.folder_triage, h.old_author, h.old_title, h.new_author, h.new_title, h.old_path, h.new_path, h.status as history_status, h.fixed_at, h.error_message @@ -9598,7 +9864,8 @@ def build_order_by(sort_cols, default_order): 'status': history_status or book_status, 'confidence': row['confidence'] or 0, 'user_locked': row['user_locked'] == 1, - 'media_type': row['media_type'] or 'audiobook' + 'media_type': row['media_type'] or 'audiobook', + 'folder_triage': row['folder_triage'] or 'clean' } # Overlay history data when present if history_status: @@ -9635,6 +9902,8 @@ def build_order_by(sort_cols, default_order): total = counts['attention'] elif status_filter == 'locked': total = counts['locked'] + elif status_filter == 'validation_failed': + total = counts['validation_failed'] elif status_filter == 'search': total = counts.get('search', 0) # Issue #53: Media type filters @@ -11465,17 +11734,22 @@ def api_manual_match(): # If not from watch folder, find which library it belongs to if lib_path is None: + old_path_resolved = Path(old_path).resolve() for lp in config.get('library_paths', []): - lp_path = Path(lp) + lp_path = Path(lp).resolve() try: - Path(old_path).relative_to(lp_path) + old_path_resolved.relative_to(lp_path) lib_path = lp_path break except ValueError: continue if lib_path is None: - lib_path = Path(old_path).parent.parent + old_p = Path(old_path) + if old_p.is_file(): + lib_path = old_p.parent + else: + lib_path = old_p.parent.parent # Detect language from title for multi-language naming lang_code = detect_title_language(new_title) if new_title else None @@ -11634,17 +11908,22 @@ def api_edit_book(): return jsonify({'success': False, 'error': 'No output folder configured for watch folder items'}) else: # Normal library item - find which library it belongs to + old_path_resolved = Path(old_path).resolve() for lp in config.get('library_paths', []): - lp_path = Path(lp) + lp_path = Path(lp).resolve() try: - Path(old_path).relative_to(lp_path) + old_path_resolved.relative_to(lp_path) lib_path = lp_path break except ValueError: continue if lib_path is None: - lib_path = Path(old_path).parent.parent + old_p = Path(old_path) + if old_p.is_file(): + lib_path = old_p.parent + else: + lib_path = old_p.parent.parent # Detect language from title for multi-language naming lang_code = detect_title_language(new_title) if new_title else None @@ -11883,6 +12162,18 @@ def _setup_user_packages(): init_db() cleanup_garbage_entries() # Remove @eaDir, #recycle, etc. from database (Issue #88) cleanup_duplicate_history_entries() # Remove duplicate history entries (Issue #79) + + # Issue #188: Load drop-in Python plugins + try: + from library_manager.pipeline.registry import default_registry + _startup_config = load_config() + _loaded_plugins = register_plugins(default_registry, _startup_config, get_db) + if _loaded_plugins: + logger.info(f"[PLUGIN] {len(_loaded_plugins)} plugin(s) loaded and registered") + except Exception as e: + logger.error(f"[PLUGIN] Failed to load plugins: {e}") + _loaded_plugins = [] + start_worker() port = int(os.environ.get('PORT', 5757)) app.run(host='0.0.0.0', port=port, debug=False) diff --git a/examples/plugins/README.md b/examples/plugins/README.md new file mode 100644 index 0000000..ac6b573 --- /dev/null +++ b/examples/plugins/README.md @@ -0,0 +1,121 @@ +# Example Plugins + +Drop-in Python plugins for Library Manager. Copy a plugin folder to `/data/plugins/` (Docker) or `plugins/` (bare metal) and restart. + +## example-logger + +A minimal template plugin that logs each book it processes. Use as a starting point for your own plugins. + +**Install:** +```bash +cp -r example-logger /data/plugins/ +# Restart Library Manager +``` + +## Creating Your Own Plugin + +Each plugin needs two files in its own folder: + +### manifest.json + +```json +{ + "id": "my-plugin", + "name": "My Plugin", + "version": "1.0.0", + "description": "What it does", + "type": "layer", + "entry_point": "layer.py", + "class_name": "MyPlugin", + "default_order": 35, + "requires_config": [], + "requires_secrets": ["my_api_key"], + "permissions": { + "network": ["api.example.com"], + "database": "read" + } +} +``` + +| Field | Required | Description | +|-------|----------|-------------| +| `id` | Yes | Unique identifier (alphanumeric, hyphens, underscores) | +| `name` | Yes | Display name | +| `version` | No | Semver version string | +| `description` | No | What the plugin does | +| `type` | Yes | Must be `layer` | +| `entry_point` | Yes | Python file containing the plugin class | +| `class_name` | No | Class name to load (auto-detected if omitted) | +| `default_order` | No | Pipeline position 1-999 (default: 50). Lower = runs earlier | +| `requires_config` | No | Config keys your plugin reads | +| `requires_secrets` | No | Secret keys your plugin needs (stored in secrets.json) | +| `permissions.network` | No | Domains your plugin connects to | +| `permissions.database` | No | `read` or `write` | + +### layer.py + +```python +from library_manager.plugin_loader import BasePlugin + +class MyPlugin(BasePlugin): + name = "My Plugin" + description = "What it does" + version = "1.0.0" + + def setup(self, config, secrets): + """Called once on startup. Store config/secrets you need.""" + self.api_key = secrets.get('my_api_key') + + def can_process(self, book_data): + """Return True to process this book, False to skip.""" + return True + + def process(self, book_data): + """Main logic. Return dict of matched fields, or empty dict for no match. + + book_data contains: + - current_title: Current title (from path or prior identification) + - current_author: Current author + - current_narrator: Current narrator (if known) + - path: Full filesystem path to the book + - book_id: Database ID + + Return any of: title, author, narrator, series, series_num, year, language + """ + return {} + + def teardown(self): + """Called on shutdown. Clean up resources.""" + pass +``` + +## Plugin Behavior + +- Plugins run with a **30 second timeout** per `process()` call +- Default confidence weight: **60** (configurable via `plugin_configs` in config.json) +- Auto-disabled after **5 consecutive failures** (re-enable from Plugin Health dashboard) +- Plugins never crash the app - exceptions are caught and logged +- Results feed into the book profile system alongside built-in sources + +## Configuration + +Per-plugin settings in `config.json`: + +```json +{ + "plugin_configs": { + "my-plugin": { + "timeout": 30, + "custom_setting": "value" + } + } +} +``` + +Secrets in `secrets.json`: + +```json +{ + "my_api_key": "your-key-here" +} +``` diff --git a/examples/plugins/example-logger/layer.py b/examples/plugins/example-logger/layer.py new file mode 100644 index 0000000..cea74a6 --- /dev/null +++ b/examples/plugins/example-logger/layer.py @@ -0,0 +1,78 @@ +"""Example plugin for Library Manager. + +This is a minimal plugin that demonstrates the BasePlugin interface. +It logs each book it sees and returns empty results (no modifications). + +To use this as a template: +1. Copy this directory to /data/plugins/your-plugin-name/ +2. Edit manifest.json with your plugin's metadata +3. Implement process() with your logic +4. Restart Library Manager + +The plugin loader will discover and load your plugin automatically. +""" + +import logging + +# Import BasePlugin from the plugin loader +from library_manager.plugin_loader import BasePlugin + +logger = logging.getLogger(__name__) + + +class ExampleLoggerPlugin(BasePlugin): + """A simple plugin that logs book information. + + This demonstrates: + - setup() for one-time initialization + - can_process() for filtering books + - process() for the main logic + - teardown() for cleanup + """ + + name = "Example Logger" + description = "Logs book data for debugging" + version = "1.0.0" + + def setup(self, config, secrets): + """Store config for later use.""" + self.log_level = config.get('log_level', 'info') + self.books_seen = 0 + logger.info("[ExamplePlugin] Setup complete") + + def can_process(self, book_data): + """Process all books.""" + return True + + def process(self, book_data): + """Log the book data and return empty (no changes). + + In a real plugin, you would: + 1. Extract info from book_data (title, author, path, etc.) + 2. Query your data source (API, database, file, etc.) + 3. Return a dict with matched fields + + Example return for a match: + return { + 'title': 'The Corrected Title', + 'author': 'Correct Author Name', + 'narrator': 'Narrator Name', + } + """ + self.books_seen += 1 + title = book_data.get('current_title', 'Unknown') + author = book_data.get('current_author', 'Unknown') + + logger.info( + f"[ExamplePlugin] Book #{self.books_seen}: " + f"'{title}' by {author}" + ) + + # Return empty dict = no changes (this is just a logger) + return {} + + def teardown(self): + """Log summary on shutdown.""" + logger.info( + f"[ExamplePlugin] Shutting down. Saw {self.books_seen} books total." + ) diff --git a/examples/plugins/example-logger/manifest.json b/examples/plugins/example-logger/manifest.json new file mode 100644 index 0000000..4c6c417 --- /dev/null +++ b/examples/plugins/example-logger/manifest.json @@ -0,0 +1,16 @@ +{ + "id": "example-logger", + "name": "Example Logger Plugin", + "version": "1.0.0", + "description": "A minimal example plugin that logs book data and returns empty results. Use as a template for building your own plugins.", + "type": "layer", + "entry_point": "layer.py", + "class_name": "ExampleLoggerPlugin", + "default_order": 35, + "requires_config": [], + "requires_secrets": [], + "permissions": { + "network": [], + "database": "read" + } +} diff --git a/library_manager/config.py b/library_manager/config.py index a3b89cc..4abb3a8 100644 --- a/library_manager/config.py +++ b/library_manager/config.py @@ -116,6 +116,9 @@ def _detect_data_dir(): # Text providers: "gemini", "openrouter", "ollama" "audio_provider_chain": ["bookdb", "gemini"], # Order to try audio identification (bookdb = Skaldleita) "text_provider_chain": ["gemini", "openrouter"], # Order to try text-based AI + # Pipeline layer ordering - controls the sequence layers execute in + "pipeline_order": ["audio_id", "audio_credits", "sl_requeue", "api_lookup", "ai_verify"], + "use_modular_pipeline": False, # Feature flag: use PipelineOrchestrator instead of hardcoded process_all_queue "deep_scan_mode": False, # Always use all enabled layers regardless of confidence "profile_confidence_threshold": 85, # Minimum confidence to skip remaining layers (0-100) "multibook_ai_fallback": True, # Use AI for ambiguous chapter/multibook detection @@ -136,7 +139,16 @@ def _detect_data_dir(): "watch_delete_empty_folders": True, # Remove empty source folders after moving "watch_min_file_age_seconds": 30, # Minimum file age before processing (wait for downloads to complete) # Post-processing hooks - run commands/webhooks after a book is renamed (Issue #166) - "post_processing_hooks": [] + "post_processing_hooks": [], + # Custom HTTP API layers - user-defined external API processing layers (Issue #185) + "custom_layers": [], + # Drop-in Python plugins (Issue #188) + "plugin_dir": "/data/plugins", # Directory to scan for drop-in plugins (Docker: /data/plugins) + "plugin_configs": {}, # Per-plugin config overrides: {"plugin-id": {"key": "value"}} + # Issue #110: File validation - check audio files before processing + "enable_file_validation": True, # Validate audio files with ffprobe before queueing + "min_audio_duration_seconds": 600, # Minimum duration (seconds) to consider a valid audiobook (default: 10 min) + "min_audio_file_size_mb": 1, # Minimum file size (MB) to consider a valid audiobook } DEFAULT_SECRETS = { diff --git a/library_manager/database.py b/library_manager/database.py index f09d68d..b25e040 100644 --- a/library_manager/database.py +++ b/library_manager/database.py @@ -153,6 +153,17 @@ def init_db(db_path=None): except: pass + # Issue #110: File validation columns - track whether audio files are valid + validation_columns = [ + ('validation_status', "TEXT"), # NULL=not validated, 'valid', 'invalid', 'skipped' + ('validation_reason', "TEXT"), # Why it failed (e.g., 'too_short', 'corrupt', 'no_audio_stream') + ] + for col_name, col_type in validation_columns: + try: + c.execute(f'ALTER TABLE books ADD COLUMN {col_name} {col_type}') + except: + pass # Column already exists + # Stats table - daily stats c.execute('''CREATE TABLE IF NOT EXISTS stats ( id INTEGER PRIMARY KEY, @@ -164,6 +175,17 @@ def init_db(db_path=None): api_calls INTEGER DEFAULT 0 )''') + # Issue #208: Persistent watch-folder dedup + # Was an in-memory set(), wiped on restart, which caused the watch worker + # to re-submit the same failing file every cycle (ate ~48% of Skaldleita + # traffic from a single LM instance before server-side cache absorbed it). + c.execute('''CREATE TABLE IF NOT EXISTS watch_folder_processed ( + path TEXT PRIMARY KEY, + processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + outcome TEXT, + error_message TEXT + )''') + conn.commit() conn.close() @@ -171,6 +193,52 @@ def init_db(db_path=None): from library_manager.hooks import init_hook_tables init_hook_tables(path) + # Initialize plugin metrics table (Issue #189) + from library_manager.plugins import init_plugin_metrics_table + init_plugin_metrics_table(path) + + +def watch_folder_is_processed(path, db_path=None): + """Return True if the watch-folder path has already been handled. + + Issue #208: replaces the in-memory set. Survives restarts so the worker + doesn't re-submit the same failing file every scan cycle. + """ + p = db_path or _db_path + if not p: + return False + conn = sqlite3.connect(p, timeout=30) + try: + c = conn.execute( + 'SELECT 1 FROM watch_folder_processed WHERE path = ? LIMIT 1', + (path,) + ) + return c.fetchone() is not None + finally: + conn.close() + + +def watch_folder_mark_processed(path, outcome, error_message=None, db_path=None): + """Record that a watch-folder path has been handled. + + outcome: 'moved' | 'move_failed' | 'unknown_author' | 'aborted_by_server' + Issue #208. + """ + p = db_path or _db_path + if not p: + return + conn = sqlite3.connect(p, timeout=30) + try: + conn.execute( + '''INSERT OR REPLACE INTO watch_folder_processed + (path, processed_at, outcome, error_message) + VALUES (?, CURRENT_TIMESTAMP, ?, ?)''', + (path, outcome, error_message) + ) + conn.commit() + finally: + conn.close() + def cleanup_garbage_entries(db_path=None): """Remove garbage entries from database on startup. @@ -379,7 +447,7 @@ def should_requeue_book(book_row, max_retries=3): max_layer = max_layer or 0 # Never requeue these statuses - skip_statuses = {'user_locked', 'needs_attention', 'needs_split', 'series_folder', 'multi_book_files'} + skip_statuses = {'user_locked', 'needs_attention', 'needs_split', 'series_folder', 'multi_book_files', 'validation_failed'} if status in skip_statuses: return (False, None) diff --git a/library_manager/file_validation.py b/library_manager/file_validation.py index f55c91a..3157721 100644 --- a/library_manager/file_validation.py +++ b/library_manager/file_validation.py @@ -32,16 +32,27 @@ def check_ffmpeg_available() -> Tuple[bool, str]: return True, "ok" -def validate_audio_file(path: str) -> Tuple[bool, str, Dict[str, Any]]: +def validate_audio_file(path: str, min_duration: Optional[float] = None, + min_size: Optional[int] = None) -> Tuple[bool, str, Dict[str, Any]]: """ Validate an audio file using ffprobe. + Args: + path: Path to the audio file + min_duration: Minimum duration in seconds (default: MIN_DURATION_SECONDS) + min_size: Minimum file size in bytes (default: MIN_FILE_SIZE_BYTES) + Returns: (is_valid, reason, metadata) - is_valid: True if file is a valid audiobook - reason: "valid" or error description - metadata: Dict with duration, size, format info (empty if invalid) """ + if min_duration is None: + min_duration = MIN_DURATION_SECONDS + if min_size is None: + min_size = MIN_FILE_SIZE_BYTES + file_path = Path(path) # Basic checks with TOCTOU protection @@ -57,7 +68,7 @@ def validate_audio_file(path: str) -> Tuple[bool, str, Dict[str, Any]]: logger.warning(f"File disappeared during validation {path}: {e}") return False, "file_disappeared", {} - if file_size < MIN_FILE_SIZE_BYTES: + if file_size < min_size: return False, "too_small", {"size": file_size} # Run ffprobe @@ -137,7 +148,7 @@ def validate_audio_file(path: str) -> Tuple[bool, str, Dict[str, Any]]: if duration == 0: return False, "no_duration_truncated", metadata - if duration < MIN_DURATION_SECONDS: + if duration < min_duration: return False, "too_short", metadata # Try to seek to end (catches truncated files) diff --git a/library_manager/hints.py b/library_manager/hints.py index 74ed456..3d26d6d 100644 --- a/library_manager/hints.py +++ b/library_manager/hints.py @@ -108,6 +108,15 @@ # === Post-Processing Hooks === 'post_processing': 'Run external scripts or webhooks after a book is successfully renamed. Use for M4B conversion, Audiobookshelf library scans, Discord notifications, backup scripts, etc. Hook failures never undo a successful rename.', + + # === Folder Triage === + 'folder_triage': 'Classifies folder names as clean, messy (scene tags, torrent markers), or garbage (hashes, generic names). Messy and garbage folders skip path-based hints and rely on audio/metadata identification only.', + + # === Plugins === + 'custom_api_sources': 'Add your own book metadata APIs as processing layers. Each source queries an HTTP endpoint and maps the response into the book profile system.', + 'python_plugins': 'Drop-in Python plugins for advanced users. Place a plugin folder in /data/plugins/ with a manifest.json and a Python file extending BasePlugin. Plugins are auto-discovered on startup.', + 'plugin_health': 'Monitor the health and performance of your custom API sources and Python plugins. Plugins are auto-disabled after 5 consecutive failures to protect your processing pipeline.', + 'plugin_secrets': 'API keys and passwords are stored in secrets.json (not config.json) so they are never exposed in backups or logs. Add your key as a named entry, then reference the key name here.', } diff --git a/library_manager/hooks.py b/library_manager/hooks.py index e041095..bcfd2ef 100644 --- a/library_manager/hooks.py +++ b/library_manager/hooks.py @@ -1,8 +1,11 @@ -"""Post-processing hooks for Library Manager (Issue #166). +"""Post-processing hooks for Library Manager (Issue #166, #187). Runs external commands or webhooks after a book is successfully renamed. Use cases: m4binder conversion, ABS library scan, Discord notifications, etc. +Issue #187: Extended event system with run_on filtering, body_template support, +standardized event envelope, and emit_event() helper. + This is a self-contained Flask Blueprint - routes, logic, DB schema all in one file. """ import json @@ -13,7 +16,7 @@ import subprocess import threading import time -from datetime import datetime +from datetime import datetime, timezone import requests as http_requests from flask import Blueprint, request, jsonify @@ -41,6 +44,23 @@ 'old_author', 'old_title', ] +# Supported hook events with descriptions (Issue #187) +HOOK_EVENTS = { + 'scan_started': 'Library scan has started', + 'scan_completed': 'Library scan finished', + 'book_discovered': 'A new book was found during scanning', + 'rename_proposed': 'A rename fix has been proposed for review', + 'rename_applied': 'A rename was successfully applied (formerly "fixed")', + 'rename_rejected': 'A proposed rename was rejected by the user', + 'processing_failed': 'Book processing encountered an error', + 'queue_empty': 'The processing queue is empty', +} + +# Backward compat: map old event name to new +_EVENT_ALIASES = { + 'fixed': 'rename_applied', +} + # ============== DATABASE ============== @@ -115,7 +135,14 @@ def build_hook_context(book_id, history_id, old_path, new_path, new_author='', new_title='', new_narrator='', new_series='', new_series_num='', new_year='', media_type='audiobook', event='fixed'): - """Build the template variable dict from fix data. All values stringified.""" + """Build the template variable dict from fix data. All values stringified. + + Note: the ``event`` param still accepts 'fixed' for backward compat; it is + normalised to 'rename_applied' internally. + """ + # Normalize legacy event name + event = _EVENT_ALIASES.get(event, event) + ctx = { 'book_id': str(book_id), 'history_id': str(history_id), @@ -131,7 +158,7 @@ def build_hook_context(book_id, history_id, old_path, new_path, 'new_year': str(new_year or ''), 'media_type': str(media_type or 'audiobook'), 'event': str(event), - 'timestamp': datetime.now().isoformat(), + 'timestamp': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'), } # Convenience aliases ctx['author'] = ctx['new_author'] @@ -143,6 +170,43 @@ def build_hook_context(book_id, history_id, old_path, new_path, return ctx +def build_event_context(event_name, payload=None): + """Build a generic event context dict for non-rename events. + + For events like scan_started or queue_empty that don't have book-specific data. + All values are stringified for template substitution. + """ + event_name = _EVENT_ALIASES.get(event_name, event_name) + ctx = { + 'event': str(event_name), + 'timestamp': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'), + } + # Merge any event-specific payload fields + if payload: + for key, value in payload.items(): + ctx[key] = str(value) if value is not None else '' + return ctx + + +def _build_webhook_envelope(event_name, payload_dict, app_version=None): + """Wrap a payload dict in the standardized event envelope (Issue #187). + + Envelope format: + { + "event": "rename_applied", + "timestamp": "2026-03-21T14:32:00Z", + "app_version": "0.9.0-beta.133", + "payload": { ... } + } + """ + return { + 'event': event_name, + 'timestamp': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'), + 'app_version': app_version or '', + 'payload': payload_dict, + } + + def substitute_template(template, context, shell_escape=False): """Replace {{variable}} placeholders in a template string. @@ -226,9 +290,29 @@ def execute_command_hook(hook, context): # ============== WEBHOOK EXECUTION ============== -def execute_webhook_hook(hook, context, secrets=None): +def _resolve_body_template(body_template, context): + """Recursively substitute {{variable}} placeholders in a body_template structure. + + body_template can be a dict, list, or string. All string values get substitution. + Returns a new structure with substituted values. + """ + if isinstance(body_template, str): + return substitute_template(body_template, context) + elif isinstance(body_template, dict): + return {k: _resolve_body_template(v, context) for k, v in body_template.items()} + elif isinstance(body_template, list): + return [_resolve_body_template(item, context) for item in body_template] + else: + return body_template + + +def execute_webhook_hook(hook, context, secrets=None, app_version=None): """Send an HTTP webhook with context as JSON payload. + If ``body_template`` is set on the hook, it is used as the payload with + template variables substituted. Otherwise, the context is wrapped in the + standardised event envelope (Issue #187). + Returns dict with: success, exit_code (HTTP status), stdout (response body), error, duration_ms """ url = hook.get('url', '') @@ -247,13 +331,20 @@ def execute_webhook_hook(hook, context, secrets=None): # Substitute template variables in URL (no shell escaping needed for URLs) resolved_url = substitute_template(url, context) - # Build payload - payload = {k: v for k, v in context.items()} + # Build payload - body_template takes priority (Issue #187) + body_template = hook.get('body_template') + if body_template: + payload = _resolve_body_template(body_template, context) + else: + # Wrap in standardized envelope + event_name = context.get('event', 'rename_applied') + payload = _build_webhook_envelope(event_name, context, app_version) start = time.monotonic() try: if method == 'GET': - resp = http_requests.get(resolved_url, params=payload, headers=headers, timeout=timeout) + resp = http_requests.get(resolved_url, params=payload if isinstance(payload, dict) else {}, + headers=headers, timeout=timeout) else: headers.setdefault('Content-Type', 'application/json') resp = http_requests.post(resolved_url, json=payload, headers=headers, timeout=timeout) @@ -283,7 +374,19 @@ def execute_webhook_hook(hook, context, secrets=None): # ============== ORCHESTRATOR ============== -def run_hooks(context, config, get_db, secrets=None): +def _normalize_run_on(run_on_list): + """Normalize a hook's run_on list, mapping legacy event names. + + Handles backward compat: 'fixed' -> 'rename_applied'. + Returns a set for fast membership testing. + """ + normalized = set() + for event in run_on_list: + normalized.add(_EVENT_ALIASES.get(event, event)) + return normalized + + +def run_hooks(context, config, get_db, secrets=None, app_version=None): """Main orchestrator - called from apply_fix() after a successful rename. Iterates enabled hooks, routes to correct executor, handles sync/async. @@ -293,7 +396,9 @@ def run_hooks(context, config, get_db, secrets=None): if not hooks: return - event = context.get('event', 'fixed') + event = context.get('event', 'rename_applied') + # Normalize legacy event name in context + event = _EVENT_ALIASES.get(event, event) history_id = context.get('history_id') book_id = context.get('book_id') @@ -304,8 +409,9 @@ def run_hooks(context, config, get_db, secrets=None): if not hook.get('enabled', True): continue - # Check if this hook should run for this event type - run_on = hook.get('run_on', ['fixed']) + # Check if this hook should run for this event type (Issue #187) + # Default to ['rename_applied'] for backward compat (covers old 'fixed' hooks) + run_on = _normalize_run_on(hook.get('run_on', ['rename_applied'])) if event not in run_on: continue @@ -313,18 +419,18 @@ def run_hooks(context, config, get_db, secrets=None): hook_type = hook.get('type', 'command') mode = hook.get('mode', 'sync') - logger.info(f"[HOOKS] Running {hook_type} hook: {hook_name} (mode={mode})") + logger.info(f"[HOOKS] Running {hook_type} hook: {hook_name} (mode={mode}, event={event})") if mode == 'async': # Fire and forget in a background thread t = threading.Thread( target=_run_single_hook, - args=(hook, hook_name, hook_type, context, secrets, get_db, history_id, book_id), + args=(hook, hook_name, hook_type, context, secrets, get_db, history_id, book_id, app_version), daemon=True, ) t.start() else: - result = _run_single_hook(hook, hook_name, hook_type, context, secrets, get_db, history_id, book_id) + result = _run_single_hook(hook, hook_name, hook_type, context, secrets, get_db, history_id, book_id, app_version) if result and not result.get('success'): any_error = True if not first_error: @@ -348,11 +454,11 @@ def run_hooks(context, config, get_db, secrets=None): logger.error(f"[HOOKS] Failed to update history hook status: {e}") -def _run_single_hook(hook, hook_name, hook_type, context, secrets, get_db, history_id, book_id): +def _run_single_hook(hook, hook_name, hook_type, context, secrets, get_db, history_id, book_id, app_version=None): """Execute a single hook and log the result.""" try: if hook_type == 'webhook': - result = execute_webhook_hook(hook, context, secrets) + result = execute_webhook_hook(hook, context, secrets, app_version=app_version) else: result = execute_command_hook(hook, context) @@ -371,6 +477,37 @@ def _run_single_hook(hook, hook_name, hook_type, context, secrets, get_db, histo return error_result +# ============== EVENT EMISSION (Issue #187) ============== + +def emit_event(event_name, context_dict, config, get_db, secrets=None, app_version=None): + """Emit a hook event, filtering hooks by their run_on list. + + This is the primary entry point for triggering hooks from anywhere in the app. + It normalizes the event name, ensures the context has the event field set, + and delegates to run_hooks() which handles filtering, sync/async, and logging. + + Args: + event_name: One of the HOOK_EVENTS keys (e.g. 'rename_applied', 'scan_started') + context_dict: Dict of template variables for this event. + For rename events, use build_hook_context(). + For other events, use build_event_context() or pass a plain dict. + config: The app config dict (must contain 'post_processing_hooks') + get_db: Database connection factory + secrets: Optional secrets dict for webhook auth + app_version: Optional app version string for webhook envelope + """ + # Normalize legacy event names + event_name = _EVENT_ALIASES.get(event_name, event_name) + + # Ensure context has the event and timestamp fields + context_dict['event'] = event_name + if 'timestamp' not in context_dict: + context_dict['timestamp'] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') + + logger.debug(f"[HOOKS] Emitting event: {event_name}") + run_hooks(context_dict, config, get_db, secrets=secrets, app_version=app_version) + + # ============== TEST HOOK ============== def test_hook(hook, secrets=None): @@ -393,7 +530,7 @@ def test_hook(hook, secrets=None): new_series_num='', new_year='1977', media_type='audiobook', - event='fixed', + event='rename_applied', ) hook_type = hook.get('type', 'command') @@ -463,6 +600,12 @@ def api_hooks_log_clear(): return jsonify({'success': False, 'error': str(e)}), 500 +@hooks_bp.route('/api/hooks/events') +def api_hooks_events(): + """Return available hook events with descriptions (Issue #187).""" + return jsonify({'events': HOOK_EVENTS}) + + @hooks_bp.route('/api/hooks/save', methods=['POST']) def api_hooks_save(): """Save hooks array to config.json.""" diff --git a/library_manager/pipeline/__init__.py b/library_manager/pipeline/__init__.py index dd4cc6f..9252701 100644 --- a/library_manager/pipeline/__init__.py +++ b/library_manager/pipeline/__init__.py @@ -5,6 +5,13 @@ """ from library_manager.pipeline.base_layer import ProcessingLayer, LayerResult, LayerAction +from library_manager.pipeline.layer_info import LayerInfo +from library_manager.pipeline.registry import LayerRegistry, default_registry, build_default_registry +from library_manager.pipeline.orchestrator import PipelineOrchestrator +from library_manager.pipeline.adapters import ( + LayerAdapter, AudioIdAdapter, ApiLookupAdapter, AudioCreditsAdapter, + AiVerifyAdapter, SlRequeueAdapter, build_default_adapters, +) from library_manager.pipeline.layer_content import process_layer_4_content from library_manager.pipeline.layer_audio_credits import process_layer_3_audio from library_manager.pipeline.layer_api import process_layer_1_api, process_sl_requeue_verification @@ -15,6 +22,18 @@ 'ProcessingLayer', 'LayerResult', 'LayerAction', + 'LayerInfo', + 'LayerRegistry', + 'default_registry', + 'build_default_registry', + 'PipelineOrchestrator', + 'LayerAdapter', + 'AudioIdAdapter', + 'ApiLookupAdapter', + 'AudioCreditsAdapter', + 'AiVerifyAdapter', + 'SlRequeueAdapter', + 'build_default_adapters', 'process_layer_4_content', 'process_layer_3_audio', 'process_layer_1_api', diff --git a/library_manager/pipeline/adapters.py b/library_manager/pipeline/adapters.py new file mode 100644 index 0000000..de2687c --- /dev/null +++ b/library_manager/pipeline/adapters.py @@ -0,0 +1,202 @@ +"""Layer adapters -- uniform interface wrappers around existing layer functions. + +Each adapter wraps one battle-tested layer function with a consistent interface +so the PipelineOrchestrator can run them generically. The adapters do NOT +reimplement any processing logic -- they translate between the orchestrator's +uniform call convention and each layer's specific function signature. + +Adapter Pattern: + Orchestrator -> adapter.run(config, deps) -> existing layer function -> (processed, resolved) +""" + +import logging +from typing import Callable, Dict, Optional, Tuple + +logger = logging.getLogger(__name__) + + +class LayerAdapter: + """Base adapter interface for pipeline layers. + + Every layer adapter must define a ``layer_id`` matching the registry + and implement ``run()`` which delegates to the real layer function. + + Attributes: + layer_id: Must match a registered LayerInfo.layer_id in the registry. + """ + + layer_id: str = "" + + def run(self, config: Dict, deps: Dict) -> Tuple[int, int]: + """Run one batch cycle of this layer. + + Args: + config: Current application configuration dict. + deps: Dictionary of injected dependencies (get_db, load_config, + is_circuit_open, etc.). Each adapter picks what it needs. + + Returns: + Tuple of (processed_count, resolved_count). + """ + raise NotImplementedError(f"{self.__class__.__name__}.run() not implemented") + + def __repr__(self): + return f"<{self.__class__.__name__} layer_id={self.layer_id!r}>" + + +class AudioIdAdapter(LayerAdapter): + """Wraps process_layer_1_audio -- audio transcription + AI parsing. + + This is the primary identification method. It transcribes audiobook + intros via Skaldleita/Whisper and identifies from narrator announcements. + + Dependencies required in deps dict: + - process_layer_1_audio: The app.py wrapper that already has + all app-level deps injected (get_db, identify_audio_with_bookdb, etc.) + """ + + layer_id = "audio_id" + + def run(self, config: Dict, deps: Dict) -> Tuple[int, int]: + """Run one batch of audio identification. + + Delegates to the app-level process_layer_1_audio wrapper which + already injects get_db, identify_audio_with_bookdb, transcribe_audio_intro, + parse_transcript_with_ai, is_circuit_open, get_circuit_breaker, + load_config, build_new_path, update_processing_status, and set_current_book. + """ + process_fn = deps.get('process_layer_1_audio') + if not process_fn: + logger.warning(f"[{self.layer_id}] No process function provided, skipping") + return 0, 0 + return process_fn(config) + + +class ApiLookupAdapter(LayerAdapter): + """Wraps process_layer_1_api -- API database lookups. + + Queries Skaldleita, Audnexus, OpenLibrary, and Google Books to enrich + book metadata. Faster and cheaper than AI verification. + + Dependencies required in deps dict: + - process_layer_1_api: The app.py wrapper with get_db, + gather_all_api_candidates, and set_current_book injected. + """ + + layer_id = "api_lookup" + + def run(self, config: Dict, deps: Dict) -> Tuple[int, int]: + """Run one batch of API lookups.""" + process_fn = deps.get('process_layer_1_api') + if not process_fn: + logger.warning(f"[{self.layer_id}] No process function provided, skipping") + return 0, 0 + return process_fn(config) + + +class AudioCreditsAdapter(LayerAdapter): + """Wraps process_layer_3_audio -- AI audio clip analysis. + + Sends longer audio samples to Gemini AI for deeper analysis when + transcription was unclear. This is an expensive layer. + + Dependencies required in deps dict: + - process_layer_3_audio: The app.py wrapper with get_db, + find_audio_files, analyze_audio_for_credits, auto_save_narrator, + contribute_audio_extraction, and standardize_initials injected. + """ + + layer_id = "audio_credits" + + def run(self, config: Dict, deps: Dict) -> Tuple[int, int]: + """Run one batch of audio credits analysis. + + Passes verification_layer=2 to process Layer 2 items (unclear L1 results). + The underlying function checks the enable_audio_analysis config key. + """ + process_fn = deps.get('process_layer_3_audio') + if not process_fn: + logger.warning(f"[{self.layer_id}] No process function provided, skipping") + return 0, 0 + # audio_credits processes items at verification_layer=2 + return process_fn(config, verification_layer=2) + + +class AiVerifyAdapter(LayerAdapter): + """Wraps process_queue -- AI verification of folder-based guesses. + + Uses AI to verify identification as a last resort. Folder names CAN be + wrong, so confidence is set LOW for folder-derived identifications. + + Dependencies required in deps dict: + - process_queue: The app.py wrapper with get_db, check_rate_limit, + call_ai, detect_multibook_vs_chapters, auto_save_narrator, + standardize_initials, extract_series_from_title, is_placeholder_author, + build_new_path, is_drastic_author_change, verify_drastic_change, + analyze_audio_for_credits, compare_book_folders, sanitize_path_component, + extract_narrator_from_folder, build_metadata_for_embedding, + embed_tags_for_path, BookProfile, audio_extensions, and set_current_book + injected. + """ + + layer_id = "ai_verify" + + def run(self, config: Dict, deps: Dict) -> Tuple[int, int]: + """Run one batch of AI verification. + + Passes verification_layer=4 because at this point in the pipeline, + we're trusting folder names as a last resort. + """ + process_fn = deps.get('process_queue') + if not process_fn: + logger.warning(f"[{self.layer_id}] No process function provided, skipping") + return 0, 0 + return process_fn(config, verification_layer=4) + + +class SlRequeueAdapter(LayerAdapter): + """Wraps process_sl_requeue_verification -- re-verify after nightly merge. + + Books with sl_requeue set had partial ID from Skaldleita. After the + nightly database merge, we re-check to see if they're now fully identified. + + Dependencies required in deps dict: + - process_sl_requeue_verification: The app.py wrapper with get_db + and search_bookdb injected. + """ + + layer_id = "sl_requeue" + + def run(self, config: Dict, deps: Dict) -> Tuple[int, int]: + """Run SL requeue verification (single pass, not a batch loop).""" + process_fn = deps.get('process_sl_requeue_verification') + if not process_fn: + logger.debug(f"[{self.layer_id}] No process function provided, skipping") + return 0, 0 + return process_fn(config) + + +def build_default_adapters(): + """Build the default set of layer adapters. + + Returns: + List of LayerAdapter instances for all known layers. + """ + return [ + AudioIdAdapter(), + AudioCreditsAdapter(), + SlRequeueAdapter(), + ApiLookupAdapter(), + AiVerifyAdapter(), + ] + + +__all__ = [ + 'LayerAdapter', + 'AudioIdAdapter', + 'ApiLookupAdapter', + 'AudioCreditsAdapter', + 'AiVerifyAdapter', + 'SlRequeueAdapter', + 'build_default_adapters', +] diff --git a/library_manager/pipeline/custom_layer.py b/library_manager/pipeline/custom_layer.py new file mode 100644 index 0000000..2653739 --- /dev/null +++ b/library_manager/pipeline/custom_layer.py @@ -0,0 +1,705 @@ +"""Custom HTTP API layers for the processing pipeline. + +Allows users to define custom API endpoints that act as processing layers. +Each custom layer makes HTTP requests to external services and maps the +response fields back into the book profile system. + +Config schema (in config.json under "custom_layers"): + { + "layer_id": "my_bookdb", + "layer_name": "My Book Database", + "enabled": true, + "order": 35, + "url_template": "https://api.example.com/search?title={{title}}&author={{author}}", + "method": "GET", + "timeout": 10, + "auth": {"type": "bearer", "token_secret_key": "my_bookdb_key"}, + "request_fields": ["title", "author", "narrator", "path"], + "response_mapping": { + "title": "$.results[0].title", + "author": "$.results[0].author_name" + }, + "source_weight": 55, + "on_error": "skip", + "circuit_breaker": {"max_failures": 3, "cooldown": 300} + } +""" + +import json +import logging +import re +import time +from base64 import b64encode +from typing import Any, Callable, Dict, List, Optional, Tuple +from urllib.parse import quote as url_quote + +import requests as http_requests + +from library_manager.config import load_secrets +from library_manager.models.book_profile import BookProfile + +logger = logging.getLogger(__name__) + +# Default limits +DEFAULT_TIMEOUT = 10 +MAX_TIMEOUT = 60 +DEFAULT_BATCH_SIZE = 10 + +# Circuit breaker state: layer_id -> {"failures": int, "cooldown_until": float} +_circuit_breakers = {} + + +# ============== JSONPATH EXTRACTION ============== + +def extract_jsonpath(data: Any, path: str) -> Any: + """Extract a value from nested data using a simple JSONPath expression. + + Supports: + $.key - top-level access + $.nested.key - dot-notation nesting + $.array[0].field - array index access + + Args: + data: Parsed JSON data (dict, list, or primitive) + path: JSONPath expression starting with $ + + Returns: + Extracted value, or None if path doesn't resolve + """ + if not path or not path.startswith('$'): + return None + + # Strip the leading $ + remaining = path[1:] + if not remaining: + return data + + # Strip leading dot if present + if remaining.startswith('.'): + remaining = remaining[1:] + + current = data + + # Tokenize: split on dots but handle array indices like [0] + # e.g. "results[0].title" -> ["results[0]", "title"] + tokens = _tokenize_jsonpath(remaining) + + for token in tokens: + if current is None: + return None + + # Check for array index: fieldname[N] or just [N] + match = re.match(r'^([^\[]*)\[(\d+)\]$', token) + if match: + field_name = match.group(1) + index = int(match.group(2)) + + # Navigate to field first (if present) + if field_name: + if isinstance(current, dict): + current = current.get(field_name) + else: + return None + + # Then index into array + if isinstance(current, (list, tuple)): + if 0 <= index < len(current): + current = current[index] + else: + return None + else: + return None + else: + # Simple field access + if isinstance(current, dict): + current = current.get(token) + else: + return None + + return current + + +def _tokenize_jsonpath(path: str) -> List[str]: + """Split a JSONPath remainder into tokens, respecting array brackets. + + Examples: + "results[0].title" -> ["results[0]", "title"] + "a.b.c" -> ["a", "b", "c"] + "items[2].nested[0].value" -> ["items[2]", "nested[0]", "value"] + """ + tokens = [] + current = '' + + i = 0 + while i < len(path): + ch = path[i] + if ch == '.': + if current: + tokens.append(current) + current = '' + elif ch == '[': + # Consume until closing bracket + bracket_start = i + while i < len(path) and path[i] != ']': + i += 1 + current += path[bracket_start:i + 1] # include ] + else: + current += ch + i += 1 + + if current: + tokens.append(current) + + return tokens + + +# ============== CIRCUIT BREAKER ============== + +def _check_circuit_breaker(layer_id: str) -> bool: + """Check if the circuit breaker allows requests for this layer. + + Returns: + True if requests are allowed, False if circuit is open (cooldown active) + """ + state = _circuit_breakers.get(layer_id) + if not state: + return True + + cooldown_until = state.get('cooldown_until', 0) + if time.monotonic() < cooldown_until: + return False + + # Cooldown expired, reset + if state.get('failures', 0) > 0: + state['failures'] = 0 + return True + + +def _record_failure(layer_id: str, max_failures: int, cooldown: int): + """Record a failure and potentially trip the circuit breaker.""" + if layer_id not in _circuit_breakers: + _circuit_breakers[layer_id] = {'failures': 0, 'cooldown_until': 0} + + state = _circuit_breakers[layer_id] + state['failures'] = state.get('failures', 0) + 1 + + if state['failures'] >= max_failures: + state['cooldown_until'] = time.monotonic() + cooldown + logger.warning( + f"[CUSTOM:{layer_id}] Circuit breaker tripped after {state['failures']} failures, " + f"cooldown {cooldown}s" + ) + + +def _record_success(layer_id: str): + """Record a success, resetting the failure counter.""" + if layer_id in _circuit_breakers: + _circuit_breakers[layer_id]['failures'] = 0 + + +def init_circuit_breaker(layer_id: str): + """Initialize circuit breaker state for a layer.""" + if layer_id not in _circuit_breakers: + _circuit_breakers[layer_id] = {'failures': 0, 'cooldown_until': 0} + + +# ============== REQUEST BUILDING ============== + +def _build_template_context(item: Dict) -> Dict[str, str]: + """Build template variable context from a queue/book item. + + Args: + item: Database row dict with book info + + Returns: + Dict of template variables (all strings, safe for URL substitution) + """ + return { + 'title': str(item.get('current_title') or ''), + 'author': str(item.get('current_author') or ''), + 'narrator': str(item.get('narrator') or ''), + 'path': str(item.get('path') or ''), + 'isbn': str(item.get('isbn') or ''), + } + + +def _substitute_url_template(template: str, context: Dict[str, str]) -> str: + """Replace {{variable}} placeholders in URL template with URL-encoded values. + + Args: + template: URL string with {{variable}} placeholders + context: Dict of variable values + + Returns: + URL with placeholders replaced by URL-encoded values + """ + result = template + for key, value in context.items(): + placeholder = '{{' + key + '}}' + if placeholder in result: + result = result.replace(placeholder, url_quote(str(value), safe='')) + return result + + +def _build_auth_headers(auth_config: Optional[Dict], secrets: Dict) -> Dict[str, str]: + """Build authentication headers based on auth config. + + Supports: + none - no auth headers + bearer - Authorization: Bearer + api_key_header - X-API-Key: (or custom header name) + basic - Authorization: Basic + + Args: + auth_config: Auth configuration dict with type and token_secret_key + secrets: Loaded secrets dict + + Returns: + Dict of headers to add to the request + """ + if not auth_config: + return {} + + auth_type = auth_config.get('type', 'none') + if auth_type == 'none': + return {} + + token_key = auth_config.get('token_secret_key', '') + token = secrets.get(token_key, '') if token_key else '' + + if auth_type == 'bearer': + if not token: + logger.warning(f"Bearer auth configured but secret '{token_key}' is empty") + return {} + return {'Authorization': f'Bearer {token}'} + + elif auth_type == 'api_key_header': + header_name = auth_config.get('header_name', 'X-API-Key') + if not token: + logger.warning(f"API key auth configured but secret '{token_key}' is empty") + return {} + return {header_name: token} + + elif auth_type == 'basic': + username = auth_config.get('username', '') + password = secrets.get(token_key, '') if token_key else '' + credentials = b64encode(f'{username}:{password}'.encode()).decode() + return {'Authorization': f'Basic {credentials}'} + + return {} + + +# ============== CUSTOM API LAYER ============== + +class CustomApiLayer: + """A user-defined HTTP API processing layer. + + Fetches book items from the queue, queries a custom API endpoint, + and maps the response back into book profile fields. + """ + + def __init__(self, layer_config: Dict, get_db: Callable, secrets_loader: Callable = None): + """Initialize the custom API layer. + + Args: + layer_config: Layer configuration dict (see module docstring for schema) + get_db: Callable that returns a database connection + secrets_loader: Callable that returns secrets dict (defaults to load_secrets) + """ + self.layer_config = layer_config + self.get_db = get_db + self._load_secrets = secrets_loader or load_secrets + + self.layer_id = layer_config.get('layer_id', 'unknown') + self.layer_name = layer_config.get('layer_name', f'Custom: {self.layer_id}') + self.order = layer_config.get('order', 50) + self.enabled = layer_config.get('enabled', True) + + self.url_template = layer_config.get('url_template', '') + self.method = layer_config.get('method', 'GET').upper() + self.timeout = min(layer_config.get('timeout', DEFAULT_TIMEOUT), MAX_TIMEOUT) + + self.auth_config = layer_config.get('auth') + self.request_fields = layer_config.get('request_fields', ['title', 'author']) + self.response_mapping = layer_config.get('response_mapping', {}) + self.source_weight = layer_config.get('source_weight', 55) + self.on_error = layer_config.get('on_error', 'skip') + + cb_config = layer_config.get('circuit_breaker', {}) + self.cb_max_failures = cb_config.get('max_failures', 3) + self.cb_cooldown = cb_config.get('cooldown', 300) + + self.log_prefix = f"[CUSTOM:{self.layer_id}]" + + def run(self, config: Dict, deps: Optional[Dict] = None) -> Tuple[int, int]: + """Run one processing cycle for this custom layer. + + Matches the LayerAdapter interface: accepts config and deps, + returns (processed_count, resolved_count). + + Records metrics after each batch for the plugin health dashboard. + + Args: + config: App configuration dict + deps: Optional dependencies dict (unused, for interface compatibility) + + Returns: + Tuple of (processed_count, resolved_count) + """ + if not self.enabled: + logger.debug(f"{self.log_prefix} Layer disabled, skipping") + return 0, 0 + + if not self.url_template: + logger.warning(f"{self.log_prefix} No url_template configured, skipping") + return 0, 0 + + # Check circuit breaker + if not _check_circuit_breaker(self.layer_id): + logger.debug(f"{self.log_prefix} Circuit breaker open, skipping") + return 0, 0 + + # Fetch batch from queue + batch = self._fetch_batch(config) + if not batch: + return 0, 0 + + logger.info(f"{self.log_prefix} Processing {len(batch)} items") + + secrets = self._load_secrets() + auth_headers = _build_auth_headers(self.auth_config, secrets) + + processed = 0 + resolved = 0 + error_message = None + start_time = time.monotonic() + + for item in batch: + try: + result = self._process_item(item, auth_headers) + if result is not None: + if self._apply_result(item, result): + resolved += 1 + processed += 1 + except Exception as e: + logger.error(f"{self.log_prefix} Exception processing item {item.get('book_id')}: {e}", + exc_info=True) + processed += 1 + if not error_message: + error_message = str(e)[:500] + + duration_ms = int((time.monotonic() - start_time) * 1000) + success = error_message is None and processed > 0 + + logger.info(f"{self.log_prefix} Processed {processed}, resolved {resolved}") + + # Record metrics for health dashboard (Issue #189) + try: + from library_manager.plugins import record_plugin_metric + was_disabled = record_plugin_metric( + self.get_db, self.layer_id, + success=success, + duration_ms=duration_ms, + error_message=error_message, + items_processed=processed, + items_resolved=resolved + ) + if was_disabled: + self.enabled = False + logger.warning(f"{self.log_prefix} Auto-disabled due to consecutive failures") + except Exception as e: + logger.debug(f"{self.log_prefix} Failed to record metric: {e}") + + return processed, resolved + + def _fetch_batch(self, config: Dict) -> List[Dict]: + """Fetch items from the queue that are at or below this layer's order. + + Uses the same query pattern as base_layer.py but filters by + verification_layer matching the custom layer order position. + + Args: + config: App configuration dict + + Returns: + List of item dicts from database + """ + batch_size = config.get('batch_size', DEFAULT_BATCH_SIZE) + + conn = self.get_db() + c = conn.cursor() + + # Custom layers process items at their assigned verification_layer + # The order field maps to a verification_layer value + c.execute('''SELECT q.id as queue_id, q.book_id, q.reason, + b.path, b.current_author, b.current_title, + b.verification_layer, b.status, b.profile, + b.confidence + FROM queue q + JOIN books b ON q.book_id = b.id + WHERE b.verification_layer = ? + AND b.status NOT IN ('verified', 'fixed', 'series_folder', + 'multi_book_files', 'needs_attention') + AND (b.user_locked IS NULL OR b.user_locked = 0) + ORDER BY q.priority, q.added_at + LIMIT ?''', (self.order, batch_size)) + + batch = [dict(row) for row in c.fetchall()] + conn.close() + + return batch + + def _process_item(self, item: Dict, auth_headers: Dict) -> Optional[Dict]: + """Process a single item by calling the external API. + + Args: + item: Database row dict with book info + auth_headers: Pre-built authentication headers + + Returns: + Dict of mapped response fields, or None on error/skip + """ + # Build template context from item + context = _build_template_context(item) + + # Check we have at least one required field populated + has_data = any(context.get(f) for f in self.request_fields) + if not has_data: + logger.debug(f"{self.log_prefix} No request data for book_id={item.get('book_id')}, skipping") + return None + + # Build the request URL + url = _substitute_url_template(self.url_template, context) + + # Make the HTTP request + headers = dict(auth_headers) + headers.setdefault('Accept', 'application/json') + + start = time.monotonic() + try: + if self.method == 'POST': + # POST with JSON body containing the context fields + body = {f: context.get(f, '') for f in self.request_fields} + headers.setdefault('Content-Type', 'application/json') + resp = http_requests.post(url, json=body, headers=headers, timeout=self.timeout) + else: + resp = http_requests.get(url, headers=headers, timeout=self.timeout) + + duration_ms = int((time.monotonic() - start) * 1000) + logger.info(f"{self.log_prefix} {self.method} {url} -> {resp.status_code} ({duration_ms}ms)") + + if not (200 <= resp.status_code < 300): + _record_failure(self.layer_id, self.cb_max_failures, self.cb_cooldown) + logger.warning(f"{self.log_prefix} HTTP {resp.status_code} from {url}") + return None + + _record_success(self.layer_id) + + # Parse response + try: + data = resp.json() + except (ValueError, json.JSONDecodeError): + logger.warning(f"{self.log_prefix} Non-JSON response from {url}") + return None + + # Map response fields using JSONPath + mapped = {} + for field_name, jsonpath in self.response_mapping.items(): + value = extract_jsonpath(data, jsonpath) + if value is not None: + mapped[field_name] = str(value) + + if not mapped: + logger.debug(f"{self.log_prefix} No fields mapped from response for book_id={item.get('book_id')}") + return None + + return mapped + + except http_requests.Timeout: + duration_ms = int((time.monotonic() - start) * 1000) + logger.warning(f"{self.log_prefix} Timeout after {duration_ms}ms: {url}") + _record_failure(self.layer_id, self.cb_max_failures, self.cb_cooldown) + return None + + except http_requests.ConnectionError as e: + duration_ms = int((time.monotonic() - start) * 1000) + logger.warning(f"{self.log_prefix} Connection error ({duration_ms}ms): {e}") + _record_failure(self.layer_id, self.cb_max_failures, self.cb_cooldown) + return None + + except Exception as e: + duration_ms = int((time.monotonic() - start) * 1000) + logger.error(f"{self.log_prefix} Request error ({duration_ms}ms): {e}") + _record_failure(self.layer_id, self.cb_max_failures, self.cb_cooldown) + return None + + def _apply_result(self, item: Dict, mapped_fields: Dict) -> bool: + """Apply mapped response fields to the book profile in the database. + + Args: + item: Original item dict from database + mapped_fields: Dict of field_name -> value from API response + + Returns: + True if the item was resolved (profile updated), False otherwise + """ + # Build a source name for this custom layer + source_name = f'custom_{self.layer_id}' + + # Load or create profile + profile = BookProfile() + if item.get('profile'): + try: + existing = json.loads(item['profile']) if isinstance(item['profile'], str) else item['profile'] + profile = BookProfile.from_dict(existing) if hasattr(BookProfile, 'from_dict') else profile + except (json.JSONDecodeError, TypeError): + pass + + # Apply mapped fields to profile + fields_applied = 0 + for field_name, value in mapped_fields.items(): + if not value: + continue + + if field_name == 'author': + if profile.add_author(source_name, value, self.source_weight): + fields_applied += 1 + elif field_name == 'title': + if profile.add_title(source_name, value, self.source_weight): + fields_applied += 1 + elif field_name == 'narrator': + profile.narrator.add_source(source_name, value, self.source_weight) + fields_applied += 1 + elif field_name == 'series': + profile.series.add_source(source_name, value, self.source_weight) + fields_applied += 1 + elif field_name == 'series_num': + profile.series_num.add_source(source_name, value, self.source_weight) + fields_applied += 1 + elif field_name == 'year': + profile.year.add_source(source_name, value, self.source_weight) + fields_applied += 1 + elif field_name == 'language': + profile.language.add_source(source_name, value, self.source_weight) + fields_applied += 1 + + if fields_applied == 0: + return False + + # Add this layer to verification history + if source_name not in profile.verification_layers_used: + profile.verification_layers_used.append(source_name) + + profile.finalize() + + # Write updated profile to database + profile_json = json.dumps(profile.to_dict()) + confidence = profile.overall_confidence + + conn = self.get_db() + c = conn.cursor() + + try: + # Update the book with new profile data + # Advance verification_layer past this layer so _fetch_batch + # (which queries WHERE verification_layer = self.order) won't + # pick up the same item again next cycle + next_layer = self.order + 1 + c.execute('''UPDATE books SET + profile = ?, + confidence = ?, + verification_layer = ?, + max_layer_reached = MAX(COALESCE(max_layer_reached, 0), ?) + WHERE id = ?''', + (profile_json, confidence, next_layer, self.order, item['book_id'])) + conn.commit() + + logger.info( + f"{self.log_prefix} Updated profile for book_id={item['book_id']} " + f"({fields_applied} fields, confidence={confidence}%)" + ) + return True + + except Exception as e: + logger.error(f"{self.log_prefix} Failed to update book {item['book_id']}: {e}") + conn.rollback() + return False + finally: + conn.close() + + +# ============== REGISTRATION ============== + +def register_custom_layers( + registry: Any, + config: Dict, + get_db: Callable +) -> List[CustomApiLayer]: + """Read custom_layers from config, register each in the LayerRegistry. + + Args: + registry: LayerRegistry instance (must have .register() method) + config: App configuration dict (should contain "custom_layers" list) + get_db: Callable that returns a database connection + + Returns: + List of instantiated CustomApiLayer adapters + """ + custom_configs = config.get('custom_layers', []) + if not custom_configs: + return [] + + adapters = [] + + for layer_cfg in custom_configs: + layer_id = layer_cfg.get('layer_id') + if not layer_id: + logger.warning("[CUSTOM] Skipping custom layer with no layer_id") + continue + + if not layer_cfg.get('enabled', True): + logger.debug(f"[CUSTOM:{layer_id}] Layer disabled, skipping registration") + continue + + if not layer_cfg.get('url_template'): + logger.warning(f"[CUSTOM:{layer_id}] No url_template, skipping registration") + continue + + # Create the adapter + adapter = CustomApiLayer(layer_cfg, get_db) + + # Initialize circuit breaker + init_circuit_breaker(layer_id) + + # Register in the layer registry if it has the expected interface + order = layer_cfg.get('order', 50) + layer_name = layer_cfg.get('layer_name', f'Custom: {layer_id}') + + if hasattr(registry, 'register'): + try: + # LayerInfo-style registration: pass the info the registry needs + registry.register( + layer_id=layer_id, + layer_name=layer_name, + order=order, + adapter=adapter, + layer_type='custom' + ) + logger.info(f"[CUSTOM:{layer_id}] Registered custom layer '{layer_name}' at order {order}") + except TypeError: + # Registry might have a different signature - try simpler registration + try: + registry.register(layer_id, adapter) + logger.info(f"[CUSTOM:{layer_id}] Registered custom layer '{layer_name}'") + except Exception as e: + logger.error(f"[CUSTOM:{layer_id}] Failed to register: {e}") + continue + else: + logger.debug(f"[CUSTOM:{layer_id}] Registry has no register method, adapter created but not registered") + + adapters.append(adapter) + + if adapters: + logger.info(f"[CUSTOM] Registered {len(adapters)} custom layer(s)") + + return adapters diff --git a/library_manager/pipeline/layer_ai_queue.py b/library_manager/pipeline/layer_ai_queue.py index 06a7987..aa294d3 100644 --- a/library_manager/pipeline/layer_ai_queue.py +++ b/library_manager/pipeline/layer_ai_queue.py @@ -450,10 +450,11 @@ def process_queue( # (Don't assume 2-level structure - series_grouping uses 3 levels) lib_path = None is_from_watch_folder = False + old_path_resolved = old_path.resolve() for lp in config.get('library_paths', []): - lp_path = Path(lp) + lp_path = Path(lp).resolve() try: - old_path.relative_to(lp_path) + old_path_resolved.relative_to(lp_path) lib_path = lp_path break except ValueError: @@ -479,7 +480,12 @@ def process_queue( # Fallback if not found in configured libraries if lib_path is None: - lib_path = old_path.parent.parent + # For loose files in library root, parent is the library itself + # Only go up 2 levels for normal Author/Title structure + if old_path.is_file(): + lib_path = old_path.parent + else: + lib_path = old_path.parent.parent logger.warning(f"Book path {old_path} not under any configured library, guessing lib_path={lib_path}") # Detect language for multi-language naming diff --git a/library_manager/pipeline/layer_audio_credits.py b/library_manager/pipeline/layer_audio_credits.py index 50bf93c..9d6008f 100644 --- a/library_manager/pipeline/layer_audio_credits.py +++ b/library_manager/pipeline/layer_audio_credits.py @@ -219,10 +219,11 @@ def process_layer_3_audio( else: # Audio suggests different values - build new path lib_path = None + book_path_resolved = book_path.resolve() for lp in config.get('library_paths', []): - lp_path = Path(lp) + lp_path = Path(lp).resolve() try: - book_path.relative_to(lp_path) + book_path_resolved.relative_to(lp_path) lib_path = lp_path break except ValueError: @@ -240,7 +241,10 @@ def process_layer_3_audio( logger.debug(f"Watch folder path check failed: {e}") if lib_path is None: - lib_path = book_path.parent.parent + if book_path.is_file(): + lib_path = book_path.parent + else: + lib_path = book_path.parent.parent logger.warning(f"[LAYER 3] Book path {book_path} not under any configured library, guessing lib_path={lib_path}") # Detect language for multi-language naming diff --git a/library_manager/pipeline/layer_content.py b/library_manager/pipeline/layer_content.py index b408a92..210f8fe 100644 --- a/library_manager/pipeline/layer_content.py +++ b/library_manager/pipeline/layer_content.py @@ -113,16 +113,21 @@ def process_layer_4_content( # Build new path lib_path = None + book_path_resolved = book_path.resolve() for lp in config.get('library_paths', []): + lp_path = Path(lp).resolve() try: - book_path.relative_to(Path(lp)) - lib_path = Path(lp) + book_path_resolved.relative_to(lp_path) + lib_path = lp_path break except ValueError: continue if not lib_path: - lib_path = book_path.parent.parent + if book_path.is_file(): + lib_path = book_path.parent + else: + lib_path = book_path.parent.parent # Build target path with series grouping if applicable if new_series and config.get('series_grouping', True): diff --git a/library_manager/pipeline/layer_info.py b/library_manager/pipeline/layer_info.py new file mode 100644 index 0000000..668af91 --- /dev/null +++ b/library_manager/pipeline/layer_info.py @@ -0,0 +1,44 @@ +"""LayerInfo dataclass for describing processing layers. + +Each processing layer in the pipeline has metadata describing its identity, +configuration, and capabilities. LayerInfo captures this without any +runtime behavior -- it's purely descriptive. +""" + +from dataclasses import dataclass, field + + +@dataclass(frozen=True) +class LayerInfo: + """Metadata describing a single processing layer. + + This is a pure data object -- it knows WHAT a layer is, not HOW it runs. + The registry uses these to track ordering, enable/disable state, and + circuit breaker dependencies. + + Attributes: + layer_id: Unique identifier (e.g. "audio_id", "api_lookup"). + layer_name: Human-readable display name. + description: What this layer does, shown in UI/logs. + config_enable_key: Config key that enables/disables this layer. + default_order: Default position in the pipeline (1-based). + supports_circuit_breaker: Whether this layer uses circuit breakers. + circuit_breaker_apis: Which API circuit breakers this layer depends on. + """ + + layer_id: str + layer_name: str + description: str + config_enable_key: str + default_order: int + supports_circuit_breaker: bool = False + circuit_breaker_apis: tuple = field(default_factory=tuple) + + def __post_init__(self): + if not self.layer_id: + raise ValueError("layer_id cannot be empty") + if self.default_order < 1: + raise ValueError("default_order must be >= 1") + + +__all__ = ['LayerInfo'] diff --git a/library_manager/pipeline/orchestrator.py b/library_manager/pipeline/orchestrator.py new file mode 100644 index 0000000..f9ad50b --- /dev/null +++ b/library_manager/pipeline/orchestrator.py @@ -0,0 +1,627 @@ +"""PipelineOrchestrator -- configurable replacement for hardcoded process_all_queue. + +This module provides a PipelineOrchestrator that executes processing layers +in the order defined by the LayerRegistry and config, using LayerAdapter +objects to delegate to the existing (battle-tested) layer functions. + +The orchestrator handles: + - Layer ordering (from registry + config pipeline_order) + - Enable/disable checks (from config enable keys) + - Circuit breaker waits (per-layer, checking _worker_running) + - Batch loops with configurable delays + - Rate limit handling (for AI verify layer) + - Disabled-layer fallback (advancing stuck items) + - Stuck item recovery + - Worker stop checks + - Status updates and logging + +Feature flag: controlled by config key ``use_modular_pipeline`` (default False). +When False, the existing process_all_queue runs unchanged. When True, the +orchestrator takes over. +""" + +import logging +import time +from typing import Callable, Dict, List, Optional, Tuple + +from library_manager.pipeline.adapters import LayerAdapter +from library_manager.pipeline.registry import LayerRegistry + +logger = logging.getLogger(__name__) + +# Import worker state -- needed for stop checks and status updates +from library_manager.worker import ( + _processing_status, + update_processing_status, + LAYER_NAMES, +) + + +# Layer display numbers for status bar (maps layer_id to legacy layer number) +_LAYER_DISPLAY_NUMBERS = { + 'audio_id': 1, + 'audio_credits': 2, + 'sl_requeue': 2, # Runs between L2 and L3, show as L2 + 'api_lookup': 3, + 'ai_verify': 4, +} + +# Delay between batches per layer (seconds) +_BATCH_DELAYS = { + 'audio_id': 2, # Audio processing needs more time + 'audio_credits': 2, # Audio processing needs more time + 'sl_requeue': 0, # Single pass, no batch loop + 'api_lookup': 0.5, # APIs are fast + 'ai_verify': None, # Uses rate-limit-based delay (calculated at runtime) +} + +# Status messages shown when each layer starts +_LAYER_STATUS_MESSAGES = { + 'audio_id': "Transcribing audio intro via Skaldleita...", + 'audio_credits': "Sending audio clip to Gemini AI...", + 'sl_requeue': "Checking SL requeue verifications...", + 'api_lookup': "Looking up metadata from Skaldleita/Audnexus...", + 'ai_verify': "Verifying identification with AI...", +} + +_LAYER_ACTIVITY_MESSAGES = { + 'audio_id': "Started audio identification", + 'audio_credits': "Started AI audio analysis", + 'sl_requeue': "Re-verifying books after nightly merge", + 'api_lookup': "Started API metadata lookup", + 'ai_verify': "Started AI verification of folder names", +} + +# Log banner messages +_LAYER_LOG_BANNERS = { + 'audio_id': "LAYER 1: Audio Transcription + AI Parsing", + 'audio_credits': "LAYER 2: AI Audio Clip Analysis (for unclear L1 results)", + 'sl_requeue': "SL REQUEUE CHECK: Re-verifying pending books after nightly merge", + 'api_lookup': "LAYER 3: API Enrichment (adding metadata to identified books)", + 'ai_verify': "LAYER 4: Folder Name Fallback (last resort, low confidence)", +} + + +class PipelineOrchestrator: + """Executes processing layers in configured order using adapter pattern. + + The orchestrator replaces the hardcoded layer sequence in + worker.process_all_queue with a configurable, registry-driven pipeline. + Each layer runs through its adapter, which delegates to the existing + layer function with all its battle-tested logic intact. + + Args: + registry: LayerRegistry defining available layers and ordering. + adapters: List of LayerAdapter instances (one per layer). + config: Application configuration dict. + deps: Dictionary of injected dependencies passed to each adapter. + Must include: get_db, load_config, is_circuit_open, + get_circuit_breaker, check_rate_limit, and the layer wrapper + functions (process_layer_1_audio, process_layer_1_api, etc.). + """ + + def __init__( + self, + registry: LayerRegistry, + adapters: List[LayerAdapter], + config: Dict, + deps: Dict, + ): + self.registry = registry + self.adapters = {a.layer_id: a for a in adapters} + self.config = config + self.deps = deps + + def run_pipeline(self) -> Tuple[int, int]: + """Execute all enabled layers in configured order. + + This is the main entry point, replacing the hardcoded sequence in + process_all_queue. It handles: + - Queue empty check + - Stuck item cleanup + - Disabled layer fallback (advancing items past disabled layers) + - Per-layer batch loops with circuit breaker waits + - Status bar updates + - Final status reset + + Returns: + Tuple of (total_processed, total_fixed). + """ + global _processing_status + import library_manager.worker as _worker_mod + + get_db = self.deps['get_db'] + + # Check if queue has items + conn = get_db() + c = conn.cursor() + c.execute('SELECT COUNT(*) as count FROM queue') + total = c.fetchone()['count'] + conn.close() + + if total == 0: + logger.info("Queue is empty, nothing to process") + return 0, 0 + + # Calculate delay for rate-limited layers + user_max = self.config.get('max_requests_per_hour', 30) + max_per_hour = max(10, min(user_max, 500)) + min_delay = max(2, 3600 // max_per_hour) + logger.info(f"Rate limit: {max_per_hour}/hour, delay between batches: {min_delay}s") + + # Initialize processing status + _processing_status.update({ + "active": True, + "processed": 0, + "total": total, + "current": "Starting processing...", + "current_book": "", + "current_author": "", + "errors": [], + "layer": 1, + "layer_name": LAYER_NAMES[1], + "queue_remaining": total, + "last_activity": f"Starting processing of {total} items", + "last_activity_time": time.time(), + }) + logger.info(f"=== STARTING AUDIO-FIRST PROCESSING: {total} items in queue ===") + + # Issue #62: Clean up stuck queue items + conn = get_db() + c = conn.cursor() + c.execute('''DELETE FROM queue WHERE book_id IN ( + SELECT b.id FROM books b WHERE b.status IN ('needs_attention', 'verified', 'fixed') + )''') + cleaned = c.rowcount + if cleaned > 0: + logger.info(f"Cleaned {cleaned} stuck items from queue (already needs_attention/verified/fixed)") + total -= cleaned + conn.commit() + conn.close() + + # Advance items stuck at Layer 2 if audio analysis is disabled + if not self.config.get('enable_audio_analysis', False): + self._advance_stuck_layer2_items() + + total_processed = 0 + total_fixed = 0 + + # Get layers in configured order + ordered_layers = self.registry.get_ordered_layers(self.config) + + for layer_info in ordered_layers: + # Check worker stop + if not _worker_mod._worker_running: + logger.info("Worker stop requested, breaking pipeline") + break + + adapter = self.adapters.get(layer_info.layer_id) + if not adapter: + logger.debug(f"No adapter for layer '{layer_info.layer_id}', skipping") + continue + + # Check if layer is enabled + if not self.registry.is_enabled(layer_info.layer_id, self.config): + self._handle_disabled_layer(layer_info) + continue + + # Run the layer + layer_processed, layer_fixed = self._run_layer( + layer_info, adapter, min_delay + ) + total_processed += layer_processed + total_fixed += layer_fixed + + # Reset status to idle + _processing_status.update({ + "active": False, + "layer": 0, + "layer_name": "Idle", + "current": "Processing complete", + "current_book": "", + "current_author": "", + "queue_remaining": 0, + "last_activity": f"Completed: {total_processed} processed, {total_fixed} fixed", + "last_activity_time": time.time(), + }) + logger.info(f"=== LAYERED PROCESSING COMPLETE: {total_processed} processed, {total_fixed} fixed ===") + return total_processed, total_fixed + + def _run_layer( + self, + layer_info, + adapter: LayerAdapter, + min_delay: int, + ) -> Tuple[int, int]: + """Run a single layer with its batch loop and circuit breaker logic. + + Handles: + - Status bar updates before starting + - Circuit breaker waits (for layers that support them) + - Batch loop with per-layer delay + - Special rate-limit handling for ai_verify + - Worker stop checks between iterations + + Args: + layer_info: LayerInfo describing this layer. + adapter: The LayerAdapter to execute. + min_delay: Rate-limit-based delay for ai_verify layer. + + Returns: + Tuple of (layer_processed, layer_fixed). + """ + import library_manager.worker as _worker_mod + global _processing_status + + layer_id = layer_info.layer_id + display_num = _LAYER_DISPLAY_NUMBERS.get(layer_id, 0) + banner = _LAYER_LOG_BANNERS.get(layer_id, f"Layer: {layer_info.layer_name}") + status_msg = _LAYER_STATUS_MESSAGES.get(layer_id, f"Processing {layer_info.layer_name}...") + activity_msg = _LAYER_ACTIVITY_MESSAGES.get(layer_id, f"Started {layer_info.layer_name}") + + logger.info(f"=== {banner} ===") + _processing_status["layer"] = display_num + _processing_status["layer_name"] = layer_info.layer_name + _processing_status["current"] = status_msg + _processing_status["last_activity"] = activity_msg + _processing_status["last_activity_time"] = time.time() + + # Special case: sl_requeue is single-pass, not a batch loop + if layer_id == 'sl_requeue': + return self._run_single_pass(adapter) + + # Special case: ai_verify has its own complex rate-limit loop + if layer_id == 'ai_verify': + return self._run_ai_verify_loop(adapter, min_delay) + + # Standard batch loop (audio_id, audio_credits, api_lookup) + return self._run_batch_loop(layer_info, adapter) + + def _run_batch_loop( + self, + layer_info, + adapter: LayerAdapter, + ) -> Tuple[int, int]: + """Standard batch loop: run adapter in a loop until it returns 0. + + Handles circuit breaker waits for layers that support them. + + Args: + layer_info: LayerInfo for this layer. + adapter: The adapter to call. + + Returns: + Tuple of (total_processed, total_resolved). + """ + import library_manager.worker as _worker_mod + global _processing_status + + layer_id = layer_info.layer_id + batch_delay = _BATCH_DELAYS.get(layer_id, 1) + layer_processed = 0 + layer_resolved = 0 + + is_circuit_open = self.deps.get('is_circuit_open') + get_circuit_breaker = self.deps.get('get_circuit_breaker') + + while True: + if not _worker_mod._worker_running: + break + + # Circuit breaker wait for layers that need it + if layer_info.supports_circuit_breaker and is_circuit_open and get_circuit_breaker: + should_continue = self._wait_for_circuit_breaker( + layer_info, is_circuit_open, get_circuit_breaker + ) + if should_continue == 'break': + break + if should_continue == 'continue': + continue + + processed, resolved = adapter.run(self.config, self.deps) + if processed == 0: + break + layer_processed += processed + layer_resolved += resolved + _processing_status["processed"] = _processing_status.get("processed", 0) + processed + if batch_delay: + time.sleep(batch_delay) + + logger.info( + f"{layer_info.layer_name} complete: {layer_processed} items processed, " + f"{layer_resolved} resolved" + ) + return layer_processed, layer_resolved + + def _run_single_pass(self, adapter: LayerAdapter) -> Tuple[int, int]: + """Run a layer once (no batch loop). Used for sl_requeue. + + Args: + adapter: The adapter to call. + + Returns: + Tuple of (processed, resolved). + """ + processed, resolved = adapter.run(self.config, self.deps) + if processed > 0: + logger.info( + f"SL Requeue Check complete: {processed} processed, {resolved} upgraded" + ) + return processed, resolved + + def _run_ai_verify_loop( + self, + adapter: LayerAdapter, + min_delay: int, + ) -> Tuple[int, int]: + """Run AI verify layer with rate limiting and 3-strike exhaustion. + + This replicates the complex Layer 4 logic from process_all_queue: + - Rate limit checks with exponential backoff + - processed == -1 means rate-limited (don't count toward exhaustion) + - 3 consecutive empty batches -> mark remaining as needs_attention + - Circuit breaker awareness for AI providers + - Worker stop checks + + Args: + adapter: The AiVerifyAdapter. + min_delay: Rate-limit-based delay between batches. + + Returns: + Tuple of (layer_processed, layer_fixed). + """ + import library_manager.worker as _worker_mod + global _processing_status + + get_db = self.deps['get_db'] + load_config = self.deps['load_config'] + check_rate_limit = self.deps['check_rate_limit'] + is_circuit_open = self.deps.get('is_circuit_open') + + batch_num = 0 + rate_limit_hits = 0 + empty_batch_count = 0 + layer_processed = 0 + layer_fixed = 0 + + while True: + if not _worker_mod._worker_running: + break + + config = load_config() + + allowed, calls_made, max_calls = check_rate_limit(config) + if not allowed: + rate_limit_hits += 1 + wait_time = min(300 * rate_limit_hits, 1800) + logger.info(f"Rate limit reached ({calls_made}/{max_calls}), waiting {wait_time // 60} minutes...") + _processing_status["current"] = f"Rate limited, waiting {wait_time // 60}min..." + time.sleep(wait_time) + continue + + batch_num += 1 + logger.info(f"--- Layer 4 batch {batch_num} (API: {calls_made}/{max_calls}) ---") + + processed, fixed = adapter.run(config, self.deps) + + # Issue #160: processed == -1 means rate-limited + if processed == -1: + logger.info("Batch skipped due to rate limiting - not counting toward exhaustion") + _processing_status["current"] = "Rate limited, waiting for cooldown..." + _processing_status["last_activity"] = "Waiting for rate limit cooldown" + _processing_status["last_activity_time"] = time.time() + time.sleep(30) + continue + + if processed == 0: + # Check if AI providers are circuit-broken + ai_provider = config.get('ai_provider', 'gemini') + providers_to_check = [ai_provider] + if ai_provider != 'bookdb': + providers_to_check.append('bookdb') + + any_circuit_open = False + if is_circuit_open: + any_circuit_open = any(is_circuit_open(p) for p in providers_to_check) + + if any_circuit_open: + broken = ', '.join(p for p in providers_to_check if is_circuit_open(p)) + logger.info(f"AI providers circuit-broken ({broken}) - waiting for recovery") + _processing_status["current"] = "AI provider cooling down, waiting..." + _processing_status["last_activity"] = "Waiting for circuit breaker recovery" + _processing_status["last_activity_time"] = time.time() + time.sleep(30) + continue + + conn = get_db() + c = conn.cursor() + c.execute('SELECT COUNT(*) as count FROM queue') + remaining = c.fetchone()['count'] + conn.close() + + if remaining == 0: + logger.info("Queue is now empty") + break + else: + empty_batch_count += 1 + logger.warning(f"No items processed but {remaining} remain (attempt {empty_batch_count}/3)") + if empty_batch_count >= 3: + self._mark_orphaned_items(remaining) + break + time.sleep(10) + continue + + empty_batch_count = 0 + layer_processed += processed + layer_fixed += fixed + _processing_status["processed"] = _processing_status.get("processed", 0) + processed + logger.info(f"Layer 4 Batch {batch_num}: {processed} processed, {fixed} fixed") + time.sleep(min_delay) + + logger.info(f"Layer 4 complete: {layer_processed} items processed, {layer_fixed} fixed via folder fallback") + return layer_processed, layer_fixed + + def _wait_for_circuit_breaker( + self, + layer_info, + is_circuit_open: Callable, + get_circuit_breaker: Callable, + ) -> Optional[str]: + """Wait for circuit breakers to close before processing. + + Checks all circuit breaker APIs for the layer. If any are open, + waits up to 60s at a time, checking _worker_running. + + Args: + layer_info: LayerInfo with circuit_breaker_apis. + is_circuit_open: Function to check if a circuit breaker is open. + get_circuit_breaker: Function to get circuit breaker state dict. + + Returns: + None if no circuit breaker is open (proceed normally). + 'continue' if we waited and should re-check. + 'break' if worker was stopped during wait. + """ + import library_manager.worker as _worker_mod + global _processing_status + + for api_name in layer_info.circuit_breaker_apis: + if is_circuit_open(api_name): + cb = get_circuit_breaker(api_name) + remaining = int(cb.get('circuit_open_until', 0) - time.time()) + if remaining > 0: + wait_time = min(remaining, 60) + logger.info( + f"[{layer_info.layer_name}] {api_name} circuit breaker open, " + f"waiting {wait_time}s ({remaining}s total remaining)" + ) + _processing_status["current"] = ( + f"{layer_info.layer_name}: Waiting for {api_name} ({remaining}s)" + ) + + # Sleep in small increments to check worker stop + for _ in range(wait_time): + if not _worker_mod._worker_running: + return 'break' + time.sleep(1) + + return 'continue' + + return None # No circuit breaker is open + + def _handle_disabled_layer(self, layer_info) -> None: + """Handle a disabled layer by advancing stuck items past it. + + When audio_credits (Layer 2) is disabled, items stuck at + verification_layer=2 need to be advanced to Layer 4 so they + get processed by the folder fallback. + + Args: + layer_info: LayerInfo for the disabled layer. + """ + if layer_info.layer_id == 'audio_credits': + get_db = self.deps['get_db'] + conn = get_db() + c = conn.cursor() + c.execute('SELECT id FROM books WHERE verification_layer = 2 AND status = "pending"') + layer2_books = [row['id'] for row in c.fetchall()] + if layer2_books: + c.execute('UPDATE books SET verification_layer = 4 WHERE verification_layer = 2 AND status = "pending"') + for book_id in layer2_books: + c.execute('SELECT id FROM queue WHERE book_id = ?', (book_id,)) + if not c.fetchone(): + c.execute( + 'INSERT INTO queue (book_id, reason, priority) VALUES (?, ?, ?)', + (book_id, 'layer2_fallback', 5), + ) + conn.commit() + logger.info(f"Layer 2 disabled - advanced {len(layer2_books)} items to Layer 4 (folder fallback)") + conn.close() + + def _advance_stuck_layer2_items(self) -> None: + """Advance items stuck at verification_layer=2 when audio analysis is disabled. + + This runs once at pipeline start, before any layers execute. + Identical to the logic at the top of process_all_queue. + """ + get_db = self.deps['get_db'] + conn = get_db() + c = conn.cursor() + c.execute('SELECT id FROM books WHERE verification_layer = 2 AND status = "pending"') + stuck_books = [row['id'] for row in c.fetchall()] + if stuck_books: + c.execute('UPDATE books SET verification_layer = 4 WHERE verification_layer = 2 AND status = "pending"') + for book_id in stuck_books: + c.execute('SELECT id FROM queue WHERE book_id = ?', (book_id,)) + if not c.fetchone(): + c.execute( + 'INSERT INTO queue (book_id, reason, priority) VALUES (?, ?, ?)', + (book_id, 'startup_layer2_recovery', 5), + ) + conn.commit() + logger.info(f"Advanced {len(stuck_books)} stuck items from Layer 2 to Layer 4") + conn.close() + + def _mark_orphaned_items(self, remaining: int) -> None: + """Mark orphaned queue items as needs_attention after 3-strike exhaustion. + + Replicates the Issue #131 logic from process_all_queue. + + Args: + remaining: Number of items remaining in queue. + """ + get_db = self.deps['get_db'] + logger.info(f"Layer 4 cannot process remaining {remaining} items - marking as needs_attention") + conn = get_db() + try: + c = conn.cursor() + # Issue #168: Increment attempt_count and record last_attempted + c.execute('''UPDATE books SET status = 'needs_attention', + error_message = 'All processing layers exhausted - could not identify this book automatically', + attempt_count = COALESCE(attempt_count, 0) + 1, + last_attempted = CURRENT_TIMESTAMP, + max_layer_reached = MAX(COALESCE(max_layer_reached, 0), COALESCE(verification_layer, 0)) + WHERE id IN ( + SELECT q.book_id FROM queue q + JOIN books b ON q.book_id = b.id + WHERE b.status NOT IN ('verified', 'fixed', 'series_folder', 'multi_book_files', 'needs_attention') + AND (b.user_locked IS NULL OR b.user_locked = 0) + )''') + orphaned = c.rowcount + c.execute('''DELETE FROM queue WHERE book_id IN ( + SELECT id FROM books WHERE status = 'needs_attention' + )''') + conn.commit() + if orphaned: + logger.info(f"Marked {orphaned} orphaned queue items as needs_attention") + finally: + conn.close() + + def run_single_layer(self, layer_id: str) -> Tuple[int, int]: + """Run just one specific layer (for standalone/manual execution). + + Useful for testing or manually triggering a single layer from + the API without running the full pipeline. + + Args: + layer_id: The layer_id to run (must be registered and have an adapter). + + Returns: + Tuple of (processed, resolved). + + Raises: + KeyError: If layer_id is not registered. + ValueError: If no adapter exists for the layer. + """ + layer_info = self.registry.get_layer(layer_id) + if layer_info is None: + raise KeyError(f"Unknown layer_id: {layer_id}") + + adapter = self.adapters.get(layer_id) + if adapter is None: + raise ValueError(f"No adapter registered for layer: {layer_id}") + + return adapter.run(self.config, self.deps) + + +__all__ = ['PipelineOrchestrator'] diff --git a/library_manager/pipeline/registry.py b/library_manager/pipeline/registry.py new file mode 100644 index 0000000..48acb88 --- /dev/null +++ b/library_manager/pipeline/registry.py @@ -0,0 +1,222 @@ +"""LayerRegistry -- knows about all processing layers and their ordering. + +The registry is the single source of truth for which layers exist, what +order they run in, and which config keys control them. Nothing in this +module executes layers -- it only describes and queries them. +""" + +import logging +from typing import Dict, List, Optional + +from library_manager.pipeline.layer_info import LayerInfo + +logger = logging.getLogger(__name__) + + +class LayerRegistry: + """Registry of all processing layers in the pipeline. + + Provides lookup by layer_id, ordered listing, and enable/disable + checks against a config dict. + """ + + def __init__(self): + self._layers: Dict[str, LayerInfo] = {} + + def register(self, info: LayerInfo) -> None: + """Register a layer. + + Args: + info: LayerInfo describing the layer. + + Raises: + ValueError: If a layer with the same layer_id is already registered. + """ + if info.layer_id in self._layers: + raise ValueError(f"Layer '{info.layer_id}' is already registered") + self._layers[info.layer_id] = info + logger.debug(f"Registered layer: {info.layer_id} (order={info.default_order})") + + def get_layer(self, layer_id: str) -> Optional[LayerInfo]: + """Return LayerInfo for a given layer_id, or None if not found.""" + return self._layers.get(layer_id) + + def get_ordered_layers(self, config: Optional[dict] = None) -> List[LayerInfo]: + """Return all layers in pipeline order. + + If *config* contains a ``pipeline_order`` list of layer_ids, that + ordering is used (unknown ids are skipped with a warning). Otherwise + layers are sorted by their ``default_order``. + + Args: + config: Optional config dict. If None or missing pipeline_order, + default ordering is used. + + Returns: + List of LayerInfo in execution order. + """ + if config and 'pipeline_order' in config: + ordered = [] + for layer_id in config['pipeline_order']: + info = self._layers.get(layer_id) + if info: + ordered.append(info) + else: + logger.warning(f"pipeline_order references unknown layer: {layer_id}") + return ordered + + return sorted(self._layers.values(), key=lambda li: li.default_order) + + def get_enabled_layers(self, config: dict) -> List[LayerInfo]: + """Return only enabled layers, in pipeline order. + + A layer is enabled when its ``config_enable_key`` is truthy in + *config* (or defaults to True if the key is absent). + + Args: + config: Config dict to check enable keys against. + + Returns: + List of enabled LayerInfo in execution order. + """ + return [ + info for info in self.get_ordered_layers(config) + if self.is_enabled(info.layer_id, config) + ] + + def is_enabled(self, layer_id: str, config: dict) -> bool: + """Check whether a layer is enabled in *config*. + + Looks up the layer's ``config_enable_key`` in *config*. If the key + is missing from config, the layer is considered enabled (safe + default -- existing behavior before registry existed). + + Args: + layer_id: The layer to check. + config: Config dict. + + Returns: + True if enabled, False otherwise. + + Raises: + KeyError: If layer_id is not registered. + """ + info = self._layers.get(layer_id) + if info is None: + raise KeyError(f"Unknown layer_id: {layer_id}") + return bool(config.get(info.config_enable_key, True)) + + def get_all_layer_ids(self) -> List[str]: + """Return all registered layer IDs in default order.""" + return [info.layer_id for info in sorted( + self._layers.values(), key=lambda li: li.default_order + )] + + def validate_order(self, order: List[str]) -> tuple: + """Validate a proposed pipeline_order list. + + Checks that every id in *order* is registered and that there are no + duplicates. Layers missing from *order* are noted as warnings (they + won't run). + + Args: + order: List of layer_ids representing the desired execution order. + + Returns: + Tuple of (is_valid, errors) where errors is a list of strings. + """ + errors: List[str] = [] + seen = set() + + for layer_id in order: + if layer_id in seen: + errors.append(f"Duplicate layer_id in order: {layer_id}") + seen.add(layer_id) + + if layer_id not in self._layers: + errors.append(f"Unknown layer_id: {layer_id}") + + # Warn about registered layers not present in the order + missing = set(self._layers.keys()) - seen + for m in sorted(missing): + errors.append(f"Registered layer '{m}' is missing from order (it will not run)") + + return (len(errors) == 0, errors) + + def __len__(self) -> int: + return len(self._layers) + + def __contains__(self, layer_id: str) -> bool: + return layer_id in self._layers + + +def build_default_registry() -> LayerRegistry: + """Build and return a registry pre-populated with all current layers. + + The ordering matches the execution sequence in worker.process_all_queue: + 1. audio_id -- Audio transcription + BookDB identification + 2. audio_credits -- AI audio clip analysis (Gemini) + 3. sl_requeue -- Skaldleita re-verification after nightly merge + 4. api_lookup -- API database lookups (Audnexus, OpenLibrary, etc.) + 5. ai_verify -- AI verification of folder-based guesses + + Config enable keys are taken from config.py DEFAULT_CONFIG. + """ + registry = LayerRegistry() + + registry.register(LayerInfo( + layer_id="audio_id", + layer_name="Audio ID", + description="Transcribe audiobook intro via Skaldleita/Whisper and identify from narrator announcement.", + config_enable_key="enable_audio_identification", + default_order=1, + supports_circuit_breaker=True, + circuit_breaker_apis=("bookdb",), + )) + + registry.register(LayerInfo( + layer_id="audio_credits", + layer_name="AI Audio Analysis", + description="Send longer audio clip to Gemini AI for deeper analysis when transcription was unclear.", + config_enable_key="enable_audio_analysis", + default_order=2, + supports_circuit_breaker=True, + circuit_breaker_apis=("gemini",), + )) + + registry.register(LayerInfo( + layer_id="sl_requeue", + layer_name="SL Requeue Check", + description="Re-verify books against Skaldleita after nightly database merge.", + config_enable_key="enable_api_lookups", + default_order=3, + supports_circuit_breaker=True, + circuit_breaker_apis=("bookdb",), + )) + + registry.register(LayerInfo( + layer_id="api_lookup", + layer_name="API Lookup", + description="Look up book metadata from Skaldleita, Audnexus, OpenLibrary, and Google Books.", + config_enable_key="enable_api_lookups", + default_order=4, + supports_circuit_breaker=False, + )) + + registry.register(LayerInfo( + layer_id="ai_verify", + layer_name="AI Verify", + description="Use AI to verify folder-name-based identification as a last resort.", + config_enable_key="enable_ai_verification", + default_order=5, + supports_circuit_breaker=False, + )) + + return registry + + +# Module-level default instance -- importable from anywhere +default_registry = build_default_registry() + + +__all__ = ['LayerRegistry', 'LayerInfo', 'build_default_registry', 'default_registry'] diff --git a/library_manager/plugin_loader.py b/library_manager/plugin_loader.py new file mode 100644 index 0000000..c2c9858 --- /dev/null +++ b/library_manager/plugin_loader.py @@ -0,0 +1,775 @@ +"""Drop-in Python plugin system for Library Manager. + +Discovers, validates, and loads plugins from a configurable directory +(default: /data/plugins for Docker). Plugins extend the processing +pipeline with custom book identification logic. + +Plugin structure: + /data/plugins/ + my_plugin/ + manifest.json # metadata, config schema + layer.py # class extending BasePlugin + +See BasePlugin for the simplified interface plugins should implement. +""" + +import copy +import importlib.util +import json +import logging +import sys +import time +import traceback +from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Tuple + +from library_manager.config import load_secrets +from library_manager.pipeline.layer_info import LayerInfo + +logger = logging.getLogger(__name__) + +LOG_PREFIX = "[PLUGIN]" + +# Manifest required fields +REQUIRED_MANIFEST_FIELDS = ('id', 'name', 'entry_point') + +# Default timeout for plugin process() calls (seconds) +DEFAULT_PLUGIN_TIMEOUT = 30 + +# Default batch size for plugin processing +DEFAULT_PLUGIN_BATCH_SIZE = 10 + + +# ============== BASE PLUGIN CLASS ============== + +class BasePlugin: + """Simple base class for drop-in plugins. + + Plugins extend this class and implement process() at minimum. + The plugin loader wraps BasePlugin subclasses in a PluginAdapter + that makes them compatible with the pipeline's LayerAdapter interface. + + Attributes: + name: Human-readable plugin name (set from manifest). + description: What this plugin does (set from manifest). + version: Plugin version string (set from manifest). + """ + + name = "unnamed" + description = "" + version = "0.0.1" + + def setup(self, config: dict, secrets: dict): + """Called once on load. Store config, create sessions, etc. + + Args: + config: Plugin-specific configuration from plugin_configs. + secrets: Full secrets dict from secrets.json. + """ + pass + + def can_process(self, book_data: dict) -> bool: + """Return True if this plugin should process this book. + + Args: + book_data: Dict with book info (current_title, current_author, + path, status, profile, etc.) + + Returns: + True to process this book, False to skip. + """ + return True + + def process(self, book_data: dict) -> dict: + """Process a book. Return dict with metadata fields. + + The returned dict can contain any of these keys: + title, author, narrator, series, series_num, year, language + + Return empty dict or None to skip (no changes). + + Args: + book_data: Deep copy of book info dict. Safe to modify. + + Returns: + Dict of metadata fields, or None/empty dict to skip. + """ + return {} + + def teardown(self): + """Called on shutdown. Clean up resources.""" + pass + + +# ============== PLUGIN INFO ============== + +@dataclass +class PluginInfo: + """Metadata about a discovered plugin. + + Populated from manifest.json during discovery. + """ + plugin_id: str + name: str + version: str + description: str + plugin_dir: Path + entry_point: str + class_name: str = "" + default_order: int = 35 + plugin_type: str = "layer" + requires_config: List[str] = field(default_factory=list) + requires_secrets: List[str] = field(default_factory=list) + permissions: Dict[str, Any] = field(default_factory=dict) + manifest: Dict[str, Any] = field(default_factory=dict) + + @property + def entry_point_path(self) -> Path: + return self.plugin_dir / self.entry_point + + +# ============== PLUGIN ADAPTER ============== + +class PluginAdapter: + """Wraps a BasePlugin instance into a LayerAdapter-compatible object. + + Handles: + - Fetching batch items from the database + - Deep copying book data before passing to plugins + - Timeout enforcement via ThreadPoolExecutor + - Exception isolation (bad plugins never crash the app) + - Recording metrics via record_plugin_metric() + """ + + def __init__(self, plugin: BasePlugin, plugin_info: PluginInfo, + get_db: Callable, timeout: int = DEFAULT_PLUGIN_TIMEOUT): + """Initialize the plugin adapter. + + Args: + plugin: Instantiated BasePlugin subclass. + plugin_info: PluginInfo from manifest discovery. + get_db: Callable that returns a database connection. + timeout: Max seconds for a single process() call. + """ + self.plugin = plugin + self.plugin_info = plugin_info + self.get_db = get_db + self.timeout = timeout + self.layer_id = f"plugin_{plugin_info.plugin_id}" + self.log_prefix = f"[PLUGIN:{plugin_info.plugin_id}]" + self.enabled = True + + def run(self, config: Dict, deps: Optional[Dict] = None) -> Tuple[int, int]: + """Run one processing cycle for this plugin. + + Matches the LayerAdapter interface: accepts config and deps, + returns (processed_count, resolved_count). + + Args: + config: App configuration dict. + deps: Optional dependencies dict (unused, for interface compat). + + Returns: + Tuple of (processed_count, resolved_count). + """ + if not self.enabled: + logger.debug(f"{self.log_prefix} Plugin disabled, skipping") + return 0, 0 + + # Fetch batch from queue + batch = self._fetch_batch(config) + if not batch: + return 0, 0 + + logger.info(f"{self.log_prefix} Processing {len(batch)} items") + + processed = 0 + resolved = 0 + error_message = None + start_time = time.monotonic() + + for item in batch: + try: + result = self._process_item(item) + if result: + if self._apply_result(item, result): + resolved += 1 + processed += 1 + except Exception as e: + logger.error( + f"{self.log_prefix} Exception processing item " + f"{item.get('book_id')}: {e}", exc_info=True + ) + processed += 1 + if not error_message: + error_message = str(e)[:500] + + duration_ms = int((time.monotonic() - start_time) * 1000) + success = error_message is None and processed > 0 + + logger.info( + f"{self.log_prefix} Processed {processed}, resolved {resolved} " + f"({duration_ms}ms)" + ) + + # Record metrics for health dashboard + try: + from library_manager.plugins import record_plugin_metric + was_disabled = record_plugin_metric( + self.get_db, self.layer_id, + success=success, + duration_ms=duration_ms, + error_message=error_message, + items_processed=processed, + items_resolved=resolved, + ) + if was_disabled: + self.enabled = False + logger.warning( + f"{self.log_prefix} Auto-disabled due to consecutive failures" + ) + except Exception as e: + logger.debug(f"{self.log_prefix} Failed to record metric: {e}") + + return processed, resolved + + def _fetch_batch(self, config: Dict) -> List[Dict]: + """Fetch items from the queue at this plugin's order position.""" + batch_size = config.get('batch_size', DEFAULT_PLUGIN_BATCH_SIZE) + + conn = self.get_db() + c = conn.cursor() + + c.execute('''SELECT q.id as queue_id, q.book_id, q.reason, + b.path, b.current_author, b.current_title, + b.verification_layer, b.status, b.profile, + b.confidence + FROM queue q + JOIN books b ON q.book_id = b.id + WHERE b.verification_layer = ? + AND b.status NOT IN ('verified', 'fixed', 'series_folder', + 'multi_book_files', 'needs_attention') + AND (b.user_locked IS NULL OR b.user_locked = 0) + ORDER BY q.priority, q.added_at + LIMIT ?''', (self.plugin_info.default_order, batch_size)) + + batch = [dict(row) for row in c.fetchall()] + conn.close() + + return batch + + def _process_item(self, item: Dict) -> Optional[Dict]: + """Process a single item through the plugin with timeout enforcement. + + Deep copies book data before passing to the plugin. Runs the + plugin's process() method in a thread pool with a timeout. + + Args: + item: Database row dict with book info. + + Returns: + Dict of metadata fields from the plugin, or None on error/skip. + """ + # Deep copy the item so plugins can't mutate our data + book_data = copy.deepcopy(item) + + # Check if plugin wants to process this book + try: + if not self.plugin.can_process(book_data): + logger.debug( + f"{self.log_prefix} Plugin skipped book_id={item.get('book_id')}" + ) + return None + except Exception as e: + logger.warning( + f"{self.log_prefix} can_process() raised: {e}" + ) + return None + + # Run process() with timeout enforcement + try: + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(self.plugin.process, book_data) + result = future.result(timeout=self.timeout) + except FuturesTimeoutError: + logger.warning( + f"{self.log_prefix} process() timed out after {self.timeout}s " + f"for book_id={item.get('book_id')}" + ) + return None + except Exception as e: + logger.error( + f"{self.log_prefix} process() raised: {e}", + exc_info=True + ) + return None + + if not result: + return None + + # Validate result is a dict + if not isinstance(result, dict): + logger.warning( + f"{self.log_prefix} process() returned {type(result).__name__}, " + f"expected dict" + ) + return None + + return result + + def _apply_result(self, item: Dict, mapped_fields: Dict) -> bool: + """Apply plugin results to the book profile in the database. + + Args: + item: Original item dict from database. + mapped_fields: Dict of field_name -> value from plugin. + + Returns: + True if the item was updated, False otherwise. + """ + from library_manager.models.book_profile import BookProfile + + source_name = f'plugin_{self.plugin_info.plugin_id}' + source_weight = 60 # Plugin default weight + + # Load or create profile + profile = BookProfile() + if item.get('profile'): + try: + existing = json.loads(item['profile']) if isinstance(item['profile'], str) else item['profile'] + if hasattr(BookProfile, 'from_dict'): + profile = BookProfile.from_dict(existing) + except (json.JSONDecodeError, TypeError): + pass + + # Apply fields + fields_applied = 0 + for field_name, value in mapped_fields.items(): + if not value: + continue + + if field_name == 'author': + if profile.add_author(source_name, value, source_weight): + fields_applied += 1 + elif field_name == 'title': + if profile.add_title(source_name, value, source_weight): + fields_applied += 1 + elif field_name == 'narrator': + profile.narrator.add_source(source_name, value, source_weight) + fields_applied += 1 + elif field_name == 'series': + profile.series.add_source(source_name, value, source_weight) + fields_applied += 1 + elif field_name == 'series_num': + profile.series_num.add_source(source_name, value, source_weight) + fields_applied += 1 + elif field_name == 'year': + profile.year.add_source(source_name, value, source_weight) + fields_applied += 1 + elif field_name == 'language': + profile.language.add_source(source_name, value, source_weight) + fields_applied += 1 + + if fields_applied == 0: + return False + + # Add to verification history + if source_name not in profile.verification_layers_used: + profile.verification_layers_used.append(source_name) + + profile.finalize() + + # Write to database + profile_json = json.dumps(profile.to_dict()) + confidence = profile.overall_confidence + + conn = self.get_db() + c = conn.cursor() + + try: + next_layer = self.plugin_info.default_order + 1 + c.execute('''UPDATE books SET + profile = ?, + confidence = ?, + verification_layer = ?, + max_layer_reached = MAX(COALESCE(max_layer_reached, 0), ?) + WHERE id = ?''', + (profile_json, confidence, next_layer, + self.plugin_info.default_order, item['book_id'])) + conn.commit() + + logger.info( + f"{self.log_prefix} Updated profile for book_id={item['book_id']} " + f"({fields_applied} fields, confidence={confidence}%)" + ) + return True + + except Exception as e: + logger.error( + f"{self.log_prefix} Failed to update book {item['book_id']}: {e}" + ) + conn.rollback() + return False + finally: + conn.close() + + +# ============== MANIFEST VALIDATION ============== + +def _validate_manifest(manifest: Dict, plugin_dir: Path) -> List[str]: + """Validate a plugin manifest. + + Args: + manifest: Parsed manifest dict. + plugin_dir: Path to the plugin directory. + + Returns: + List of error strings. Empty list means valid. + """ + errors = [] + + for field_name in REQUIRED_MANIFEST_FIELDS: + if not manifest.get(field_name): + errors.append(f"Missing required field: {field_name}") + + # Validate entry_point exists + entry_point = manifest.get('entry_point', '') + if entry_point: + entry_path = plugin_dir / entry_point + if not entry_path.exists(): + errors.append(f"Entry point file not found: {entry_point}") + elif not entry_path.suffix == '.py': + errors.append(f"Entry point must be a .py file: {entry_point}") + + # Validate plugin ID format (alphanumeric, hyphens, underscores) + plugin_id = manifest.get('id', '') + if plugin_id: + import re + if not re.match(r'^[a-zA-Z0-9_-]+$', plugin_id): + errors.append( + f"Invalid plugin id '{plugin_id}': use only letters, numbers, " + f"hyphens, underscores" + ) + + # Validate type + plugin_type = manifest.get('type', 'layer') + if plugin_type not in ('layer',): + errors.append(f"Unsupported plugin type: {plugin_type}") + + # Validate default_order is reasonable + order = manifest.get('default_order', 35) + if not isinstance(order, int) or order < 1 or order > 999: + errors.append("default_order must be an integer between 1 and 999") + + return errors + + +# ============== DISCOVERY ============== + +def discover_plugins(plugin_dir: Path) -> List[PluginInfo]: + """Scan a directory for valid plugins. + + Looks for subdirectories containing manifest.json, validates them, + and returns PluginInfo objects for valid plugins. + + Args: + plugin_dir: Directory to scan for plugins. + + Returns: + List of PluginInfo for valid plugins. Invalid plugins are + logged as warnings and skipped. + """ + if not plugin_dir.exists(): + logger.info(f"{LOG_PREFIX} Plugin directory does not exist: {plugin_dir}") + return [] + + if not plugin_dir.is_dir(): + logger.warning(f"{LOG_PREFIX} Plugin path is not a directory: {plugin_dir}") + return [] + + plugins = [] + + for subdir in sorted(plugin_dir.iterdir()): + if not subdir.is_dir(): + continue + + # Skip hidden directories and __pycache__ + if subdir.name.startswith('.') or subdir.name == '__pycache__': + continue + + manifest_path = subdir / 'manifest.json' + if not manifest_path.exists(): + logger.debug( + f"{LOG_PREFIX} Skipping {subdir.name}: no manifest.json" + ) + continue + + # Parse manifest + try: + with open(manifest_path) as f: + manifest = json.load(f) + except (json.JSONDecodeError, OSError) as e: + logger.warning( + f"{LOG_PREFIX} Invalid manifest.json in {subdir.name}: {e}" + ) + continue + + # Validate manifest + errors = _validate_manifest(manifest, subdir) + if errors: + for err in errors: + logger.warning(f"{LOG_PREFIX} {subdir.name}: {err}") + continue + + info = PluginInfo( + plugin_id=manifest['id'], + name=manifest['name'], + version=manifest.get('version', '0.0.1'), + description=manifest.get('description', ''), + plugin_dir=subdir, + entry_point=manifest['entry_point'], + class_name=manifest.get('class_name', ''), + default_order=manifest.get('default_order', 35), + plugin_type=manifest.get('type', 'layer'), + requires_config=manifest.get('requires_config', []), + requires_secrets=manifest.get('requires_secrets', []), + permissions=manifest.get('permissions', {}), + manifest=manifest, + ) + + logger.info( + f"{LOG_PREFIX} Discovered plugin: {info.name} v{info.version} " + f"({info.plugin_id})" + ) + plugins.append(info) + + if plugins: + logger.info(f"{LOG_PREFIX} Discovered {len(plugins)} plugin(s)") + else: + logger.debug(f"{LOG_PREFIX} No plugins found in {plugin_dir}") + + return plugins + + +# ============== LOADING ============== + +def load_plugin(plugin_info: PluginInfo, config: dict = None, + secrets: dict = None) -> Optional[BasePlugin]: + """Load and instantiate a plugin from its PluginInfo. + + Uses importlib to dynamically load the plugin module, finds the + target class (by class_name or auto-discovery), instantiates it, + and calls setup(). + + Args: + plugin_info: PluginInfo from discovery. + config: Plugin-specific config (from plugin_configs). + secrets: Full secrets dict. + + Returns: + Instantiated BasePlugin, or None on failure. + """ + entry_path = plugin_info.entry_point_path + module_name = f"lm_plugin_{plugin_info.plugin_id}" + + try: + # Load the module + spec = importlib.util.spec_from_file_location(module_name, entry_path) + if spec is None or spec.loader is None: + logger.error( + f"{LOG_PREFIX} Failed to create module spec for " + f"{plugin_info.plugin_id}: {entry_path}" + ) + return None + + module = importlib.util.module_from_spec(spec) + + # Add to sys.modules so relative imports work within plugins + sys.modules[module_name] = module + + try: + spec.loader.exec_module(module) + except Exception as e: + logger.error( + f"{LOG_PREFIX} Failed to execute module for " + f"{plugin_info.plugin_id}: {e}" + ) + # Clean up on failure + sys.modules.pop(module_name, None) + return None + + # Find the plugin class + plugin_class = None + + if plugin_info.class_name: + # Look up by explicit class name + plugin_class = getattr(module, plugin_info.class_name, None) + if plugin_class is None: + logger.error( + f"{LOG_PREFIX} Class '{plugin_info.class_name}' not found " + f"in {entry_path}" + ) + return None + else: + # Auto-discover: find first BasePlugin subclass + for attr_name in dir(module): + attr = getattr(module, attr_name) + if (isinstance(attr, type) + and issubclass(attr, BasePlugin) + and attr is not BasePlugin): + plugin_class = attr + break + + if plugin_class is None: + logger.error( + f"{LOG_PREFIX} No BasePlugin subclass found in {entry_path}" + ) + return None + + # Instantiate + try: + instance = plugin_class() + except Exception as e: + logger.error( + f"{LOG_PREFIX} Failed to instantiate {plugin_class.__name__} " + f"for {plugin_info.plugin_id}: {e}" + ) + return None + + # Set metadata from manifest + instance.name = plugin_info.name + instance.description = plugin_info.description + instance.version = plugin_info.version + + # Call setup + try: + instance.setup(config or {}, secrets or {}) + except Exception as e: + logger.error( + f"{LOG_PREFIX} setup() failed for {plugin_info.plugin_id}: {e}" + ) + return None + + logger.info( + f"{LOG_PREFIX} Loaded plugin: {plugin_info.name} v{plugin_info.version} " + f"({plugin_class.__name__})" + ) + return instance + + except Exception as e: + logger.error( + f"{LOG_PREFIX} Unexpected error loading {plugin_info.plugin_id}: {e}", + exc_info=True + ) + return None + + +# ============== REGISTRATION ============== + +def register_plugins(registry, config: dict, get_db: Callable) -> List[PluginAdapter]: + """Discover, load, and register all plugins. + + This is the main entry point called from app.py on startup. It: + 1. Reads plugin_dir from config + 2. Discovers plugins in that directory + 3. Loads each plugin (with config and secrets) + 4. Wraps each in a PluginAdapter + 5. Registers each in the LayerRegistry + + Args: + registry: LayerRegistry instance. + config: Full app configuration dict. + get_db: Callable that returns a database connection. + + Returns: + List of PluginAdapter instances for loaded plugins. + """ + plugin_dir = Path(config.get('plugin_dir', '/data/plugins')) + plugin_configs = config.get('plugin_configs', {}) + + # Discover + discovered = discover_plugins(plugin_dir) + if not discovered: + return [] + + # Load secrets once + secrets = load_secrets() + + adapters = [] + + for info in discovered: + # Get per-plugin config + plugin_config = plugin_configs.get(info.plugin_id, {}) + + # Check for missing required secrets + missing_secrets = [ + s for s in info.requires_secrets + if not secrets.get(s) + ] + if missing_secrets: + logger.warning( + f"{LOG_PREFIX} {info.plugin_id}: missing required secrets: " + f"{', '.join(missing_secrets)} -- skipping" + ) + continue + + # Load the plugin + instance = load_plugin(info, config=plugin_config, secrets=secrets) + if instance is None: + continue + + # Determine timeout from plugin config or default + timeout = plugin_config.get('timeout', DEFAULT_PLUGIN_TIMEOUT) + + # Wrap in adapter + adapter = PluginAdapter(instance, info, get_db, timeout=timeout) + + # Register in the layer registry + layer_id = adapter.layer_id + try: + registry.register(LayerInfo( + layer_id=layer_id, + layer_name=info.name, + description=info.description or f"Plugin: {info.name}", + config_enable_key=f"plugin_{info.plugin_id}_enabled", + default_order=info.default_order, + supports_circuit_breaker=False, + )) + logger.info( + f"{LOG_PREFIX} Registered {info.name} as '{layer_id}' " + f"at order {info.default_order}" + ) + except ValueError as e: + # Already registered (duplicate plugin ID) + logger.warning(f"{LOG_PREFIX} {info.plugin_id}: {e}") + continue + + adapters.append(adapter) + + if adapters: + logger.info(f"{LOG_PREFIX} Loaded {len(adapters)} plugin(s)") + + return adapters + + +def teardown_plugins(adapters: List[PluginAdapter]): + """Call teardown() on all loaded plugins. + + Called during app shutdown to clean up plugin resources. + + Args: + adapters: List of PluginAdapter instances. + """ + for adapter in adapters: + try: + adapter.plugin.teardown() + logger.debug( + f"{LOG_PREFIX} Teardown complete: {adapter.plugin_info.plugin_id}" + ) + except Exception as e: + logger.warning( + f"{LOG_PREFIX} teardown() failed for " + f"{adapter.plugin_info.plugin_id}: {e}" + ) diff --git a/library_manager/plugins.py b/library_manager/plugins.py new file mode 100644 index 0000000..e0b6b99 --- /dev/null +++ b/library_manager/plugins.py @@ -0,0 +1,566 @@ +"""Custom Layer Builder API routes (Issue #186) + Plugin Health Dashboard (Issue #189). + +Flask Blueprint providing CRUD, test, and health monitoring endpoints for custom HTTP +API layers. These layers let users add their own book metadata sources without writing code. +""" +import json +import logging +import re +import sqlite3 +import time + +import requests as http_requests +from flask import Blueprint, request, jsonify + +from library_manager.config import CONFIG_PATH, load_config, load_secrets +from library_manager.pipeline.custom_layer import ( + extract_jsonpath, _build_auth_headers, _substitute_url_template +) + +logger = logging.getLogger(__name__) + +plugins_bp = Blueprint('plugins', __name__) + +# Keys that must never be written to config.json +SECRETS_KEYS = ['openrouter_api_key', 'gemini_api_key', 'google_books_api_key', + 'abs_api_token', 'bookdb_api_key', 'webhook_secret'] + +# Auto-disable threshold: consecutive failures before a plugin is auto-disabled +AUTO_DISABLE_THRESHOLD = 5 + + +def _slugify(name): + """Convert a layer name to a safe layer_id slug.""" + slug = name.lower().strip() + slug = re.sub(r'[^a-z0-9]+', '_', slug) + slug = slug.strip('_') + return slug or 'custom_layer' + + +def _save_config_safe(config): + """Save config.json, stripping secret keys.""" + config_only = {k: v for k, v in config.items() if k not in SECRETS_KEYS} + with open(CONFIG_PATH, 'w') as f: + json.dump(config_only, f, indent=2) + + +# ============== PLUGIN METRICS DATABASE ============== + +def init_plugin_metrics_table(db_path): + """Create plugin_metrics table. Called from database.py init_db().""" + conn = sqlite3.connect(db_path, timeout=30) + c = conn.cursor() + + c.execute('''CREATE TABLE IF NOT EXISTS plugin_metrics ( + id INTEGER PRIMARY KEY, + plugin_id TEXT NOT NULL, + timestamp REAL NOT NULL, + success INTEGER DEFAULT 0, + duration_ms INTEGER, + error_message TEXT, + items_processed INTEGER DEFAULT 0, + items_resolved INTEGER DEFAULT 0 + )''') + + c.execute('''CREATE INDEX IF NOT EXISTS idx_plugin_metrics_plugin + ON plugin_metrics(plugin_id, timestamp)''') + + conn.commit() + conn.close() + + +def record_plugin_metric(get_db, plugin_id, success, duration_ms, + error_message=None, items_processed=0, items_resolved=0): + """Record a single plugin execution metric. + + Fast INSERT only - no aggregation on write path. + Also checks for consecutive failures and triggers auto-disable if needed. + + Args: + get_db: Callable that returns a database connection + plugin_id: The custom layer's layer_id + success: Whether the run succeeded (bool) + duration_ms: Execution time in milliseconds + error_message: Error text if failed (truncated to 1000 chars) + items_processed: Number of items processed this run + items_resolved: Number of items resolved this run + + Returns: + True if the plugin was auto-disabled due to consecutive failures + """ + auto_disabled = False + try: + conn = get_db() + c = conn.cursor() + c.execute('''INSERT INTO plugin_metrics + (plugin_id, timestamp, success, duration_ms, error_message, + items_processed, items_resolved) + VALUES (?, ?, ?, ?, ?, ?, ?)''', + (plugin_id, time.time(), 1 if success else 0, duration_ms, + (error_message or '')[:1000] if error_message else None, + items_processed, items_resolved)) + conn.commit() + + # Check for consecutive failures if this run failed + if not success: + c.execute('''SELECT success FROM plugin_metrics + WHERE plugin_id = ? + ORDER BY timestamp DESC + LIMIT ?''', (plugin_id, AUTO_DISABLE_THRESHOLD)) + recent = c.fetchall() + if (len(recent) >= AUTO_DISABLE_THRESHOLD and + all(row[0] == 0 for row in recent)): + # Auto-disable the plugin + auto_disabled = _auto_disable_plugin(plugin_id) + + conn.close() + except Exception as e: + logger.error(f"[PLUGINS] Failed to record metric for {plugin_id}: {e}") + + return auto_disabled + + +def _auto_disable_plugin(plugin_id): + """Auto-disable a plugin after consecutive failures. + + Sets auto_disabled: true in the layer config and saves to config.json. + + Returns: + True if the plugin was disabled, False if not found or already disabled + """ + try: + config = load_config() + custom_layers = config.get('custom_layers', []) + + for layer in custom_layers: + if layer.get('layer_id') == plugin_id: + if layer.get('auto_disabled'): + return False # Already disabled + layer['auto_disabled'] = True + layer['enabled'] = False + config['custom_layers'] = custom_layers + _save_config_safe(config) + logger.warning( + f"[PLUGINS] Auto-disabled plugin '{plugin_id}' after " + f"{AUTO_DISABLE_THRESHOLD} consecutive failures" + ) + return True + + return False + except Exception as e: + logger.error(f"[PLUGINS] Failed to auto-disable {plugin_id}: {e}") + return False + + +# ============== CRUD ROUTES ============== + +@plugins_bp.route('/api/plugins/layers') +def api_plugins_list(): + """List all custom layers from config.""" + config = load_config() + layers = config.get('custom_layers', []) + return jsonify({'success': True, 'layers': layers}) + + +@plugins_bp.route('/api/plugins/save-layer', methods=['POST']) +def api_plugins_save(): + """Save or update a custom layer in config.json.""" + data = request.get_json() + if not data: + return jsonify({'success': False, 'error': 'No data provided'}), 400 + + layer = data.get('layer', {}) + if not layer.get('layer_name'): + return jsonify({'success': False, 'error': 'Layer name is required'}), 400 + if not layer.get('url_template'): + return jsonify({'success': False, 'error': 'URL template is required'}), 400 + + # Auto-generate layer_id from name if not provided + if not layer.get('layer_id'): + layer['layer_id'] = _slugify(layer['layer_name']) + + # Ensure defaults + layer.setdefault('enabled', True) + layer.setdefault('method', 'GET') + layer.setdefault('timeout', 10) + layer.setdefault('source_weight', 55) + layer.setdefault('order', 35) + layer.setdefault('on_error', 'skip') + layer.setdefault('response_mapping', {}) + layer.setdefault('request_fields', ['title', 'author']) + layer.setdefault('circuit_breaker', {'max_failures': 3, 'cooldown': 300}) + + # Clamp timeout + layer['timeout'] = max(1, min(int(layer.get('timeout', 10)), 60)) + layer['source_weight'] = max(0, min(int(layer.get('source_weight', 55)), 100)) + + try: + config = load_config() + custom_layers = config.get('custom_layers', []) + + # Check if updating existing layer + existing_idx = None + for i, existing in enumerate(custom_layers): + if existing.get('layer_id') == layer['layer_id']: + existing_idx = i + break + + if existing_idx is not None: + custom_layers[existing_idx] = layer + else: + custom_layers.append(layer) + + config['custom_layers'] = custom_layers + _save_config_safe(config) + + return jsonify({'success': True, 'layer_id': layer['layer_id']}) + except Exception as e: + logger.error(f"[PLUGINS] Failed to save layer: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + +@plugins_bp.route('/api/plugins/layer/', methods=['DELETE']) +def api_plugins_delete(layer_id): + """Remove a custom layer from config.json.""" + try: + config = load_config() + custom_layers = config.get('custom_layers', []) + original_count = len(custom_layers) + + custom_layers = [l for l in custom_layers if l.get('layer_id') != layer_id] + + if len(custom_layers) == original_count: + return jsonify({'success': False, 'error': f'Layer "{layer_id}" not found'}), 404 + + config['custom_layers'] = custom_layers + _save_config_safe(config) + + return jsonify({'success': True}) + except Exception as e: + logger.error(f"[PLUGINS] Failed to delete layer {layer_id}: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + +@plugins_bp.route('/api/plugins/toggle-layer', methods=['POST']) +def api_plugins_toggle(): + """Toggle a custom layer's enabled state.""" + data = request.get_json() + if not data or not data.get('layer_id'): + return jsonify({'success': False, 'error': 'No layer_id provided'}), 400 + + layer_id = data['layer_id'] + enabled = bool(data.get('enabled', True)) + + try: + config = load_config() + custom_layers = config.get('custom_layers', []) + + found = False + for layer in custom_layers: + if layer.get('layer_id') == layer_id: + layer['enabled'] = enabled + found = True + break + + if not found: + return jsonify({'success': False, 'error': f'Layer "{layer_id}" not found'}), 404 + + config['custom_layers'] = custom_layers + _save_config_safe(config) + + return jsonify({'success': True, 'enabled': enabled}) + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 + + +@plugins_bp.route('/api/plugins/test-layer', methods=['POST']) +def api_plugins_test(): + """Test a custom layer config by making the actual API call. + + Receives a layer config and test book data (title/author), + makes the HTTP request server-side, and returns the results. + """ + data = request.get_json() + if not data: + return jsonify({'success': False, 'error': 'No data provided'}), 400 + + layer = data.get('layer', {}) + test_title = data.get('test_title', 'The Final Empire') + test_author = data.get('test_author', 'Brandon Sanderson') + + url_template = layer.get('url_template', '') + if not url_template: + return jsonify({'success': False, 'error': 'No URL template provided'}), 400 + + method = layer.get('method', 'GET').upper() + timeout = max(1, min(int(layer.get('timeout', 10)), 60)) + + # Build template context from test data + context = { + 'title': test_title, + 'author': test_author, + 'narrator': '', + 'path': '/test/path', + 'isbn': '', + } + + # Substitute URL template + url = _substitute_url_template(url_template, context) + + # Build auth headers + secrets = load_secrets() + auth_config = layer.get('auth') + auth_headers = _build_auth_headers(auth_config, secrets) + + headers = dict(auth_headers) + headers['Accept'] = 'application/json' + + result = { + 'success': True, + 'url': url, + 'method': method, + 'status_code': None, + 'response_time_ms': None, + 'mapped_fields': {}, + 'raw_response': None, + 'error': None, + } + + start = time.monotonic() + try: + if method == 'POST': + body = {f: context.get(f, '') for f in layer.get('request_fields', ['title', 'author'])} + headers.setdefault('Content-Type', 'application/json') + resp = http_requests.post(url, json=body, headers=headers, timeout=timeout) + else: + resp = http_requests.get(url, headers=headers, timeout=timeout) + + duration_ms = int((time.monotonic() - start) * 1000) + result['status_code'] = resp.status_code + result['response_time_ms'] = duration_ms + + # Try to parse JSON response + try: + resp_data = resp.json() + # Truncate raw response for display (max 2000 chars) + raw_str = json.dumps(resp_data, indent=2) + result['raw_response'] = raw_str[:2000] + ('...' if len(raw_str) > 2000 else '') + + # Apply response mappings + response_mapping = layer.get('response_mapping', {}) + for field_name, jsonpath in response_mapping.items(): + value = extract_jsonpath(resp_data, jsonpath) + if value is not None: + result['mapped_fields'][field_name] = str(value) + else: + result['mapped_fields'][field_name] = None + + except (ValueError, json.JSONDecodeError): + result['raw_response'] = resp.text[:500] + result['error'] = 'Response is not valid JSON' + + if not (200 <= resp.status_code < 300): + result['success'] = False + result['error'] = f'HTTP {resp.status_code}' + + except http_requests.Timeout: + duration_ms = int((time.monotonic() - start) * 1000) + result['success'] = False + result['response_time_ms'] = duration_ms + result['error'] = f'Request timed out after {timeout}s' + + except http_requests.ConnectionError as e: + duration_ms = int((time.monotonic() - start) * 1000) + result['success'] = False + result['response_time_ms'] = duration_ms + result['error'] = f'Connection error: {str(e)[:200]}' + + except Exception as e: + duration_ms = int((time.monotonic() - start) * 1000) + result['success'] = False + result['response_time_ms'] = duration_ms + result['error'] = f'Error: {str(e)[:200]}' + + return jsonify(result) + + +# ============== HEALTH DASHBOARD API (Issue #189) ============== + +@plugins_bp.route('/api/plugins/health') +def api_plugins_health(): + """Return aggregated health stats for all custom plugins. + + For each plugin returns: + - success_rate (from last 50 runs) + - avg_duration_ms + - last_run timestamp + - total_processed, total_resolved + - status: active / errored / auto-disabled + - recent_errors (last 5 error messages) + """ + from library_manager.database import get_db + + config = load_config() + custom_layers = config.get('custom_layers', []) + + if not custom_layers: + return jsonify({'success': True, 'plugins': []}) + + try: + conn = get_db() + c = conn.cursor() + plugins_health = [] + + for layer in custom_layers: + plugin_id = layer.get('layer_id', '') + if not plugin_id: + continue + + # Get last 50 runs for success rate + c.execute('''SELECT success, duration_ms, timestamp, error_message, + items_processed, items_resolved + FROM plugin_metrics + WHERE plugin_id = ? + ORDER BY timestamp DESC + LIMIT 50''', (plugin_id,)) + recent_rows = c.fetchall() + + if not recent_rows: + plugins_health.append({ + 'plugin_id': plugin_id, + 'plugin_name': layer.get('layer_name', plugin_id), + 'enabled': layer.get('enabled', True), + 'auto_disabled': layer.get('auto_disabled', False), + 'status': 'no_data', + 'success_rate': None, + 'avg_duration_ms': None, + 'last_run': None, + 'total_processed': 0, + 'total_resolved': 0, + 'recent_errors': [], + }) + continue + + total_runs = len(recent_rows) + successes = sum(1 for r in recent_rows if r[0]) + success_rate = round(successes / total_runs * 100, 1) if total_runs else 0 + + durations = [r[1] for r in recent_rows if r[1] is not None] + avg_duration = int(sum(durations) / len(durations)) if durations else 0 + + last_run = recent_rows[0][2] if recent_rows else None + + total_processed = sum(r[4] or 0 for r in recent_rows) + total_resolved = sum(r[5] or 0 for r in recent_rows) + + # Recent errors (last 5 non-null) + recent_errors = [] + for r in recent_rows: + if r[3] and len(recent_errors) < 5: + recent_errors.append({ + 'message': r[3], + 'timestamp': r[2], + }) + + # Determine status + auto_disabled = layer.get('auto_disabled', False) + enabled = layer.get('enabled', True) + if auto_disabled: + status = 'auto-disabled' + elif not enabled: + status = 'disabled' + elif success_rate < 50: + status = 'errored' + else: + status = 'active' + + plugins_health.append({ + 'plugin_id': plugin_id, + 'plugin_name': layer.get('layer_name', plugin_id), + 'enabled': enabled, + 'auto_disabled': auto_disabled, + 'status': status, + 'success_rate': success_rate, + 'avg_duration_ms': avg_duration, + 'last_run': last_run, + 'total_processed': total_processed, + 'total_resolved': total_resolved, + 'recent_errors': recent_errors, + }) + + conn.close() + return jsonify({'success': True, 'plugins': plugins_health}) + + except Exception as e: + logger.error(f"[PLUGINS] Health check error: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + +@plugins_bp.route('/api/plugins/health//logs') +def api_plugins_health_logs(plugin_id): + """Return last 20 metric entries for a specific plugin.""" + from library_manager.database import get_db + + try: + conn = get_db() + c = conn.cursor() + c.execute('''SELECT id, plugin_id, timestamp, success, duration_ms, + error_message, items_processed, items_resolved + FROM plugin_metrics + WHERE plugin_id = ? + ORDER BY timestamp DESC + LIMIT 20''', (plugin_id,)) + rows = c.fetchall() + conn.close() + + entries = [] + for r in rows: + entries.append({ + 'id': r[0], + 'plugin_id': r[1], + 'timestamp': r[2], + 'success': bool(r[3]), + 'duration_ms': r[4], + 'error_message': r[5], + 'items_processed': r[6], + 'items_resolved': r[7], + }) + + return jsonify({'success': True, 'entries': entries}) + + except Exception as e: + logger.error(f"[PLUGINS] Health logs error for {plugin_id}: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + +@plugins_bp.route('/api/plugins/health//reset', methods=['POST']) +def api_plugins_health_reset(plugin_id): + """Reset failure count and re-enable an auto-disabled plugin. + + Clears the auto_disabled flag in config and removes recent failure + metrics so the consecutive failure counter starts fresh. + """ + try: + config = load_config() + custom_layers = config.get('custom_layers', []) + + found = False + for layer in custom_layers: + if layer.get('layer_id') == plugin_id: + layer['auto_disabled'] = False + layer['enabled'] = True + found = True + break + + if not found: + return jsonify({'success': False, 'error': f'Plugin "{plugin_id}" not found'}), 404 + + config['custom_layers'] = custom_layers + _save_config_safe(config) + + logger.info(f"[PLUGINS] Reset and re-enabled plugin '{plugin_id}'") + return jsonify({'success': True, 'enabled': True}) + + except Exception as e: + logger.error(f"[PLUGINS] Reset error for {plugin_id}: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 diff --git a/library_manager/providers/bookdb.py b/library_manager/providers/bookdb.py index 83e7345..d003733 100644 --- a/library_manager/providers/bookdb.py +++ b/library_manager/providers/bookdb.py @@ -15,6 +15,7 @@ import logging import subprocess import tempfile +import threading import requests from pathlib import Path @@ -33,6 +34,22 @@ logger = logging.getLogger(__name__) +# Issue #208: Skaldleita can signal "stop retrying this task" via a server_notice +# in the JSON response. We stash the notice in a thread-local so the caller +# (e.g. the watch-folder worker) can pick it up and mark the item as aborted +# without a 30-second retry loop. Thread-local keeps the signal scoped to the +# thread that issued the matching request. +_abort_state = threading.local() + + +def get_and_clear_server_abort(): + """Return (and clear) the last server_notice with action=abort_task seen + on this thread, or None. Safe to call when none was set.""" + notice = getattr(_abort_state, 'notice', None) + if notice is not None: + _abort_state.notice = None + return notice + # Skaldleita API endpoint (our metadata service, legacy name: BookDB) BOOKDB_API_URL = "https://bookdb.deucebucket.com" # URL unchanged for backwards compatibility # Public API key for Library Manager users (no config needed) @@ -168,6 +185,21 @@ def search_bookdb(title, author=None, api_key=None, retry_count=0, bookdb_url=No data = resp.json() + # Issue #208: honor Skaldleita server_notice. Log every notice; on + # action=abort_task, stash in thread-local so the watch-folder worker + # can stop retrying instead of hammering /match every 30s. + notice = data.get('server_notice') + if notice: + code = notice.get('code', 'unknown') + msg = notice.get('message', '') + upgrade_url = notice.get('upgrade_url') + severity = notice.get('severity', 'info') + logger.warning(f"[SKALDLEITA] server notice ({severity}) [{code}]: {msg}") + if upgrade_url: + logger.warning(f"[SKALDLEITA] upgrade: {upgrade_url}") + if notice.get('action') == 'abort_task': + _abort_state.notice = notice + # Check confidence threshold if data.get('confidence', 0) < 0.5: logger.debug(f"Skaldleita match below confidence threshold: {data.get('confidence')}") diff --git a/library_manager/utils/naming.py b/library_manager/utils/naming.py index 92d1511..98c18b2 100644 --- a/library_manager/utils/naming.py +++ b/library_manager/utils/naming.py @@ -113,7 +113,7 @@ def clean_search_title(messy_name): # Remove file extensions clean = re.sub(r'\.(mp3|m4b|m4a|epub|pdf|mobi|webm|opus)$', '', clean, flags=re.IGNORECASE) # Remove "by Author" at the end temporarily for searching - clean = re.sub(r'\s+by\s+[\w\s]+$', '', clean, flags=re.IGNORECASE) + clean = re.sub(r'\s+by\s+[\w\s.\'\-]+(?:\s*\([^)]*\))*$', '', clean, flags=re.IGNORECASE) # Remove audiobook-related junk (YouTube rip artifacts) clean = re.sub(r'\b(full\s+)?audiobook\b', '', clean, flags=re.IGNORECASE) clean = re.sub(r'\b(complete|unabridged|abridged)\b', '', clean, flags=re.IGNORECASE) diff --git a/library_manager/worker.py b/library_manager/worker.py index 25a8634..1e9a5a4 100644 --- a/library_manager/worker.py +++ b/library_manager/worker.py @@ -143,6 +143,11 @@ def process_all_queue( Philosophy: The audio content IS the book. Folder names can be wrong. + When config['use_modular_pipeline'] is True, delegates to + PipelineOrchestrator instead of running the hardcoded sequence below. + The orchestrator uses the same layer functions via adapter objects + and produces identical behavior, but is configurable via pipeline_order. + Args: config: Configuration dict get_db: Function to get database connection. Returns a new @@ -162,6 +167,33 @@ def process_all_queue( Returns: Tuple of (total_processed, total_fixed) """ + # Feature flag: use modular pipeline orchestrator when enabled + if config.get('use_modular_pipeline', False): + from library_manager.pipeline.orchestrator import PipelineOrchestrator + from library_manager.pipeline.adapters import build_default_adapters + from library_manager.pipeline.registry import default_registry + + deps = { + 'get_db': get_db, + 'load_config': load_config, + 'is_circuit_open': is_circuit_open, + 'get_circuit_breaker': get_circuit_breaker, + 'check_rate_limit': check_rate_limit, + 'process_layer_1_audio': process_layer_1_audio, + 'process_layer_3_audio': process_layer_3_audio, + 'process_layer_1_api': process_layer_1_api, + 'process_queue': process_queue, + 'process_sl_requeue_verification': process_sl_requeue_verification, + } + orchestrator = PipelineOrchestrator( + registry=default_registry, + adapters=build_default_adapters(), + config=config, + deps=deps, + ) + return orchestrator.run_pipeline() + + # === Legacy hardcoded pipeline (default, unchanged) === global _processing_status conn = get_db() diff --git a/static/css/style.css b/static/css/style.css new file mode 100644 index 0000000..844e036 --- /dev/null +++ b/static/css/style.css @@ -0,0 +1,1110 @@ +/* ===== THEME SYSTEM - CSS Variables ===== */ +:root, +[data-theme="default"] { + /* Background colors */ + --theme-bg-primary: #1a1a2e; + --theme-bg-secondary: #16213e; + --theme-bg-card: rgba(22, 33, 62, 0.8); + --theme-bg-navbar: rgba(15, 52, 96, 0.9); + --theme-bg-header: rgba(15, 52, 96, 0.5); + /* Accent colors */ + --theme-accent-primary: #e94560; + --theme-accent-primary-hover: #d63d56; + --theme-accent-secondary: #00d9ff; + --theme-accent-warning: #ffc107; + --theme-accent-success: #2ecc71; + /* Text colors */ + --theme-text-primary: #eee; + --theme-text-secondary: rgba(255, 255, 255, 0.6); + --theme-text-link: #00d9ff; + /* Border colors */ + --theme-border-primary: rgba(15, 52, 96, 0.5); + --theme-border-accent: rgba(0, 217, 255, 0.3); + /* Gradients */ + --theme-gradient-bg: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); + --theme-gradient-stat: linear-gradient(45deg, #e94560, #00d9ff); + --theme-gradient-progress: linear-gradient(90deg, #e94560, #00d9ff); + /* Fonts - Bootstrap system default */ + --theme-font-heading: system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; + --theme-font-body: system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; + /* Shadows */ + --theme-shadow-glow: rgba(0, 217, 255, 0.1); + --theme-shadow-card: rgba(0, 0, 0, 0.5); + /* Spacing scale */ + --space-xs: 0.25rem; + --space-sm: 0.5rem; + --space-md: 1rem; + --space-lg: 1.5rem; + --space-xl: 2rem; + /* Border radius */ + --radius-sm: 4px; + --radius-md: 8px; + --radius-lg: 12px; + /* Transitions */ + --transition-fast: 150ms ease; + --transition-normal: 250ms ease; +} + +/* ===== SKALDLEITA THEME - Norse Gold ===== */ +[data-theme="skaldleita"] { + /* Background colors - darker, more dramatic */ + --theme-bg-primary: #0a0c10; + --theme-bg-secondary: #12151c; + --theme-bg-card: rgba(18, 21, 28, 0.9); + --theme-bg-navbar: rgba(26, 30, 40, 0.95); + --theme-bg-header: rgba(26, 30, 40, 0.7); + /* Accent colors - gold and ice blue */ + --theme-accent-primary: #c9a55c; + --theme-accent-primary-hover: #e8c87d; + --theme-accent-secondary: #7eb8da; + --theme-accent-warning: #c9a55c; + --theme-accent-success: #7eb8da; + /* Text colors */ + --theme-text-primary: #e8e6e3; + --theme-text-secondary: #9ca3af; + --theme-text-link: #c9a55c; + /* Border colors */ + --theme-border-primary: rgba(201, 165, 92, 0.2); + --theme-border-accent: rgba(201, 165, 92, 0.4); + /* Gradients */ + --theme-gradient-bg: linear-gradient(135deg, #0a0c10 0%, #12151c 100%); + --theme-gradient-stat: linear-gradient(45deg, #c9a55c, #7eb8da); + --theme-gradient-progress: linear-gradient(90deg, #c9a55c, #7eb8da); + /* Fonts - Norse-inspired */ + --theme-font-heading: 'Cinzel', serif; + --theme-font-body: 'Inter', sans-serif; + /* Shadows - warmer glow */ + --theme-shadow-glow: rgba(201, 165, 92, 0.15); + --theme-shadow-card: rgba(0, 0, 0, 0.6); +} + +/* ===== Apply theme variables ===== */ +:root { + --bs-body-bg: var(--theme-bg-primary); + --bs-body-color: var(--theme-text-primary); + --bs-card-bg: var(--theme-bg-card); + --bs-border-color: var(--theme-border-primary); +} +body { + background: var(--theme-gradient-bg); + min-height: 100vh; + font-family: var(--theme-font-body); + color: var(--theme-text-primary); +} +h1, h2, h3, h4, h5, h6, .navbar-brand { + font-family: var(--theme-font-heading); +} +.navbar { + background: var(--theme-bg-navbar) !important; + backdrop-filter: blur(10px); +} +.card { + background: var(--theme-bg-card); + border: 1px solid var(--theme-border-primary); + border-radius: var(--radius-lg); + transition: border-color var(--transition-normal), box-shadow var(--transition-normal); +} +.card:hover { + border-color: var(--theme-border-accent); + box-shadow: 0 4px 20px var(--theme-shadow-glow); +} +.card-header { + background: var(--theme-bg-header); + border-bottom: 1px solid var(--theme-border-primary); +} +.table { + --bs-table-bg: transparent; + --bs-table-color: var(--theme-text-primary); +} +.btn-primary { + background: var(--theme-accent-primary); + border-color: var(--theme-accent-primary); +} +.btn-primary:hover { + background: var(--theme-accent-primary-hover); + border-color: var(--theme-accent-primary-hover); +} +.btn-success { + background: var(--theme-accent-secondary); + border-color: var(--theme-accent-secondary); + color: #000; +} +.stat-card { + transition: transform 0.2s; +} +.stat-card:hover { + transform: translateY(-5px); +} +.stat-number { + font-size: 2.5rem; + font-weight: bold; + background: var(--theme-gradient-stat); + -webkit-background-clip: text; + -webkit-text-fill-color: transparent; +} +.badge-fix { + background: var(--theme-accent-primary); +} +.badge-ok { + background: var(--theme-accent-secondary); + color: #000; +} +.badge-pending { + background: var(--theme-accent-warning); + color: #000; +} +.status-indicator { + display: inline-block; + width: 10px; + height: 10px; + border-radius: 50%; + margin-right: 8px; +} +.status-running { + background: var(--theme-accent-success); + box-shadow: 0 0 10px var(--theme-accent-success); + animation: pulse 2s infinite; +} +.status-stopped { + background: #ff0000; +} +@keyframes pulse { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.5; } +} +.nav-link { + color: rgba(255,255,255,0.8) !important; +} +.nav-link:hover, .nav-link.active { + color: var(--theme-text-link) !important; +} +.text-muted { + color: var(--theme-text-secondary) !important; +} + +/* ===== SKALDLEITA THEME - Comprehensive Color Overrides ===== */ +/* All Bootstrap text utilities -> Gold */ +[data-theme="skaldleita"] .text-primary, +[data-theme="skaldleita"] .text-info, +[data-theme="skaldleita"] .text-success, +[data-theme="skaldleita"] .text-warning, +[data-theme="skaldleita"] .text-danger { + color: #c9a55c !important; +} +[data-theme="skaldleita"] .text-secondary { + color: #8b7340 !important; +} +/* ALL icons become gold */ +[data-theme="skaldleita"] i.bi, +[data-theme="skaldleita"] .bi { + color: #c9a55c !important; +} +/* Headings get gold accent */ +[data-theme="skaldleita"] h1, +[data-theme="skaldleita"] h2, +[data-theme="skaldleita"] h3, +[data-theme="skaldleita"] h4, +[data-theme="skaldleita"] h5, +[data-theme="skaldleita"] .card-header { + color: #c9a55c !important; +} +/* Card header text */ +[data-theme="skaldleita"] .card-header * { + color: #c9a55c !important; +} +/* Navbar brand gold */ +[data-theme="skaldleita"] .navbar-brand { + color: #c9a55c !important; +} +/* Links gold */ +[data-theme="skaldleita"] a { + color: #c9a55c !important; +} +[data-theme="skaldleita"] a:hover { + color: #e8c87d !important; +} +/* Nav links */ +[data-theme="skaldleita"] .nav-link { + color: #9ca3af !important; +} +[data-theme="skaldleita"] .nav-link:hover, +[data-theme="skaldleita"] .nav-link.active { + color: #c9a55c !important; +} +/* Background colors - all use gold */ +[data-theme="skaldleita"] .bg-primary, +[data-theme="skaldleita"] .bg-info, +[data-theme="skaldleita"] .bg-success, +[data-theme="skaldleita"] .bg-warning, +[data-theme="skaldleita"] .bg-danger { + background-color: #c9a55c !important; + color: #0a0c10 !important; +} +/* Badges - gold with dark text */ +[data-theme="skaldleita"] .badge { + background-color: #c9a55c !important; + color: #0a0c10 !important; + font-weight: 500; +} +/* Buttons */ +[data-theme="skaldleita"] .btn-primary, +[data-theme="skaldleita"] .btn-success, +[data-theme="skaldleita"] .btn-info { + background-color: #c9a55c !important; + border-color: #c9a55c !important; + color: #0a0c10 !important; +} +[data-theme="skaldleita"] .btn-primary:hover, +[data-theme="skaldleita"] .btn-success:hover, +[data-theme="skaldleita"] .btn-info:hover { + background-color: #e8c87d !important; + border-color: #e8c87d !important; +} +[data-theme="skaldleita"] .btn-outline-primary, +[data-theme="skaldleita"] .btn-outline-success, +[data-theme="skaldleita"] .btn-outline-info, +[data-theme="skaldleita"] .btn-outline-secondary, +[data-theme="skaldleita"] .btn-outline-warning { + color: #c9a55c !important; + border-color: #c9a55c !important; +} +[data-theme="skaldleita"] .btn-outline-primary:hover, +[data-theme="skaldleita"] .btn-outline-success:hover, +[data-theme="skaldleita"] .btn-outline-info:hover, +[data-theme="skaldleita"] .btn-outline-secondary:hover, +[data-theme="skaldleita"] .btn-outline-warning:hover { + background-color: #c9a55c !important; + color: #0a0c10 !important; +} +/* Status bar elements */ +[data-theme="skaldleita"] .status-book, +[data-theme="skaldleita"] .status-author { + color: #c9a55c !important; +} +[data-theme="skaldleita"] .status-layer { + background: rgba(201, 165, 92, 0.2) !important; + color: #c9a55c !important; +} +[data-theme="skaldleita"] .status-stat-value { + color: #c9a55c !important; +} +/* Stat numbers gradient -> solid gold */ +[data-theme="skaldleita"] .stat-number { + background: none !important; + -webkit-background-clip: unset !important; + -webkit-text-fill-color: #c9a55c !important; + color: #c9a55c !important; +} +/* Form focus states */ +[data-theme="skaldleita"] .form-control:focus, +[data-theme="skaldleita"] .form-select:focus { + border-color: #c9a55c !important; + box-shadow: 0 0 0 0.2rem rgba(201, 165, 92, 0.25) !important; +} +[data-theme="skaldleita"] .form-check-input:checked { + background-color: #c9a55c !important; + border-color: #c9a55c !important; +} +/* Gold borders around elements */ +[data-theme="skaldleita"] .card { + border: 1px solid rgba(201, 165, 92, 0.3) !important; +} +[data-theme="skaldleita"] .card-header { + border-bottom: 1px solid rgba(201, 165, 92, 0.3) !important; +} +[data-theme="skaldleita"] .form-control, +[data-theme="skaldleita"] .form-select { + border-color: rgba(201, 165, 92, 0.3) !important; +} +[data-theme="skaldleita"] .table { + border-color: rgba(201, 165, 92, 0.2) !important; +} +[data-theme="skaldleita"] .table > :not(caption) > * > * { + border-color: rgba(201, 165, 92, 0.15) !important; +} +[data-theme="skaldleita"] .navbar { + border-bottom: 1px solid rgba(201, 165, 92, 0.2) !important; +} +[data-theme="skaldleita"] .status-bar { + border-bottom: 1px solid rgba(201, 165, 92, 0.3) !important; +} +[data-theme="skaldleita"] .alert { + border-color: rgba(201, 165, 92, 0.3) !important; +} +[data-theme="skaldleita"] .list-group-item { + border-color: rgba(201, 165, 92, 0.2) !important; +} +[data-theme="skaldleita"] hr { + border-color: rgba(201, 165, 92, 0.2) !important; + opacity: 1; +} +[data-theme="skaldleita"] .modal-content { + border: 1px solid rgba(201, 165, 92, 0.3) !important; +} +[data-theme="skaldleita"] .dropdown-menu { + border: 1px solid rgba(201, 165, 92, 0.3) !important; +} +/* Skaldleita-specific: rune divider */ +[data-theme="skaldleita"] .rune-divider { + text-align: center; + color: var(--theme-accent-primary); + opacity: 0.3; + font-size: 0.8rem; + letter-spacing: 0.5em; + margin: var(--space-md) 0; +} +[data-theme="skaldleita"] .rune-divider::before { + content: "\16A0 \16A2 \16A6 \16A8 \16B1 \16B2"; +} +/* Theme-aware form controls */ +.form-select, .form-control { + transition: border-color 0.2s, box-shadow 0.2s; +} +.form-select:focus, .form-control:focus { + border-color: var(--theme-accent-primary); + box-shadow: 0 0 0 0.2rem rgba(var(--theme-accent-primary-rgb, 233, 69, 96), 0.25); +} +.form-check-input:checked { + background-color: var(--theme-accent-primary); + border-color: var(--theme-accent-primary); +} +/* Theme-aware links */ +a { + color: var(--theme-text-link); +} +a:hover { + color: var(--theme-accent-primary-hover); +} +/* Alert styling */ +.alert-dark { + background: var(--theme-bg-card); + border-color: var(--theme-border-primary); + color: var(--theme-text-primary); +} +/* Skaldleita theme enhancements */ +[data-theme="skaldleita"] .navbar-brand { + font-weight: 600; + letter-spacing: 0.05em; +} +[data-theme="skaldleita"] .card-header { + font-family: var(--theme-font-heading); + font-weight: 500; + letter-spacing: 0.02em; +} + +/* ===== Book Info Card (Hover Preview) ===== */ +.book-hover-card { + position: absolute; + z-index: 1000; + width: 380px; + background: var(--theme-gradient-bg); + border: 1px solid var(--theme-border-accent); + border-radius: var(--radius-lg); + box-shadow: 0 8px 32px var(--theme-shadow-card), 0 0 20px var(--theme-shadow-glow); + padding: 0; + display: none; + overflow: hidden; +} +.book-hover-card.show { + display: block; + animation: fadeIn 0.2s ease; +} +@keyframes fadeIn { + from { opacity: 0; transform: translateY(-10px); } + to { opacity: 1; transform: translateY(0); } +} +.book-hover-card .card-header { + background: rgba(var(--theme-accent-secondary-rgb, 0, 217, 255), 0.1); + padding: 12px 15px; + border-bottom: 1px solid var(--theme-border-accent); +} +.book-hover-card .card-body { + padding: 15px; +} +.book-hover-card .card-header .d-flex { + min-width: 0; +} +.book-hover-card #hover-cover-container { + flex-shrink: 0; +} +.book-hover-card .card-header .flex-grow-1 { + min-width: 0; + overflow: hidden; +} +.book-hover-card .book-cover { + width: 80px; + height: 120px; + object-fit: cover; + border-radius: 6px; + border: 2px solid var(--theme-border-accent); + background: rgba(0, 0, 0, 0.3); +} +.book-hover-card .book-cover-placeholder { + width: 80px; + height: 120px; + background: var(--theme-bg-header); + border-radius: 6px; + display: flex; + align-items: center; + justify-content: center; + color: rgba(255,255,255,0.3); + font-size: 2rem; +} +.book-hover-card .book-author, +.book-hover-card .book-series { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.book-hover-card .book-title { + font-size: 1.1rem; + font-weight: 600; + color: var(--theme-text-primary); + margin-bottom: 4px; + overflow: hidden; + text-overflow: ellipsis; + display: -webkit-box; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; +} +.book-hover-card .book-author { + color: var(--theme-accent-secondary); + font-size: 0.9rem; +} +.book-hover-card .book-series { + color: var(--theme-accent-warning); + font-size: 0.85rem; +} +.book-hover-card .book-meta { + font-size: 0.8rem; + color: var(--theme-text-secondary); +} +.book-hover-card .book-description { + font-size: 0.85rem; + color: rgba(255,255,255,0.8); + max-height: 80px; + overflow: hidden; + text-overflow: ellipsis; + line-height: 1.4; +} +.book-hover-card .book-tags { + display: flex; + flex-wrap: wrap; + gap: 4px; +} +.book-hover-card .book-tag { + font-size: 0.7rem; + padding: 2px 8px; + border-radius: 10px; + background: rgba(var(--theme-accent-secondary-rgb, 0, 217, 255), 0.15); + color: var(--theme-accent-secondary); +} +.book-hover-card .abs-status { + background: rgba(0,0,0,0.2); + border-radius: var(--radius-md); + padding: 10px; + margin-top: 10px; +} +.book-hover-card .abs-user { + display: flex; + align-items: center; + gap: 8px; + font-size: 0.85rem; + padding: 4px 0; +} +.book-hover-card .abs-user .progress { + flex: 1; + height: 6px; +} +.book-hover-card .click-hint { + text-align: center; + font-size: 0.75rem; + color: rgba(255,255,255,0.4); + padding: 8px; + border-top: 1px solid var(--theme-border-accent); +} + +/* ===== Search Result Cover Thumbnails ===== */ +.search-cover-container { + flex-shrink: 0; + width: 40px; + height: 60px; +} +.search-result-cover { + width: 40px; + height: 60px; + object-fit: cover; + border-radius: var(--radius-sm); + border: 1px solid var(--theme-border-accent); +} +.search-cover-placeholder { + width: 40px; + height: 60px; + background: var(--theme-bg-header); + border-radius: var(--radius-sm); + display: flex; + align-items: center; + justify-content: center; + color: rgba(255,255,255,0.3); + font-size: 1.2rem; +} +.search-result-item { + border-color: var(--theme-border-primary) !important; +} +.search-result-item:hover { + background: var(--theme-bg-header) !important; + border-color: var(--theme-border-accent) !important; +} + +/* ===== Live Status Bar ===== */ +.status-bar { + background: var(--theme-bg-navbar); + border-bottom: 1px solid var(--theme-border-accent); + padding: 8px 0; + margin-bottom: var(--space-md); + backdrop-filter: blur(10px); + transition: all 0.3s ease; +} +.status-bar.processing { + border-bottom-color: var(--theme-accent-primary); +} +.status-bar.idle { + opacity: 0.7; +} +.status-bar.idle:hover { + opacity: 1; +} +.status-bar-content { + display: flex; + align-items: center; + justify-content: space-between; + flex-wrap: wrap; + gap: 10px; +} +.status-main { + display: flex; + align-items: center; + gap: 12px; + flex: 1; + min-width: 0; +} +.status-icon { + font-size: 1.2rem; + width: 28px; + text-align: center; +} +.status-icon.processing { + color: var(--theme-accent-primary); + animation: pulse-icon 1.5s ease-in-out infinite; +} +.status-icon.idle { + color: var(--theme-accent-secondary); +} +.status-icon.stopped { + color: #6c757d; +} +@keyframes pulse-icon { + 0%, 100% { opacity: 1; transform: scale(1); } + 50% { opacity: 0.6; transform: scale(0.95); } +} +.status-text { + display: flex; + flex-direction: column; + min-width: 0; + flex: 1; +} +.status-primary { + font-size: 0.9rem; + font-weight: 500; + color: var(--theme-text-primary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} +.status-secondary { + font-size: 0.75rem; + color: var(--theme-text-secondary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} +.status-book { + color: var(--theme-accent-secondary); + font-weight: 500; +} +.status-author { + color: var(--theme-accent-warning); +} +.status-layer { + display: inline-flex; + align-items: center; + gap: 4px; + background: rgba(var(--theme-accent-primary-rgb, 233, 69, 96), 0.2); + color: var(--theme-accent-primary); + padding: 2px 8px; + border-radius: 10px; + font-size: 0.7rem; + font-weight: 600; + text-transform: uppercase; +} +.status-provider { + display: inline-flex; + align-items: center; + gap: 6px; + margin-left: 8px; +} +.provider-badge { + display: inline-flex; + align-items: center; + gap: 4px; + background: rgba(0, 200, 150, 0.2); + color: #00c896; + padding: 2px 8px; + border-radius: 10px; + font-size: 0.7rem; + font-weight: 600; +} +.provider-badge.paid { + background: rgba(255, 193, 7, 0.2); + color: #ffc107; +} +.provider-free-badge { + display: inline-flex; + align-items: center; + gap: 3px; + background: rgba(0, 200, 150, 0.15); + color: #00c896; + padding: 2px 6px; + border-radius: 8px; + font-size: 0.6rem; + font-weight: 700; + text-transform: uppercase; +} +.status-step { + color: var(--theme-text-secondary); + font-size: 0.75rem; + margin-left: 8px; + font-style: italic; +} +.status-meta { + display: flex; + align-items: center; + gap: 15px; + flex-shrink: 0; +} +.status-stat { + display: flex; + flex-direction: column; + align-items: center; + padding: 0 10px; + border-left: 1px solid rgba(255, 255, 255, 0.1); +} +.status-stat:first-child { + border-left: none; +} +.status-stat-value { + font-size: 1rem; + font-weight: 600; + color: var(--theme-accent-secondary); + line-height: 1; +} +.status-stat-label { + font-size: 0.65rem; + color: var(--theme-text-secondary); + text-transform: uppercase; +} +.status-progress { + height: 3px; + background: rgba(0, 0, 0, 0.3); + border-radius: 2px; + overflow: hidden; + margin-top: 4px; + width: 100%; + max-width: 200px; +} +.status-progress-bar { + height: 100%; + background: var(--theme-gradient-progress); + border-radius: 2px; + transition: width 0.5s ease; +} + +/* ===== Hint Icon (Settings & Library) ===== */ +.hint-icon { + display: inline-flex; + align-items: center; + justify-content: center; + width: 16px; + height: 16px; + border-radius: 50%; + border: 1px solid var(--theme-border-accent); + color: var(--theme-accent-secondary); + font-size: 10px; + font-weight: bold; + font-style: normal; + cursor: help; + margin-left: 4px; + position: relative; + vertical-align: middle; + line-height: 1; + flex-shrink: 0; +} +.hint-icon:hover { + border-color: var(--theme-accent-secondary); + color: var(--theme-accent-secondary); +} +.hint-icon .hint-text { + display: none; + position: absolute; + bottom: calc(100% + 8px); + left: 50%; + transform: translateX(-50%); + background: rgba(15, 52, 96, 0.95); + border: 1px solid var(--theme-border-accent); + color: #eee; + padding: var(--space-sm) var(--space-md); + border-radius: 6px; + font-size: 0.8rem; + font-weight: normal; + font-style: normal; + line-height: 1.4; + white-space: normal; + width: 280px; + z-index: 1000; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4); + pointer-events: none; +} +.hint-icon .hint-text::after { + content: ''; + position: absolute; + top: 100%; + left: 50%; + transform: translateX(-50%); + border: 6px solid transparent; + border-top-color: var(--theme-border-accent); +} +.hint-icon:hover .hint-text { + display: block; +} + +/* ===== Dashboard Activity Log ===== */ +#activity-log-table { table-layout: fixed; width: 100%; } +#activity-log-table td, #activity-log-table th { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + padding: 0.35rem 0.5rem; +} +#activity-log-table td.scroll-text:hover { + overflow: visible; +} +#activity-log-table td.scroll-text:hover span { + display: inline-block; + animation: marquee 4s linear infinite; +} +@keyframes marquee { + 0% { transform: translateX(0); } + 100% { transform: translateX(-50%); } +} + +/* ===== History Page ===== */ +.resizable-table th { resize: horizontal; overflow: auto; min-width: 60px; } +.resizable-table td { word-break: break-word; } + +/* ===== Library Page ===== */ +.filter-chip { + display: inline-flex; + align-items: center; + padding: var(--space-sm) var(--space-md); + margin: var(--space-xs); + border-radius: 20px; + background: var(--theme-bg-header); + border: 1px solid var(--theme-border-accent); + color: #eee; + cursor: pointer; + transition: all var(--transition-fast); + text-decoration: none; +} +.filter-chip:hover { + background: rgba(0, 217, 255, 0.2); + border-color: rgba(0, 217, 255, 0.5); + color: #fff; +} +.filter-chip.active { + background: rgba(0, 217, 255, 0.3); + border-color: var(--theme-accent-secondary); + color: var(--theme-accent-secondary); +} +.filter-chip .count { + background: rgba(0, 0, 0, 0.3); + padding: 2px 8px; + border-radius: 10px; + margin-left: 8px; + font-size: 0.85rem; +} +.filter-chip.active .count { + background: rgba(0, 217, 255, 0.3); +} +.filter-chip.has-items .count { + background: rgba(233, 69, 96, 0.5); +} +.activity-stream { + max-height: 150px; + overflow-y: auto; + background: rgba(0, 0, 0, 0.2); + border-radius: var(--radius-md); + padding: 10px; + font-family: monospace; + font-size: 0.85rem; + display: none; +} +.activity-stream.active { + display: block; +} +.activity-stream .entry { + padding: 2px 0; + border-bottom: 1px solid rgba(255,255,255,0.05); +} +.activity-stream .entry.success { color: var(--theme-accent-secondary); } +.activity-stream .entry.error { color: var(--theme-accent-primary); } +.activity-stream .entry.info { color: var(--theme-accent-warning); } +.item-row { + transition: background var(--transition-fast); +} +.item-row:hover { + background: rgba(0, 217, 255, 0.05); +} +.status-badge { + font-size: 0.75rem; + padding: 3px 8px; +} +.action-btn { + padding: 2px 8px; + font-size: 0.8rem; +} +/* Issue #111: Sortable column headers */ +th[onclick]:hover { + color: var(--theme-accent-secondary); +} +.sort-icon { + font-size: 0.7rem; + opacity: 0.5; +} +.sort-icon.active { + opacity: 1; + color: var(--theme-accent-secondary); +} + +/* ===== Setup Wizard ===== */ +.setup-container { + max-width: 700px; + margin: 0 auto; +} +.step-indicators { + display: flex; + justify-content: center; + align-items: center; + margin-bottom: 2rem; + flex-wrap: wrap; + gap: 5px; +} +.step-indicator { + width: 40px; + height: 40px; + border-radius: 50%; + background: var(--theme-bg-secondary); + border: 2px solid var(--theme-border-primary); + display: flex; + align-items: center; + justify-content: center; + font-weight: bold; + color: #888; + transition: all 0.3s ease; +} +.step-indicator.active { + background: var(--theme-accent-primary); + border-color: var(--theme-accent-primary); + color: #fff; + transform: scale(1.1); +} +.step-indicator.completed { + background: var(--theme-accent-secondary); + border-color: var(--theme-accent-secondary); + color: #000; +} +.step-line { + width: 40px; + height: 2px; + background: var(--theme-border-primary); + transition: background 0.3s ease; +} +.step-line.completed { + background: var(--theme-accent-secondary); +} +.setup-step { + animation: fadeIn 0.3s ease; +} +.setup-card { + background: var(--theme-bg-card); + border: 1px solid var(--theme-border-accent); + border-radius: 16px; +} +.setup-card .card-body { + padding: 2rem; +} +.option-card { + background: var(--theme-bg-header); + border: 2px solid transparent; + border-radius: var(--radius-lg); + padding: var(--space-md); + cursor: pointer; + transition: all var(--transition-fast); +} +.option-card:hover { + border-color: rgba(0, 217, 255, 0.5); + background: rgba(15, 52, 96, 0.8); +} +.option-card.selected { + border-color: var(--theme-accent-secondary); + background: rgba(0, 217, 255, 0.1); +} +.option-card input[type="radio"] { + display: none; +} +.path-item { + background: var(--theme-bg-header); + border-radius: var(--radius-md); + padding: 0.75rem 1rem; + margin-bottom: var(--space-sm); + display: flex; + align-items: center; + justify-content: space-between; +} +.path-item.valid { + border-left: 3px solid var(--theme-accent-secondary); +} +.path-item.invalid { + border-left: 3px solid var(--theme-accent-primary); +} +.btn-nav { + min-width: 120px; +} +.feature-icon { + font-size: 2.5rem; + margin-bottom: var(--space-md); +} +.summary-item { + background: rgba(15, 52, 96, 0.3); + border-radius: var(--radius-md); + padding: var(--space-md); + margin-bottom: 0.75rem; +} +.summary-item .label { + color: var(--theme-text-secondary); + font-size: 0.85rem; +} +.summary-item .value { + font-weight: 600; + color: var(--theme-accent-secondary); +} +.setup-header { + text-align: center; + margin-bottom: var(--space-md); + padding-top: var(--space-md); +} +.setup-header h1 { + font-size: 1.5rem; + color: var(--theme-accent-secondary); +} +/* Hide navbar during setup wizard - the setup_wizard.html template adds this class */ +.setup-mode .navbar, +.setup-mode .status-bar { + display: none !important; +} + +/* ===== Utility Classes ===== */ +.fs-icon-lg { + font-size: 2rem; +} +.fs-icon-xl { + font-size: 4rem; +} +.scroll-container { + max-height: 300px; + overflow-y: auto; +} +.scroll-container-sm { + max-height: 150px; + overflow-y: auto; +} +.cursor-pointer { + cursor: pointer; +} + +/* ===== Table Container ===== */ +.table-container { + border-radius: var(--radius-md); + overflow: hidden; + border: 1px solid var(--theme-border-primary); +} + +/* ===== Settings Save Bar ===== */ +.settings-save-bar { + position: sticky; + bottom: 0; + background: var(--theme-bg-secondary); + border-top: 1px solid var(--theme-border-primary); + padding: var(--space-md); + z-index: 100; + backdrop-filter: blur(10px); +} + +/* ===== Spin Icon Animation ===== */ +.spin-icon { + animation: spin-anim 1s linear infinite; +} +@keyframes spin-anim { + 100% { transform: rotate(360deg); } +} + +/* ===== Toast Notification ===== */ +.toast-notification { + position: fixed; + top: 20px; + right: 20px; + z-index: 9999; + padding: 12px 20px; + border-radius: var(--radius-md); + color: #fff; + font-size: 0.9rem; + opacity: 0; + transition: opacity 0.3s; +} + +/* ===== Mobile Responsive ===== */ +@media (max-width: 768px) { + .stat-card .stat-number { font-size: 1.5rem; } + .nav-tabs { flex-wrap: nowrap; overflow-x: auto; } + .nav-tabs .nav-link { white-space: nowrap; font-size: 0.85rem; padding: 0.4rem 0.75rem; } + .table { font-size: 0.85rem; } + .card-body { padding: 0.75rem; } + .status-bar-content { + flex-direction: column; + align-items: flex-start; + } + .status-meta { + width: 100%; + justify-content: space-around; + padding-top: 8px; + border-top: 1px solid rgba(255, 255, 255, 0.1); + margin-top: 4px; + } + .status-stat { + border-left: none; + } +} +@media (max-width: 576px) { + .container { padding-left: 0.5rem; padding-right: 0.5rem; } + .modal-dialog { margin: 0.5rem; } + h2 { font-size: 1.3rem; } +} diff --git a/static/js/common.js b/static/js/common.js new file mode 100644 index 0000000..fb38a37 --- /dev/null +++ b/static/js/common.js @@ -0,0 +1,41 @@ +/** + * Common utility functions shared across all templates. + * Loaded via base.html before page-specific scripts. + */ + +/** + * Escape HTML entities to prevent XSS. + * @param {string} text - Raw text to escape + * @returns {string} HTML-safe string + */ +function escapeHtml(text) { + if (text === null || text === undefined) return ''; + var div = document.createElement('div'); + div.textContent = String(text); + return div.innerHTML; +} + +/** + * Show a toast notification. + * @param {string} message - Message to display + * @param {string} [type='info'] - One of: success, danger, info, warning + */ +function showToast(message, type) { + type = type || 'info'; + var colors = { + success: 'var(--theme-accent-success)', + danger: 'var(--theme-accent-primary)', + info: 'var(--theme-accent-secondary)', + warning: 'var(--theme-accent-warning)' + }; + var toast = document.createElement('div'); + toast.className = 'toast-notification'; + toast.style.background = colors[type] || colors.info; + toast.textContent = message; + document.body.appendChild(toast); + requestAnimationFrame(function() { toast.style.opacity = '1'; }); + setTimeout(function() { + toast.style.opacity = '0'; + setTimeout(function() { toast.remove(); }, 300); + }, 3000); +} diff --git a/templates/base.html b/templates/base.html index 546b1a0..8e31220 100644 --- a/templates/base.html +++ b/templates/base.html @@ -20,735 +20,7 @@ - +