From a08fe16dfa7909d25d4db9f675b0b3e1bc5ac31b Mon Sep 17 00:00:00 2001
From: deucebucket <deucebucket@users.noreply.github.com>
Date: Fri, 17 Apr 2026 19:39:10 -0500
Subject: [PATCH] Fix #208: Persist watch_folder_processed + honor Skaldleita
 server_notice

Problem 1: watch_folder_processed was an in-memory set() that got wiped on
every restart. Any file that couldn't be processed (unknown author, ambiguous
match, move failure, mtime churn) got re-submitted every scan cycle after a
restart, forever. One LM instance generated ~48% of all Skaldleita /match
traffic for days on the same filename.

Problem 2: Skaldleita PR #129 added a server_notice field to /match
responses. When the server detects a retry loop it now sends
{severity, code, message, action: abort_task, upgrade_url}. LM ignored it.

Fixes:

- New watch_folder_processed SQLite table (path PK, processed_at, outcome,
  error_message). outcome in {moved, move_failed, aborted_by_server}.
- watch_folder_is_processed() / watch_folder_mark_processed() helpers in
  library_manager/database.py. process_watch_folder swapped from set ops
  to these helpers. Restart no longer resets dedup.
- bookdb.py logs every server_notice (with upgrade_url). On
  action=abort_task it stashes the notice in a threading.local() slot so
  scope is per-thread (watch worker, API endpoint, pipeline layer don't
  cross-contaminate).
- process_watch_folder reads the abort slot after each identify attempt;
  if set, marks the item as aborted_by_server and skips the pipeline.

Bumps APP_VERSION to 0.9.0-beta.148.
---
 CHANGELOG.md                        | 28 +++++++++++++++
 README.md                           |  6 +++-
 app.py                              | 33 ++++++++++++------
 library_manager/database.py         | 53 +++++++++++++++++++++++++++++
 library_manager/providers/bookdb.py | 32 +++++++++++++++++
 5 files changed, 141 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 066d4ae..daeb120 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,34 @@
 
 All notable changes to Library Manager will be documented in this file.
 
+## [0.9.0-beta.148] - 2026-04-17
+
+### Fixed
+
+- **Issue #208: Watch-folder retry loop survives restarts** — The watch-folder
+  worker used an in-memory `set()` to remember which files it had already
+  processed. Every LM restart wiped the set, so whenever a file couldn't be
+  processed (unknown author, ambiguous match, move failure, mtime churn), the
+  worker would re-submit it on every scan forever. Server-side evidence showed
+  one LM instance generating ~48% of all Skaldleita `/match` traffic — 2,840
+  requests in a single day on the same filename. Fix:
+  - New `watch_folder_processed` SQLite table (`path`, `processed_at`,
+    `outcome`, `error_message`) persists dedup across restarts. `outcome`
+    values: `moved`, `move_failed`, `aborted_by_server`.
+  - Added `watch_folder_is_processed()` / `watch_folder_mark_processed()`
+    helpers in `library_manager/database.py`; watch worker switched from
+    `set()` ops to these helpers.
+- **Issue #208: Skaldleita `server_notice` handler** — Skaldleita responses
+  can now carry a `server_notice` block (severity/code/message/action/
+  upgrade_url). `library_manager/providers/bookdb.py` logs every notice
+  (with upgrade URL) and, on `action=abort_task`, stashes it in a
+  `threading.local()` slot. The watch-folder worker reads that slot after
+  each identify attempt and, if an abort was signalled, marks the item as
+  `aborted_by_server` and skips the rest of the pipeline — no 30-second
+  retry loop.
+
+---
+
 ## [0.9.0-beta.147] - 2026-04-17
 
 ### Fixed
diff --git a/README.md b/README.md
index 0871d1d..a15d196 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 **Smart Audiobook Library Organizer with Multi-Source Metadata & AI Verification**
 
-[![Version](https://img.shields.io/badge/version-0.9.0--beta.147-blue.svg)](CHANGELOG.md)
+[![Version](https://img.shields.io/badge/version-0.9.0--beta.148-blue.svg)](CHANGELOG.md)
 [![Docker](https://img.shields.io/badge/docker-ghcr.io-blue.svg)](https://ghcr.io/deucebucket/library-manager)
 [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
 
@@ -16,6 +16,10 @@
 
 ## Recent Changes (stable)
 
+> **beta.148** - **Fix: Watch-Folder Retry Loop Across Restarts + Skaldleita server_notice** (Issue #208)
+> - **Persistent watch-folder dedup** - `watch_folder_processed` is now a SQLite table instead of an in-memory `set()`. Restarts no longer wipe it, killing the retry loop that had one LM instance hammering Skaldleita's `/match` every 30 seconds on the same file for days.
+> - **Honors Skaldleita's abort signal** - When the server detects a retry loop it sends a `server_notice` in the response. LM now logs it (with an upgrade URL) and, on `action=abort_task`, stops retrying that file immediately.
+
 > **beta.147** - **Critical Fix: Hard Link Safety** (Issue #209)
 > - **Stop silent copy+delete** - When "Use hard links" was enabled and the watch folder / library sat on different filesystems, LM used to copy every file and delete the originals. That broke torrent seeding and doubled disk use. Now LM fails fast with a clear error and leaves source files untouched.
 > - **Pre-check filesystem compatibility** - Verifies `st_dev` match before any file operations when hard links are enabled.
diff --git a/app.py b/app.py
index 8663f0e..6972a4c 100644
--- a/app.py
+++ b/app.py
@@ -11,7 +11,7 @@
 - Multi-provider AI (Gemini, OpenRouter, Ollama)
 """
 
-APP_VERSION = "0.9.0-beta.147"
+APP_VERSION = "0.9.0-beta.148"
 GITHUB_REPO = "deucebucket/library-manager"  # Your GitHub repo
 
 # Versioning Guide:
@@ -52,7 +52,8 @@
 from library_manager.database import (
     init_db, get_db, set_db_path, cleanup_garbage_entries,
     cleanup_duplicate_history_entries, insert_history_entry,
-    should_requeue_book
+    should_requeue_book,
+    watch_folder_is_processed, watch_folder_mark_processed
 )
 from library_manager.models.book_profile import (
     SOURCE_WEIGHTS, FIELD_WEIGHTS, FieldValue, BookProfile,
@@ -6432,8 +6433,8 @@ def get_circuit_breaker(api_name):
 # WATCH FOLDER FUNCTIONALITY
 # ============================================================================
 
-# Track processed watch folder items to avoid reprocessing
-watch_folder_processed = set()
+# Issue #208: watch-folder dedup now lives in the watch_folder_processed
+# SQLite table (see library_manager.database) so restarts don't reset state.
 watch_folder_last_scan = 0
 
 def get_watch_folder_items(watch_folder: str, min_age_seconds: int = 30) -> list:
@@ -6456,8 +6457,8 @@ def get_watch_folder_items(watch_folder: str, min_age_seconds: int = 30) -> list
     for item in watch_path.iterdir():
         item_path = str(item.resolve())
 
-        # Skip if already processed
-        if item_path in watch_folder_processed:
+        # Skip if already processed (persisted in SQLite, Issue #208)
+        if watch_folder_is_processed(item_path):
             continue
 
         # Check if folder contains audio files or is an audio file
@@ -6668,7 +6669,7 @@ def process_watch_folder(config: dict) -> int:
     Process items in the watch folder.
     Returns number of items processed.
     """
-    global watch_folder_processed, watch_folder_last_scan
+    global watch_folder_last_scan
 
     watch_folder = config.get('watch_folder', '').strip()
     output_folder = config.get('watch_output_folder', '').strip()
@@ -6828,6 +6829,18 @@ def norm_conf(c):
             except Exception as e:
                 logger.debug(f"Watch folder: API lookup failed, using path analysis: {e}")
 
+            # Issue #208: Skaldleita may have signalled 'abort_task' during the
+            # lookup above (retry-loop protection). Stop retrying this item and
+            # persist it so future scans skip it until the user upgrades / fixes
+            # the source. The warning + upgrade URL are already in the logs.
+            from library_manager.providers.bookdb import get_and_clear_server_abort
+            server_abort = get_and_clear_server_abort()
+            if server_abort:
+                abort_msg = server_abort.get('message', 'Skaldleita requested task abort')
+                logger.warning(f"Watch folder: Aborting '{item.name}' per Skaldleita server notice")
+                watch_folder_mark_processed(item_path, 'aborted_by_server', abort_msg)
+                continue
+
             # Issue #57: Verify drastic author changes before accepting
             if needs_verification and api_author and api_title:
                 try:
@@ -6880,7 +6893,7 @@ def norm_conf(c):
 
             if success:
                 logger.info(f"Watch folder: Moved to {new_path}")
-                watch_folder_processed.add(item_path)
+                watch_folder_mark_processed(item_path, 'moved')
                 processed += 1
 
                 # Add to books table
@@ -6914,8 +6927,8 @@ def norm_conf(c):
             else:
                 logger.error(f"Watch folder: Failed to move {item.name}: {error}")
                 # Issue #49: Track failed items in the database so user can see and fix them
-                # Add to watch_folder_processed to prevent infinite retry loop
-                watch_folder_processed.add(item_path)
+                # Issue #208: persist dedup so the retry loop dies across restarts too
+                watch_folder_mark_processed(item_path, 'move_failed', error)
                 try:
                     # Check if this item is already tracked
                     c.execute('SELECT id FROM books WHERE path = ?', (item_path,))
diff --git a/library_manager/database.py b/library_manager/database.py
index 62d4df6..b25e040 100644
--- a/library_manager/database.py
+++ b/library_manager/database.py
@@ -175,6 +175,17 @@ def init_db(db_path=None):
         api_calls INTEGER DEFAULT 0
     )''')
 
+    # Issue #208: Persistent watch-folder dedup
+    # Was an in-memory set(), wiped on restart, which caused the watch worker
+    # to re-submit the same failing file every cycle (ate ~48% of Skaldleita
+    # traffic from a single LM instance before server-side cache absorbed it).
+    c.execute('''CREATE TABLE IF NOT EXISTS watch_folder_processed (
+        path TEXT PRIMARY KEY,
+        processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+        outcome TEXT,
+        error_message TEXT
+    )''')
+
     conn.commit()
     conn.close()
 
@@ -187,6 +198,48 @@ def init_db(db_path=None):
     init_plugin_metrics_table(path)
 
 
+def watch_folder_is_processed(path, db_path=None):
+    """Return True if the watch-folder path has already been handled.
+
+    Issue #208: replaces the in-memory set. Survives restarts so the worker
+    doesn't re-submit the same failing file every scan cycle.
+    """
+    p = db_path or _db_path
+    if not p:
+        return False
+    conn = sqlite3.connect(p, timeout=30)
+    try:
+        c = conn.execute(
+            'SELECT 1 FROM watch_folder_processed WHERE path = ? LIMIT 1',
+            (path,)
+        )
+        return c.fetchone() is not None
+    finally:
+        conn.close()
+
+
+def watch_folder_mark_processed(path, outcome, error_message=None, db_path=None):
+    """Record that a watch-folder path has been handled.
+
+    outcome: 'moved' | 'move_failed' | 'unknown_author' | 'aborted_by_server'
+    Issue #208.
+    """
+    p = db_path or _db_path
+    if not p:
+        return
+    conn = sqlite3.connect(p, timeout=30)
+    try:
+        conn.execute(
+            '''INSERT OR REPLACE INTO watch_folder_processed
+               (path, processed_at, outcome, error_message)
+               VALUES (?, CURRENT_TIMESTAMP, ?, ?)''',
+            (path, outcome, error_message)
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+
 def cleanup_garbage_entries(db_path=None):
     """Remove garbage entries from database on startup.
 
diff --git a/library_manager/providers/bookdb.py b/library_manager/providers/bookdb.py
index 83e7345..d003733 100644
--- a/library_manager/providers/bookdb.py
+++ b/library_manager/providers/bookdb.py
@@ -15,6 +15,7 @@
 import logging
 import subprocess
 import tempfile
+import threading
 import requests
 from pathlib import Path
 
@@ -33,6 +34,22 @@
 
 logger = logging.getLogger(__name__)
 
+# Issue #208: Skaldleita can signal "stop retrying this task" via a server_notice
+# in the JSON response. We stash the notice in a thread-local so the caller
+# (e.g. the watch-folder worker) can pick it up and mark the item as aborted
+# without a 30-second retry loop. Thread-local keeps the signal scoped to the
+# thread that issued the matching request.
+_abort_state = threading.local()
+
+
+def get_and_clear_server_abort():
+    """Return (and clear) the last server_notice with action=abort_task seen
+    on this thread, or None. Safe to call when none was set."""
+    notice = getattr(_abort_state, 'notice', None)
+    if notice is not None:
+        _abort_state.notice = None
+    return notice
+
 # Skaldleita API endpoint (our metadata service, legacy name: BookDB)
 BOOKDB_API_URL = "https://bookdb.deucebucket.com"  # URL unchanged for backwards compatibility
 # Public API key for Library Manager users (no config needed)
@@ -168,6 +185,21 @@ def search_bookdb(title, author=None, api_key=None, retry_count=0, bookdb_url=No
 
         data = resp.json()
 
+        # Issue #208: honor Skaldleita server_notice. Log every notice; on
+        # action=abort_task, stash in thread-local so the watch-folder worker
+        # can stop retrying instead of hammering /match every 30s.
+        notice = data.get('server_notice')
+        if notice:
+            code = notice.get('code', 'unknown')
+            msg = notice.get('message', '')
+            upgrade_url = notice.get('upgrade_url')
+            severity = notice.get('severity', 'info')
+            logger.warning(f"[SKALDLEITA] server notice ({severity}) [{code}]: {msg}")
+            if upgrade_url:
+                logger.warning(f"[SKALDLEITA] upgrade: {upgrade_url}")
+            if notice.get('action') == 'abort_task':
+                _abort_state.notice = notice
+
         # Check confidence threshold
         if data.get('confidence', 0) < 0.5:
             logger.debug(f"Skaldleita match below confidence threshold: {data.get('confidence')}")