From 7fe10e9277a30a4ba77d909a0f7e2377282f43b1 Mon Sep 17 00:00:00 2001
From: thismanyboyfriends2 <impurity-robe-tilt@duck.com>
Date: Tue, 3 Feb 2026 15:42:20 +0000
Subject: [PATCH 1/7] feat: first pass at performer url cleanup

---
 .../performer-url-cleanup.yml                 |  17 ++
 .../performer_url_cleanup.py                  | 246 ++++++++++++++++++
 2 files changed, 263 insertions(+)
 create mode 100644 plugins/performer-url-cleanup/performer-url-cleanup.yml
 create mode 100644 plugins/performer-url-cleanup/performer_url_cleanup.py

diff --git a/plugins/performer-url-cleanup/performer-url-cleanup.yml b/plugins/performer-url-cleanup/performer-url-cleanup.yml
new file mode 100644
index 0000000..4cefe2c
--- /dev/null
+++ b/plugins/performer-url-cleanup/performer-url-cleanup.yml
@@ -0,0 +1,17 @@
+name: Performer URL Cleanup
+description: Normalises, deduplicates, and sorts performer URLs
+version: 1.0.0
+url: https://github.com/thismanyboyfriends2/stash-plugins
+exec:
+  - python
+  - "{pluginDir}/performer_url_cleanup.py"
+interface: raw
+tasks:
+  - name: Preview URL Cleanup
+    description: Shows what URL changes would be made without applying them
+    defaultArgs:
+      mode: preview
+  - name: Apply URL Cleanup
+    description: Normalises, deduplicates, and sorts all performer URLs
+    defaultArgs:
+      mode: apply
diff --git a/plugins/performer-url-cleanup/performer_url_cleanup.py b/plugins/performer-url-cleanup/performer_url_cleanup.py
new file mode 100644
index 0000000..35c0a66
--- /dev/null
+++ b/plugins/performer-url-cleanup/performer_url_cleanup.py
@@ -0,0 +1,246 @@
+"""Performer URL Cleanup Plugin for Stash.
+
+Normalises, deduplicates, and sorts performer URLs.
+"""
+import json
+import sys
+from urllib.parse import urlparse, urlunparse
+
+try:
+    import stashapi.log as log
+    from stashapi.stashapp import StashInterface
+except ModuleNotFoundError:
+    print(json.dumps({
+        "output": "Error: stashapp-tools not installed. Run: pip install stashapp-tools"
+    }))
+    sys.exit(1)
+
+# Sites that should not have www prefix
+REMOVE_WWW = {
+    'x.com',
+    'twitter.com',
+    'onlyfans.com',
+    'instagram.com',
+    'fansly.com',
+    'pornhub.com',
+    'xvideos.com',
+    'xhamster.com',
+}
+
+# Domain aliases - map old domains to canonical ones
+DOMAIN_ALIASES = {
+    'twitter.com': 'x.com',
+}
+
+# Sites that preserve user's chosen capitalisation in the path
+PRESERVE_CASE = {'x.com', 'twitter.com'}
+
+
+def normalise_url(url):
+    """Normalise a URL according to site-specific rules.
+
+    Returns (normalised_url, canonical_domain) tuple.
+    """
+    # Parse the URL
+    parsed = urlparse(url)
+
+    # Upgrade to HTTPS
+    scheme = 'https'
+
+    # Normalise domain
+    domain = parsed.netloc.lower()
+
+    # Remove www if site doesn't use it
+    if domain.startswith('www.'):
+        domain_without_www = domain[4:]
+        if domain_without_www in REMOVE_WWW:
+            domain = domain_without_www
+
+    # Apply domain aliases
+    if domain in DOMAIN_ALIASES:
+        domain = DOMAIN_ALIASES[domain]
+
+    # Handle path
+    path = parsed.path
+
+    # Remove trailing slash
+    if path.endswith('/') and len(path) > 1:
+        path = path.rstrip('/')
+
+    # Case handling - lowercase path unless site preserves case
+    if domain not in PRESERVE_CASE:
+        path = path.lower()
+
+    # Reconstruct URL
+    normalised = urlunparse((scheme, domain, path, '', '', ''))
+
+    return normalised, domain
+
+
+def get_canonical_url(urls):
+    """Given a list of equivalent URLs, return the canonical one.
+
+    For PRESERVE_CASE sites, keeps the first occurrence.
+    For others, returns the normalised (lowercase) version.
+    """
+    if not urls:
+        return None
+
+    # All URLs should normalise to the same thing
+    # Return the first one's normalised form
+    return urls[0]
+
+
+def deduplicate_and_sort(urls):
+    """Normalise, deduplicate, and sort URLs.
+
+    Returns (new_urls, changes) where changes is a list of change descriptions.
+    """
+    if not urls:
+        return [], []
+
+    changes = []
+    seen = {}  # normalised_lower -> (normalised_url, original_url)
+
+    for url in urls:
+        normalised, domain = normalise_url(url)
+        normalised_lower = normalised.lower()
+
+        if normalised_lower in seen:
+            # Duplicate found
+            existing_normalised, existing_original = seen[normalised_lower]
+            changes.append(f"Remove duplicate: {url} (same as {existing_original})")
+        else:
+            seen[normalised_lower] = (normalised, url)
+            if normalised != url:
+                changes.append(f"Normalise: {url} -> {normalised}")
+
+    # Extract normalised URLs and sort by domain
+    result_urls = []
+    for normalised, original in seen.values():
+        result_urls.append(normalised)
+
+    # Sort by domain, then full URL
+    def sort_key(url):
+        parsed = urlparse(url)
+        return (parsed.netloc.lower(), url.lower())
+
+    sorted_urls = sorted(result_urls, key=sort_key)
+
+    # Check if order changed
+    original_normalised = [normalise_url(u)[0] for u in urls if normalise_url(u)[0].lower() in {u.lower() for u in result_urls}]
+    if sorted_urls != list(dict.fromkeys(original_normalised)):  # Remove dups preserving order
+        changes.append("Reordered URLs alphabetically by domain")
+
+    return sorted_urls, changes
+
+
+def process_performers(stash, dry_run=True):
+    """Process all performers and clean up their URLs."""
+    # Fetch all performers with URLs
+    log.info("Fetching performers with URLs...")
+
+    result = stash.find_performers(
+        f={},
+        fragment="id name urls",
+        get_count=True
+    )
+
+    if not result:
+        log.info("No performers found")
+        return
+
+    count, performers = result
+    log.info(f"Found {count} performers to check")
+
+    performers_to_update = []
+
+    for idx, performer in enumerate(performers):
+        urls = performer.get('urls') or []
+
+        if not urls:
+            continue
+
+        new_urls, changes = deduplicate_and_sort(urls)
+
+        if changes:
+            performers_to_update.append({
+                'id': performer['id'],
+                'name': performer['name'],
+                'old_urls': urls,
+                'new_urls': new_urls,
+                'changes': changes
+            })
+
+        # Update progress
+        if count > 0:
+            log.progress((idx + 1) / count)
+
+    # Report results
+    if not performers_to_update:
+        log.info("No URL changes needed - all performers are already clean")
+        return
+
+    log.info(f"\n{'=' * 60}")
+    log.info(f"Found {len(performers_to_update)} performers with URL changes:")
+    log.info(f"{'=' * 60}\n")
+
+    for p in performers_to_update:
+        log.info(f"Performer: {p['name']} (ID: {p['id']})")
+        for change in p['changes']:
+            log.info(f"  - {change}")
+        log.info(f"  Final URLs:")
+        for url in p['new_urls']:
+            log.info(f"    - {url}")
+        log.info("")
+
+    if dry_run:
+        log.info(f"{'=' * 60}")
+        log.info(f"PREVIEW MODE - No changes applied")
+        log.info(f"Run 'Apply URL Cleanup' to apply these changes")
+        log.info(f"{'=' * 60}")
+    else:
+        log.info(f"Applying changes to {len(performers_to_update)} performers...")
+
+        for idx, p in enumerate(performers_to_update):
+            try:
+                stash.update_performer({
+                    'id': p['id'],
+                    'urls': p['new_urls']
+                })
+                log.debug(f"Updated {p['name']}")
+            except Exception as e:
+                log.error(f"Failed to update {p['name']}: {e}")
+
+            log.progress((idx + 1) / len(performers_to_update))
+
+        log.info(f"{'=' * 60}")
+        log.info(f"Applied URL cleanup to {len(performers_to_update)} performers")
+        log.info(f"{'=' * 60}")
+
+
+def main():
+    """Main entry point."""
+    # Read JSON input from Stash
+    json_input = json.loads(sys.stdin.read())
+
+    # Extract connection info and initialise client
+    server_connection = json_input["server_connection"]
+    stash = StashInterface(server_connection)
+
+    # Get mode from args
+    mode = json_input.get("args", {}).get("mode", "preview")
+
+    log.info(f"Performer URL Cleanup - Mode: {mode}")
+    log.info("")
+
+    if mode == "preview":
+        process_performers(stash, dry_run=True)
+    elif mode == "apply":
+        process_performers(stash, dry_run=False)
+    else:
+        log.error(f"Unknown mode: {mode}")
+
+
+if __name__ == "__main__":
+    main()

From 24548e79a3233eff968d1827c19ac9c5cf3052a6 Mon Sep 17 00:00:00 2001
From: thismanyboyfriends2 <impurity-robe-tilt@duck.com>
Date: Tue, 3 Feb 2026 16:33:24 +0000
Subject: [PATCH 2/7] feat: copy stashbox urls plugin

---
 plugins/copy-stashbox-urls | 1 +
 1 file changed, 1 insertion(+)
 create mode 160000 plugins/copy-stashbox-urls

diff --git a/plugins/copy-stashbox-urls b/plugins/copy-stashbox-urls
new file mode 160000
index 0000000..059eecc
--- /dev/null
+++ b/plugins/copy-stashbox-urls
@@ -0,0 +1 @@
+Subproject commit 059eeccc89c84e80f5ddf90e435b567945be0fba

From b61309580686e5c060b101c8eb88bf7654aca256 Mon Sep 17 00:00:00 2001
From: thismanyboyfriends2 <impurity-robe-tilt@duck.com>
Date: Tue, 3 Feb 2026 16:37:23 +0000
Subject: [PATCH 3/7] fix: fixing performer url cleanup plugin

---
 .../performer-url-cleanup.yml                 |  2 +-
 .../performer_url_cleanup.py                  | 19 ++-----------------
 2 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/plugins/performer-url-cleanup/performer-url-cleanup.yml b/plugins/performer-url-cleanup/performer-url-cleanup.yml
index 4cefe2c..40613db 100644
--- a/plugins/performer-url-cleanup/performer-url-cleanup.yml
+++ b/plugins/performer-url-cleanup/performer-url-cleanup.yml
@@ -3,7 +3,7 @@ description: Normalises, deduplicates, and sorts performer URLs
 version: 1.0.0
 url: https://github.com/thismanyboyfriends2/stash-plugins
 exec:
-  - python
+  - python3
   - "{pluginDir}/performer_url_cleanup.py"
 interface: raw
 tasks:
diff --git a/plugins/performer-url-cleanup/performer_url_cleanup.py b/plugins/performer-url-cleanup/performer_url_cleanup.py
index 35c0a66..549e5ed 100644
--- a/plugins/performer-url-cleanup/performer_url_cleanup.py
+++ b/plugins/performer-url-cleanup/performer_url_cleanup.py
@@ -77,20 +77,6 @@ def normalise_url(url):
     return normalised, domain
 
 
-def get_canonical_url(urls):
-    """Given a list of equivalent URLs, return the canonical one.
-
-    For PRESERVE_CASE sites, keeps the first occurrence.
-    For others, returns the normalised (lowercase) version.
-    """
-    if not urls:
-        return None
-
-    # All URLs should normalise to the same thing
-    # Return the first one's normalised form
-    return urls[0]
-
-
 def deduplicate_and_sort(urls):
     """Normalise, deduplicate, and sort URLs.
 
@@ -127,9 +113,8 @@ def sort_key(url):
 
     sorted_urls = sorted(result_urls, key=sort_key)
 
-    # Check if order changed
-    original_normalised = [normalise_url(u)[0] for u in urls if normalise_url(u)[0].lower() in {u.lower() for u in result_urls}]
-    if sorted_urls != list(dict.fromkeys(original_normalised)):  # Remove dups preserving order
+    # Check if order changed (result_urls preserves original order after dedup)
+    if result_urls != sorted_urls:
         changes.append("Reordered URLs alphabetically by domain")
 
     return sorted_urls, changes

From a0a726dc9abda87e84c795cb3f58fec6f2b8127b Mon Sep 17 00:00:00 2001
From: thismanyboyfriends2 <impurity-robe-tilt@duck.com>
Date: Tue, 3 Feb 2026 18:16:50 +0000
Subject: [PATCH 4/7] feat: add site-specific URL rules and debug output

- Add known domains system to limit changes to configured sites
- Add debug files for reviewing changes and potential matches
- Add www handling, HTTP-only sites, path transforms, suffix removal
- Preserve case by default, only lowercase known case-insensitive sites
---
 .../performer_url_cleanup.py                  | 297 +++++++++++++++---
 1 file changed, 252 insertions(+), 45 deletions(-)

diff --git a/plugins/performer-url-cleanup/performer_url_cleanup.py b/plugins/performer-url-cleanup/performer_url_cleanup.py
index 549e5ed..1ba82df 100644
--- a/plugins/performer-url-cleanup/performer_url_cleanup.py
+++ b/plugins/performer-url-cleanup/performer_url_cleanup.py
@@ -4,8 +4,15 @@
 """
 import json
 import sys
+from collections import defaultdict
 from urllib.parse import urlparse, urlunparse
 
+# Debug output paths (temporary)
+DEBUG_DIR = r"C:\stash"
+DEBUG_BY_PERFORMER = f"{DEBUG_DIR}\\url_cleanup_by_performer.txt"
+DEBUG_BY_DOMAIN = f"{DEBUG_DIR}\\url_cleanup_by_domain.txt"
+DEBUG_POTENTIAL = f"{DEBUG_DIR}\\url_cleanup_potential.txt"
+
 try:
     import stashapi.log as log
     from stashapi.stashapp import StashInterface
@@ -20,11 +27,15 @@
     'x.com',
     'twitter.com',
     'onlyfans.com',
-    'instagram.com',
     'fansly.com',
+    'xhamster.com',
+}
+
+# Sites that should have www prefix added
+ADD_WWW = {
+    'instagram.com',
     'pornhub.com',
     'xvideos.com',
-    'xhamster.com',
 }
 
 # Domain aliases - map old domains to canonical ones
@@ -32,8 +43,66 @@
     'twitter.com': 'x.com',
 }
 
-# Sites that preserve user's chosen capitalisation in the path
-PRESERVE_CASE = {'x.com', 'twitter.com'}
+# Sites where path is case-insensitive (safe to lowercase)
+# Default behaviour: preserve original case
+LOWERCASE_PATH = {
+    'onlyfans.com',
+    'instagram.com',
+}
+
+# Sites that don't support HTTPS (keep as HTTP)
+HTTP_ONLY = {
+    'bustybuffy.com',
+    'www.bustybuffy.com',
+}
+
+# Path transformations - (domain, old_prefix, new_prefix)
+PATH_TRANSFORMS = [
+    ('eastcoasttalents.com', '/site/talent/', '/talent/'),
+]
+
+# Path suffixes to remove - (domain, suffix)
+REMOVE_PATH_SUFFIX = [
+    ('fansly.com', '/posts'),
+]
+
+# Sites that require trailing slashes
+KEEP_TRAILING_SLASH = {
+    'adultfilmdatabase.com',
+    'www.adultfilmdatabase.com',
+}
+
+
+def get_known_domains():
+    """Build set of all domains we have explicit rules for."""
+    known = set()
+    known.update(REMOVE_WWW)
+    known.update(ADD_WWW)
+    known.update(DOMAIN_ALIASES.keys())
+    known.update(LOWERCASE_PATH)
+    known.update(HTTP_ONLY)
+    known.update(KEEP_TRAILING_SLASH)
+    for domain, _, _ in PATH_TRANSFORMS:
+        known.add(domain)
+    for domain, _ in REMOVE_PATH_SUFFIX:
+        known.add(domain)
+    # Also add www variants
+    www_variants = {f'www.{d}' for d in known if not d.startswith('www.')}
+    known.update(www_variants)
+    return known
+
+
+KNOWN_DOMAINS = get_known_domains()
+
+
+def is_known_domain(domain):
+    """Check if domain has explicit rules configured."""
+    d = domain.lower()
+    if d in KNOWN_DOMAINS:
+        return True
+    if d.startswith('www.') and d[4:] in KNOWN_DOMAINS:
+        return True
+    return False
 
 
 def normalise_url(url):
@@ -41,21 +110,29 @@ def normalise_url(url):
 
     Returns (normalised_url, canonical_domain) tuple.
     """
+    # Ensure URL has a scheme before parsing (urlparse needs it to identify netloc)
+    if not url.startswith(('http://', 'https://')):
+        url = 'https://' + url
+
     # Parse the URL
     parsed = urlparse(url)
 
-    # Upgrade to HTTPS
-    scheme = 'https'
-
-    # Normalise domain
+    # Normalise domain (need this early to check HTTP_ONLY)
     domain = parsed.netloc.lower()
 
+    # Upgrade to HTTPS unless site doesn't support it
+    scheme = 'http' if domain in HTTP_ONLY else 'https'
+
     # Remove www if site doesn't use it
     if domain.startswith('www.'):
         domain_without_www = domain[4:]
         if domain_without_www in REMOVE_WWW:
             domain = domain_without_www
 
+    # Add www if site requires it
+    if not domain.startswith('www.') and domain in ADD_WWW:
+        domain = 'www.' + domain
+
     # Apply domain aliases
     if domain in DOMAIN_ALIASES:
         domain = DOMAIN_ALIASES[domain]
@@ -63,48 +140,161 @@ def normalise_url(url):
     # Handle path
     path = parsed.path
 
-    # Remove trailing slash
-    if path.endswith('/') and len(path) > 1:
+    # Apply path transformations
+    for transform_domain, old_prefix, new_prefix in PATH_TRANSFORMS:
+        if domain == transform_domain and path.startswith(old_prefix):
+            path = new_prefix + path[len(old_prefix):]
+            break
+
+    # Remove path suffixes
+    for suffix_domain, suffix in REMOVE_PATH_SUFFIX:
+        if domain == suffix_domain and path.endswith(suffix):
+            path = path[:-len(suffix)]
+            break
+
+    # Remove trailing slash (unless site requires it)
+    if path.endswith('/') and domain not in KEEP_TRAILING_SLASH:
         path = path.rstrip('/')
 
-    # Case handling - lowercase path unless site preserves case
-    if domain not in PRESERVE_CASE:
+    # Case handling - only lowercase if site is known to be case-insensitive
+    if domain in LOWERCASE_PATH:
         path = path.lower()
 
-    # Reconstruct URL
-    normalised = urlunparse((scheme, domain, path, '', '', ''))
+    # Reconstruct URL (preserve query string, drop fragment)
+    normalised = urlunparse((scheme, domain, path, parsed.params, parsed.query, ''))
 
     return normalised, domain
 
 
+def write_debug_files(performers_to_update):
+    """Write debug output files for analysis."""
+    # Per-performer output (confirmed changes only)
+    with open(DEBUG_BY_PERFORMER, 'w', encoding='utf-8') as f:
+        for p in performers_to_update:
+            if not p['changes']:
+                continue
+            f.write(f"{'=' * 60}\n")
+            f.write(f"Performer: {p['name']} (ID: {p['id']})\n")
+            f.write(f"{'=' * 60}\n")
+            f.write("Original URLs:\n")
+            for url in p['old_urls']:
+                f.write(f"  {url}\n")
+            f.write("\nChanges:\n")
+            for change in p['changes']:
+                f.write(f"  - {change}\n")
+            f.write("\nFinal URLs:\n")
+            for url in p['new_urls']:
+                f.write(f"  {url}\n")
+            f.write("\n")
+
+    # Per-domain output - group confirmed changes by domain
+    domain_changes = defaultdict(list)
+    for p in performers_to_update:
+        for url in p['old_urls']:
+            normalised, domain = normalise_url(url)
+            if normalised != url and is_known_domain(domain):
+                domain_changes[domain].append({
+                    'performer': p['name'],
+                    'original': url,
+                    'normalised': normalised
+                })
+
+    with open(DEBUG_BY_DOMAIN, 'w', encoding='utf-8') as f:
+        for domain in sorted(domain_changes.keys()):
+            changes = domain_changes[domain]
+            f.write(f"{'=' * 60}\n")
+            f.write(f"Domain: {domain} ({len(changes)} changes)\n")
+            f.write(f"{'=' * 60}\n")
+            for c in changes:
+                f.write(f"[{c['performer']}]\n")
+                f.write(f"  {c['original']}\n")
+                f.write(f"  -> {c['normalised']}\n")
+            f.write("\n")
+
+    # Potential changes - unknown domains grouped by domain
+    potential_by_domain = defaultdict(list)
+    for p in performers_to_update:
+        for url in p['old_urls']:
+            normalised, domain = normalise_url(url)
+            if normalised != url and not is_known_domain(domain):
+                potential_by_domain[domain].append({
+                    'performer': p['name'],
+                    'original': url,
+                    'normalised': normalised
+                })
+
+    with open(DEBUG_POTENTIAL, 'w', encoding='utf-8') as f:
+        f.write("POTENTIAL CHANGES - Unknown domains (review and add rules as needed)\n")
+        f.write(f"{'=' * 60}\n\n")
+        for domain in sorted(potential_by_domain.keys()):
+            changes = potential_by_domain[domain]
+            f.write(f"{'=' * 60}\n")
+            f.write(f"Domain: {domain} ({len(changes)} potential changes)\n")
+            f.write(f"{'=' * 60}\n")
+            for c in changes:
+                f.write(f"[{c['performer']}]\n")
+                f.write(f"  {c['original']}\n")
+                f.write(f"  -> {c['normalised']}\n")
+            f.write("\n")
+
+
+def has_mixed_case(url):
+    """Check if URL path has mixed case (likely from scraper, more accurate)."""
+    parsed = urlparse(url)
+    path = parsed.path
+    return path != path.lower() and path != path.upper()
+
+
 def deduplicate_and_sort(urls):
     """Normalise, deduplicate, and sort URLs.
 
-    Returns (new_urls, changes) where changes is a list of change descriptions.
+    Only applies changes to known domains. Unknown domain changes go to potential list.
+    Returns (new_urls, changes, potential_changes).
     """
     if not urls:
-        return [], []
+        return [], [], []
 
     changes = []
-    seen = {}  # normalised_lower -> (normalised_url, original_url)
+    potential_changes = []
+    seen = {}  # normalised_lower -> (normalised_url, original_url, domain, is_known)
 
     for url in urls:
         normalised, domain = normalise_url(url)
         normalised_lower = normalised.lower()
+        known = is_known_domain(domain)
 
         if normalised_lower in seen:
-            # Duplicate found
-            existing_normalised, existing_original = seen[normalised_lower]
-            changes.append(f"Remove duplicate: {url} (same as {existing_original})")
+            # Duplicate found - prefer mixed case version (likely from scraper)
+            existing_normalised, existing_original, existing_domain, existing_known = seen[normalised_lower]
+            if has_mixed_case(normalised) and not has_mixed_case(existing_normalised):
+                seen[normalised_lower] = (normalised, url, domain, known)
+                msg = f"Remove duplicate: {existing_original} (prefer mixed-case {url})"
+                if known or existing_known:
+                    changes.append(msg)
+                else:
+                    potential_changes.append(msg)
+            else:
+                msg = f"Remove duplicate: {url} (same as {existing_original})"
+                if known or existing_known:
+                    changes.append(msg)
+                else:
+                    potential_changes.append(msg)
         else:
-            seen[normalised_lower] = (normalised, url)
+            seen[normalised_lower] = (normalised, url, domain, known)
             if normalised != url:
-                changes.append(f"Normalise: {url} -> {normalised}")
+                msg = f"Normalise: {url} -> {normalised}"
+                if known:
+                    changes.append(msg)
+                else:
+                    potential_changes.append(msg)
 
-    # Extract normalised URLs and sort by domain
+    # Build result - only apply normalisations for known domains
     result_urls = []
-    for normalised, original in seen.values():
-        result_urls.append(normalised)
+    for normalised, original, domain, known in seen.values():
+        if known:
+            result_urls.append(normalised)
+        else:
+            result_urls.append(original)  # Keep original for unknown domains
 
     # Sort by domain, then full URL
     def sort_key(url):
@@ -117,7 +307,7 @@ def sort_key(url):
     if result_urls != sorted_urls:
         changes.append("Reordered URLs alphabetically by domain")
 
-    return sorted_urls, changes
+    return sorted_urls, changes, potential_changes
 
 
 def process_performers(stash, dry_run=True):
@@ -146,15 +336,16 @@ def process_performers(stash, dry_run=True):
         if not urls:
             continue
 
-        new_urls, changes = deduplicate_and_sort(urls)
+        new_urls, changes, potential_changes = deduplicate_and_sort(urls)
 
-        if changes:
+        if changes or potential_changes:
             performers_to_update.append({
                 'id': performer['id'],
                 'name': performer['name'],
                 'old_urls': urls,
                 'new_urls': new_urls,
-                'changes': changes
+                'changes': changes,
+                'potential_changes': potential_changes,
             })
 
         # Update progress
@@ -166,28 +357,44 @@ def process_performers(stash, dry_run=True):
         log.info("No URL changes needed - all performers are already clean")
         return
 
-    log.info(f"\n{'=' * 60}")
-    log.info(f"Found {len(performers_to_update)} performers with URL changes:")
-    log.info(f"{'=' * 60}\n")
+    # Write debug files
+    write_debug_files(performers_to_update)
+    log.info(f"Debug files written to {DEBUG_DIR}")
 
-    for p in performers_to_update:
-        log.info(f"Performer: {p['name']} (ID: {p['id']})")
-        for change in p['changes']:
-            log.info(f"  - {change}")
-        log.info(f"  Final URLs:")
-        for url in p['new_urls']:
-            log.info(f"    - {url}")
-        log.info("")
+    # Filter to only performers with confirmed changes
+    performers_with_changes = [p for p in performers_to_update if p['changes']]
+    performers_with_potential = [p for p in performers_to_update if p['potential_changes']]
+
+    log.info(f"Found {len(performers_with_changes)} performers with confirmed changes")
+    log.info(f"Found {len(performers_with_potential)} performers with potential changes (see {DEBUG_POTENTIAL})")
+
+    if performers_with_changes:
+        log.info(f"\n{'=' * 60}")
+        log.info(f"Confirmed changes:")
+        log.info(f"{'=' * 60}\n")
+
+        for p in performers_with_changes:
+            log.info(f"Performer: {p['name']} (ID: {p['id']})")
+            for change in p['changes']:
+                log.info(f"  - {change}")
+            log.info(f"  Final URLs:")
+            for url in p['new_urls']:
+                log.info(f"    - {url}")
 
     if dry_run:
         log.info(f"{'=' * 60}")
         log.info(f"PREVIEW MODE - No changes applied")
-        log.info(f"Run 'Apply URL Cleanup' to apply these changes")
+        if performers_with_changes:
+            log.info(f"Run 'Apply URL Cleanup' to apply {len(performers_with_changes)} confirmed changes")
         log.info(f"{'=' * 60}")
     else:
-        log.info(f"Applying changes to {len(performers_to_update)} performers...")
+        if not performers_with_changes:
+            log.info("No confirmed changes to apply")
+            return
+
+        log.info(f"Applying changes to {len(performers_with_changes)} performers...")
 
-        for idx, p in enumerate(performers_to_update):
+        for idx, p in enumerate(performers_with_changes):
             try:
                 stash.update_performer({
                     'id': p['id'],
@@ -197,10 +404,10 @@ def process_performers(stash, dry_run=True):
             except Exception as e:
                 log.error(f"Failed to update {p['name']}: {e}")
 
-            log.progress((idx + 1) / len(performers_to_update))
+            log.progress((idx + 1) / len(performers_with_changes))
 
         log.info(f"{'=' * 60}")
-        log.info(f"Applied URL cleanup to {len(performers_to_update)} performers")
+        log.info(f"Applied URL cleanup to {len(performers_with_changes)} performers")
         log.info(f"{'=' * 60}")
 
 

From fe5afd28582bd89155afc73ac8bfb2dc1f03be37 Mon Sep 17 00:00:00 2001
From: thismanyboyfriends2 <impurity-robe-tilt@duck.com>
Date: Tue, 3 Feb 2026 18:34:12 +0000
Subject: [PATCH 5/7] feat: toggle for output files

---
 .../performer-url-cleanup.yml                 |  5 ++
 .../performer_url_cleanup.py                  | 76 ++++++++++++-------
 2 files changed, 53 insertions(+), 28 deletions(-)

diff --git a/plugins/performer-url-cleanup/performer-url-cleanup.yml b/plugins/performer-url-cleanup/performer-url-cleanup.yml
index 40613db..cc3153c 100644
--- a/plugins/performer-url-cleanup/performer-url-cleanup.yml
+++ b/plugins/performer-url-cleanup/performer-url-cleanup.yml
@@ -6,6 +6,11 @@ exec:
   - python3
   - "{pluginDir}/performer_url_cleanup.py"
 interface: raw
+settings:
+  writeDebugFiles:
+    displayName: Write debug files
+    description: Output debug text files to the plugin directory for reviewing changes
+    type: BOOLEAN
 tasks:
   - name: Preview URL Cleanup
     description: Shows what URL changes would be made without applying them
diff --git a/plugins/performer-url-cleanup/performer_url_cleanup.py b/plugins/performer-url-cleanup/performer_url_cleanup.py
index 1ba82df..9b2a621 100644
--- a/plugins/performer-url-cleanup/performer_url_cleanup.py
+++ b/plugins/performer-url-cleanup/performer_url_cleanup.py
@@ -5,13 +5,19 @@
 import json
 import sys
 from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from urllib.parse import urlparse, urlunparse
 
-# Debug output paths (temporary)
-DEBUG_DIR = r"C:\stash"
-DEBUG_BY_PERFORMER = f"{DEBUG_DIR}\\url_cleanup_by_performer.txt"
-DEBUG_BY_DOMAIN = f"{DEBUG_DIR}\\url_cleanup_by_domain.txt"
-DEBUG_POTENTIAL = f"{DEBUG_DIR}\\url_cleanup_potential.txt"
+# Number of parallel threads for updates
+PARALLEL_WORKERS = 10
+
+import os
+
+# Debug output paths - written to plugin directory
+PLUGIN_DIR = os.path.dirname(os.path.realpath(__file__))
+DEBUG_BY_PERFORMER = os.path.join(PLUGIN_DIR, "debug_by_performer.txt")
+DEBUG_BY_DOMAIN = os.path.join(PLUGIN_DIR, "debug_by_domain.txt")
+DEBUG_POTENTIAL = os.path.join(PLUGIN_DIR, "debug_potential.txt")
 
 try:
     import stashapi.log as log
@@ -310,7 +316,7 @@ def sort_key(url):
     return sorted_urls, changes, potential_changes
 
 
-def process_performers(stash, dry_run=True):
+def process_performers(stash, dry_run=True, write_debug=False):
     """Process all performers and clean up their URLs."""
     # Fetch all performers with URLs
     log.info("Fetching performers with URLs...")
@@ -357,16 +363,17 @@ def process_performers(stash, dry_run=True):
         log.info("No URL changes needed - all performers are already clean")
         return
 
-    # Write debug files
-    write_debug_files(performers_to_update)
-    log.info(f"Debug files written to {DEBUG_DIR}")
-
     # Filter to only performers with confirmed changes
     performers_with_changes = [p for p in performers_to_update if p['changes']]
     performers_with_potential = [p for p in performers_to_update if p['potential_changes']]
 
     log.info(f"Found {len(performers_with_changes)} performers with confirmed changes")
-    log.info(f"Found {len(performers_with_potential)} performers with potential changes (see {DEBUG_POTENTIAL})")
+    log.info(f"Found {len(performers_with_potential)} performers with potential changes")
+
+    # Write debug files if enabled
+    if write_debug:
+        write_debug_files(performers_to_update)
+        log.info(f"Debug files written to {PLUGIN_DIR}")
 
     if performers_with_changes:
         log.info(f"\n{'=' * 60}")
@@ -392,22 +399,32 @@ def process_performers(stash, dry_run=True):
             log.info("No confirmed changes to apply")
             return
 
-        log.info(f"Applying changes to {len(performers_with_changes)} performers...")
-
-        for idx, p in enumerate(performers_with_changes):
-            try:
-                stash.update_performer({
-                    'id': p['id'],
-                    'urls': p['new_urls']
-                })
-                log.debug(f"Updated {p['name']}")
-            except Exception as e:
-                log.error(f"Failed to update {p['name']}: {e}")
-
-            log.progress((idx + 1) / len(performers_with_changes))
+        log.info(f"Applying changes to {len(performers_with_changes)} performers using {PARALLEL_WORKERS} workers...")
+
+        completed = 0
+        failed = 0
+        total = len(performers_with_changes)
+
+        def update_performer(p):
+            stash.update_performer({'id': p['id'], 'urls': p['new_urls']})
+            return p['name']
+
+        with ThreadPoolExecutor(max_workers=PARALLEL_WORKERS) as executor:
+            futures = {executor.submit(update_performer, p): p for p in performers_with_changes}
+            for future in as_completed(futures):
+                p = futures[future]
+                try:
+                    future.result()
+                    completed += 1
+                    if completed % 100 == 0 or completed == total:
+                        log.info(f"Progress: {completed}/{total} performers updated")
+                except Exception as e:
+                    log.error(f"Failed to update {p['name']}: {e}")
+                    failed += 1
+                log.progress((completed + failed) / total)
 
         log.info(f"{'=' * 60}")
-        log.info(f"Applied URL cleanup to {len(performers_with_changes)} performers")
+        log.info(f"Applied URL cleanup to {completed} performers ({failed} failed)")
         log.info(f"{'=' * 60}")
 
 
@@ -423,13 +440,16 @@ def main():
     # Get mode from args
     mode = json_input.get("args", {}).get("mode", "preview")
 
+    # Get settings
+    write_debug = json_input.get("server_connection", {}).get("PluginDir") and \
+                  stash.get_configuration().get("plugins", {}).get("performer-url-cleanup", {}).get("writeDebugFiles", False)
+
     log.info(f"Performer URL Cleanup - Mode: {mode}")
-    log.info("")
 
     if mode == "preview":
-        process_performers(stash, dry_run=True)
+        process_performers(stash, dry_run=True, write_debug=write_debug)
     elif mode == "apply":
-        process_performers(stash, dry_run=False)
+        process_performers(stash, dry_run=False, write_debug=write_debug)
     else:
         log.error(f"Unknown mode: {mode}")
 

From a956cd58032daf407210c781d5dc23efe8f120f9 Mon Sep 17 00:00:00 2001
From: thismanyboyfriends2 <impurity-robe-tilt@duck.com>
Date: Tue, 3 Feb 2026 18:39:42 +0000
Subject: [PATCH 6/7] ci: adding in claude review PR step

---
 .github/workflows/claude-review.yml | 41 +++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 .github/workflows/claude-review.yml

diff --git a/.github/workflows/claude-review.yml b/.github/workflows/claude-review.yml
new file mode 100644
index 0000000..fcfdf36
--- /dev/null
+++ b/.github/workflows/claude-review.yml
@@ -0,0 +1,41 @@
+name: Claude PR Review
+
+on:
+  pull_request:
+    types: [opened, synchronize]
+    branches:
+      - main
+  issue_comment:
+    types: [created]
+  pull_request_review_comment:
+    types: [created]
+
+jobs:
+  review:
+    runs-on: ubuntu-latest
+    # Only run on PRs targeting main, or on @claude mentions
+    if: |
+      (github.event_name == 'pull_request') ||
+      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude'))
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: anthropics/claude-code-action@v1
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+          prompt: |
+            Review this PR for:
+            1. Code quality and best practices
+            2. Potential bugs or edge cases
+            3. Security concerns
+            4. Alignment with Stash plugin conventions (YAML metadata, Python/JS patterns)
+
+            Be concise and actionable. Focus on substantive issues rather than style nitpicks.

From 56b1e14daaf5861254807c6110c6246461b8e1c9 Mon Sep 17 00:00:00 2001
From: thismanyboyfriends2 <impurity-robe-tilt@duck.com>
Date: Tue, 3 Feb 2026 18:52:25 +0000
Subject: [PATCH 7/7] ci: clamping down permissions

---
 .github/workflows/claude-review.yml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/claude-review.yml b/.github/workflows/claude-review.yml
index fcfdf36..9c80a9a 100644
--- a/.github/workflows/claude-review.yml
+++ b/.github/workflows/claude-review.yml
@@ -13,11 +13,13 @@ on:
 jobs:
   review:
     runs-on: ubuntu-latest
-    # Only run on PRs targeting main, or on @claude mentions
+    # Only run for repo owner
     if: |
-      (github.event_name == 'pull_request') ||
-      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
-      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude'))
+      (github.event_name == 'pull_request' && github.event.pull_request.author_association == 'OWNER') ||
+      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude') &&
+        github.event.comment.author_association == 'OWNER') ||
+      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude') &&
+        github.event.comment.author_association == 'OWNER')
     permissions:
       contents: read
       pull-requests: write