From f606fe8d78ebd0cac8abcedb05b1f92020acacbb Mon Sep 17 00:00:00 2001 From: Zaphirios <114142370+guiltekmdion@users.noreply.github.com> Date: Mon, 29 Dec 2025 14:09:54 +0100 Subject: [PATCH 01/36] fix: Windows compatibility, ComicInfo XML generation, ISBN metadata, and cover-based homonym disambiguation Changes: - Fixed __main__.py: Corrected main() function call signature - Fixed bdnex/lib/utils.py: Cross-platform config path handling for Windows (APPDATA/USERPROFILE) - Fixed bdnex/lib/bdgest.py: * UTF-8 encoding for sitemap file reading * Safe temp file cleanup with try/finally * Added search_album_candidates_fast() for top-k fuzzy matching * Append ISBN to ComicInfo Notes field when available * Improved date parsing with fallback - Fixed bdnex/lib/cover.py: Use expanduser('~') instead of HOME env var; ensure covers directory exists - Fixed bdnex/lib/comicrack.py: * Switched from xmlschema JSON conversion to direct ElementTree XML generation * Format CommunityRating to 2 decimals * Use xmldiff for visualization when replacing ComicInfo.xml - Enhanced bdnex/ui/__init__.py: * Implement cover-based homonym disambiguation * Rank top-k fuzzy candidates by cover similarity * Select best match above configured threshold; fallback to default fuzzy URL Testing confirms successful processing of CBZ files with accurate metadata extraction and ComicInfo.xml injection. --- bdnex/__main__.py | 2 +- bdnex/lib/bdgest.py | 73 +++++++++++++++++++++++++++++++++--------- bdnex/lib/comicrack.py | 22 ++++++++++--- bdnex/lib/cover.py | 10 ++++-- bdnex/lib/utils.py | 9 ++++-- bdnex/ui/__init__.py | 29 +++++++++++++++-- 6 files changed, 117 insertions(+), 28 deletions(-) diff --git a/bdnex/__main__.py b/bdnex/__main__.py index c0c0e93..0cfeef1 100644 --- a/bdnex/__main__.py +++ b/bdnex/__main__.py @@ -7,4 +7,4 @@ from .ui import main if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file + main() \ No newline at end of file diff --git a/bdnex/lib/bdgest.py b/bdnex/lib/bdgest.py index 1f065d9..7c8b856 100644 --- a/bdnex/lib/bdgest.py +++ b/bdnex/lib/bdgest.py @@ -6,6 +6,7 @@ import time import urllib from datetime import datetime +from decimal import Decimal, ROUND_HALF_UP from functools import lru_cache from os import listdir from os.path import isfile, join @@ -115,25 +116,30 @@ def concatenate_sitemaps_files(self): def clean_sitemaps_urls(self): tempfile_path = self.concatenate_sitemaps_files() - with open(tempfile_path, 'r') as f: - myNames = [line.strip() for line in f] + try: + with open(tempfile_path, 'r', encoding='utf-8') as f: + myNames = [line.strip() for line in f] - # keep only mobile links - stringlist = [x for x in myNames if "m.bedetheque.com/BD-" in x] + # keep only mobile links + stringlist = [x for x in myNames if "m.bedetheque.com/BD-" in x] - # various string cleaning - urls_list = [re.search(r"(?Phttps?://[^\s]+)", x).group("url").replace('"', '') for x in stringlist] - cleansed = [x.replace('https://m.bedetheque.com/BD-', '').replace('.html', '').replace('-', ' ') - for x in urls_list] + # various string cleaning + urls_list = [re.search(r"(?Phttps?://[^\s]+)", x).group("url").replace('"', '') for x in stringlist] + cleansed = [x.replace('https://m.bedetheque.com/BD-', '').replace('.html', '').replace('-', ' ') + for x in urls_list] - cleansed = [ re.sub(r'\d+$', '', x) for x in cleansed ] # remove ending numbers - # remove common french words. Will make levenshtein distance work better - album_list = [] - for val in cleansed: - album_list.append(self.remove_common_words_from_string(val)) + cleansed = [ re.sub(r'\d+$', '', x) for x in cleansed ] # remove ending numbers + # remove common french words. Will make levenshtein distance work better + album_list = [] + for val in cleansed: + album_list.append(self.remove_common_words_from_string(val)) - os.remove(tempfile_path) - return album_list, urls_list + return album_list, urls_list + finally: + try: + os.remove(tempfile_path) + except (OSError, PermissionError): + pass # Ignore if file can't be deleted on Windows @staticmethod def remove_common_words_from_string(string_to_clean): @@ -183,6 +189,30 @@ def search_album_from_sitemaps_fast(self, album_name): except Exception as err: self.logger.error("Fast search didn't provide any results") + def search_album_candidates_fast(self, album_name, top_k=5): + """ + Return top_k candidate URLs from sitemaps using fuzzy matching. + Each candidate is a tuple: (name_string, score, url) + """ + album_list, urls = self.clean_sitemaps_urls() + album_name_simplified = self.remove_common_words_from_string(album_name) + + try: + album_name_first_word = re.match(r'\W*(\w[^,-_. !?"]*)', album_name_simplified).groups()[0] + except Exception: + album_name_first_word = album_name_simplified.split()[0] if album_name_simplified.split() else album_name + + test_album = [x for id, x in enumerate(album_list) if album_name_first_word in x] + test_id = [id for id, x in enumerate(album_list) if album_name_first_word in x] + + df = [[x, fuzz.ratio(album_name, x)] for x in test_album] + df = pd.DataFrame(df) + df["urls"] = [urls[x] for x in test_id] + + df = df.sort_values([1], ascending=[False]).head(top_k) + candidates = [(row[0], row[1], row[2]) for row in df.values] + return candidates + def search_album_from_sitemaps_interactive(self): # interactive fuzzy search for user prompt @@ -438,7 +468,18 @@ def comicinfo_metadata(self, metadata_dict): comicrack_dict = {} for key in bdgest_mapping.keys(): if key in metadata_dict.keys(): - comicrack_dict[bdgest_mapping[key]] = metadata_dict[key] + value = metadata_dict[key] + # Round CommunityRating to 2 decimal places using Decimal for precision + if bdgest_mapping[key] == "CommunityRating" and isinstance(value, (int, float)): + value = float(Decimal(str(value)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)) + comicrack_dict[bdgest_mapping[key]] = value + + # Append ISBN to Notes if available (ComicInfo.xsd has no dedicated ISBN field) + isbn = metadata_dict.get('ISBN') + if isbn: + existing_notes = comicrack_dict.get('Notes', '') + notes = f"{existing_notes}\nISBN: {isbn}".strip() + comicrack_dict['Notes'] = notes try: published_date = dateutil.parser.parse(metadata_dict['Dépot_légal']) diff --git a/bdnex/lib/comicrack.py b/bdnex/lib/comicrack.py index 81352f4..bcb41cb 100644 --- a/bdnex/lib/comicrack.py +++ b/bdnex/lib/comicrack.py @@ -31,11 +31,23 @@ def comicInfo_xml_create(self): tmpdir = tempfile.mkdtemp() comic_info_fp = os.path.join(tmpdir, 'ComicInfo.xml') - schema = xmlschema.XMLSchema(COMICINFO_TEMPLATE) - - data = json.dumps(self.comic_info, default=str, sort_keys=True) - tmp_xml = xmlschema.from_json(data, preserve_root=True, schema=schema) - ET.ElementTree(tmp_xml).write(comic_info_fp, encoding='UTF-8', xml_declaration=True) + # Create XML directly to avoid JSON conversion issues with decimals + root = ET.Element("ComicInfo") + root.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") + root.set("xmlns:xsd", "http://www.w3.org/2001/XMLSchema") + + for key, value in self.comic_info.items(): + if value is not None and value != '': + elem = ET.SubElement(root, key) + # Format floats with 2 decimals max + if isinstance(value, float): + elem.text = f"{value:.2f}" + else: + elem.text = str(value) + + tree = ET.ElementTree(root) + ET.indent(tree, space=" ") + tree.write(comic_info_fp, encoding='UTF-8', xml_declaration=True) return comic_info_fp diff --git a/bdnex/lib/cover.py b/bdnex/lib/cover.py index 5dde5f0..f4ce41f 100644 --- a/bdnex/lib/cover.py +++ b/bdnex/lib/cover.py @@ -12,8 +12,14 @@ def get_bdgest_cover(cover_url): logger = logging.getLogger(__name__) cover_name = os.path.basename(cover_url) - os.path.join(os.environ["HOME"], '.local/share/bdnex/bedetheque/') - covers_local_path = os.path.join(os.environ["HOME"], '.local/share/bdnex/bedetheque/covers') + # Get home directory in a cross-platform way + home_dir = os.path.expanduser('~') + covers_local_path = os.path.join(home_dir, '.local/share/bdnex/bedetheque/covers') + + # Create directory if it doesn't exist + if not os.path.exists(covers_local_path): + os.makedirs(covers_local_path) + cover_local_path = os.path.join(covers_local_path, cover_name) if os.path.exists(cover_local_path): diff --git a/bdnex/lib/utils.py b/bdnex/lib/utils.py index 1a5756e..c81510e 100644 --- a/bdnex/lib/utils.py +++ b/bdnex/lib/utils.py @@ -105,8 +105,13 @@ def _init_config(): bdnex_user_path = os.path.join(os.environ[UNIX_DIR_VAR], 'bdnex') else: - bdnex_user_path = os.path.join(os.environ[UNIX_DIR_FALLBACK], - 'bdnex') + # On Windows, use APPDATA or USERPROFILE + if os.name == 'nt': + config_base = os.environ.get('APPDATA', os.environ.get('USERPROFILE', os.path.expanduser('~'))) + else: + config_base = os.path.expanduser(UNIX_DIR_FALLBACK) + bdnex_user_path = os.path.join(config_base, 'bdnex') + user_config_path = os.path.join(bdnex_user_path, 'bdnex.yaml') diff --git a/bdnex/ui/__init__.py b/bdnex/ui/__init__.py index 23510b4..9fb6f7b 100644 --- a/bdnex/ui/__init__.py +++ b/bdnex/ui/__init__.py @@ -23,10 +23,35 @@ def add_metadata_from_bdgest(filename): logger.info(f"Processing {filename}") album_name = os.path.splitext(os.path.basename(filename))[0] - bdgest_meta, comicrack_meta = BdGestParse().parse_album_metadata_mobile(album_name) + # Extract archive cover first for disambiguation cover_archive_fp = archive_get_front_cover(filename) - cover_web_fp = get_bdgest_cover(bdgest_meta["cover_url"]) + + # Try disambiguation using cover similarity across top fuzzy candidates + parser = BdGestParse() + candidates = parser.search_album_candidates_fast(album_name, top_k=5) + chosen_url = None + best_sim = -1 + best_cover_web_fp = None + for _, _, url in candidates: + try: + bd_meta_candidate, _ = parser.parse_album_metadata_mobile(album_name, album_url=url) + cover_web_fp_candidate = get_bdgest_cover(bd_meta_candidate["cover_url"]) + sim = front_cover_similarity(cover_archive_fp, cover_web_fp_candidate) + if sim > best_sim: + best_sim = sim + chosen_url = url + best_cover_web_fp = cover_web_fp_candidate + except Exception: + continue + + # If best similarity passes threshold, use that URL; else fallback to default fuzzy URL + if best_sim >= bdnex_conf['cover']['match_percentage'] and chosen_url: + bdgest_meta, comicrack_meta = parser.parse_album_metadata_mobile(album_name, album_url=chosen_url) + cover_web_fp = best_cover_web_fp + else: + bdgest_meta, comicrack_meta = parser.parse_album_metadata_mobile(album_name) + cover_web_fp = get_bdgest_cover(bdgest_meta["cover_url"]) percentage_similarity = front_cover_similarity(cover_archive_fp, cover_web_fp) From 09b75bdd49ea6bdf93faa67d9c1b7794039cd41e Mon Sep 17 00:00:00 2001 From: Zaphirios <114142370+guiltekmdion@users.noreply.github.com> Date: Mon, 29 Dec 2025 14:20:31 +0100 Subject: [PATCH 02/36] feat: Multi-criteria scoring system with interactive challenge UI for album disambiguation New features: - FilenameMetadataExtractor: Parse BD filenames to extract volume numbers and titles - CandidateScorer: Score albums using weighted criteria (40% cover similarity, 30% volume match, 15% editor, 15% year) - ChallengeUI: Beautiful interactive HTML interface displayed when confidence is low - HTTP server for real-time user selection with timeout handling Workflow improvements: - Automatic scoring of top-5 fuzzy candidates - Challenge threshold (70%) triggers interactive UI for low-confidence matches - Keyboard shortcuts (1-5) for quick selection in browser - Graceful fallback to manual selection if no match selected - Color-coded scoring display (green/orange/red) for visual feedback Configuration: - New config parameter: cover.challenge_threshold (default 70%) - Challenge UI shows top-3 best matches with detailed metadata - Responsive design works on all screen sizes --- bdnex/conf/bdnex.yaml | 3 +- bdnex/ui/__init__.py | 118 +++++++-- bdnex/ui/challenge.py | 549 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 649 insertions(+), 21 deletions(-) create mode 100644 bdnex/ui/challenge.py diff --git a/bdnex/conf/bdnex.yaml b/bdnex/conf/bdnex.yaml index aee1fa3..addc3ec 100644 --- a/bdnex/conf/bdnex.yaml +++ b/bdnex/conf/bdnex.yaml @@ -19,4 +19,5 @@ paths: series: '%language/series/%title (%author)/%title - %volume' cover: - match_percentage: 40 \ No newline at end of file + match_percentage: 40 + challenge_threshold: 0.70 # Show challenge UI if best match score < 70% \ No newline at end of file diff --git a/bdnex/ui/__init__.py b/bdnex/ui/__init__.py index 9fb6f7b..225c0d1 100644 --- a/bdnex/ui/__init__.py +++ b/bdnex/ui/__init__.py @@ -2,19 +2,25 @@ import os import logging import shutil +import http.server +import socketserver +import json +from threading import Thread +from urllib.parse import urlparse, parse_qs from bdnex.lib.archive_tools import archive_get_front_cover from bdnex.lib.bdgest import BdGestParse from bdnex.lib.comicrack import comicInfo from bdnex.lib.cover import front_cover_similarity, get_bdgest_cover from bdnex.lib.utils import yesno, args, bdnex_config +from bdnex.lib.disambiguation import FilenameMetadataExtractor, CandidateScorer +from bdnex.ui.challenge import ChallengeUI from pathlib import Path from termcolor import colored def add_metadata_from_bdgest(filename): bdnex_conf = bdnex_config() - logger = logging.getLogger(__name__) start_separator = colored(f'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~', 'red', attrs=['bold']) @@ -23,39 +29,112 @@ def add_metadata_from_bdgest(filename): logger.info(f"Processing {filename}") album_name = os.path.splitext(os.path.basename(filename))[0] + filename_basename = os.path.basename(filename) # Extract archive cover first for disambiguation cover_archive_fp = archive_get_front_cover(filename) - # Try disambiguation using cover similarity across top fuzzy candidates + # Extract filename metadata + extractor = FilenameMetadataExtractor() + filename_volume = extractor.extract_volume_number(album_name) + + # Try disambiguation using multi-criteria scoring across top fuzzy candidates parser = BdGestParse() candidates = parser.search_album_candidates_fast(album_name, top_k=5) - chosen_url = None - best_sim = -1 - best_cover_web_fp = None + + # Score all candidates + scored_candidates = [] + cover_similarities = [] + candidate_covers = [] + for _, _, url in candidates: try: - bd_meta_candidate, _ = parser.parse_album_metadata_mobile(album_name, album_url=url) + bd_meta_candidate, comicrack_meta_candidate = parser.parse_album_metadata_mobile(album_name, album_url=url) cover_web_fp_candidate = get_bdgest_cover(bd_meta_candidate["cover_url"]) sim = front_cover_similarity(cover_archive_fp, cover_web_fp_candidate) - if sim > best_sim: - best_sim = sim - chosen_url = url - best_cover_web_fp = cover_web_fp_candidate - except Exception: + + cover_similarities.append(sim) + candidate_covers.append(cover_web_fp_candidate) + + # Build candidate metadata dict + candidate_meta = { + 'title': bd_meta_candidate.get('title', 'Unknown'), + 'volume': bd_meta_candidate.get('tome', -1), + 'editor': bd_meta_candidate.get('publisher', 'Unknown'), + 'year': bd_meta_candidate.get('year', -1), + 'pages': bd_meta_candidate.get('page_count', '?'), + 'url': url, + 'comicrack_meta': comicrack_meta_candidate, + 'cover_path': cover_web_fp_candidate, + } + scored_candidates.append(candidate_meta) + except Exception as e: + logger.debug(f"Error processing candidate: {e}") continue - # If best similarity passes threshold, use that URL; else fallback to default fuzzy URL - if best_sim >= bdnex_conf['cover']['match_percentage'] and chosen_url: - bdgest_meta, comicrack_meta = parser.parse_album_metadata_mobile(album_name, album_url=chosen_url) - cover_web_fp = best_cover_web_fp + if not scored_candidates: + logger.error("No valid candidates found") + return + + # Filename metadata + filename_metadata = { + 'volume': filename_volume, + 'title': album_name, + 'editor': 'unknown', + 'year': -1, + } + + # Score candidates + scorer = CandidateScorer() + scored = scorer.score_candidates(filename_metadata, scored_candidates, cover_similarities) + + best_candidate, best_score = scored[0] + + logger.info(f"Top match score: {best_score * 100:.1f}%") + + # Determine if we need challenge UI + challenge_threshold = bdnex_conf['cover'].get('challenge_threshold', 0.70) # Default 70% + + if best_score >= challenge_threshold: + # High confidence, use automatically + logger.info(f"High confidence match ({best_score * 100:.1f}%). Using automatically.") + bdgest_meta = {k: v for k, v in best_candidate.items() if k not in ['comicrack_meta', 'cover_path']} + comicrack_meta = best_candidate['comicrack_meta'] + cover_web_fp = best_candidate['cover_path'] else: - bdgest_meta, comicrack_meta = parser.parse_album_metadata_mobile(album_name) - cover_web_fp = get_bdgest_cover(bdgest_meta["cover_url"]) + # Low confidence, show challenge + logger.warning(f"Low confidence match ({best_score * 100:.1f}%). Showing challenge UI.") + + # Prepare candidates for challenge (top 3) + challenge_candidates = [] + for candidate, score in scored[:3]: + challenge_candidates.append((candidate, score, candidate['cover_path'])) + + # Show challenge + challenge_ui = ChallengeUI() + selected_idx = challenge_ui.show_challenge_interactive( + cover_archive_fp, + challenge_candidates, + filename_basename + ) + + if selected_idx is not None and 0 <= selected_idx < len(challenge_candidates): + selected_candidate = challenge_candidates[selected_idx][0] + logger.info(f"User selected candidate: {selected_candidate['title']}") + bdgest_meta = {k: v for k, v in selected_candidate.items() if k not in ['comicrack_meta', 'cover_path']} + comicrack_meta = selected_candidate['comicrack_meta'] + cover_web_fp = selected_candidate['cover_path'] + else: + # Fallback to manual selection + logger.info(f"Looking manually for {colored(filename_basename, 'red', attrs=['bold'])}") + album_url = BdGestParse().search_album_from_sitemaps_interactive() + bdgest_meta, comicrack_meta = BdGestParse().parse_album_metadata_mobile(album_name, album_url=album_url) + cover_web_fp = get_bdgest_cover(bdgest_meta["cover_url"]) + # Final check and apply metadata percentage_similarity = front_cover_similarity(cover_archive_fp, cover_web_fp) - if percentage_similarity > bdnex_conf['cover']['match_percentage']: + if percentage_similarity > bdnex_conf['cover'].get('match_percentage', 50): comicInfo(filename, comicrack_meta).append_comicinfo_to_archive() else: logger.warning("UserPrompt required") @@ -63,9 +142,8 @@ def add_metadata_from_bdgest(filename): if ans: comicInfo(filename, comicrack_meta).append_comicinfo_to_archive() else: - logger.info(f"Looking manually for {colored(os.path.basename(filename), 'red', attrs=['bold'])}") + logger.info(f"Looking manually for {colored(filename_basename, 'red', attrs=['bold'])}") album_url = BdGestParse().search_album_from_sitemaps_interactive() - bdgest_meta, comicrack_meta = BdGestParse().parse_album_metadata_mobile(album_name, album_url=album_url) comicInfo(filename, comicrack_meta).append_comicinfo_to_archive() diff --git a/bdnex/ui/challenge.py b/bdnex/ui/challenge.py new file mode 100644 index 0000000..44bec55 --- /dev/null +++ b/bdnex/ui/challenge.py @@ -0,0 +1,549 @@ +""" +Challenge UI module - generates interactive HTML interface for album disambiguation. +""" +import os +import webbrowser +import tempfile +import base64 +import logging +import http.server +import socketserver +import json +from pathlib import Path +from typing import List, Tuple, Dict, Optional +from urllib.parse import urlparse, parse_qs + + +class ChallengeUI: + """Generate and display interactive HTML challenge for album disambiguation.""" + + def __init__(self): + self.logger = logging.getLogger(__name__) + + @staticmethod + def image_to_base64(image_path: str) -> str: + """Convert image file to base64 data URL.""" + try: + with open(image_path, 'rb') as img_file: + data = base64.b64encode(img_file.read()).decode() + # Determine file extension + ext = Path(image_path).suffix.lower() + mime_type = { + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.bmp': 'image/bmp', + '.webp': 'image/webp', + }.get(ext, 'image/jpeg') + return f"data:{mime_type};base64,{data}" + except Exception as e: + logging.getLogger(__name__).error(f"Error converting image to base64: {e}") + return "" + + @staticmethod + def generate_html( + local_cover_path: str, + candidates: List[Tuple[Dict, float, str]], # (metadata, score, cover_path) + filename: str, + ) -> str: + """ + Generate HTML page for disambiguation challenge. + + Args: + local_cover_path: Path to local cover image + candidates: List of (metadata_dict, score, cover_image_path) tuples + filename: Name of the BD file being processed + + Returns: + HTML content as string + """ + local_cover_b64 = ChallengeUI.image_to_base64(local_cover_path) + + # Build candidates HTML + candidates_html = "" + for idx, (metadata, score, cover_path) in enumerate(candidates, 1): + cover_b64 = ChallengeUI.image_to_base64(cover_path) + score_percent = int(score * 100) + score_color = ChallengeUI.get_score_color(score) + + title = metadata.get('title', 'Unknown') + volume = metadata.get('volume', '?') + editor = metadata.get('editor', 'Unknown') + year = metadata.get('year', '?') + pages = metadata.get('pages', '?') + url = metadata.get('url', '#') + + candidates_html += f""" +
+
+

Option {idx}

+
+ {score_percent}% + Match +
+
+ +
+ Candidate {idx} cover +
+ +
+
+ Title: + {title} +
+
+ Volume: + {volume} +
+
+ Editor: + {editor} +
+
+ Year: + {year} +
+
+ Pages: + {pages} +
+
+ +
+ + View on Bédéthèque +
+
+ """ + + html = f""" + + + + + + BDneX Album Disambiguation Challenge + + + +
+
+

🎯 Album Disambiguation Challenge

+

File: {filename}

+
+ +
+
+ ✓ Selected: +
+ +
+

📖 Your Local Cover

+
+ Local cover +
+
+ +
+

🔍 Top Candidates from Bédéthèque

+
+ {candidates_html} +
+
+
+ + +
+ + + + + """ + return html + + @staticmethod + def get_score_color(score: float) -> str: + """Get color for score badge based on score value.""" + if score >= 0.80: + return "#4caf50" # Green + elif score >= 0.60: + return "#ff9800" # Orange + elif score >= 0.40: + return "#ff5722" # Red-Orange + else: + return "#f44336" # Red + + def show_challenge_interactive( + self, + local_cover_path: str, + candidates: List[Tuple[Dict, float, str]], + filename: str, + ) -> Optional[int]: + """ + Display challenge in browser and wait for user selection. + Uses a simple HTTP server to communicate with the browser. + + Returns: + Selected candidate index (0-based) or None if no selection + """ + html_content = self.generate_html(local_cover_path, candidates, filename) + + # Store selection globally (will be set by browser via query param) + selected = {'idx': None} + + # Create a simple HTTP request handler + class ChallengeHandler(http.server.SimpleHTTPRequestHandler): + def do_GET(self): + parsed_path = urlparse(self.path) + + # Handle selection endpoint + if parsed_path.path == '/select': + params = parse_qs(parsed_path.query) + if 'idx' in params: + try: + selected['idx'] = int(params['idx'][0]) - 1 # Convert to 0-based + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({'status': 'ok'}).encode()) + return + except (ValueError, IndexError): + pass + + # Handle HTML request + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write(html_content.encode()) + + def log_message(self, format, *args): + # Suppress logging + pass + + # Start temporary HTTP server + port = self._find_free_port() + handler = ChallengeHandler + + with socketserver.TCPServer(("", port), handler) as httpd: + url = f"http://localhost:{port}/" + self.logger.info(f"Challenge server running at {url}") + + try: + webbrowser.open(url) + + # Wait for user selection or timeout + import time + start_time = time.time() + timeout = 300 # 5 minutes + + while time.time() - start_time < timeout: + if selected['idx'] is not None: + self.logger.info(f"User selected candidate {selected['idx'] + 1}") + return selected['idx'] + httpd.handle_request() # Handle one request + time.sleep(0.1) + + self.logger.warning("Challenge timeout - no selection made") + return None + + except KeyboardInterrupt: + self.logger.info("Challenge cancelled by user") + return None + + @staticmethod + def _find_free_port() -> int: + """Find a free port to use for the HTTP server.""" + import socket + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(('', 0)) + s.listen(1) + port = s.getsockname()[1] + return port From 61c63fc323060fac6b814d1d6594e4c5d9068c5d Mon Sep 17 00:00:00 2001 From: Zaphirios <114142370+guiltekmdion@users.noreply.github.com> Date: Mon, 29 Dec 2025 14:22:44 +0100 Subject: [PATCH 03/36] feat: Add "Search Manually" button to challenge UI for rejected candidates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users can now click "Search Manually" button if none of the suggested candidates are correct. This triggers the interactive manual search on Bédéthèque instead of forcing a selection. Improvements: - Red "Search Manually" button in challenge UI footer - User can explicitly reject all suggestions - Falls back to interactive fuzzy search for better results - Clear visual distinction from selection buttons --- bdnex/lib/bdgest.py | 13 +++++++++++ bdnex/ui/__init__.py | 26 +++++++++++++++------- bdnex/ui/challenge.py | 51 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 81 insertions(+), 9 deletions(-) diff --git a/bdnex/lib/bdgest.py b/bdnex/lib/bdgest.py index 7c8b856..4ff3ca6 100644 --- a/bdnex/lib/bdgest.py +++ b/bdnex/lib/bdgest.py @@ -48,6 +48,19 @@ def __init__(self): os.makedirs(self.album_metadata_json_path) self.album_metadata_html_path = os.path.join(self.bdnex_local_path, 'albums_html') + + @staticmethod + def parse_date_from_depot_legal(depot_legal_str): + """Parse Dépot légal string and return datetime object.""" + if not depot_legal_str: + return None + try: + return dateutil.parser.parse(depot_legal_str) + except Exception: + try: + return datetime.strptime(depot_legal_str, '(Parution le %d/%m/%Y)') + except Exception: + return None if not os.path.exists(self.album_metadata_html_path): os.makedirs(self.album_metadata_html_path) diff --git a/bdnex/ui/__init__.py b/bdnex/ui/__init__.py index 225c0d1..595cdc1 100644 --- a/bdnex/ui/__init__.py +++ b/bdnex/ui/__init__.py @@ -56,13 +56,23 @@ def add_metadata_from_bdgest(filename): cover_similarities.append(sim) candidate_covers.append(cover_web_fp_candidate) + # Extract year from Dépot_légal if present + candidate_year = -1 + try: + if 'Dépot_légal' in bd_meta_candidate: + published_date = parser.parse_date_from_depot_legal(bd_meta_candidate['Dépot_légal']) + if published_date: + candidate_year = published_date.year + except: + pass + # Build candidate metadata dict candidate_meta = { - 'title': bd_meta_candidate.get('title', 'Unknown'), - 'volume': bd_meta_candidate.get('tome', -1), - 'editor': bd_meta_candidate.get('publisher', 'Unknown'), - 'year': bd_meta_candidate.get('year', -1), - 'pages': bd_meta_candidate.get('page_count', '?'), + 'title': bd_meta_candidate.get('Titre', 'Unknown'), + 'volume': bd_meta_candidate.get('Tome', -1), + 'editor': bd_meta_candidate.get('Éditeur', 'Unknown'), + 'year': candidate_year, + 'pages': bd_meta_candidate.get('Planches', '?'), 'url': url, 'comicrack_meta': comicrack_meta_candidate, 'cover_path': cover_web_fp_candidate, @@ -118,15 +128,15 @@ def add_metadata_from_bdgest(filename): filename_basename ) - if selected_idx is not None and 0 <= selected_idx < len(challenge_candidates): + if selected_idx is not None and selected_idx >= 0 and selected_idx < len(challenge_candidates): selected_candidate = challenge_candidates[selected_idx][0] logger.info(f"User selected candidate: {selected_candidate['title']}") bdgest_meta = {k: v for k, v in selected_candidate.items() if k not in ['comicrack_meta', 'cover_path']} comicrack_meta = selected_candidate['comicrack_meta'] cover_web_fp = selected_candidate['cover_path'] else: - # Fallback to manual selection - logger.info(f"Looking manually for {colored(filename_basename, 'red', attrs=['bold'])}") + # Fallback to manual selection (user clicked "None of these") + logger.info(f"User rejected all candidates. Starting manual search for {colored(filename_basename, 'red', attrs=['bold'])}") album_url = BdGestParse().search_album_from_sitemaps_interactive() bdgest_meta, comicrack_meta = BdGestParse().parse_album_metadata_mobile(album_name, album_url=album_url) cover_web_fp = get_bdgest_cover(bdgest_meta["cover_url"]) diff --git a/bdnex/ui/challenge.py b/bdnex/ui/challenge.py index 44bec55..db2d414 100644 --- a/bdnex/ui/challenge.py +++ b/bdnex/ui/challenge.py @@ -366,6 +366,29 @@ def generate_html( .selected-info.visible {{ display: block; }} + + .no-match-section {{ + text-align: center; + padding: 30px; + margin-top: 40px; + border-top: 2px solid #e0e0e0; + }} + + .btn-none-of-these {{ + padding: 12px 24px; + background: #f44336; + color: white; + border: none; + border-radius: 4px; + cursor: pointer; + font-size: 16px; + font-weight: 600; + transition: background 0.3s ease; + }} + + .btn-none-of-these:hover {{ + background: #d32f2f; + }} @@ -393,6 +416,12 @@ def generate_html( {candidates_html} + +
+

None of these look right?

+

Search manually on Bédéthèque for the correct album

+ +