diff --git a/.gitignore b/.gitignore index 9e1d25d..0564e17 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,8 @@ wheels/ .venv # Custom +.data/ *_data/ *.epub +!alice.epub + diff --git a/README.md b/README.md index 5d868d7..0817ad6 100644 --- a/README.md +++ b/README.md @@ -6,21 +6,36 @@ A lightweight, self-hosted EPUB reader that lets you read through EPUB books one This project was 90% vibe coded just to illustrate how one can very easily [read books together with LLMs](https://x.com/karpathy/status/1990577951671509438). I'm not going to support it in any way, it's provided here as is for other people's inspiration and I don't intend to improve it. Code is ephemeral now and libraries are over, ask your LLM to change it in whatever way you like. -## Usage +## Simple Usage -The project uses [uv](https://docs.astral.sh/uv/). So for example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345) to this directory as `dracula.epub`, then: +The easiest way to use the reader: + +```bash + uv run book.py alice.epub +``` + +This will: +1. Process the EPUB file and extract all content, images, and metadata +2. Start the server automatically +3. Open the book at [localhost:8123](http://localhost:8123/) + +You can run it again with a different book, and the previous one will be replaced. Press `Ctrl+C` to stop the server. + +## Advanced Usage + +The project uses [uv](https://docs.astral.sh/uv/). For manual control over processing and serving: ```bash uv run reader3.py dracula.epub ``` -This creates the directory `dracula_data`, which registers the book to your local library. We can then run the server: +This creates the directory `dracula_data` with the processed book. Then run the server separately: ```bash uv run server.py ``` -And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting the folder. It's not supposed to be complicated or complex. +And visit [localhost:8123](http://localhost:8123/). You can easily add more books, or delete them from your library by deleting the `*_data` folder. It's not supposed to be complicated or complex. ## License diff --git a/alice.epub b/alice.epub new file mode 100644 index 0000000..727dfe6 Binary files /dev/null and b/alice.epub differ diff --git a/book.py b/book.py new file mode 100644 index 0000000..46e6749 --- /dev/null +++ b/book.py @@ -0,0 +1,453 @@ +#!/usr/bin/env python3 +""" +Unified EPUB reader: processes books and runs the server. +Usage: python book.py +""" + +import os +import pickle +import shutil +import sys +import socket +from dataclasses import dataclass, field +from typing import List, Dict, Optional +from datetime import datetime +from urllib.parse import unquote +from functools import lru_cache + +import ebooklib +from ebooklib import epub +from bs4 import BeautifulSoup, Comment +from fastapi import FastAPI, Request, HTTPException +from fastapi.responses import HTMLResponse, FileResponse +from fastapi.templating import Jinja2Templates +import uvicorn + +from claude_code_detect import get_claude_code_status +from book_info import ( + get_ai_conclusion, + get_paragraph_summaries, +) + +# --- Data structures --- + +@dataclass +class ChapterContent: + id: str + href: str + title: str + content: str + text: str + order: int + + +@dataclass +class TOCEntry: + title: str + href: str + file_href: str + anchor: str + children: List['TOCEntry'] = field(default_factory=list) + + +@dataclass +class BookMetadata: + title: str + language: str + authors: List[str] = field(default_factory=list) + description: Optional[str] = None + publisher: Optional[str] = None + date: Optional[str] = None + identifiers: List[str] = field(default_factory=list) + subjects: List[str] = field(default_factory=list) + + +@dataclass +class Book: + metadata: BookMetadata + spine: List[ChapterContent] + toc: List[TOCEntry] + images: Dict[str, str] + source_file: str + processed_at: str + version: str = "3.0" + + +# --- Utilities --- + +def clean_html_content(soup: BeautifulSoup) -> BeautifulSoup: + for tag in soup(['script', 'style', 'iframe', 'video', 'nav', 'form', 'button']): + tag.decompose() + + for comment in soup.find_all(string=lambda text: isinstance(text, Comment)): + comment.extract() + + for tag in soup.find_all('input'): + tag.decompose() + + return soup + + +def extract_plain_text(soup: BeautifulSoup) -> str: + text = soup.get_text(separator=' ') + return ' '.join(text.split()) + + +def parse_toc_recursive(toc_list, depth=0) -> List[TOCEntry]: + result = [] + + for item in toc_list: + if isinstance(item, tuple): + section, children = item + entry = TOCEntry( + title=section.title, + href=section.href, + file_href=section.href.split('#')[0], + anchor=section.href.split('#')[1] if '#' in section.href else "", + children=parse_toc_recursive(children, depth + 1) + ) + result.append(entry) + elif isinstance(item, epub.Link): + entry = TOCEntry( + title=item.title, + href=item.href, + file_href=item.href.split('#')[0], + anchor=item.href.split('#')[1] if '#' in item.href else "" + ) + result.append(entry) + elif isinstance(item, epub.Section): + entry = TOCEntry( + title=item.title, + href=item.href, + file_href=item.href.split('#')[0], + anchor=item.href.split('#')[1] if '#' in item.href else "" + ) + result.append(entry) + + return result + + +def get_fallback_toc(book_obj) -> List[TOCEntry]: + toc = [] + for item in book_obj.get_items(): + if item.get_type() == ebooklib.ITEM_DOCUMENT: + name = item.get_name() + title = item.get_name().replace('.html', '').replace('.xhtml', '').replace('_', ' ').title() + toc.append(TOCEntry(title=title, href=name, file_href=name, anchor="")) + return toc + + +def extract_metadata_robust(book_obj) -> BookMetadata: + def get_list(key): + data = book_obj.get_metadata('DC', key) + return [x[0] for x in data] if data else [] + + def get_one(key): + data = book_obj.get_metadata('DC', key) + return data[0][0] if data else None + + return BookMetadata( + title=get_one('title') or "Untitled", + language=get_one('language') or "en", + authors=get_list('creator'), + description=get_one('description'), + publisher=get_one('publisher'), + date=get_one('date'), + identifiers=get_list('identifier'), + subjects=get_list('subject') + ) + + +# --- Main EPUB Processing --- + +def process_epub(epub_path: str, output_dir: str) -> Book: + print(f"Loading {epub_path}...") + book = epub.read_epub(epub_path) + + metadata = extract_metadata_robust(book) + + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + images_dir = os.path.join(output_dir, 'images') + os.makedirs(images_dir, exist_ok=True) + + print("Extracting images...") + image_map = {} + + for item in book.get_items(): + if item.get_type() in (ebooklib.ITEM_IMAGE, ebooklib.ITEM_COVER): + original_fname = os.path.basename(item.get_name()) + safe_fname = "".join([c for c in original_fname if c.isalpha() or c.isdigit() or c in '._-']).strip() + + local_path = os.path.join(images_dir, safe_fname) + with open(local_path, 'wb') as f: + f.write(item.get_content()) + + rel_path = f"images/{safe_fname}" + image_map[item.get_name()] = rel_path + image_map[original_fname] = rel_path + + print("Parsing Table of Contents...") + toc_structure = parse_toc_recursive(book.toc) + if not toc_structure: + print("Warning: Empty TOC, building fallback from Spine...") + toc_structure = get_fallback_toc(book) + + print("Processing chapters...") + spine_chapters = [] + + for i, spine_item in enumerate(book.spine): + item_id, _ = spine_item + item = book.get_item_with_id(item_id) + + if not item: + continue + + if item.get_type() == ebooklib.ITEM_DOCUMENT: + raw_content = item.get_content().decode('utf-8', errors='ignore') + soup = BeautifulSoup(raw_content, 'html.parser') + + for img in soup.find_all('img'): + src = img.get('src', '') + if not src: continue + + src_decoded = unquote(src) + filename = os.path.basename(src_decoded) + + if src_decoded in image_map: + img['src'] = image_map[src_decoded] + elif filename in image_map: + img['src'] = image_map[filename] + + soup = clean_html_content(soup) + + body = soup.find('body') + if body: + final_html = "".join([str(x) for x in body.contents]) + else: + final_html = str(soup) + + chapter = ChapterContent( + id=item_id, + href=item.get_name(), + title=f"Section {i+1}", + content=final_html, + text=extract_plain_text(soup), + order=i + ) + spine_chapters.append(chapter) + + final_book = Book( + metadata=metadata, + spine=spine_chapters, + toc=toc_structure, + images=image_map, + source_file=os.path.basename(epub_path), + processed_at=datetime.now().isoformat() + ) + + return final_book + + +def save_to_pickle(book: Book, output_dir: str): + p_path = os.path.join(output_dir, 'book.pkl') + with open(p_path, 'wb') as f: + pickle.dump(book, f) + print(f"Saved to {p_path}") + + +# --- FastAPI Server --- + +app = FastAPI() +templates = Jinja2Templates(directory="templates") + +BOOKS_DIR = ".data" +CLAUDE_CODE_STATUS = get_claude_code_status() +CURRENT_BOOK_FOLDER = None # Will be set when server starts + + +@lru_cache(maxsize=1) +def load_book_cached(folder_name: str) -> Optional[Book]: + file_path = os.path.join(BOOKS_DIR, folder_name, "book.pkl") + if not os.path.exists(file_path): + return None + + try: + with open(file_path, "rb") as f: + book = pickle.load(f) + return book + except Exception as e: + print(f"Error loading book {folder_name}: {e}") + return None + + +def _get_book_folder() -> Optional[str]: + """Get the single book folder.""" + if os.path.exists(BOOKS_DIR): + books = [item for item in os.listdir(BOOKS_DIR) + if item.endswith("_data") and os.path.isdir(item)] + if len(books) == 1: + return books[0] + return None + + +@app.get("/", response_class=HTMLResponse) +async def root(request: Request): + if not CURRENT_BOOK_FOLDER: + raise HTTPException(status_code=404, detail="No book found") + return await read_chapter(request=request, book_id=CURRENT_BOOK_FOLDER, chapter_index=0) + + +@app.get("/read/{book_id}", response_class=HTMLResponse) +async def redirect_to_first_chapter(book_id: str): + return await read_chapter(book_id=book_id, chapter_index=0) + + +@app.get("/read/{book_id}/{chapter_index}", response_class=HTMLResponse) +async def read_chapter(request: Request, book_id: str, chapter_index: int): + book = load_book_cached(book_id) + if not book: + raise HTTPException(status_code=404, detail="Book not found") + + if chapter_index < 0 or chapter_index >= len(book.spine): + raise HTTPException(status_code=404, detail="Chapter not found") + + current_chapter = book.spine[chapter_index] + + prev_idx = chapter_index - 1 if chapter_index > 0 else None + next_idx = chapter_index + 1 if chapter_index < len(book.spine) - 1 else None + + # Extract clean text from current chapter (always needed for prephrase) + from book_info import _extract_text_content, get_book_summary_cached, get_chapter_prephrase, _load_cached_summary + chapter_clean = _extract_text_content(current_chapter.content, min_length=1000) + + # Check if summary is cached to avoid unnecessary first chapter extraction + summary_cache_key = f"{book_id}_summary" + summary = _load_cached_summary(summary_cache_key) + + if not summary: + # Only extract first chapter if summary not cached + first_chapter_clean = _extract_text_content(book.spine[0].content if book.spine else "", min_length=1000) + summary = get_book_summary_cached( + book_id, + book.metadata.title, + ", ".join(book.metadata.authors), + first_chapter_clean + ) + + # Get chapter prephrase (uses cached summary as context) + ai_prephrase = get_chapter_prephrase( + book_id, + book.metadata.title, + ", ".join(book.metadata.authors), + chapter_clean, + summary + ) + + ai_conclusion = get_ai_conclusion( + book_id, + chapter_clean, + book.metadata.title, + ", ".join(book.metadata.authors), + summary + ) + + paragraph_summaries = await get_paragraph_summaries( + current_chapter.content, + book.metadata.title, + ", ".join(book.metadata.authors) + ) + + return templates.TemplateResponse("reader.html", { + "request": request, + "book": book, + "current_chapter": current_chapter, + "chapter_index": chapter_index, + "book_id": book_id, + "prev_idx": prev_idx, + "next_idx": next_idx, + "claude_code_enabled": CLAUDE_CODE_STATUS["enabled"], + "book_summary": summary, + "ai_prephrase": ai_prephrase, + "ai_conclusion": ai_conclusion, + "paragraph_summaries": paragraph_summaries + }) + + +@app.get("/read/{book_id}/images/{image_name}") +async def serve_image(book_id: str, image_name: str): + safe_book_id = os.path.basename(book_id) + safe_image_name = os.path.basename(image_name) + + img_path = os.path.join(BOOKS_DIR, safe_book_id, "images", safe_image_name) + + if not os.path.exists(img_path): + raise HTTPException(status_code=404, detail="Image not found") + + return FileResponse(img_path) + + +@app.get("/{image_name:path}") +async def serve_any_image(image_name: str): + if not any(image_name.lower().endswith(ext) for ext in ('.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg')): + raise HTTPException(status_code=404, detail="Not an image") + + if not CURRENT_BOOK_FOLDER: + raise HTTPException(status_code=404, detail="Book not found") + + safe_image_name = os.path.basename(image_name) + + img_path = os.path.join(BOOKS_DIR, CURRENT_BOOK_FOLDER, "images", safe_image_name) + + if not os.path.exists(img_path): + raise HTTPException(status_code=404, detail="Image not found") + + return FileResponse(img_path) + + +# --- Utilities --- + +def find_available_port(start_port: int = 8123, max_attempts: int = 10) -> int: + """Find an available port starting from start_port.""" + for port in range(start_port, start_port + max_attempts): + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(('127.0.0.1', port)) + return port + except OSError: + continue + raise RuntimeError(f"No available ports found in range {start_port}-{start_port + max_attempts}") + + +# --- CLI --- + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python book.py ") + sys.exit(1) + + epub_file = sys.argv[1] + assert os.path.exists(epub_file), "File not found." + + # Always store book data in .data folder in project directory + book_name = os.path.splitext(os.path.basename(epub_file))[0] + out_dir = os.path.join(".data", book_name + "_data") + + # Process the book + book_obj = process_epub(epub_file, out_dir) + save_to_pickle(book_obj, out_dir) + + print("\n--- Summary ---") + print(f"Title: {book_obj.metadata.title}") + print(f"Authors: {', '.join(book_obj.metadata.authors)}") + print(f"Chapters: {len(book_obj.spine)}") + print(f"Images: {len(book_obj.images)}") + + # Set current book folder for the server (just the folder name, not path) + globals()['CURRENT_BOOK_FOLDER'] = book_name + "_data" + + # Find available port (prefer 8123) + port = find_available_port() + print(f"\nStarting server at http://127.0.0.1:{port}") + print("Press Ctrl+C to stop\n") + uvicorn.run(app, host="127.0.0.1", port=port) diff --git a/book_info.py b/book_info.py new file mode 100644 index 0000000..85c4ed6 --- /dev/null +++ b/book_info.py @@ -0,0 +1,389 @@ +""" +Book info summarization module. + +Fetches compact summaries and AI context for books using Claude CLI. +Caches results for future requests. +""" + +import asyncio +import hashlib +import json +import re +import subprocess +from pathlib import Path +from typing import Optional + +# Precompile regex patterns for performance +_PARAGRAPH_PATTERN = re.compile(r']*>(.*?)

', re.DOTALL) +_HTML_TAG_PATTERN = re.compile(r'<[^>]+>') + +# Cache directory for book summaries +CACHE_DIR = Path.home() / ".reader3_cache" +CACHE_DIR.mkdir(exist_ok=True) + + +def _get_cache_path(book_id: str) -> Path: + """Get the cache file path for a book summary.""" + return CACHE_DIR / f"{book_id}_summary.json" + + +def _load_cached_summary(book_id: str) -> Optional[str]: + """Load cached summary if it exists.""" + cache_path = _get_cache_path(book_id) + if cache_path.exists(): + try: + with open(cache_path) as f: + data = json.load(f) + return data.get("summary") + except Exception: + pass + return None + + +def _save_summary(book_id: str, summary: str) -> None: + """Save summary to cache.""" + cache_path = _get_cache_path(book_id) + try: + with open(cache_path, "w") as f: + json.dump({"summary": summary}, f) + except Exception: + pass + + +def _is_valid_response(response: str) -> bool: + """Check if response is valid (not an error or complaint).""" + if not response: + return False + # Check for explicit rejection marker + if "" in response: + return False + # Detect explanatory/apologetic responses that bypass the marker + lower = response.lower() + explanatory_patterns = [ + "i appreciate", + "i need to clarify", + "i notice", + "i should mention", + "i should point out", + "just the front matter", + "just the table of contents", + "doesn't include any actual", + "lacks actual narrative", + ] + return not any(pattern in lower for pattern in explanatory_patterns) + + +def _fetch_from_claude(prompt: str) -> Optional[str]: + """Fetch a response from Claude Code CLI. + + Args: + prompt: The prompt to send to Claude + + Returns: + The response text, or None if the request failed + """ + try: + result = subprocess.run( + ["claude", "-p", prompt], + capture_output=True, + text=True, + timeout=30 + ) + + if result.returncode == 0: + return result.stdout.strip() + + error_msg = result.stderr.lower() if result.stderr else "" + if "auth" in error_msg or "unauthorized" in error_msg or "signed in" in error_msg: + return "Sign in to Claude Code CLI to enable this feature" + return None + + except subprocess.TimeoutExpired: + return "Request timeout - try again later" + except FileNotFoundError: + return "Claude Code CLI not found" + except Exception: + return None + + +def _extract_text_content(html: str, min_length: int = 100) -> str: + """Extract and clean text content from HTML, stripping tags and excess whitespace. + + Keeps consuming content until reaching min_length of clean text, skipping images/markup. + + Args: + html: HTML content to clean + min_length: Minimum characters required for valid content + + Returns: + Cleaned text up to min_length (or more if necessary), or empty string if insufficient + """ + # Remove HTML tags + text = re.sub(r'<[^>]+>', '', html) + # Remove extra whitespace + text = re.sub(r'\s+', ' ', text).strip() + # Return text if we have enough real content + return text if len(text) >= min_length else "" + + +def get_book_summary_cached(book_id: str, title: str, author: str, first_chapter_clean: str) -> str: + """Get cached book summary or fetch if needed. Only requires first chapter.""" + summary_cache_key = f"{book_id}_summary" + cached = _load_cached_summary(summary_cache_key) + if cached: + return cached + + if not first_chapter_clean: + return "" + + prompt = f"""Write a 2-3 sentence summary that makes this book sound absolutely irresistible and makes readers DESPERATE to pick it up. +- Be vivid, conversational, and exciting (not formal or dull) +- Capture the core tension, conflict, or fascination that hooks readers +- Use strong verbs and concrete imagery (show, don't tell) +- Use sensory language and emotional hooks +- Sound like you're passionately recommending it to a friend, not writing a textbook +- Focus on the experience and feeling, not plot mechanics + +Book: {title} by {author} + +Sample text: +{first_chapter_clean} + +If the provided text is insufficient for creating a summary (e.g., it's just front matter, table of contents, or lacks actual narrative content), respond with: +Otherwise provide ONLY the summary, no other text.""" + + result = _fetch_from_claude(prompt) + if not result or not _is_valid_response(result): + return "" + + _save_summary(summary_cache_key, result) + return result + + +def get_chapter_prephrase(book_id: str, title: str, author: str, chapter_clean: str, book_summary: str = "") -> str: + """Get cached chapter prephrase or fetch if needed. Only requires current chapter.""" + if not chapter_clean: + return "" + + chapter_hash = hashlib.md5(chapter_clean[:500].encode()).hexdigest()[:8] + prephrase_cache_key = f"{book_id}_prephrase_{chapter_hash}" + + cached = _load_cached_summary(prephrase_cache_key) + if cached: + return cached + + context = f"Book overview: {book_summary}\n\n" if book_summary else "" + prompt = f"""Write a SHORT punchy one-sentence hook (10-20 words) that makes readers DESPERATE to read this chapter. +- Extract a specific intriguing detail, event, or character moment from the content +- Raise a compelling question or hint at conflict/tension/mystery +- Use vivid, sensory language (not generic) +- Focus on what actually happens in this text, not the book premise + +Book: {title} by {author} + +{context}Chapter content: +{chapter_clean} + +If the provided text is insufficient for creating a hook (e.g., it's just front matter, table of contents, or lacks actual narrative content), respond with: +Otherwise provide ONLY the one-sentence hook, no other text.""" + + result = _fetch_from_claude(prompt) + if not result or not _is_valid_response(result): + return "" + + _save_summary(prephrase_cache_key, result) + return result + + +def get_ai_conclusion(book_id: str, clean_text: str, title: str, author: str, book_summary: str = "") -> str: + """ + Get or fetch a conclusion summarizing key points from a chapter. + + Args: + book_id: Unique identifier for the book (for caching) + clean_text: Already cleaned plain text (1000+ chars, no HTML/images) + title: Book title + author: Book author + book_summary: Optional summary of the book for context + + Returns: + 2-3 sentence conclusion consolidating knowledge + """ + if not clean_text: + return "" + + # Use chapter hash as part of cache key to avoid conflicts + content_hash = hashlib.md5(clean_text[:500].encode()).hexdigest()[:8] + cache_key = f"{book_id}_conclusion_{content_hash}" + cached = _load_cached_summary(cache_key) + if cached: + return cached + + # Build prompt with book context + context = f"Book overview: {book_summary}\n\n" if book_summary else "" + prompt = f"""Write a 2-3 sentence conclusion that captures the essence and emotional impact of this chapter - what lingers with the reader. It should: +- Highlight the pivotal moments, revelations, or turning points +- Convey the emotional weight and significance of what happened +- Connect to the larger narrative and themes in a compelling way +- Leave the reader wanting more - tease what comes next without spoiling +- Be vivid and memorable, not just factual summary +- Sound conversational and friendly (like chatting with a friend), NOT textbook or academic + +Book: {title} by {author} + +{context}Chapter excerpt: +{clean_text} + +If the provided text is insufficient for creating a conclusion (e.g., it's just front matter, table of contents, or lacks actual narrative content), respond with: +Otherwise provide only the conclusion, no other text.""" + + # Fetch from Claude CLI + result = _fetch_from_claude(prompt) + if not result or not _is_valid_response(result): + return "" + + _save_summary(cache_key, result) + return result + + +def _split_into_paragraph_groups(content: str, min_length: int = 500, max_groups: int = 10) -> list[str]: + """Split HTML into groups with guaranteed minimum length, capped at max_groups. + + Args: + content: HTML chapter content + min_length: Minimum characters per group (hard floor) + max_groups: Maximum number of groups to create (cap LLM requests) + + Returns: + List of group HTML strings + """ + p_matches = list(_PARAGRAPH_PATTERN.finditer(content)) + if not p_matches: + return [content.strip()] if content.strip() else [] + + # Extract and filter non-empty paragraphs + paragraphs = [] + for match in p_matches: + clean = _HTML_TAG_PATTERN.sub('', match.group(1)).strip() + if clean: + paragraphs.append(match.group(0)) + + if not paragraphs: + return [] + + # Calculate total content and target group length + total_length = sum(len(_HTML_TAG_PATTERN.sub('', p)) for p in paragraphs) + target_length = max(min_length, total_length // max_groups) + + # Group paragraphs to meet target length + groups = [] + current_group = [] + current_length = 0 + + for para_html in paragraphs: + para_length = len(_HTML_TAG_PATTERN.sub('', para_html)) + current_group.append(para_html) + current_length += para_length + + # Flush when we hit target length and haven't hit max groups yet + if (current_length >= target_length and len(groups) < max_groups - 1) or current_length >= target_length * 1.5: + groups.append('\n'.join(current_group)) + current_group = [] + current_length = 0 + + # Add remaining paragraphs to last group + if current_group: + groups.append('\n'.join(current_group)) + + return groups[:max_groups] + + +def _get_paragraph_group_summary(group_text: str, book_title: str = "", book_author: str = "") -> Optional[str]: + """Get an intriguing 2-6 word teaser for a paragraph group (shown before reading). + + Args: + group_text: HTML text to summarize + book_title: Book title for context + book_author: Book author for context + """ + # Clean HTML and strip + clean_text = _HTML_TAG_PATTERN.sub('', group_text).strip() + + if not clean_text or len(clean_text) < 1000: + return None + + # Generate cache key from content hash + content_hash = hashlib.md5(clean_text[:200].encode()).hexdigest()[:8] + cache_key = f"para_summary_{content_hash}" + + # Check cache + cached = _load_cached_summary(cache_key) + if cached: + return cached + + # Build context string + context = "" + if book_title and book_author: + context = f"Book: {book_title} by {book_author}\n\n" + elif book_title: + context = f"Book: {book_title}\n\n" + elif book_author: + context = f"Book by {book_author}\n\n" + + # Build prompt for intriguing teaser + prompt = f"""Create an intriguing 2-6 word teaser that makes someone curious to read this section. +Use vivid verbs, tension, or mystery. Make it a hook, not just a summary. +No punctuation. Just key words. + +{context}Passage: +{clean_text} + +If the provided text is insufficient for creating a teaser (e.g., it's just front matter, table of contents, or lacks actual narrative content), respond with: +Otherwise provide ONLY the 2-6 words, nothing else.""" + + result = _fetch_from_claude(prompt) + if not result or not _is_valid_response(result): + return None + + _save_summary(cache_key, result) + return result + + +async def get_paragraph_summaries( + content: str, + book_title: str = "", + book_author: str = "" +) -> dict[int, str]: + """Get summaries for all paragraph groups in parallel. + + Args: + content: HTML chapter content + book_title: Book title for context + book_author: Book author for context + + Returns: + Dict mapping group index to summary text (max 10 groups) + """ + groups = _split_into_paragraph_groups(content) + + if not groups: + return {} + + # Create tasks with book context + async def get_summary(i: int, group_text: str) -> tuple[int, Optional[str]]: + result = await asyncio.to_thread( + _get_paragraph_group_summary, + group_text, + book_title, + book_author + ) + return (i, result) + + # Run all summaries in true parallel using gather + results = await asyncio.gather(*[ + get_summary(i, group_text) + for i, group_text in enumerate(groups) + ]) + + # Build result dict, filtering None values + return {i: summary for i, summary in results if summary} diff --git a/claude_code_detect.py b/claude_code_detect.py new file mode 100644 index 0000000..c4628a7 --- /dev/null +++ b/claude_code_detect.py @@ -0,0 +1,74 @@ +""" +Claude Code detection and authentication module. + +Detects if Claude Code is available and properly authenticated via +environment variables and CLI checks. +""" + +import os +import shutil +import subprocess + + +def is_claude_code_available() -> bool: + """Check if Claude Code CLI is installed and available in PATH.""" + return shutil.which("claude") is not None + + +def _has_env_auth() -> bool: + """Check for authentication via environment variables.""" + return bool( + os.getenv("ANTHROPIC_API_KEY") + or os.getenv("ANTHROPIC_AUTH_TOKEN") + or os.getenv("AWS_BEARER_TOKEN_BEDROCK") + ) + + +def _has_cli_auth() -> bool: + """Check Claude Code CLI auth by testing if it can run a command.""" + try: + result = subprocess.run( + ["claude", "-p", "test"], + capture_output=True, + text=True, + timeout=10 + ) + # Exit code 0 means command succeeded (authenticated) + # Non-zero or auth errors in stderr mean not authenticated + if result.returncode != 0: + return False + # Check stderr for auth-related errors + if result.stderr and ("auth" in result.stderr.lower() or "unauthorized" in result.stderr.lower()): + return False + return True + except subprocess.TimeoutExpired: + # If it times out, assume authenticated (Claude Code is processing) + return True + except Exception: + pass + return False + + +def is_authenticated() -> bool: + """Check if Claude Code is authenticated via environment or CLI.""" + return _has_env_auth() or _has_cli_auth() + + +def get_claude_code_status() -> dict: + """ + Get the current Claude Code status. + + Returns: + dict with keys: + - available: bool, whether Claude Code is installed + - authenticated: bool, whether Claude Code is authenticated + - enabled: bool, whether Claude Code is both available and authenticated + """ + available = is_claude_code_available() + authenticated = is_authenticated() if available else False + + return { + "available": available, + "authenticated": authenticated, + "enabled": available and authenticated, + } diff --git a/reader3.png b/reader3.png index 45aac09..bb4f919 100644 Binary files a/reader3.png and b/reader3.png differ diff --git a/reader3.py b/reader3.py index d0b9d3f..574e169 100644 --- a/reader3.py +++ b/reader3.py @@ -192,7 +192,7 @@ def process_epub(epub_path: str, output_dir: str) -> Book: image_map = {} # Key: internal_path, Value: local_relative_path for item in book.get_items(): - if item.get_type() == ebooklib.ITEM_IMAGE: + if item.get_type() in (ebooklib.ITEM_IMAGE, ebooklib.ITEM_COVER): # Normalize filename original_fname = os.path.basename(item.get_name()) # Sanitize filename for OS diff --git a/server.py b/server.py index 9c870dc..64456a2 100644 --- a/server.py +++ b/server.py @@ -9,6 +9,13 @@ from fastapi.templating import Jinja2Templates from reader3 import Book, BookMetadata, ChapterContent, TOCEntry +from claude_code_detect import get_claude_code_status +from book_info import ( + get_book_summary, + get_ai_prephrase, + get_ai_conclusion, + get_paragraph_summaries, +) app = FastAPI() templates = Jinja2Templates(directory="templates") @@ -16,7 +23,10 @@ # Where are the book folders located? BOOKS_DIR = "." -@lru_cache(maxsize=10) +# Get Claude Code status once at startup +CLAUDE_CODE_STATUS = get_claude_code_status() + +@lru_cache(maxsize=1) def load_book_cached(folder_name: str) -> Optional[Book]: """ Loads the book from the pickle file. @@ -34,26 +44,22 @@ def load_book_cached(folder_name: str) -> Optional[Book]: print(f"Error loading book {folder_name}: {e}") return None -@app.get("/", response_class=HTMLResponse) -async def library_view(request: Request): - """Lists all available processed books.""" - books = [] - - # Scan directory for folders ending in '_data' that have a book.pkl +def _get_book_folder() -> Optional[str]: + """Get the single book folder. Returns None if not found or multiple exist.""" if os.path.exists(BOOKS_DIR): - for item in os.listdir(BOOKS_DIR): - if item.endswith("_data") and os.path.isdir(item): - # Try to load it to get the title - book = load_book_cached(item) - if book: - books.append({ - "id": item, - "title": book.metadata.title, - "author": ", ".join(book.metadata.authors), - "chapters": len(book.spine) - }) - - return templates.TemplateResponse("library.html", {"request": request, "books": books}) + books = [item for item in os.listdir(BOOKS_DIR) + if item.endswith("_data") and os.path.isdir(item)] + if len(books) == 1: + return books[0] + return None + +@app.get("/", response_class=HTMLResponse) +async def root(request: Request): + """Root endpoint - redirect to the book.""" + book_folder = _get_book_folder() + if not book_folder: + raise HTTPException(status_code=404, detail="No book found. Run: uv run reader3.py ") + return await read_chapter(request=request, book_id=book_folder, chapter_index=0) @app.get("/read/{book_id}", response_class=HTMLResponse) async def redirect_to_first_chapter(book_id: str): @@ -76,6 +82,42 @@ async def read_chapter(request: Request, book_id: str, chapter_index: int): prev_idx = chapter_index - 1 if chapter_index > 0 else None next_idx = chapter_index + 1 if chapter_index < len(book.spine) - 1 else None + # Get book summary for context + content_sample = book.spine[0].content[:1000] if book.spine else "" + summary = get_book_summary( + book_id, + book.metadata.title, + ", ".join(book.metadata.authors), + content_sample + ) + + # Get AI Prephrase (before chapter) + # Use current chapter content, not book sample + chapter_sample = current_chapter.content[:1000] if current_chapter.content else "" + ai_prephrase = get_ai_prephrase( + book_id, + book.metadata.title, + ", ".join(book.metadata.authors), + chapter_sample, + summary + ) + + # Get AI Conclusion (after chapter) + ai_conclusion = get_ai_conclusion( + book_id, + current_chapter.content, + book.metadata.title, + ", ".join(book.metadata.authors), + summary + ) + + # Get paragraph summaries in parallel with book context + paragraph_summaries = await get_paragraph_summaries( + current_chapter.content, + book.metadata.title, + ", ".join(book.metadata.authors) + ) + return templates.TemplateResponse("reader.html", { "request": request, "book": book, @@ -83,15 +125,19 @@ async def read_chapter(request: Request, book_id: str, chapter_index: int): "chapter_index": chapter_index, "book_id": book_id, "prev_idx": prev_idx, - "next_idx": next_idx + "next_idx": next_idx, + "claude_code_enabled": CLAUDE_CODE_STATUS["enabled"], + "book_summary": summary, + "ai_prephrase": ai_prephrase, + "ai_conclusion": ai_conclusion, + "paragraph_summaries": paragraph_summaries }) @app.get("/read/{book_id}/images/{image_name}") async def serve_image(book_id: str, image_name: str): """ Serves images specifically for a book. - The HTML contains . - The browser resolves this to /read/{book_id}/images/pic.jpg. + Supports both structured paths and loose filenames. """ # Security check: ensure book_id is clean safe_book_id = os.path.basename(book_id) @@ -104,6 +150,31 @@ async def serve_image(book_id: str, image_name: str): return FileResponse(img_path) +@app.get("/{image_name:path}") +async def serve_any_image(image_name: str): + """ + Catch-all for loose image filenames (e.g., from SVG xlink:href). + Only matches image extensions (.jpg, .png, .gif, .webp, .svg). + """ + # Only serve image files + if not any(image_name.lower().endswith(ext) for ext in ('.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg')): + raise HTTPException(status_code=404, detail="Not an image") + + # Get current book + book_folder = _get_book_folder() + if not book_folder: + raise HTTPException(status_code=404, detail="Book not found") + + # Security check + safe_image_name = os.path.basename(image_name) + + img_path = os.path.join(BOOKS_DIR, book_folder, "images", safe_image_name) + + if not os.path.exists(img_path): + raise HTTPException(status_code=404, detail="Image not found") + + return FileResponse(img_path) + if __name__ == "__main__": import uvicorn print("Starting server at http://127.0.0.1:8123") diff --git a/templates/library.html b/templates/library.html index e7d094d..37f7782 100644 --- a/templates/library.html +++ b/templates/library.html @@ -7,18 +7,30 @@
-

Library

+
+

Library

+ {% if claude_code_enabled %} +
+ + Claude Code +
+ {% endif %} +
{% if not books %}

No processed books found. Run reader3.py on an epub first.

@@ -32,6 +44,9 @@

Library

{{ book.author }}
{{ book.chapters }} sections
+ {% if book.summary %} +
{{ book.summary }}
+ {% endif %} Read Book {% endfor %} diff --git a/templates/reader.html b/templates/reader.html index c012edc..0abbd26 100644 --- a/templates/reader.html +++ b/templates/reader.html @@ -11,7 +11,6 @@ /* Sidebar */ #sidebar { width: 300px; background: #f8f9fa; border-right: 1px solid #e9ecef; overflow-y: auto; padding: 20px; flex-shrink: 0; } .nav-header { font-family: -apple-system, sans-serif; font-weight: bold; color: #495057; margin-bottom: 15px; padding-bottom: 10px; border-bottom: 1px solid #dee2e6; } - .nav-home { display: block; margin-bottom: 20px; color: #3498db; text-decoration: none; font-family: -apple-system, sans-serif; font-size: 0.9em; } /* TOC Tree */ ul.toc-list { list-style: none; padding-left: 0; margin: 0; } @@ -38,11 +37,16 @@ - +