diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..2b0fdfe --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +# DeepSeek API Configuration +# Get your API key from: https://platform.deepseek.com/api_keys + +OPENAI_API_KEY=your_api_key_here +OPENAI_BASE_URL=https://api.deepseek.com +OPENAI_MODEL=deepseek-chat diff --git a/.gitignore b/.gitignore index 9e1d25d..03392c4 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,36 @@ wheels/ # Virtual environments .venv +# IDE settings +.vscode/ +.idea/ + # Custom -*_data/ *.epub + +# Books directory (but keep the folder structure) +books/* +!books/.gitkeep + +# Temp directory for uploads +temp/ + +# AI Features & Data +.env +reader_data.db +test.db + +# Backup files +backups/ +*.db.backup + +# Export files +reader_data_*.json +highlights_*.csv +ai_analyses_*.csv +report_*.txt + +# OS files +.DS_Store +Thumbs.db +desktop.ini diff --git a/README.md b/README.md index 5d868d7..1600a37 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,171 @@ -# reader 3 +# Reader3 - EPUB Reader with AI Analysis - +A lightweight, self-hosted EPUB reader with integrated AI analysis capabilities. -A lightweight, self-hosted EPUB reader that lets you read through EPUB books one chapter at a time. This makes it very easy to copy paste the contents of a chapter to an LLM, to read along. Basically - get epub books (e.g. [Project Gutenberg](https://www.gutenberg.org/) has many), open them up in this reader, copy paste text around to your favorite LLM, and read together and along. +## Features -This project was 90% vibe coded just to illustrate how one can very easily [read books together with LLMs](https://x.com/karpathy/status/1990577951671509438). I'm not going to support it in any way, it's provided here as is for other people's inspiration and I don't intend to improve it. Code is ephemeral now and libraries are over, ask your LLM to change it in whatever way you like. +### Reading Experience +- 📚 **Clean Layout** - Three-column design (TOC, Content, AI Panel) +- 📖 **Sticky Navigation** - Top navigation bar stays visible while scrolling +- ⌨️ **Keyboard Shortcuts** - Arrow keys for prev/next chapter, ESC to close panels +- 🔗 **Internal Links** - Footnotes and author comments open in modal popups +- 🎯 **Clickable Covers** - Click book covers to start reading instantly -## Usage +### AI & Annotations +- 🤖 **AI Analysis** - Right-click on text for fact-checking or discussion (DeepSeek) +- � ***Personal Comments** - Add your own notes without AI (no API cost) +- 💾 **Manual Save** - Choose what to save to avoid clutter +- ✨ **Color-Coded Highlights** - Yellow (fact check), Blue (discussion), Green (comments) +- 🏷️ **Smart Tooltips** - Hover over highlights to see type +- 🗑️ **Edit & Delete** - Manage all your highlights and comments +- 🎨 **Markdown Support** - AI responses render with proper formatting + +### Library & Organization +- 📝 **Highlights View** - See all your notes and analyses for each book +- 📤 **Export to Markdown** - Export highlights with AI context warnings +- 🌐 **Web Upload** - Upload EPUB files via click or drag & drop +- 🖼️ **Cover Images** - Automatic cover extraction and display +- 🔍 **Search** - Find books by title or author +- 🗂️ **Organized Storage** - All books in `books/` directory, data in SQLite + +## Quick Start + +### 1. Configure API Key + +Edit `.env` file: +```bash +OPENAI_API_KEY=your_deepseek_key +OPENAI_BASE_URL=https://api.deepseek.com +OPENAI_MODEL=deepseek-chat +``` + +Get your key from: https://platform.deepseek.com/api_keys -The project uses [uv](https://docs.astral.sh/uv/). So for example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345) to this directory as `dracula.epub`, then: +### 2. Add Books +**Option A: Upload via Web Interface (Recommended)** +1. Start server: `uv run server.py` +2. Open http://127.0.0.1:8123 +3. Click the "+" card OR drag & drop EPUB file +4. Wait for automatic processing + +**Option B: Command Line** ```bash -uv run reader3.py dracula.epub +uv run reader3.py your_book.epub ``` -This creates the directory `dracula_data`, which registers the book to your local library. We can then run the server: +### 3. Start Server ```bash uv run server.py ``` -And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting the folder. It's not supposed to be complicated or complex. +### 4. Read and Analyze + +1. Open http://127.0.0.1:8123 +2. Select a book +3. Right-click on text → Choose analysis type +4. Review AI response in side panel +5. Save if important +6. Highlights appear on next visit! + +## Usage + +### AI Analysis +- Select text → Right-click → Choose: + - **📋 Fact Check** - Verify facts and get context + - **💡 Discussion** - Deep analysis and insights + - **💬 Add Comment** - Your personal notes (no AI) +- View response in right panel +- Click "Save" for important insights + +### Highlights +- **Yellow** - Fact checks +- **Blue** - Discussions +- **Green** - Your comments +- Hover to see type, click to view/edit +- All highlights are editable and deletable + +### View & Export Highlights +- Click ⋮ menu on any book → "View Highlights" +- See all your notes and analyses in one page +- Filter by type (Fact Check, Discussion, Comment) +- Export to markdown for AI processing +- Context length warnings for large exports +- Jump directly to any chapter + +### Keyboard Shortcuts +- **← →** - Navigate between chapters +- **ESC** - Close panels and modals +- Works anywhere except when typing in text fields + +## Project Structure + +``` +reader3/ +├── reader3.py # EPUB processor +├── server.py # Web server +├── database.py # SQLite operations +├── ai_service.py # AI integration +├── books/ # All book data here +│ └── book_name_data/ +│ ├── book.pkl +│ └── images/ +├── templates/ # HTML templates +├── reader_data.db # SQLite database +└── .env # API configuration +``` + +## Data Management + +### View Your Highlights +- Click ⋮ menu on any book → "View Highlights" +- See all notes, comments, and analyses in one page +- Filter by type and jump to chapters + +### View Database (Advanced) +```bash +uv run check_database.py +``` + +### Backup +```bash +# Double-click: backup.bat +# Or manually: +copy reader_data.db backups\reader_data_backup.db +``` + +## Tools + +- `check_database.py` - View raw database contents (advanced) +- `backup.bat` - Quick database backup + +## Why DeepSeek? + +- ✅ Cost-effective (¥1/M tokens input, ¥2/M output) +- ✅ Excellent Chinese language support +- ✅ Fast response in China +- ✅ OpenAI-compatible API + +## Troubleshooting + +### API Key Error +1. Check `.env` file exists and has correct key +2. Restart server + +### No Highlights Showing +1. Check browser console (F12) for errors +2. Verify data exists: `uv run check_database.py` +3. Hard refresh (Ctrl+Shift+R) + +### Server Won't Start +1. Check if port 8123 is available +2. Verify `.env` configuration ## License -MIT \ No newline at end of file +MIT + +--- + +**Note**: This project is designed to be simple and hackable. Ask your LLM to modify it however you like! diff --git a/TECHNICAL_CHALLENGES.md b/TECHNICAL_CHALLENGES.md new file mode 100644 index 0000000..21b1a86 --- /dev/null +++ b/TECHNICAL_CHALLENGES.md @@ -0,0 +1,158 @@ +# Technical Challenges Solved + +This document outlines the key technical challenges we encountered and solved while building this AI-powered EPUB reader. + +## 1. EPUB Cover Image Extraction + +**Challenge**: Cover images weren't being extracted from EPUB files. Some books had covers marked as `ITEM_COVER` type instead of `ITEM_IMAGE`, causing them to be skipped. + +**Solution**: +- Modified image extraction to handle both `ITEM_COVER` and `ITEM_IMAGE` types +- Implemented multi-method cover detection: check ITEM_COVER type → search by filename pattern → use first large image as fallback +- Added size filtering (>10KB) to avoid using small icons as covers + +**Code**: `reader3.py` lines 190-230 + +## 2. Multi-Paragraph Text Highlighting + +**Challenge**: When users highlighted text spanning multiple paragraphs, the highlight wouldn't display because wrapping `
` tags in a `` creates invalid HTML that browsers reject.
+
+**Solution**:
+- Detect when highlighted text spans block elements
+- Apply highlight class directly to the paragraph elements instead of wrapping
+- Use Range API with whitespace-tolerant regex matching to handle text across multiple elements
+- Normalize whitespace in search patterns to handle variations in HTML structure
+
+**Code**: `templates/reader.html` - `applyHighlights()` and `findTextRange()` functions
+
+## 3. FastAPI Route Ordering for Image Serving
+
+**Challenge**: Image URLs like `/read/{book_id}/images/{image_name}` were returning 404 because the catch-all route `/read/{book_id}/{chapter_ref:path}` was matching first.
+
+**Solution**:
+- Moved the specific image route definition before the generic chapter route
+- FastAPI matches routes in order, so more specific routes must come first
+- Also fixed path handling to preserve spaces in book folder names (removed incorrect `os.path.basename()` usage)
+
+**Code**: `server.py` - route ordering around line 125-175
+
+## 4. Reading Progress with Precise Scroll Position
+
+**Challenge**:
+- `scrollTop` was always returning 0 when read directly
+- `beforeunload` event doesn't fire reliably
+- Need to track exact scroll position within chapters, not just chapter numbers
+
+**Solution**:
+- Use scroll event listener to continuously track `currentScrollPosition` variable
+- Intercept navigation clicks with `preventDefault()` to ensure save completes before navigation
+- Add `pagehide` event as backup for mobile browsers
+- Store both chapter index and scroll position in database
+- Implement retry mechanism for scroll restoration to handle content loading delays
+
+**Code**: `templates/reader.html` - scroll tracking and `saveProgress()` function
+
+## 5. Database Schema Migration
+
+**Challenge**: Adding `scroll_position` column to existing `reading_progress` table without breaking existing data.
+
+**Solution**:
+- Created migration script that checks if column exists before adding
+- Used `ALTER TABLE ADD COLUMN` with `DEFAULT 0` for backward compatibility
+- Gracefully handles both new installations and existing databases
+
+**Code**: `migrate_progress.py`
+
+## 6. AI Prompt Engineering for Reading Context
+
+**Challenge**: Generic AI prompts weren't providing useful reading assistance. Needed different types of help for different reading scenarios.
+
+**Solution**:
+- Split into two distinct functions:
+ - **解释说明 (Explanation)**: Quick lookups for terms, people, events, concepts
+ - **深入讨论 (Discussion)**: Academic analysis with theoretical frameworks and critical thinking
+- Structured prompts with clear dimensions (论点解析, 理论视角, 批判思考, 启发问题)
+- Removed context parameter from fact-check to keep it focused and fast
+
+**Code**: `ai_service.py` - `fact_check()` and `discuss()` methods
+
+## 7. Dark Mode Implementation
+
+**Challenge**: Implementing comprehensive dark mode across all pages with proper contrast and readability.
+
+**Solution**:
+- Used CSS class toggle (`body.dark-mode`) instead of media queries for user control
+- Defined dark mode colors for every UI element including highlights, progress bars, modals
+- Persisted theme preference in localStorage
+- Synchronized theme across all pages (library, reader, highlights)
+- Used `!important` for highlight colors to override inline styles
+
+**Code**: All template files - CSS dark mode sections
+
+## 8. TOC Auto-Scroll to Active Item
+
+**Challenge**: When opening a book mid-way through, the TOC sidebar didn't show the current chapter, requiring manual scrolling.
+
+**Solution**:
+- Calculate active TOC item position using `offsetTop`
+- Scroll sidebar to center the active item in viewport
+- Execute after DOM load to ensure elements are rendered
+
+**Code**: `templates/reader.html` - TOC auto-scroll in DOMContentLoaded
+
+## 9. Book Detection Without Naming Convention
+
+**Challenge**: Initially required `_data` suffix in folder names, limiting flexibility and creating ugly folder names.
+
+**Solution**:
+- Changed detection from filename pattern matching to presence of `book.pkl` file
+- Updated library scanning to check for file existence instead of name patterns
+- Maintained backward compatibility with old `_data` folders
+
+**Code**: `server.py` - `library_view()` function
+
+## 10. Whitespace-Tolerant Text Matching
+
+**Challenge**: Saved highlights couldn't be found when text spanned multiple paragraphs due to whitespace differences (newlines, multiple spaces).
+
+**Solution**:
+- Created regex pattern that replaces `\s+` in search text with `\s+` pattern
+- Allows flexible matching of any whitespace sequence
+- Escapes special regex characters in user text before pattern creation
+- Falls back to exact match first for performance
+
+**Code**: `templates/reader.html` - `findTextRange()` function
+
+---
+
+## Key Technologies Used
+
+- **FastAPI**: Async web framework with automatic API documentation
+- **SQLite**: Lightweight database for highlights and progress
+- **ebooklib**: EPUB parsing and extraction
+- **BeautifulSoup**: HTML processing and cleaning
+- **MathJax**: Mathematical equation rendering
+- **Marked.js**: Markdown rendering for AI responses
+- **Jinja2**: Server-side templating
+- **Vanilla JavaScript**: No framework dependencies for frontend
+
+## Architecture Decisions
+
+1. **Server-side rendering** for initial page load (SEO-friendly, fast first paint)
+2. **Client-side interactivity** for highlights and AI features (responsive UX)
+3. **SQLite for data** (simple, portable, no separate database server)
+4. **Pickle for book data** (fast serialization, preserves Python objects)
+5. **localStorage for preferences** (theme, font settings persist across sessions)
+6. **Event-driven progress saving** (reliable, doesn't interfere with reading)
+
+## Performance Optimizations
+
+- **LRU cache** for book loading (avoid repeated disk reads)
+- **Lazy AI service initialization** (only load when needed)
+- **Async/await** throughout (non-blocking I/O)
+- **keepalive flag** on fetch requests (ensures completion on page unload)
+- **Debounced scroll tracking** (via event listener, not polling)
+
+---
+
+*This document serves as a reference for understanding the technical depth and problem-solving approaches used in this project.*
diff --git a/ai_service.py b/ai_service.py
new file mode 100644
index 0000000..4cb6ca2
--- /dev/null
+++ b/ai_service.py
@@ -0,0 +1,102 @@
+"""
+AI service for fact-checking and discussion.
+"""
+import os
+import httpx
+from typing import Optional
+
+
+class AIService:
+ """Handles AI API calls."""
+
+ def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+ self.base_url = base_url or os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
+ self.model = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
+
+ if not self.api_key:
+ raise ValueError("API key not provided. Set OPENAI_API_KEY environment variable.")
+
+ async def fact_check(self, text: str, context: str = "") -> str:
+ """Quick explanation and fact-checking for unclear content."""
+ prompt = f"""请帮我理解以下内容:
+
+{text}
+
+请根据内容类型提供相应的解释:
+
+**如果是专有名词/概念**:给出清晰的定义和解释
+**如果是人物**:介绍其身份、背景和重要性
+**如果是历史事件**:说明事件经过、时间、影响
+**如果是地点**:介绍其地理位置、特点、相关背景
+**如果是数据/事实陈述**:验证准确性,提供来源或背景
+
+要求:
+- 简洁明了,重点突出
+- 如有错误或争议,明确指出
+- 如果内容不完整或无法判断,说明需要更多上下文"""
+
+ return await self._call_api(prompt)
+
+ async def discuss(self, text: str, context: str = "") -> str:
+ """Generate insightful and academic discussion about the selected text."""
+ prompt = f"""请对以下文本进行深入的学术性分析和讨论:
+
+{text}
+
+请从以下几个维度展开分析:
+
+**1. 核心论点解析**
+- 作者的主要观点是什么?
+- 论证逻辑和结构如何?
+- 使用了哪些论证方法(举例、类比、引用等)?
+
+**2. 理论与学术视角**
+- 这段文本涉及哪些学术领域或理论框架?
+- 与哪些经典理论、学派或学者的观点相关?
+- 在学术史或思想史上的位置如何?
+
+**3. 批判性思考**
+- 论证是否充分?有无逻辑漏洞?
+- 是否存在隐含的假设或前提?
+- 可能的反驳观点是什么?
+
+**4. 启发性问题**
+- 这段文本引发了哪些值得深入思考的问题?
+- 如何将这些观点应用到其他领域或情境?
+- 对当代有什么启示意义?
+
+要求:
+- 保持学术严谨性,但避免过于晦涩
+- 提出具有启发性的问题,引导深入思考
+- 如涉及专业术语,简要解释
+- 鼓励多角度、批判性的思考"""
+
+ return await self._call_api(prompt)
+
+ async def _call_api(self, prompt: str) -> str:
+ """Make API call to OpenAI-compatible endpoint."""
+ async with httpx.AsyncClient(timeout=60.0) as client:
+ try:
+ response = await client.post(
+ f"{self.base_url}/chat/completions",
+ headers={
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json"
+ },
+ json={
+ "model": self.model,
+ "messages": [
+ {"role": "user", "content": prompt}
+ ],
+ "temperature": 0.7
+ }
+ )
+ response.raise_for_status()
+ data = response.json()
+ return data["choices"][0]["message"]["content"]
+
+ except httpx.HTTPError as e:
+ return f"API调用失败: {str(e)}"
+ except Exception as e:
+ return f"处理失败: {str(e)}"
diff --git a/backup.bat b/backup.bat
new file mode 100644
index 0000000..4677b45
--- /dev/null
+++ b/backup.bat
@@ -0,0 +1,28 @@
+@echo off
+echo ========================================
+echo 备份 Reader3 数据库
+echo ========================================
+echo.
+
+REM 创建backups文件夹
+if not exist backups mkdir backups
+
+REM 生成带时间戳的文件名
+set datetime=%date:~0,4%%date:~5,2%%date:~8,2%_%time:~0,2%%time:~3,2%%time:~6,2%
+set datetime=%datetime: =0%
+
+REM 备份数据库
+copy reader_data.db backups\reader_data_%datetime%.db
+
+echo.
+echo ✓ 备份完成!
+echo 文件: backups\reader_data_%datetime%.db
+echo.
+
+REM 显示backups文件夹内容
+echo 现有备份:
+dir /b backups\*.db
+
+echo.
+echo ========================================
+pause
diff --git a/books/.gitkeep b/books/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/check_book.py b/check_book.py
new file mode 100644
index 0000000..5b2a0a0
--- /dev/null
+++ b/check_book.py
@@ -0,0 +1,12 @@
+import pickle
+import sys
+from reader3 import Book, BookMetadata, ChapterContent, TOCEntry
+
+book_path = sys.argv[1] if len(sys.argv) > 1 else 'books/Evicted/book.pkl'
+
+with open(book_path, 'rb') as f:
+ book = pickle.load(f)
+ print(f"Cover image: {book.cover_image}")
+ print(f"\nFirst few spine items:")
+ for i, item in enumerate(book.spine[:3]):
+ print(f" {i}: {item.href}")
diff --git a/check_database.py b/check_database.py
new file mode 100644
index 0000000..cb60f06
--- /dev/null
+++ b/check_database.py
@@ -0,0 +1,94 @@
+"""查看数据库内容"""
+import sqlite3
+from datetime import datetime
+
+db_path = "reader_data.db"
+
+print("=" * 60)
+print("数据库内容检查")
+print("=" * 60)
+print(f"\n数据库位置: {db_path}")
+print()
+
+conn = sqlite3.connect(db_path)
+cursor = conn.cursor()
+
+# 检查highlights表
+print("📚 Highlights (高亮) 表:")
+print("-" * 60)
+cursor.execute("SELECT COUNT(*) FROM highlights")
+count = cursor.fetchone()[0]
+print(f"总记录数: {count}")
+
+if count > 0:
+ cursor.execute("""
+ SELECT id, book_id, chapter_index,
+ substr(selected_text, 1, 50) as text_preview,
+ created_at
+ FROM highlights
+ ORDER BY created_at DESC
+ LIMIT 5
+ """)
+
+ print("\n最近的5条记录:")
+ for row in cursor.fetchall():
+ print(f"\nID: {row[0]}")
+ print(f" 书籍: {row[1]}")
+ print(f" 章节: {row[2]}")
+ print(f" 文本: {row[3]}...")
+ print(f" 时间: {row[4]}")
+
+print("\n" + "=" * 60)
+
+# 检查ai_analyses表
+print("🤖 AI Analyses (AI分析) 表:")
+print("-" * 60)
+cursor.execute("SELECT COUNT(*) FROM ai_analyses")
+count = cursor.fetchone()[0]
+print(f"总记录数: {count}")
+
+if count > 0:
+ cursor.execute("""
+ SELECT id, highlight_id, analysis_type,
+ substr(prompt, 1, 50) as prompt_preview,
+ substr(response, 1, 100) as response_preview,
+ created_at
+ FROM ai_analyses
+ ORDER BY created_at DESC
+ LIMIT 5
+ """)
+
+ print("\n最近的5条记录:")
+ for row in cursor.fetchall():
+ print(f"\nID: {row[0]}")
+ print(f" 关联高亮ID: {row[1]}")
+ print(f" 分析类型: {row[2]}")
+ print(f" 提示: {row[3]}...")
+ print(f" 响应: {row[4]}...")
+ print(f" 时间: {row[5]}")
+
+print("\n" + "=" * 60)
+
+# 统计信息
+print("📊 统计信息:")
+print("-" * 60)
+
+cursor.execute("""
+ SELECT analysis_type, COUNT(*)
+ FROM ai_analyses
+ GROUP BY analysis_type
+""")
+stats = cursor.fetchall()
+
+if stats:
+ print("\n按分析类型统计:")
+ for row in stats:
+ print(f" {row[0]}: {row[1]} 条")
+else:
+ print(" 暂无数据")
+
+conn.close()
+
+print("\n" + "=" * 60)
+print("✓ 检查完成")
+print("=" * 60)
diff --git a/database.py b/database.py
new file mode 100644
index 0000000..5f52c21
--- /dev/null
+++ b/database.py
@@ -0,0 +1,251 @@
+"""
+Database models for storing highlights and AI interactions.
+"""
+import sqlite3
+import json
+from datetime import datetime
+from typing import List, Dict, Optional
+from dataclasses import dataclass, asdict
+
+
+@dataclass
+class Highlight:
+ """User highlight with position info."""
+ id: Optional[int] = None
+ book_id: str = ""
+ chapter_index: int = 0
+ selected_text: str = ""
+ context_before: str = ""
+ context_after: str = ""
+ created_at: str = ""
+
+
+@dataclass
+class AIAnalysis:
+ """AI analysis result (fact-check or discussion)."""
+ id: Optional[int] = None
+ highlight_id: int = 0
+ analysis_type: str = "" # 'fact_check' or 'discussion'
+ prompt: str = ""
+ response: str = ""
+ created_at: str = ""
+
+
+class Database:
+ """Simple SQLite database for storing highlights and AI analyses."""
+
+ def __init__(self, db_path: str = "reader_data.db"):
+ self.db_path = db_path
+ self.init_db()
+
+ def init_db(self):
+ """Create tables if they don't exist."""
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ CREATE TABLE IF NOT EXISTS highlights (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ book_id TEXT NOT NULL,
+ chapter_index INTEGER NOT NULL,
+ selected_text TEXT NOT NULL,
+ context_before TEXT,
+ context_after TEXT,
+ created_at TEXT NOT NULL
+ )
+ """)
+
+ cursor.execute("""
+ CREATE TABLE IF NOT EXISTS ai_analyses (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ highlight_id INTEGER NOT NULL,
+ analysis_type TEXT NOT NULL,
+ prompt TEXT NOT NULL,
+ response TEXT NOT NULL,
+ created_at TEXT NOT NULL,
+ FOREIGN KEY (highlight_id) REFERENCES highlights (id)
+ )
+ """)
+
+ cursor.execute("""
+ CREATE TABLE IF NOT EXISTS reading_progress (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ book_id TEXT NOT NULL UNIQUE,
+ chapter_index INTEGER NOT NULL,
+ scroll_position INTEGER DEFAULT 0,
+ last_read_at TEXT NOT NULL
+ )
+ """)
+
+ conn.commit()
+ conn.close()
+
+ def save_highlight(self, highlight: Highlight) -> int:
+ """Save a highlight and return its ID."""
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ INSERT INTO highlights (book_id, chapter_index, selected_text,
+ context_before, context_after, created_at)
+ VALUES (?, ?, ?, ?, ?, ?)
+ """, (
+ highlight.book_id,
+ highlight.chapter_index,
+ highlight.selected_text,
+ highlight.context_before,
+ highlight.context_after,
+ highlight.created_at or datetime.now().isoformat()
+ ))
+
+ highlight_id = cursor.lastrowid
+ conn.commit()
+ conn.close()
+
+ return highlight_id
+
+ def save_analysis(self, analysis: AIAnalysis) -> int:
+ """Save an AI analysis and return its ID."""
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ INSERT INTO ai_analyses (highlight_id, analysis_type, prompt, response, created_at)
+ VALUES (?, ?, ?, ?, ?)
+ """, (
+ analysis.highlight_id,
+ analysis.analysis_type,
+ analysis.prompt,
+ analysis.response,
+ analysis.created_at or datetime.now().isoformat()
+ ))
+
+ analysis_id = cursor.lastrowid
+ conn.commit()
+ conn.close()
+
+ return analysis_id
+
+ def get_highlights_for_chapter(self, book_id: str, chapter_index: int) -> List[Dict]:
+ """Get all highlights for a specific chapter."""
+ conn = sqlite3.connect(self.db_path)
+ conn.row_factory = sqlite3.Row
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ SELECT * FROM highlights
+ WHERE book_id = ? AND chapter_index = ?
+ ORDER BY created_at DESC
+ """, (book_id, chapter_index))
+
+ rows = cursor.fetchall()
+ conn.close()
+
+ return [dict(row) for row in rows]
+
+ def get_all_highlights_for_book(self, book_id: str) -> List[Dict]:
+ """Get all highlights for a book (all chapters)."""
+ conn = sqlite3.connect(self.db_path)
+ conn.row_factory = sqlite3.Row
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ SELECT * FROM highlights
+ WHERE book_id = ?
+ ORDER BY created_at DESC
+ """, (book_id,))
+
+ rows = cursor.fetchall()
+ conn.close()
+
+ return [dict(row) for row in rows]
+
+ def get_analyses_for_highlight(self, highlight_id: int) -> List[Dict]:
+ """Get all AI analyses for a highlight."""
+ conn = sqlite3.connect(self.db_path)
+ conn.row_factory = sqlite3.Row
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ SELECT * FROM ai_analyses
+ WHERE highlight_id = ?
+ ORDER BY created_at DESC
+ """, (highlight_id,))
+
+ rows = cursor.fetchall()
+ conn.close()
+
+ return [dict(row) for row in rows]
+
+ def update_analysis(self, analysis_id: int, response: str):
+ """Update an existing analysis response (for editing comments)."""
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ UPDATE ai_analyses
+ SET response = ?
+ WHERE id = ?
+ """, (response, analysis_id))
+
+ conn.commit()
+ conn.close()
+
+ def delete_analysis(self, analysis_id: int):
+ """Delete an analysis and its highlight if no other analyses exist."""
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+
+ # Get the highlight_id before deleting
+ cursor.execute("SELECT highlight_id FROM ai_analyses WHERE id = ?", (analysis_id,))
+ result = cursor.fetchone()
+
+ if result:
+ highlight_id = result[0]
+
+ # Delete the analysis
+ cursor.execute("DELETE FROM ai_analyses WHERE id = ?", (analysis_id,))
+
+ # Check if there are other analyses for this highlight
+ cursor.execute("SELECT COUNT(*) FROM ai_analyses WHERE highlight_id = ?", (highlight_id,))
+ count = cursor.fetchone()[0]
+
+ # If no other analyses, delete the highlight too
+ if count == 0:
+ cursor.execute("DELETE FROM highlights WHERE id = ?", (highlight_id,))
+
+ conn.commit()
+ conn.close()
+
+ def save_progress(self, book_id: str, chapter_index: int, scroll_position: int = 0):
+ """Save or update reading progress for a book."""
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ INSERT INTO reading_progress (book_id, chapter_index, scroll_position, last_read_at)
+ VALUES (?, ?, ?, ?)
+ ON CONFLICT(book_id) DO UPDATE SET
+ chapter_index = excluded.chapter_index,
+ scroll_position = excluded.scroll_position,
+ last_read_at = excluded.last_read_at
+ """, (book_id, chapter_index, scroll_position, datetime.now().isoformat()))
+
+ conn.commit()
+ conn.close()
+
+ def get_progress(self, book_id: str) -> Optional[Dict]:
+ """Get the last read position for a book."""
+ conn = sqlite3.connect(self.db_path)
+ conn.row_factory = sqlite3.Row
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ SELECT chapter_index, scroll_position FROM reading_progress
+ WHERE book_id = ?
+ """, (book_id,))
+
+ result = cursor.fetchone()
+ conn.close()
+
+ return dict(result) if result else None
diff --git a/list_epub_images.py b/list_epub_images.py
new file mode 100644
index 0000000..e744c96
--- /dev/null
+++ b/list_epub_images.py
@@ -0,0 +1,35 @@
+import sys
+import ebooklib
+from ebooklib import epub
+
+if len(sys.argv) < 2:
+ print("Usage: python list_epub_images.py No processed books found. Run
.
+ The browser resolves this to /read/{book_id}/images/pic.jpg.
+ """
+ # Security check: prevent path traversal
+ if ".." in book_id or "/" in book_id or "\\" in book_id:
+ raise HTTPException(status_code=400, detail="Invalid book ID")
+ if ".." in image_name or "/" in image_name or "\\" in image_name:
+ raise HTTPException(status_code=400, detail="Invalid image name")
-@app.get("/read/{book_id}/{chapter_index}", response_class=HTMLResponse)
-async def read_chapter(request: Request, book_id: str, chapter_index: int):
- """The main reader interface."""
+ img_path = os.path.join(BOOKS_DIR, book_id, "images", image_name)
+
+ if not os.path.exists(img_path):
+ raise HTTPException(status_code=404, detail="Image not found")
+
+ return FileResponse(img_path)
+
+@app.get("/read/{book_id}/{chapter_ref:path}", response_class=HTMLResponse)
+async def read_chapter(request: Request, book_id: str, chapter_ref: str):
+ """The main reader interface. Accepts either chapter index (0, 1, 2) or filename (part0008.html)."""
+
+ # Try to parse as integer first
+ try:
+ chapter_index = int(chapter_ref)
+ except ValueError:
+ # It's a filename, need to find the corresponding chapter index
+ book = load_book_cached(book_id)
+ chapter_index = None
+
+ # Search through spine to find matching filename
+ for idx, item in enumerate(book.spine):
+ if item.href == chapter_ref or item.href.endswith(chapter_ref):
+ chapter_index = idx
+ break
+
+ if chapter_index is None:
+ raise HTTPException(status_code=404, detail=f"Chapter file '{chapter_ref}' not found")
+
+ # Now proceed with the chapter_index
book = load_book_cached(book_id)
if not book:
raise HTTPException(status_code=404, detail="Book not found")
@@ -76,6 +196,12 @@ async def read_chapter(request: Request, book_id: str, chapter_index: int):
prev_idx = chapter_index - 1 if chapter_index > 0 else None
next_idx = chapter_index + 1 if chapter_index < len(book.spine) - 1 else None
+ # Get saved scroll position if returning to this chapter
+ progress_data = db.get_progress(book_id)
+ saved_scroll = 0
+ if progress_data and progress_data['chapter_index'] == chapter_index:
+ saved_scroll = progress_data['scroll_position']
+
return templates.TemplateResponse("reader.html", {
"request": request,
"book": book,
@@ -83,28 +209,248 @@ async def read_chapter(request: Request, book_id: str, chapter_index: int):
"chapter_index": chapter_index,
"book_id": book_id,
"prev_idx": prev_idx,
- "next_idx": next_idx
+ "next_idx": next_idx,
+ "saved_scroll": saved_scroll
})
-@app.get("/read/{book_id}/images/{image_name}")
-async def serve_image(book_id: str, image_name: str):
- """
- Serves images specifically for a book.
- The HTML contains
.
- The browser resolves this to /read/{book_id}/images/pic.jpg.
- """
- # Security check: ensure book_id is clean
- safe_book_id = os.path.basename(book_id)
- safe_image_name = os.path.basename(image_name)
- img_path = os.path.join(BOOKS_DIR, safe_book_id, "images", safe_image_name)
+# AI-related endpoints
- if not os.path.exists(img_path):
- raise HTTPException(status_code=404, detail="Image not found")
+@app.post("/api/progress")
+async def save_reading_progress(book_id: str, chapter_index: int, scroll_position: int = 0):
+ """Save reading progress."""
+ try:
+ db.save_progress(book_id, chapter_index, scroll_position)
+ return {"status": "success"}
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/highlight")
+async def create_highlight(req: HighlightRequest):
+ """Save a user highlight."""
+ highlight = Highlight(
+ book_id=req.book_id,
+ chapter_index=req.chapter_index,
+ selected_text=req.selected_text,
+ context_before=req.context_before,
+ context_after=req.context_after,
+ created_at=datetime.now().isoformat()
+ )
+
+ highlight_id = db.save_highlight(highlight)
+ return {"highlight_id": highlight_id, "status": "success"}
+
+
+@app.post("/api/ai/analyze")
+async def analyze_text(req: AIRequest):
+ """Perform AI analysis (fact-check or discussion) without saving."""
+ service = get_ai_service()
+ if not service:
+ raise HTTPException(status_code=500, detail="AI service not configured. Please set OPENAI_API_KEY.")
+
+ # Call appropriate AI function
+ if req.analysis_type == "fact_check":
+ response = await service.fact_check(req.selected_text, req.context)
+ elif req.analysis_type == "discussion":
+ response = await service.discuss(req.selected_text, req.context)
+ else:
+ raise HTTPException(status_code=400, detail="Invalid analysis type")
+
+ return {
+ "response": response,
+ "status": "success"
+ }
+
+
+class SaveAnalysisRequest(BaseModel):
+ highlight_id: int
+ analysis_type: str
+ prompt: str
+ response: str
+
+
+@app.post("/api/ai/save")
+async def save_analysis(req: SaveAnalysisRequest):
+ """Save AI analysis to database."""
+ analysis = AIAnalysis(
+ highlight_id=req.highlight_id,
+ analysis_type=req.analysis_type,
+ prompt=req.prompt,
+ response=req.response,
+ created_at=datetime.now().isoformat()
+ )
+
+ analysis_id = db.save_analysis(analysis)
+
+ return {
+ "analysis_id": analysis_id,
+ "status": "success"
+ }
+
+
+@app.get("/api/highlights/{book_id}/{chapter_index}")
+async def get_highlights(book_id: str, chapter_index: int):
+ """Get all highlights for a chapter."""
+ highlights = db.get_highlights_for_chapter(book_id, chapter_index)
+
+ # Attach analyses to each highlight
+ for highlight in highlights:
+ highlight["analyses"] = db.get_analyses_for_highlight(highlight["id"])
+
+ return {"highlights": highlights}
+
+
+@app.get("/highlights/{book_id}")
+async def view_highlights(book_id: str, request: Request):
+ """View all highlights for a book."""
+ try:
+ # Get all highlights for this book
+ all_highlights = db.get_all_highlights_for_book(book_id)
+
+ # Attach analyses and flatten
+ highlights_with_analyses = []
+ for highlight in all_highlights:
+ analyses = db.get_analyses_for_highlight(highlight["id"])
+ if analyses:
+ for analysis in analyses:
+ highlights_with_analyses.append({
+ **highlight,
+ "analysis_type": analysis["analysis_type"],
+ "response": analysis["response"],
+ "analysis_created_at": analysis["created_at"]
+ })
+ else:
+ # Highlight without analysis
+ highlights_with_analyses.append({
+ **highlight,
+ "analysis_type": None,
+ "response": None,
+ "analysis_created_at": None
+ })
+
+ # Sort by creation date (newest first)
+ highlights_with_analyses.sort(key=lambda x: x["created_at"], reverse=True)
+
+ # Calculate stats
+ stats = {
+ "total": len(highlights_with_analyses),
+ "fact_check": sum(1 for h in highlights_with_analyses if h["analysis_type"] == "fact_check"),
+ "discussion": sum(1 for h in highlights_with_analyses if h["analysis_type"] == "discussion"),
+ "comment": sum(1 for h in highlights_with_analyses if h["analysis_type"] == "comment")
+ }
+
+ # Get book title
+ book_title = book_id.replace("_data", "").replace("_", " ")
+
+ return templates.TemplateResponse("highlights.html", {
+ "request": request,
+ "book_id": book_id,
+ "book_title": book_title,
+ "highlights": highlights_with_analyses,
+ "stats": stats
+ })
+
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.put("/api/ai/update/{analysis_id}")
+async def update_analysis(analysis_id: int, req: dict):
+ """Update an existing analysis (for editing comments)."""
+ try:
+ db.update_analysis(analysis_id, req.get("response", ""))
+ return {"status": "success"}
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.delete("/api/ai/delete/{analysis_id}")
+async def delete_analysis(analysis_id: int):
+ """Delete an analysis (and its highlight if no other analyses exist)."""
+ try:
+ db.delete_analysis(analysis_id)
+ return {"status": "success"}
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.delete("/delete/{book_id}")
+async def delete_book(book_id: str):
+ """Delete a book folder (but keep database entries)."""
+ try:
+ # Security check: ensure book_id doesn't contain path traversal
+ if ".." in book_id or "/" in book_id or "\\" in book_id:
+ raise HTTPException(status_code=400, detail="Invalid book ID")
+
+ book_path = os.path.join(BOOKS_DIR, book_id)
+
+ if not os.path.exists(book_path):
+ raise HTTPException(status_code=404, detail="Book not found")
+
+ # Delete the book folder
+ shutil.rmtree(book_path)
+
+ # Clear cache for this book
+ load_book_cached.cache_clear()
+
+ return {
+ "message": f"Book deleted. Your highlights and analyses are preserved in the database.",
+ "status": "success"
+ }
+
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/upload")
+async def upload_book(file: UploadFile = File(...)):
+ """Upload and process an EPUB file."""
+ # Validate file type
+ if not file.filename.endswith('.epub'):
+ raise HTTPException(status_code=400, detail="Only EPUB files are supported")
+
+ try:
+ # Create temp directory if it doesn't exist
+ temp_dir = "temp"
+ os.makedirs(temp_dir, exist_ok=True)
+
+ # Save uploaded file
+ temp_file_path = os.path.join(temp_dir, file.filename)
+ with open(temp_file_path, "wb") as buffer:
+ shutil.copyfileobj(file.file, buffer)
+
+ # Process the EPUB file using reader3.py with uv
+ result = subprocess.run(
+ ["uv", "run", "reader3.py", temp_file_path],
+ capture_output=True,
+ text=True,
+ timeout=60
+ )
+
+ # Clean up temp file
+ os.remove(temp_file_path)
+
+ if result.returncode == 0:
+ # Extract book title from output
+ book_name = os.path.splitext(file.filename)[0]
+ return {
+ "message": f"Successfully processed '{book_name}'",
+ "status": "success"
+ }
+ else:
+ raise HTTPException(
+ status_code=500,
+ detail=f"Failed to process EPUB: {result.stderr}"
+ )
+
+ except subprocess.TimeoutExpired:
+ raise HTTPException(status_code=500, detail="Processing timeout (file too large?)")
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
- return FileResponse(img_path)
if __name__ == "__main__":
import uvicorn
- print("Starting server at http://127.0.0.1:8123")
- uvicorn.run(app, host="127.0.0.1", port=8123)
+ print("Starting server at http://0.0.0.0:8123 (accessible externally if firewall/NAT allow)")
+ uvicorn.run(app, host="0.0.0.0", port=8123)
diff --git a/templates/highlights.html b/templates/highlights.html
new file mode 100644
index 0000000..38a332e
--- /dev/null
+++ b/templates/highlights.html
@@ -0,0 +1,362 @@
+
+
+
+
+
+ {{ book_title }}
+
+ Library
+ My Reader with AI
- {% if not books %}
- reader3.py on an epub first.