sylvanding · sylvanding · Mar 18, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
@@ -6,7 +6,7 @@
 # --- Application ---
 APP_ENV=development
 # Set to true for development only. Production MUST use false.
-APP_DEBUG=false
+APP_DEBUG=true
 APP_HOST=0.0.0.0
 APP_PORT=8000
 # SECURITY: Change this to a random secret key in production!
@@ -59,31 +59,66 @@ OLLAMA_MODEL=llama3
 # --- Embedding ---
 # Provider: local (HuggingFace) | api (OpenAI) | mock
 EMBEDDING_PROVIDER=local
-EMBEDDING_MODEL=BAAI/bge-m3
+EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B
 EMBEDDING_API_KEY=
-RERANKER_MODEL=BAAI/bge-reranker-v2-m3
+RERANKER_MODEL=tomaarsen/Qwen3-Reranker-8B-seq-cls
 
 # --- OCR ---
 # PaddleOCR language: ch (Chinese+English) | en (English only)
 OCR_LANG=ch
 
-# --- PDF Parsing ---
+# --- PDF Parsing / MinerU ---
 # Parser selection: auto (pdfplumber first, fallback to MinerU) | mineru | pdfplumber
-PDF_PARSER=auto
+PDF_PARSER=mineru
 # MinerU independent API service URL
 MINERU_API_URL=http://localhost:8010
 # MinerU backend: pipeline | hybrid-auto-engine | vlm-auto-engine
 MINERU_BACKEND=pipeline
 # Timeout per PDF in seconds
-MINERU_TIMEOUT=300
+MINERU_TIMEOUT=8000
+# Auto start/stop MinerU subprocess (true = Omelette manages MinerU lifecycle)
+MINERU_AUTO_MANAGE=true
+# Conda environment name for MinerU (used with conda run)
+MINERU_CONDA_ENV=mineru
+# Stop MinerU after N seconds idle (0 = never auto-stop)
+MINERU_TTL_SECONDS=600
+# MinerU startup timeout in seconds
+MINERU_STARTUP_TIMEOUT=120
+# GPU IDs for MinerU (empty = inherit CUDA_VISIBLE_DEVICES)
+MINERU_GPU_IDS=
 
 # --- GPU ---
 # Comma-separated GPU IDs for OCR/embedding tasks
-CUDA_VISIBLE_DEVICES=0,3
+CUDA_VISIBLE_DEVICES=
+
+# Auto-unload GPU models after N seconds idle (0 = never auto-unload)
+MODEL_TTL_SECONDS=300
+# TTL check interval in seconds
+MODEL_TTL_CHECK_INTERVAL=30
+
+# GPU preset mode: conservative | balanced | aggressive
+#   conservative: batch=1, parallel=1, safe for small VRAM / debugging
+#   balanced:     batch=8/16, auto parallel, good default
+#   aggressive:   batch=32/50, parallel=GPU*2, max throughput (32G+ VRAM)
+GPU_MODE=balanced
+
+# Per-service overrides (0 = follow GPU_MODE preset)
+# EMBED_BATCH_SIZE=0
+# RERANK_BATCH_SIZE=0
+
+# Pin models to specific GPU index (-1 = auto-select by free memory)
+# EMBED_GPU_ID=-1
+# RERANK_GPU_ID=-1
+
+# Comma-separated GPU IDs for OCR workers (empty = use all visible GPUs)
+# OCR_GPU_IDS=
+
+# Max parallel OCR tasks. 0=auto (GPU count, or GPU*2 in aggressive mode)
+# OCR_PARALLEL_LIMIT=0
 
 # --- Network Proxy ---
-HTTP_PROXY=http://127.0.0.1:20171/
-HTTPS_PROXY=http://127.0.0.1:20171/
+# HTTP_PROXY=http://your-proxy:port
+# HTTPS_PROXY=http://your-proxy:port
 
 # --- HuggingFace Mirror ---
 # For users in China, set to https://hf-mirror.com to speed up model downloads

@@ -60,7 +60,7 @@ Omelette automates the full research literature pipeline — from keyword manage
   Multi-channel download via Unpaywall, arXiv, and direct URL fallback strategies.
 
   **📝 OCR Processing**
-  Native text extraction with PaddleOCR GPU fallback for scanned documents.
+  Native text extraction via MinerU (auto-managed subprocess) or PaddleOCR GPU fallback.
 
   **🧠 RAG Knowledge Base**
   LlamaIndex engine with ChromaDB, GPU-aware embeddings, hybrid retrieval, and cited answers.
@@ -69,7 +69,10 @@ Omelette automates the full research literature pipeline — from keyword manage
   Summarization, citation generation (GB/T 7714, APA, MLA), review outlines, and gap analysis.
 
   **🔄 LangGraph Pipeline**
-  Pipeline orchestration with human-in-the-loop interrupt and resume.
+  Pipeline orchestration with HITL interrupt/resume and persistent checkpointing.
+
+  **⚡ GPU Resource Management**
+  TTL-based auto-unload for GPU models, MinerU subprocess auto-management, monitoring API, and exit cleanup watchdog.
 
   **🔗 MCP Integration**
   Model Context Protocol server for AI IDE clients (Cursor, Claude Code, etc.).
@@ -103,7 +106,7 @@ Keywords ─→ Search ─→ Dedup ─→ Crawler ─→ OCR ─→ RAG ─→
 | **RAG** | LlamaIndex with GPU-aware embeddings |
 | **LLM** | LangChain (OpenAI, Anthropic, Aliyun, Volcengine, Ollama) |
 | **Orchestration** | LangGraph with HITL interrupt/resume |
-| **OCR** | pdfplumber (native) + PaddleOCR (scanned, optional) |
+| **OCR** | MinerU (auto-managed) + pdfplumber (native) + PaddleOCR (scanned) |
 | **MCP** | Model Context Protocol server |
 | **Docs** | VitePress (bilingual EN/ZH) |
 
@@ -147,6 +150,10 @@ cp .env.example .env
 | `ALIYUN_API_KEY` | Aliyun Bailian API key |
 | `VOLCENGINE_API_KEY` | Volcengine Doubao API key |
 | `SEMANTIC_SCHOLAR_API_KEY` | Optional; increases Semantic Scholar rate limit |
+| `GPU_MODE` | GPU preset: `conservative`, `balanced` (default), `aggressive` |
+| `MODEL_TTL_SECONDS` | Auto-unload GPU models after N seconds idle (default: 300) |
+| `MINERU_AUTO_MANAGE` | Auto start/stop MinerU subprocess (default: true) |
+| `PDF_PARSER` | `auto`, `mineru`, or `pdfplumber` |
 
 See [`.env.example`](.env.example) for the full list.
 
@@ -156,10 +163,31 @@ See [`.env.example`](.env.example) for the full list.
 
 ```bash
 cd backend
+
+# Run database migrations
+alembic upgrade head
+
+# Start server
 uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
 ```
 
-### 4. Start frontend
+On startup, the backend automatically:
+- Writes a PID file to `DATA_DIR/omelette.pid`
+- Starts a GPU model TTL monitor (auto-unloads idle models)
+- If `MINERU_AUTO_MANAGE=true`, manages MinerU subprocess lifecycle
+- Registers cleanup handlers (`atexit` + `SIGHUP`) so GPU resources are released even if the process exits unexpectedly
+
+### 4. (Optional) GPU watchdog
+
+For extra safety against `kill -9` or crashes, run the external watchdog:
+
+```bash
+python backend/scripts/gpu_watchdog.py --daemon
+```
+
+The watchdog monitors the Omelette process and cleans up GPU resources if it terminates abnormally.
+
+### 5. Start frontend
 
 ```bash
 cd frontend
@@ -169,13 +197,19 @@ npm run dev
 
 Open [http://localhost:3000](http://localhost:3000) in your browser.
 
-### 5. (Optional) OCR & Embeddings
+### 6. (Optional) MinerU setup
+
+If using MinerU for PDF parsing (`PDF_PARSER=mineru`):
 
 ```bash
-cd backend
-pip install -e ".[ocr,ml]"
+# Create a separate conda env for MinerU
+conda create -n mineru python=3.10
+conda activate mineru
+pip install magic-pdf[full]
 ```
 
+Set `MINERU_CONDA_ENV=mineru` in `.env`. Omelette will auto-start MinerU when needed.
+
 > **Troubleshooting:** If you get `ModuleNotFoundError: No module named 'fastapi'`, ensure the conda environment is activated: `conda activate omelette`.
 
 ## 📂 Project Layout
@@ -194,7 +228,8 @@ omelette/
 │   │   └── main.py       # App entry, lifespan, CORS
 │   ├── mcp_server.py     # MCP (Model Context Protocol) server
 │   ├── alembic/          # Database migrations
-│   ├── tests/            # pytest-asyncio tests (178 tests)
+│   ├── scripts/          # Utilities (gpu_watchdog.py)
+│   ├── tests/            # pytest-asyncio tests (526 tests)
 │   └── pyproject.toml    # Python dependencies
 ├── frontend/             # React SPA
 │   └── src/
@@ -230,7 +265,7 @@ make dev                  # Start both backend and frontend
 ### Running Tests
 
 ```bash
-# Backend (178 tests)
+# Backend (526 tests)
 cd backend && pytest tests/ -v
 
 # Frontend unit tests (28 tests — Vitest + Testing Library + MSW)
@@ -269,6 +304,8 @@ REST APIs under `/api/v1/`:
 | `GET/POST /subscriptions` | Subscription management |
 | `GET/POST /settings` | Settings and health |
 | `GET /settings/health` | Health check |
+| `GET /gpu/status` | GPU model and memory status |
+| `POST /gpu/unload` | Manually unload GPU models |
 
 MCP server: `/mcp` (WebSocket/SSE for AI IDE clients)
 

@@ -60,7 +60,7 @@ Omelette 覆盖科研文献全流程自动化 — 从关键词管理、多源检
   Unpaywall、arXiv、直链多通道下载，智能回退策略。
 
   **📝 OCR 解析**
-  pdfplumber 原生文本提取，PaddleOCR GPU 加速处理扫描件。
+  MinerU（自动管理子进程）或 pdfplumber 原生提取，PaddleOCR GPU 加速处理扫描件。
 
   **🧠 RAG 知识库**
   LlamaIndex 引擎，ChromaDB 向量存储，GPU 感知嵌入，混合检索，带引用回答。
@@ -69,7 +69,10 @@ Omelette 覆盖科研文献全流程自动化 — 从关键词管理、多源检
   论文摘要、引用生成（GB/T 7714、APA、MLA）、综述提纲、缺口分析。
 
   **🔄 LangGraph 流水线**
-  流水线编排，支持人机协同中断与恢复。
+  流水线编排，支持人机协同中断/恢复与持久化检查点。
+
+  **⚡ GPU 资源管理**
+  TTL 自动卸载 GPU 模型、MinerU 子进程自动管理、监控 API、退出清理看门狗。
 
   **🔗 MCP 集成**
   Model Context Protocol 服务端，面向 AI IDE 客户端（Cursor、Claude Code 等）。
@@ -103,7 +106,7 @@ Keywords ─→ Search ─→ Dedup ─→ Crawler ─→ OCR ─→ RAG ─→
 | **RAG** | LlamaIndex，GPU 感知嵌入 |
 | **LLM** | LangChain（OpenAI、Anthropic、阿里云、火山引擎、Ollama） |
 | **编排** | LangGraph，支持人机协同中断与恢复 |
-| **OCR** | pdfplumber（原生）+ PaddleOCR（扫描件，可选） |
+| **OCR** | MinerU（自动管理）+ pdfplumber（原生）+ PaddleOCR（扫描件） |
 | **MCP** | Model Context Protocol 服务端 |
 | **文档** | VitePress（中英双语） |
 
@@ -156,10 +159,31 @@ cp .env.example .env
 
 ```bash
 cd backend
+
+# 执行数据库迁移
+alembic upgrade head
+
+# 启动服务
 uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
 ```
 
-### 4. 启动前端
+启动时后端自动完成以下操作：
+- 写入 PID 文件到 `DATA_DIR/omelette.pid`
+- 启动 GPU 模型 TTL 监控（自动卸载空闲模型）
+- 若 `MINERU_AUTO_MANAGE=true`，自动管理 MinerU 子进程生命周期
+- 注册清理钩子（`atexit` + `SIGHUP`），即使进程意外退出也会释放 GPU 资源
+
+### 4.（可选）GPU 看门狗
+
+为防止 `kill -9` 或崩溃导致资源泄漏，可运行外部看门狗：
+
+```bash
+python backend/scripts/gpu_watchdog.py --daemon
+```
+
+看门狗会监控 Omelette 进程，在其异常终止后自动清理 GPU 资源。
+
+### 5. 启动前端
 
 ```bash
 cd frontend
@@ -169,13 +193,19 @@ npm run dev
 
 在浏览器中打开 [http://localhost:3000](http://localhost:3000)。
 
-### 5.（可选）OCR 与嵌入
+### 6.（可选）MinerU 配置
+
+若使用 MinerU 解析 PDF（`PDF_PARSER=mineru`）：
 
 ```bash
-cd backend
-pip install -e ".[ocr,ml]"
+# 为 MinerU 创建独立 conda 环境
+conda create -n mineru python=3.10
+conda activate mineru
+pip install magic-pdf[full]
 ```
 
+在 `.env` 中设置 `MINERU_CONDA_ENV=mineru`，Omelette 将在需要时自动启动 MinerU。
+
 > **常见问题：** 若出现 `ModuleNotFoundError: No module named 'fastapi'`，请确认已激活 conda 环境：`conda activate omelette`。
 
 ## 📂 项目结构
@@ -194,7 +224,8 @@ omelette/
 │   │   └── main.py       # App entry, lifespan, CORS
 │   ├── mcp_server.py     # MCP (Model Context Protocol) server
 │   ├── alembic/          # Database migrations
-│   ├── tests/            # pytest-asyncio 测试（178 个）
+│   ├── scripts/          # 工具脚本（gpu_watchdog.py）
+│   ├── tests/            # pytest-asyncio 测试（526 个）
 │   └── pyproject.toml    # Python dependencies
 ├── frontend/             # React SPA
 │   └── src/
@@ -230,7 +261,7 @@ make dev                  # Start both backend and frontend
 ### 运行测试
 
 ```bash
-# 后端（178 个测试）
+# 后端（526 个测试）
 cd backend && pytest tests/ -v
 
 # 前端单元测试（28 个测试 — Vitest + Testing Library + MSW）
@@ -266,6 +297,8 @@ REST API 位于 `/api/v1/` 下：
 | `GET/POST /subscriptions` | 订阅管理 |
 | `GET/POST /settings` | 设置与健康状态 |
 | `GET /settings/health` | 健康检查 |
+| `GET /gpu/status` | GPU 模型与显存状态 |
+| `POST /gpu/unload` | 手动卸载 GPU 模型 |
 
 MCP 服务端：`/mcp`（WebSocket/SSE，面向 AI IDE 客户端）
 

@@ -0,0 +1,26 @@
+"""add composite indexes for paper and task tables
+
+Revision ID: a1b2c3d4e5f6
+Revises: f2bee250c39f
+Create Date: 2026-03-18 10:00:00.000000
+
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+revision: str = "a1b2c3d4e5f6"
+down_revision: str | None = "f2bee250c39f"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_index("ix_paper_project_status", "papers", ["project_id", "status"])
+    op.create_index("ix_task_project_status", "tasks", ["project_id", "status"])
+
+
+def downgrade() -> None:
+    op.drop_index("ix_task_project_status", table_name="tasks")
+    op.drop_index("ix_paper_project_status", table_name="papers")
@@ -0,0 +1,29 @@
+"""add paper project_doi unique constraint
+
+Revision ID: cb8130e58f92
+Revises: a1b2c3d4e5f6
+Create Date: 2026-03-18 22:54:13.519198
+
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "cb8130e58f92"
+down_revision: str | Sequence[str] | None = "a1b2c3d4e5f6"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    with op.batch_alter_table("papers", schema=None) as batch_op:
+        batch_op.create_unique_constraint("uq_paper_project_doi", ["project_id", "doi"])
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    with op.batch_alter_table("papers", schema=None) as batch_op:
+        batch_op.drop_constraint("uq_paper_project_doi", type_="unique")