diff --git a/backend/app/api/v1/rewrite.py b/backend/app/api/v1/rewrite.py
index 19fa391..5b40fd2 100644
--- a/backend/app/api/v1/rewrite.py
+++ b/backend/app/api/v1/rewrite.py
@@ -13,7 +13,7 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.api.deps import get_db
-from app.services.llm.client import LLMClient, get_llm_client
+from app.services.llm.client import get_llm_client
 from app.services.user_settings_service import UserSettingsService
 
 logger = logging.getLogger(__name__)
@@ -85,12 +85,10 @@ async def _stream_rewrite(request: RewriteRequest, db: AsyncSession):
 
             full_text = ""
             try:
-                async for token in asyncio.wait_for(
-                    _collect_stream(llm, messages),
-                    timeout=REWRITE_TIMEOUT,
-                ):
-                    full_text += token
-                    yield _sse("rewrite_delta", {"delta": token})
+                async with asyncio.timeout(REWRITE_TIMEOUT):
+                    async for token in llm.chat_stream(messages, temperature=0.3, task_type="rewrite"):
+                        full_text += token
+                        yield _sse("rewrite_delta", {"delta": token})
             except TimeoutError:
                 yield _sse("error", {"code": "timeout", "message": "Rewrite timed out after 30s"})
                 return
@@ -105,12 +103,6 @@ async def _stream_rewrite(request: RewriteRequest, db: AsyncSession):
         yield _sse("error", {"code": "rewrite_error", "message": str(e)})
 
 
-async def _collect_stream(llm: LLMClient, messages: list[dict[str, str]]):
-    """Wrap the async iterator so asyncio.wait_for can timeout the whole stream."""
-    async for token in llm.chat_stream(messages, temperature=0.3, task_type="rewrite"):
-        yield token
-
-
 @router.post("/rewrite")
 async def rewrite_stream(
     request: RewriteRequest,
diff --git a/backend/app/services/search_service.py b/backend/app/services/search_service.py
index 5001031..f8f1df9 100644
--- a/backend/app/services/search_service.py
+++ b/backend/app/services/search_service.py
@@ -219,7 +219,7 @@ def _affiliation(auth: dict) -> str:
 class ArXivProvider(SearchProvider):
     """arXiv API — Atom XML feed."""
 
-    BASE = "http://export.arxiv.org/api/query"
+    BASE = "https://export.arxiv.org/api/query"
 
     @property
     def name(self) -> str:
diff --git a/backend/app/services/subscription_service.py b/backend/app/services/subscription_service.py
index 4420de9..11ce2e5 100644
--- a/backend/app/services/subscription_service.py
+++ b/backend/app/services/subscription_service.py
@@ -91,10 +91,10 @@ def get_common_feeds() -> list[dict]:
         return [
             {
                 "name": "arXiv - Physics Optics",
-                "url": "http://export.arxiv.org/rss/physics.optics",
+                "url": "https://export.arxiv.org/rss/physics.optics",
                 "category": "preprint",
             },
-            {"name": "arXiv - Quantum Physics", "url": "http://export.arxiv.org/rss/quant-ph", "category": "preprint"},
+            {"name": "arXiv - Quantum Physics", "url": "https://export.arxiv.org/rss/quant-ph", "category": "preprint"},
             {"name": "Nature Photonics", "url": "https://www.nature.com/nphoton.rss", "category": "journal"},
             {
                 "name": "Science - Latest",
diff --git a/docs/api/chat.md b/docs/api/chat.md
new file mode 100644
index 0000000..fc2d867
--- /dev/null
+++ b/docs/api/chat.md
@@ -0,0 +1,121 @@
+# Chat API
+
+Chat 模块提供基于 SSE 的流式对话与文本改写接口，支持知识库 RAG 检索、多工具模式及实时流式输出。
+
+**Base path:** `/api/v1/chat`
+
+---
+
+## 1. 流式对话
+
+### POST /api/v1/chat/stream
+
+基于 SSE 的流式对话接口，支持知识库检索、引用标注及多轮对话上下文。
+
+#### 请求体 (ChatStreamRequest)
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `conversation_id` | int | 否 | 对话 ID，续写时传入以保持上下文 |
+| `message` | str | 是 | 用户消息内容（至少 1 字符） |
+| `knowledge_base_ids` | list[int] | 否 | 知识库（项目）ID 列表，用于 RAG 检索 |
+| `model` | str | 否 | 模型标识，空则使用用户设置 |
+| `tool_mode` | str | 否 | 工具模式，默认 `"qa"` |
+
+**tool_mode 可选值：**
+
+| 值 | 说明 |
+|----|------|
+| `qa` | 问答模式：基于上下文回答问题，使用 [1]、[2] 等引用格式 |
+| `citation_lookup` | 引用查找：识别并列出与文本最相关的参考文献 |
+| `review_outline` | 综述提纲：生成结构化文献综述提纲 |
+| `gap_analysis` | 研究缺口分析：识别研究空白与未来方向 |
+
+#### 对话响应格式
+
+SSE 流式响应，`Content-Type: text/event-stream`。
+
+#### 对话 SSE 事件类型
+
+| 事件 | 说明 | data 字段 |
+|------|------|-----------|
+| `message_start` | 消息开始 | `{ message_id }` |
+| `citation` | 引用信息（每个来源一条） | `{ index, paper_id, paper_title, page_number, excerpt, relevance_score, chunk_type, authors, year, doi }` |
+| `text_delta` | 文本增量 | `{ delta }` |
+| `message_end` | 消息结束 | `{ message_id, conversation_id, finish_reason }` |
+| `error` | 错误 | `{ code, message }` |
+
+#### 对话示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/chat/stream" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "message": "什么是注意力机制？",
+    "knowledge_base_ids": [1, 2],
+    "tool_mode": "qa"
+  }'
+```
+
+#### 对话错误码
+
+| code | 说明 |
+|------|------|
+| `stream_error` | 流式处理异常 |
+
+---
+
+## 2. 文本改写
+
+### POST /api/v1/chat/rewrite
+
+基于 SSE 的流式文本改写接口，支持多种风格与自定义提示。
+
+#### 请求体 (RewriteRequest)
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `excerpt` | str | 是 | 待改写文本，**最多 2000 字符** |
+| `style` | str | 是 | 改写风格 |
+| `custom_prompt` | str | 否 | 自定义提示，`style=custom` 时必填 |
+| `source_language` | str | 否 | 源语言，默认 `"auto"` |
+
+**style 可选值：**
+
+| 值 | 说明 |
+|----|------|
+| `simplify` | 通俗化：将学术文本改写为易懂语言 |
+| `academic` | 学术化：改写为正式学术风格 |
+| `translate_en` | 英译：翻译为英文 |
+| `translate_zh` | 中译：翻译为中文 |
+| `custom` | 自定义：使用 `custom_prompt` 作为系统提示 |
+
+#### 改写响应格式
+
+SSE 流式响应，`Content-Type: text/event-stream`。
+
+#### 改写 SSE 事件类型
+
+| 事件 | 说明 | data 字段 |
+|------|------|-----------|
+| `rewrite_delta` | 改写文本增量 | `{ delta }` |
+| `rewrite_end` | 改写完成 | `{ full_text }` |
+| `error` | 错误 | `{ code, message }` |
+
+#### 改写示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/chat/rewrite" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "excerpt": "The attention mechanism allows the model to focus on different parts of the input.",
+    "style": "translate_zh"
+  }'
+```
+
+#### 改写错误码
+
+| code | 说明 |
+|------|------|
+| `timeout` | 改写超时（30 秒） |
+| `rewrite_error` | 改写处理异常 |
diff --git a/docs/api/conversations.md b/docs/api/conversations.md
new file mode 100644
index 0000000..3d5e2dd
--- /dev/null
+++ b/docs/api/conversations.md
@@ -0,0 +1,232 @@
+# Conversations API
+
+Conversations 模块提供对话的 CRUD 接口，支持分页列表、按知识库筛选及消息详情查询。
+
+**Base path:** `/api/v1/conversations`
+
+---
+
+## 端点总览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | `/conversations` | 分页列表 |
+| POST | `/conversations` | 创建对话 |
+| GET | `/conversations/{id}` | 获取详情（含消息） |
+| PUT | `/conversations/{id}` | 更新对话 |
+| DELETE | `/conversations/{id}` | 删除对话 |
+
+---
+
+## GET /conversations — 列表对话
+
+分页获取对话列表，按更新时间倒序，支持按知识库 ID 筛选。
+
+### 查询参数
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `page` | int | 否 | 页码，默认 1 |
+| `page_size` | int | 否 | 每页条数，默认 20 |
+| `knowledge_base_id` | int | 否 | 仅返回包含该知识库的对话 |
+
+### 列表响应格式
+
+`ApiResponse[PaginatedData[ConversationListSchema]]`
+
+**ConversationListSchema 字段：**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+| `title` | str | 标题 |
+| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 |
+| `model` | str | 模型标识 |
+| `tool_mode` | str | 工具模式，默认 `"qa"` |
+| `created_at` | datetime | 创建时间 |
+| `updated_at` | datetime | 更新时间 |
+| `message_count` | int | 消息数量 |
+| `last_message_preview` | str | 最后一条消息预览（最多 100 字符） |
+
+**PaginatedData 结构：**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "items": [...],
+    "total": 42,
+    "page": 1,
+    "page_size": 20,
+    "total_pages": 3
+  }
+}
+```
+
+### 列表示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/conversations?page=1&page_size=20"
+curl -X GET "http://localhost:8000/api/v1/conversations?knowledge_base_id=1"
+```
+
+---
+
+## POST /conversations — 创建对话
+
+创建新对话。
+
+### 创建请求体
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `title` | str | 否 | 标题，默认 `"新对话"` |
+| `knowledge_base_ids` | list[int] | 否 | 知识库 ID 列表 |
+| `model` | str | 否 | 模型标识 |
+| `tool_mode` | str | 否 | 工具模式，默认 `"qa"` |
+
+### 创建响应格式
+
+`ApiResponse[ConversationSchema]`，包含完整对话及空 `messages` 数组。
+
+### 创建示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/conversations" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "title": "文献综述讨论",
+    "knowledge_base_ids": [1, 2],
+    "tool_mode": "review_outline"
+  }'
+```
+
+---
+
+## GET /conversations/{id} — 获取对话详情
+
+获取单个对话及其全部消息。
+
+### 详情路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 详情响应格式
+
+`ApiResponse[ConversationSchema]`
+
+**ConversationSchema 字段：**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+| `title` | str | 标题 |
+| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 |
+| `model` | str | 模型标识 |
+| `tool_mode` | str | 工具模式 |
+| `created_at` | datetime | 创建时间 |
+| `updated_at` | datetime | 更新时间 |
+| `messages` | list[MessageSchema] | 消息列表 |
+
+**MessageSchema 字段：**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 消息 ID |
+| `conversation_id` | int | 对话 ID |
+| `role` | str | 角色：`user` / `assistant` |
+| `content` | str | 内容 |
+| `citations` | list[dict] \| null | 引用列表（assistant 消息） |
+| `created_at` | datetime | 创建时间 |
+
+### 详情示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/conversations/1"
+```
+
+### 详情错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
+
+---
+
+## PUT /conversations/{id} — 更新对话
+
+更新对话标题或设置。
+
+### 更新路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 更新请求体
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `title` | str | 否 | 新标题 |
+| `model` | str | 否 | 新模型 |
+| `tool_mode` | str | 否 | 新工具模式 |
+
+仅传入需要更新的字段。
+
+### 更新响应格式
+
+`ApiResponse[ConversationSchema]`，包含更新后的完整对话及消息。
+
+### 更新示例
+
+```bash
+curl -X PUT "http://localhost:8000/api/v1/conversations/1" \
+  -H "Content-Type: application/json" \
+  -d '{"title": "新标题"}'
+```
+
+### 更新错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
+
+---
+
+## DELETE /conversations/{id} — 删除对话
+
+删除对话及其全部消息（级联删除）。
+
+### 删除路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 删除响应格式
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "deleted": true,
+    "id": 1
+  }
+}
+```
+
+### 删除示例
+
+```bash
+curl -X DELETE "http://localhost:8000/api/v1/conversations/1"
+```
+
+### 删除错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
diff --git a/docs/api/crawler.md b/docs/api/crawler.md
new file mode 100644
index 0000000..d7d37dd
--- /dev/null
+++ b/docs/api/crawler.md
@@ -0,0 +1,98 @@
+# Crawler API
+
+爬虫模块 API，用于为待下载文献执行 PDF 下载（Unpaywall 等多源回退）。
+
+**Base path:** `/api/v1/projects/{project_id}/crawl`
+
+---
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|--------------|
+| POST | `/start` | 启动 PDF 下载任务 |
+| GET | `/stats` | 获取下载统计 |
+
+---
+
+## POST /start
+
+对项目内待下载文献启动 PDF 下载。仅处理 `pending` 或 `metadata_only` 状态文献。
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `priority` | string | `"high"` | 优先级：`high` 按引用数排序，`low` 按创建时间排序 |
+| `max_papers` | int | 50 | 单次处理最大文献数 |
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "total": 10,
+    "success": 8,
+    "failed": 2,
+    "details": [
+      {
+        "paper_id": 1,
+        "success": true,
+        "file_path": "/data0/djx/omelette/.../1.pdf"
+      }
+    ]
+  }
+}
+```
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/crawl/start?priority=high&max_papers=50"
+```
+
+---
+
+## GET /stats
+
+返回项目内下载相关统计。
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "pending": 20,
+    "metadata_only": 5,
+    "pdf_downloaded": 80,
+    "ocr_complete": 60,
+    "indexed": 50,
+    "error": 3,
+    "storage": {
+      "total_mb": 1024,
+      "used_mb": 512
+    }
+  }
+}
+```
+
+- 各状态字段：文献数量
+- `storage`：存储统计（可选，由 CrawlerService 提供）
+
+**Example**
+
+```bash
+curl "http://localhost:8000/api/v1/projects/1/crawl/stats"
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 404 | 项目不存在 |
diff --git a/docs/api/dedup.md b/docs/api/dedup.md
new file mode 100644
index 0000000..a5ca83b
--- /dev/null
+++ b/docs/api/dedup.md
@@ -0,0 +1,229 @@
+# Dedup API
+
+Deduplication module API: DOI exact dedup, title similarity dedup, and LLM-assisted verification.
+
+**Base path:** `/api/v1/projects/{project_id}/dedup`
+
+---
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/run` | Run deduplication pipeline |
+| GET | `/candidates` | List candidate duplicate pairs for manual review |
+| POST | `/verify` | LLM-verify if two papers are duplicates |
+| POST | `/resolve` | Resolve single upload conflict (keep_old / keep_new / merge / skip) |
+| POST | `/auto-resolve` | AI auto-suggest conflict resolution |
+
+---
+
+## POST /run
+
+Run the deduplication pipeline.
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `strategy` | string | `"full"` | Strategy: `doi_only` \| `title_only` \| `full` |
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "stage1_doi_removed": 0,
+    "stage2_title_removed": 0,
+    "stage3_candidates": 5,
+    "total_remaining": 120,
+    "details": {
+      "doi_duplicates": [],
+      "title_duplicates": [],
+      "llm_candidates": []
+    }
+  }
+}
+```
+
+- `strategy=doi_only`: DOI exact dedup only
+- `strategy=title_only`: Title similarity dedup only
+- `strategy=full`: Full 3-stage (DOI → title → LLM candidates)
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/run?strategy=full"
+```
+
+---
+
+## GET /candidates
+
+List candidate duplicate pairs for manual review (high title similarity, need LLM or human confirmation).
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "paper_a_id": 10,
+      "paper_b_id": 11,
+      "similarity": 0.92,
+      "paper_a": { "id": 10, "title": "...", "doi": "..." },
+      "paper_b": { "id": 11, "title": "...", "doi": "..." }
+    }
+  ]
+}
+```
+
+**Example**
+
+```bash
+curl "http://localhost:8000/api/v1/projects/1/dedup/candidates"
+```
+
+---
+
+## POST /verify
+
+Use LLM to determine if two papers are duplicates.
+
+**Query Parameters**
+
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `paper_a_id` | int | Yes | Paper A ID |
+| `paper_b_id` | int | Yes | Paper B ID |
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "is_duplicate": true,
+    "reason": "Same paper, different sources"
+  }
+}
+```
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/verify?paper_a_id=10&paper_b_id=11"
+```
+
+---
+
+## POST /resolve
+
+Resolve a single upload conflict. `conflict_id` format: `{old_paper_id}:{saved_filename}`, provided by the upload endpoint's `conflicts` array.
+
+**Request Body**
+
+```json
+{
+  "conflict_id": "123:uploaded.pdf",
+  "action": "keep_old",
+  "merged_paper": null
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `conflict_id` | string | Yes | Conflict ID, format `old_paper_id:saved_filename` |
+| `action` | string | Yes | `keep_old` \| `keep_new` \| `merge` \| `skip` |
+| `merged_paper` | object | No | Required when `action=merge`, merged metadata |
+
+**Actions**
+
+- `keep_old`: Keep existing paper, discard upload
+- `keep_new`: Use new upload, create new paper
+- `merge`: Merge metadata, create new paper (provide `merged_paper`)
+- `skip`: Use new upload, create new paper (same as keep_new)
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "action": "keep_new",
+    "paper_id": 124,
+    "message": "Created new paper"
+  }
+}
+```
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/resolve" \
+  -H "Content-Type: application/json" \
+  -d '{"conflict_id":"123:paper.pdf","action":"keep_new"}'
+```
+
+---
+
+## POST /auto-resolve
+
+Use LLM to batch-suggest conflict resolution.
+
+**Request Body**
+
+```json
+{
+  "conflict_ids": ["123:file1.pdf", "124:file2.pdf"]
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `conflict_ids` | list[string] | No | Conflict ID list; empty returns empty list |
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "conflict_id": "123:file1.pdf",
+      "action": "keep_new",
+      "reason": "New version has more complete metadata"
+    },
+    {
+      "conflict_id": "124:file2.pdf",
+      "error": "Paper not found"
+    }
+  ]
+}
+```
+
+Each element is either `{conflict_id, action, reason}` or `{conflict_id, error}`.
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/auto-resolve" \
+  -H "Content-Type: application/json" \
+  -d '{"conflict_ids":["123:paper.pdf"]}'
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 400 | Invalid `conflict_id` format, `action`, or request body |
+| 404 | Paper not found or PDF file not found |
diff --git a/docs/api/index.md b/docs/api/index.md
index 05345bb..86818bc 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -54,6 +54,14 @@ GET /api/v1/tasks/{task_id}
 | [Papers](/api/papers) | `/projects/{id}/papers` |
 | [Keywords](/api/keywords) | `/projects/{id}/keywords` |
 | [Search](/api/search) | `/projects/{id}/search` |
+| [Dedup](/api/dedup) | `/projects/{id}/dedup` |
+| [OCR](/api/ocr) | `/projects/{id}/ocr` |
+| [Crawler](/api/crawler) | `/projects/{id}/crawl` |
+| [Subscription](/api/subscription) | `/projects/{id}/subscriptions` |
 | [RAG](/api/rag) | `/projects/{id}/rag` |
 | [Writing](/api/writing) | `/projects/{id}/writing` |
-| Tasks | `/tasks` |
+| [Chat](/api/chat) | `/chat` |
+| [Conversations](/api/conversations) | `/conversations` |
+| [Settings](/api/settings) | `/settings` |
+| [Tasks](/api/tasks) | `/tasks` |
+| [Pipelines](/api/pipelines) | `/pipelines` |
diff --git a/docs/api/keywords.md b/docs/api/keywords.md
index 6a01c0c..f8ae072 100644
--- a/docs/api/keywords.md
+++ b/docs/api/keywords.md
@@ -12,7 +12,7 @@ Base path: `/api/v1/projects/{project_id}/keywords`
 | PUT | `/projects/{id}/keywords/{kw_id}` | Update keyword |
 | DELETE | `/projects/{id}/keywords/{kw_id}` | Delete keyword |
 | POST | `/projects/{id}/keywords/expand` | LLM expand |
-| GET | `/projects/{id}/keywords/search-formula` | Generate formula |
+| GET | `/projects/{id}/keywords/search-formula` | Generate search formula |
 
 ## Query Parameters (List)
 
@@ -31,6 +31,14 @@ Base path: `/api/v1/projects/{project_id}/keywords`
 }
 ```
 
+## Bulk Create
+
+`POST /projects/{id}/keywords/bulk` — Create multiple keywords at once.
+
+**Request body:** Array of `KeywordCreate` objects.
+
+**Response:** `{ created }` — Number of keywords created.
+
 ## Expand Request
 
 ```json
@@ -41,6 +49,30 @@ Base path: `/api/v1/projects/{project_id}/keywords`
 }
 ```
 
+## Expand Response
+
+Returns `expanded_terms` as a list of objects:
+
+```json
+{
+  "expanded_terms": [
+    {"term": "self-attention", "term_zh": "自注意力", "relation": "synonym"},
+    {"term": "BERT", "term_zh": "", "relation": "abbreviation"}
+  ],
+  "source": "llm:openai"
+}
+```
+
+- `term` — Expanded term (English)
+- `term_zh` — Chinese translation (optional)
+- `relation` — `synonym`, `abbreviation`, or `related`
+
 ## Search Formula
 
-Query param: `database` — `wos`, `scopus`, or `pubmed`
+`GET /projects/{id}/keywords/search-formula?database=wos` — Generate a boolean search formula from project keywords for a specific database.
+
+**Query parameters:**
+
+- `database` — Target database: `wos`, `scopus`, or `pubmed` (default: `wos`)
+
+**Response:** `{ formula, database, keyword_count }`
diff --git a/docs/api/ocr.md b/docs/api/ocr.md
new file mode 100644
index 0000000..e4b0938
--- /dev/null
+++ b/docs/api/ocr.md
@@ -0,0 +1,34 @@
+# OCR API
+
+Base path: `/api/v1/projects/{project_id}/ocr`
+
+## Overview
+
+OCR and text extraction for PDF papers. Uses pdfplumber for native PDFs and PaddleOCR for scanned documents.
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/projects/{id}/ocr/process` | Run OCR on papers |
+| GET | `/projects/{id}/ocr/stats` | OCR statistics |
+
+## Process
+
+`POST /projects/{id}/ocr/process` — Extract text from PDFs via OCR.
+
+**Query parameters:**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `paper_ids` | list[int] | Optional. Specific paper IDs. If omitted, all `pdf_downloaded` papers are processed. |
+| `force_ocr` | bool | Re-run OCR even if already processed (default: false) |
+| `use_gpu` | bool | Use GPU for PaddleOCR (default: true) |
+
+**Response:** `{ processed, failed, total, message? }`
+
+## Stats
+
+`GET /projects/{id}/ocr/stats` — Return paper counts by status and total chunk count.
+
+**Response:** `{ metadata_only: n, pdf_downloaded: n, ocr_complete: n, indexed: n, error: n, total_chunks: n }`
diff --git a/docs/api/papers.md b/docs/api/papers.md
index 0c52078..ab9288e 100644
--- a/docs/api/papers.md
+++ b/docs/api/papers.md
@@ -9,6 +9,8 @@ Base path: `/api/v1/projects/{project_id}/papers`
 | GET | `/projects/{id}/papers` | List papers (paginated) |
 | POST | `/projects/{id}/papers` | Create paper |
 | POST | `/projects/{id}/papers/bulk` | Bulk import |
+| POST | `/projects/{id}/papers/upload` | Multipart file upload (PDFs) |
+| POST | `/projects/{id}/papers/process` | Trigger processing for papers |
 | GET | `/projects/{id}/papers/{paper_id}` | Get paper |
 | PUT | `/projects/{id}/papers/{paper_id}` | Update paper |
 | DELETE | `/projects/{id}/papers/{paper_id}` | Delete paper |
@@ -39,3 +41,31 @@ Base path: `/api/v1/projects/{project_id}/papers`
   "status": "metadata_only"
 }
 ```
+
+## Upload (Multipart)
+
+`POST /projects/{id}/papers/upload` — Upload PDF files. Accepts `multipart/form-data` with `files` (one or more PDFs). Extracts metadata, runs dedup check, and queues processing for new papers.
+
+**Response:** `{ papers, conflicts, total_uploaded }`
+
+- `papers` — List of newly created paper metadata
+- `conflicts` — Dedup conflicts (DOI or title similarity)
+- `total_uploaded` — Count of files successfully uploaded
+
+## Process
+
+`POST /projects/{id}/papers/process` — Trigger OCR + RAG indexing for papers.
+
+**Query parameters:**
+
+- `paper_ids` — Optional list of paper IDs. If omitted, all unprocessed papers in the project are queued.
+
+**Response:** `{ queued, message }`
+
+## Bulk Import Response
+
+`POST /projects/{id}/papers/bulk` returns `{ created, skipped, total }`:
+
+- `created` — Number of papers imported
+- `skipped` — Number skipped (duplicate DOI)
+- `total` — Total papers in request
diff --git a/docs/api/pipelines.md b/docs/api/pipelines.md
new file mode 100644
index 0000000..6a71438
--- /dev/null
+++ b/docs/api/pipelines.md
@@ -0,0 +1,71 @@
+# Pipelines API
+
+Base path: `/api/v1/pipelines`
+
+## Overview
+
+LangGraph pipeline orchestration for search and upload workflows. Pipelines run asynchronously and support HITL (human-in-the-loop) interrupt for conflict resolution.
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/pipelines/search` | Start keyword-search pipeline |
+| POST | `/pipelines/upload` | Start PDF-upload pipeline |
+| GET | `/pipelines/{thread_id}/status` | Get pipeline status |
+| POST | `/pipelines/{thread_id}/resume` | Resume interrupted pipeline |
+| POST | `/pipelines/{thread_id}/cancel` | Cancel running pipeline |
+
+## Search Pipeline
+
+`POST /pipelines/search` — Start search → dedup → crawl → OCR → index pipeline.
+
+**Request body:**
+
+```json
+{
+  "project_id": 1,
+  "query": "transformer attention",
+  "sources": ["semantic_scholar", "openalex"],
+  "max_results": 50
+}
+```
+
+**Response:** `{ thread_id, status, project_id }`
+
+## Upload Pipeline
+
+`POST /pipelines/upload` — Start extract → dedup → OCR → index pipeline for local PDF paths.
+
+**Request body:**
+
+```json
+{
+  "project_id": 1,
+  "pdf_paths": ["/path/to/paper1.pdf", "/path/to/paper2.pdf"]
+}
+```
+
+Paths must be within the configured `PDF_DIR` (see settings).
+
+**Response:** `{ thread_id, status, project_id }`
+
+## Status
+
+`GET /pipelines/{thread_id}/status` — Returns `status` (`running`, `interrupted`, `completed`, `failed`, `cancelled`). When `interrupted`, includes `conflicts` for HITL resolution.
+
+## Resume
+
+`POST /pipelines/{thread_id}/resume` — Resume interrupted pipeline with resolved conflicts.
+
+**Request body:**
+
+```json
+{
+  "resolved_conflicts": []
+}
+```
+
+## Cancel
+
+`POST /pipelines/{thread_id}/cancel` — Cancel a running pipeline.
diff --git a/docs/api/projects.md b/docs/api/projects.md
index e049ab5..7d7b9a1 100644
--- a/docs/api/projects.md
+++ b/docs/api/projects.md
@@ -11,6 +11,8 @@ Base path: `/api/v1/projects`
 | GET | `/projects/{id}` | Get project |
 | PUT | `/projects/{id}` | Update project |
 | DELETE | `/projects/{id}` | Delete project |
+| POST | `/projects/{id}/pipeline/run` | Run full pipeline (crawl → OCR → index) for all pending papers |
+| POST | `/projects/{id}/pipeline/paper/{paper_id}` | Run pipeline for a single paper |
 
 ## Query Parameters (List)
 
diff --git a/docs/api/rag.md b/docs/api/rag.md
index 1187587..5c36b69 100644
--- a/docs/api/rag.md
+++ b/docs/api/rag.md
@@ -8,6 +8,7 @@ Base path: `/api/v1/projects/{project_id}/rag`
 |--------|----------|-------------|
 | POST | `/projects/{id}/rag/query` | Query knowledge base |
 | POST | `/projects/{id}/rag/index` | Build/rebuild index |
+| POST | `/projects/{id}/rag/index/stream` | Build index (SSE streaming progress) |
 | GET | `/projects/{id}/rag/stats` | Index statistics |
 | DELETE | `/projects/{id}/rag/index` | Delete index |
 
@@ -15,12 +16,18 @@ Base path: `/api/v1/projects/{project_id}/rag`
 
 ```json
 {
-  "query": "What is attention mechanism?",
+  "question": "What is attention mechanism?",
   "top_k": 10,
-  "use_reranker": true
+  "use_reranker": true,
+  "include_sources": true
 }
 ```
 
+- `question` — The question to answer (required)
+- `top_k` — Number of chunks to retrieve (default: 10)
+- `use_reranker` — Apply reranker for relevance (default: true)
+- `include_sources` — Include source chunks in response (default: true)
+
 ## Query Response
 
 ```json
@@ -28,6 +35,25 @@ Base path: `/api/v1/projects/{project_id}/rag`
   "answer": "LLM-generated answer with citations",
   "sources": [
     {"paper_id": 1, "chunk_id": "...", "score": 0.9}
-  ]
+  ],
+  "confidence": 0.0
 }
 ```
+
+## Index Stream (SSE)
+
+`POST /projects/{id}/rag/index/stream` — Rebuild the vector index with Server-Sent Events for progress updates.
+
+**Response:** `text/event-stream`
+
+**Event types:**
+
+| Event | Description | data |
+|-------|-------------|------|
+| `progress` | Indexing progress | `{ stage, percent, message? }` |
+| `complete` | Indexing finished | `{ indexed, collection, papers_updated }` |
+| `error` | Error occurred | `{ message }` |
+
+## Delete Index
+
+`DELETE /projects/{id}/rag/index` — Delete the vector index for the project. Returns `ApiResponse[dict]` with deletion result.
diff --git a/docs/api/settings.md b/docs/api/settings.md
new file mode 100644
index 0000000..fee9a33
--- /dev/null
+++ b/docs/api/settings.md
@@ -0,0 +1,261 @@
+# Settings API
+
+Base path: `/api/v1/settings`
+
+## Overview
+
+The Settings API manages application configuration: LLM provider selection, model parameters, API keys for various providers (OpenAI, Anthropic, Aliyun, Volcengine, Ollama), embedding/reranker models, and other system settings. Values are merged from environment variables with DB overrides; API keys are masked in responses.
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/settings` | Get all settings |
+| PUT | `/settings` | Update settings (partial) |
+| GET | `/settings/models` | List available models per provider |
+| POST | `/settings/test-connection` | Test LLM provider connection |
+| GET | `/settings/health` | Health check |
+
+---
+
+## GET /api/v1/settings
+
+**Description:** Return merged settings (DB overrides .env). API keys are masked (e.g. `sk-12***abcd`).
+
+**Response:** `ApiResponse[SettingsSchema]`
+
+### SettingsSchema
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `llm_provider` | string | Default LLM provider (`openai`, `anthropic`, `aliyun`, `volcengine`, `ollama`, `mock`) |
+| `llm_model` | string | Default model (overrides provider default) |
+| `llm_temperature` | float | Temperature (0.0–2.0) |
+| `llm_max_tokens` | int | Max tokens |
+| `openai_api_key` | string | OpenAI API key (masked) |
+| `openai_model` | string | OpenAI model |
+| `anthropic_api_key` | string | Anthropic API key (masked) |
+| `anthropic_model` | string | Anthropic model |
+| `aliyun_api_key` | string | Aliyun API key (masked) |
+| `aliyun_base_url` | string | Aliyun base URL |
+| `aliyun_model` | string | Aliyun model |
+| `volcengine_api_key` | string | Volcengine API key (masked) |
+| `volcengine_base_url` | string | Volcengine base URL |
+| `volcengine_model` | string | Volcengine model |
+| `ollama_base_url` | string | Ollama base URL |
+| `ollama_model` | string | Ollama model |
+| `embedding_model` | string | Embedding model name |
+| `reranker_model` | string | Reranker model name |
+| `data_dir` | string | Data directory path |
+| `cuda_visible_devices` | string | CUDA device IDs |
+| `semantic_scholar_api_key` | string | Semantic Scholar API key (masked) |
+| `unpaywall_email` | string | Unpaywall email |
+
+### Get Settings Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "llm_provider": "openai",
+    "llm_model": "gpt-4o-mini",
+    "llm_temperature": 0.7,
+    "llm_max_tokens": 4096,
+    "openai_api_key": "sk-12***abcd",
+    "openai_model": "gpt-4o-mini",
+    "anthropic_api_key": "",
+    "anthropic_model": "",
+    "aliyun_api_key": "",
+    "aliyun_base_url": "",
+    "aliyun_model": "",
+    "volcengine_api_key": "",
+    "volcengine_base_url": "",
+    "volcengine_model": "",
+    "ollama_base_url": "http://localhost:11434",
+    "ollama_model": "",
+    "embedding_model": "BAAI/bge-m3",
+    "reranker_model": "",
+    "data_dir": "/data0/djx/omelette",
+    "cuda_visible_devices": "",
+    "semantic_scholar_api_key": "",
+    "unpaywall_email": ""
+  }
+}
+```
+
+---
+
+## PUT /api/v1/settings
+
+**Description:** Update user-configurable settings. Only non-null fields are applied. Masked API keys (containing `***`) are skipped to avoid overwriting secrets.
+
+**Request:** `SettingsUpdateSchema` (partial, all fields optional)
+
+| Field | Type | Constraints |
+|-------|------|-------------|
+| `llm_provider` | string | — |
+| `llm_model` | string | — |
+| `llm_temperature` | float | 0.0–2.0 |
+| `llm_max_tokens` | int | 1–128000 |
+| `openai_api_key` | string | — |
+| `openai_model` | string | — |
+| `anthropic_api_key` | string | — |
+| `anthropic_model` | string | — |
+| `aliyun_api_key` | string | — |
+| `aliyun_base_url` | string | — |
+| `aliyun_model` | string | — |
+| `volcengine_api_key` | string | — |
+| `volcengine_base_url` | string | — |
+| `volcengine_model` | string | — |
+| `ollama_base_url` | string | — |
+| `ollama_model` | string | — |
+
+**Response:** `ApiResponse[SettingsSchema]` (updated merged settings)
+
+### Update Settings Example
+
+```bash
+curl -X PUT "http://localhost:8000/api/v1/settings" \
+  -H "Content-Type: application/json" \
+  -d '{"llm_provider": "openai", "llm_model": "gpt-4o-mini"}'
+```
+
+---
+
+## GET /api/v1/settings/models
+
+**Description:** Return available LLM providers and their model lists.
+
+**Response:** `ApiResponse[list[ProviderModelInfo]]`
+
+### ProviderModelInfo
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `provider` | string | Provider ID |
+| `display_name` | string | Display name |
+| `models` | string[] | List of model IDs |
+| `requires_api_key` | bool | Whether API key is required |
+| `requires_base_url` | bool | Whether base URL is configurable |
+| `default_base_url` | string | Default base URL if applicable |
+
+### List Models Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings/models"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "provider": "openai",
+      "display_name": "OpenAI",
+      "models": ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3-mini"],
+      "requires_api_key": true,
+      "requires_base_url": false,
+      "default_base_url": ""
+    },
+    {
+      "provider": "ollama",
+      "display_name": "Ollama (本地)",
+      "models": ["llama3", "llama3.1", "mistral", "qwen2", "deepseek-r1"],
+      "requires_api_key": false,
+      "requires_base_url": true,
+      "default_base_url": "http://localhost:11434"
+    }
+  ]
+}
+```
+
+---
+
+## POST /api/v1/settings/test-connection
+
+**Description:** Test the current LLM configuration by sending a simple prompt. Uses merged settings from DB (no request body).
+
+**Response:** `ApiResponse[dict]`
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `success` | bool | Whether the test succeeded |
+| `response` | string | First 200 chars of LLM response (on success) |
+| `error` | string | Error message (on failure) |
+
+### Test Connection Example
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/settings/test-connection"
+```
+
+**Success:**
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "success": true,
+    "response": "OK."
+  }
+}
+```
+
+**Failure:**
+```json
+{
+  "code": 500,
+  "message": "Connection test failed",
+  "data": {
+    "success": false,
+    "error": "Invalid API key"
+  }
+}
+```
+
+---
+
+## GET /api/v1/settings/health
+
+**Description:** Simple health check endpoint.
+
+**Response:** `ApiResponse[dict]`
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `status` | string | `"healthy"` |
+| `version` | string | Application version |
+
+### Health Check Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings/health"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "status": "healthy",
+    "version": "0.1.0"
+  }
+}
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 200 | Success |
+| 400 | Bad request (e.g. invalid temperature range) |
+| 422 | Validation error (invalid request body) |
+| 500 | Server error (e.g. connection test failure) |
diff --git a/docs/api/subscription.md b/docs/api/subscription.md
new file mode 100644
index 0000000..57e6e35
--- /dev/null
+++ b/docs/api/subscription.md
@@ -0,0 +1,256 @@
+# Subscription API
+
+Subscription module API for managing incremental literature updates (RSS / API search).
+
+**Base path:** `/api/v1/projects/{project_id}/subscriptions`
+
+---
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/feeds` | Get common academic RSS feed templates |
+| GET | `/` | List project subscriptions |
+| POST | `/` | Create subscription |
+| GET | `/{sub_id}` | Get single subscription |
+| PUT | `/{sub_id}` | Update subscription |
+| DELETE | `/{sub_id}` | Delete subscription |
+| POST | `/{sub_id}/trigger` | Manually trigger subscription update |
+| POST | `/check-rss` | Check RSS feed |
+| POST | `/check-updates` | Check API for updates |
+
+---
+
+## GET /feeds
+
+Return common academic RSS feed templates (no project_id required in logic).
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "name": "arXiv CS",
+      "url": "https://...",
+      "description": "..."
+    }
+  ]
+}
+```
+
+---
+
+## GET /subscriptions
+
+List all subscriptions for the project.
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "id": 1,
+      "project_id": 1,
+      "name": "arXiv CS.AI",
+      "query": "machine learning",
+      "sources": ["arxiv"],
+      "frequency": "weekly",
+      "max_results": 50,
+      "is_active": true,
+      "last_run_at": "2025-03-10T12:00:00",
+      "total_found": 120,
+      "created_at": "2025-01-01T00:00:00",
+      "updated_at": "2025-03-10T12:00:00"
+    }
+  ]
+}
+```
+
+---
+
+## POST /subscriptions
+
+Create a new subscription.
+
+**Request Body**
+
+```json
+{
+  "name": "arXiv CS.AI",
+  "query": "machine learning",
+  "sources": ["arxiv", "semantic_scholar"],
+  "frequency": "weekly",
+  "max_results": 50
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | Yes | Subscription name |
+| `query` | string | No | Search query, default `""` |
+| `sources` | list[string] | No | Data sources, default `[]` |
+| `frequency` | string | No | `daily` \| `weekly` \| `monthly`, default `weekly` |
+| `max_results` | int | No | Max results per run 1–200, default 50 |
+
+**Response**
+
+```json
+{
+  "code": 201,
+  "message": "Subscription created",
+  "data": {
+    "id": 1,
+    "project_id": 1,
+    "name": "arXiv CS.AI",
+    "query": "machine learning",
+    "sources": ["arxiv"],
+    "frequency": "weekly",
+    "max_results": 50,
+    "is_active": true,
+    "last_run_at": null,
+    "total_found": 0,
+    "created_at": "2025-03-12T00:00:00",
+    "updated_at": "2025-03-12T00:00:00"
+  }
+}
+```
+
+---
+
+## PUT /subscriptions/{sub_id}
+
+Update a subscription.
+
+**Request Body**
+
+```json
+{
+  "name": "arXiv CS.AI (updated)",
+  "query": "deep learning",
+  "is_active": false
+}
+```
+
+All fields optional; only include fields to update.
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": { ... }
+}
+```
+
+---
+
+## DELETE /subscriptions/{sub_id}
+
+Delete a subscription.
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "Subscription deleted",
+  "data": null
+}
+```
+
+---
+
+## POST /subscriptions/{sub_id}/trigger
+
+Manually trigger subscription update (check API for new papers).
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `since_days` | int | 7 | Query last N days, 1–365 |
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "new_papers": 5,
+    "total_checked": 120,
+    "sources_searched": ["arxiv", "semantic_scholar"]
+  }
+}
+```
+
+---
+
+## POST /check-rss
+
+Check an RSS feed (does not require a saved subscription).
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `feed_url` | string | — | RSS/Atom feed URL |
+| `since_days` | int | 7 | Query last N days, 1–365 |
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "entries": [...],
+    "count": 10
+  }
+}
+```
+
+---
+
+## POST /check-updates
+
+Check for new papers via API search (does not require a saved subscription).
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `query` | string | `""` | Search query |
+| `sources` | list[string] | null | Data sources |
+| `since_days` | int | 7 | Query last N days, 1–365 |
+| `max_results` | int | 50 | Max results 1–200 |
+
+**Response**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "new_papers": [...],
+    "total_found": 50,
+    "sources_checked": { "arxiv": 30, "semantic_scholar": 20 }
+  }
+}
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 404 | Subscription not found |
diff --git a/docs/api/tasks.md b/docs/api/tasks.md
new file mode 100644
index 0000000..f217a5d
--- /dev/null
+++ b/docs/api/tasks.md
@@ -0,0 +1,163 @@
+# Tasks API
+
+Base path: `/api/v1/tasks`
+
+## Overview
+
+The Tasks API manages background processing jobs: search, dedup, crawl, OCR, index, keyword expansion. Tasks are created by pipelines and other services; this API provides listing, detail retrieval, and cancellation.
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/tasks` | List tasks |
+| GET | `/tasks/{id}` | Get task detail |
+| POST | `/tasks/{id}/cancel` | Cancel a running task |
+
+---
+
+## GET /api/v1/tasks
+
+**Description:** List tasks with optional filters. Results are ordered by `created_at` descending.
+
+**Query Parameters**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `project_id` | int | No | Filter by project ID |
+| `status` | string | No | Filter by status: `pending`, `running`, `completed`, `failed`, `cancelled` |
+| `limit` | int | No | Max results (default: 50) |
+
+**Response:** `ApiResponse[list[TaskSchema]]`
+
+### TaskSchema (list view)
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | int | Task ID |
+| `project_id` | int | Project ID |
+| `task_type` | string | `search`, `dedup`, `crawl`, `ocr`, `index`, `keyword_expand` |
+| `status` | string | `pending`, `running`, `completed`, `failed`, `cancelled` |
+| `progress` | int | Current progress |
+| `total` | int | Total steps |
+| `created_at` | string | ISO 8601 datetime |
+
+### List Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/tasks?project_id=1&status=running&limit=20"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "id": 42,
+      "project_id": 1,
+      "task_type": "search",
+      "status": "running",
+      "progress": 30,
+      "total": 100,
+      "created_at": "2025-03-12T10:00:00"
+    }
+  ]
+}
+```
+
+---
+
+## GET /api/v1/tasks/{id}
+
+**Description:** Get full task detail including params, result, and error message.
+
+**Path Parameters**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `id` | int | Task ID |
+
+**Response:** `ApiResponse[TaskDetailSchema]`
+
+### TaskDetailSchema
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | int | Task ID |
+| `project_id` | int | Project ID |
+| `task_type` | string | Task type |
+| `status` | string | Task status |
+| `progress` | int | Current progress |
+| `total` | int | Total steps |
+| `params` | object | Input parameters |
+| `result` | object | Output result (when completed) |
+| `error_message` | string | Error message (when failed) |
+| `created_at` | string | ISO 8601 datetime |
+| `started_at` | string | ISO 8601 datetime (nullable) |
+| `completed_at` | string | ISO 8601 datetime (nullable) |
+
+### Detail Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/tasks/42"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "id": 42,
+    "project_id": 1,
+    "task_type": "search",
+    "status": "completed",
+    "progress": 100,
+    "total": 100,
+    "params": {"query": "machine learning", "sources": ["semantic_scholar"]},
+    "result": {"papers_found": 15, "imported": 10},
+    "error_message": "",
+    "created_at": "2025-03-12T10:00:00",
+    "started_at": "2025-03-12T10:00:01",
+    "completed_at": "2025-03-12T10:02:30"
+  }
+}
+```
+
+---
+
+## POST /api/v1/tasks/{id}/cancel
+
+**Description:** Cancel a running or pending task. Tasks in `completed`, `failed`, or `cancelled` state cannot be cancelled.
+
+**Path Parameters**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `id` | int | Task ID |
+
+**Response:** `ApiResponse` (no data)
+
+### Cancel Example
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/tasks/42/cancel"
+```
+
+```json
+{
+  "code": 200,
+  "message": "Task cancelled",
+  "data": null
+}
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 200 | Success |
+| 400 | Cannot cancel task (already completed/failed/cancelled) |
+| 404 | Task not found |
diff --git a/docs/api/writing.md b/docs/api/writing.md
index 8c47c57..50409bf 100644
--- a/docs/api/writing.md
+++ b/docs/api/writing.md
@@ -6,12 +6,37 @@ Base path: `/api/v1/projects/{project_id}/writing`
 
 | Method | Endpoint | Description |
 |--------|----------|-------------|
-| POST | `/projects/{id}/writing/assist` | General assistance |
+| POST | `/projects/{id}/writing/assist` | General writing assistance |
 | POST | `/projects/{id}/writing/summarize` | Summarize papers |
 | POST | `/projects/{id}/writing/citations` | Generate citations |
 | POST | `/projects/{id}/writing/review-outline` | Review outline |
 | POST | `/projects/{id}/writing/gap-analysis` | Gap analysis |
 
+## Assist (General)
+
+`POST /projects/{id}/writing/assist` — AI-powered writing assistance for summarize, cite, outline, or gap analysis.
+
+**Request body:**
+
+```json
+{
+  "task": "summarize",
+  "text": "",
+  "paper_ids": [1, 2],
+  "topic": "Literature Review",
+  "style": "gb_t_7714",
+  "language": "en"
+}
+```
+
+- `task` — `summarize`, `cite`, `review_outline`, or `gap_analysis`
+- `paper_ids` — Paper IDs (for summarize/cite)
+- `topic` — Topic for outline/gap analysis
+- `style` — Citation style (for cite task)
+- `language` — Output language (default: `en`)
+
+**Response:** `{ content, citations, suggestions }`
+
 ## Summarize Request
 
 ```json
@@ -25,8 +50,8 @@ Base path: `/api/v1/projects/{project_id}/writing`
 ```json
 {
   "paper_ids": [1, 2],
-  "style": "gb7714"
+  "style": "gb_t_7714"
 }
 ```
 
-Styles: `gb7714`, `apa`, `mla`
+**Citation styles:** `gb_t_7714`, `apa`, `mla`
diff --git a/docs/zh/api/chat.md b/docs/zh/api/chat.md
new file mode 100644
index 0000000..fc2d867
--- /dev/null
+++ b/docs/zh/api/chat.md
@@ -0,0 +1,121 @@
+# Chat API
+
+Chat 模块提供基于 SSE 的流式对话与文本改写接口，支持知识库 RAG 检索、多工具模式及实时流式输出。
+
+**Base path:** `/api/v1/chat`
+
+---
+
+## 1. 流式对话
+
+### POST /api/v1/chat/stream
+
+基于 SSE 的流式对话接口，支持知识库检索、引用标注及多轮对话上下文。
+
+#### 请求体 (ChatStreamRequest)
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `conversation_id` | int | 否 | 对话 ID，续写时传入以保持上下文 |
+| `message` | str | 是 | 用户消息内容（至少 1 字符） |
+| `knowledge_base_ids` | list[int] | 否 | 知识库（项目）ID 列表，用于 RAG 检索 |
+| `model` | str | 否 | 模型标识，空则使用用户设置 |
+| `tool_mode` | str | 否 | 工具模式，默认 `"qa"` |
+
+**tool_mode 可选值：**
+
+| 值 | 说明 |
+|----|------|
+| `qa` | 问答模式：基于上下文回答问题，使用 [1]、[2] 等引用格式 |
+| `citation_lookup` | 引用查找：识别并列出与文本最相关的参考文献 |
+| `review_outline` | 综述提纲：生成结构化文献综述提纲 |
+| `gap_analysis` | 研究缺口分析：识别研究空白与未来方向 |
+
+#### 对话响应格式
+
+SSE 流式响应，`Content-Type: text/event-stream`。
+
+#### 对话 SSE 事件类型
+
+| 事件 | 说明 | data 字段 |
+|------|------|-----------|
+| `message_start` | 消息开始 | `{ message_id }` |
+| `citation` | 引用信息（每个来源一条） | `{ index, paper_id, paper_title, page_number, excerpt, relevance_score, chunk_type, authors, year, doi }` |
+| `text_delta` | 文本增量 | `{ delta }` |
+| `message_end` | 消息结束 | `{ message_id, conversation_id, finish_reason }` |
+| `error` | 错误 | `{ code, message }` |
+
+#### 对话示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/chat/stream" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "message": "什么是注意力机制？",
+    "knowledge_base_ids": [1, 2],
+    "tool_mode": "qa"
+  }'
+```
+
+#### 对话错误码
+
+| code | 说明 |
+|------|------|
+| `stream_error` | 流式处理异常 |
+
+---
+
+## 2. 文本改写
+
+### POST /api/v1/chat/rewrite
+
+基于 SSE 的流式文本改写接口，支持多种风格与自定义提示。
+
+#### 请求体 (RewriteRequest)
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `excerpt` | str | 是 | 待改写文本，**最多 2000 字符** |
+| `style` | str | 是 | 改写风格 |
+| `custom_prompt` | str | 否 | 自定义提示，`style=custom` 时必填 |
+| `source_language` | str | 否 | 源语言，默认 `"auto"` |
+
+**style 可选值：**
+
+| 值 | 说明 |
+|----|------|
+| `simplify` | 通俗化：将学术文本改写为易懂语言 |
+| `academic` | 学术化：改写为正式学术风格 |
+| `translate_en` | 英译：翻译为英文 |
+| `translate_zh` | 中译：翻译为中文 |
+| `custom` | 自定义：使用 `custom_prompt` 作为系统提示 |
+
+#### 改写响应格式
+
+SSE 流式响应，`Content-Type: text/event-stream`。
+
+#### 改写 SSE 事件类型
+
+| 事件 | 说明 | data 字段 |
+|------|------|-----------|
+| `rewrite_delta` | 改写文本增量 | `{ delta }` |
+| `rewrite_end` | 改写完成 | `{ full_text }` |
+| `error` | 错误 | `{ code, message }` |
+
+#### 改写示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/chat/rewrite" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "excerpt": "The attention mechanism allows the model to focus on different parts of the input.",
+    "style": "translate_zh"
+  }'
+```
+
+#### 改写错误码
+
+| code | 说明 |
+|------|------|
+| `timeout` | 改写超时（30 秒） |
+| `rewrite_error` | 改写处理异常 |
diff --git a/docs/zh/api/conversations.md b/docs/zh/api/conversations.md
new file mode 100644
index 0000000..3d5e2dd
--- /dev/null
+++ b/docs/zh/api/conversations.md
@@ -0,0 +1,232 @@
+# Conversations API
+
+Conversations 模块提供对话的 CRUD 接口，支持分页列表、按知识库筛选及消息详情查询。
+
+**Base path:** `/api/v1/conversations`
+
+---
+
+## 端点总览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | `/conversations` | 分页列表 |
+| POST | `/conversations` | 创建对话 |
+| GET | `/conversations/{id}` | 获取详情（含消息） |
+| PUT | `/conversations/{id}` | 更新对话 |
+| DELETE | `/conversations/{id}` | 删除对话 |
+
+---
+
+## GET /conversations — 列表对话
+
+分页获取对话列表，按更新时间倒序，支持按知识库 ID 筛选。
+
+### 查询参数
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `page` | int | 否 | 页码，默认 1 |
+| `page_size` | int | 否 | 每页条数，默认 20 |
+| `knowledge_base_id` | int | 否 | 仅返回包含该知识库的对话 |
+
+### 列表响应格式
+
+`ApiResponse[PaginatedData[ConversationListSchema]]`
+
+**ConversationListSchema 字段：**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+| `title` | str | 标题 |
+| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 |
+| `model` | str | 模型标识 |
+| `tool_mode` | str | 工具模式，默认 `"qa"` |
+| `created_at` | datetime | 创建时间 |
+| `updated_at` | datetime | 更新时间 |
+| `message_count` | int | 消息数量 |
+| `last_message_preview` | str | 最后一条消息预览（最多 100 字符） |
+
+**PaginatedData 结构：**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "items": [...],
+    "total": 42,
+    "page": 1,
+    "page_size": 20,
+    "total_pages": 3
+  }
+}
+```
+
+### 列表示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/conversations?page=1&page_size=20"
+curl -X GET "http://localhost:8000/api/v1/conversations?knowledge_base_id=1"
+```
+
+---
+
+## POST /conversations — 创建对话
+
+创建新对话。
+
+### 创建请求体
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `title` | str | 否 | 标题，默认 `"新对话"` |
+| `knowledge_base_ids` | list[int] | 否 | 知识库 ID 列表 |
+| `model` | str | 否 | 模型标识 |
+| `tool_mode` | str | 否 | 工具模式，默认 `"qa"` |
+
+### 创建响应格式
+
+`ApiResponse[ConversationSchema]`，包含完整对话及空 `messages` 数组。
+
+### 创建示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/conversations" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "title": "文献综述讨论",
+    "knowledge_base_ids": [1, 2],
+    "tool_mode": "review_outline"
+  }'
+```
+
+---
+
+## GET /conversations/{id} — 获取对话详情
+
+获取单个对话及其全部消息。
+
+### 详情路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 详情响应格式
+
+`ApiResponse[ConversationSchema]`
+
+**ConversationSchema 字段：**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+| `title` | str | 标题 |
+| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 |
+| `model` | str | 模型标识 |
+| `tool_mode` | str | 工具模式 |
+| `created_at` | datetime | 创建时间 |
+| `updated_at` | datetime | 更新时间 |
+| `messages` | list[MessageSchema] | 消息列表 |
+
+**MessageSchema 字段：**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 消息 ID |
+| `conversation_id` | int | 对话 ID |
+| `role` | str | 角色：`user` / `assistant` |
+| `content` | str | 内容 |
+| `citations` | list[dict] \| null | 引用列表（assistant 消息） |
+| `created_at` | datetime | 创建时间 |
+
+### 详情示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/conversations/1"
+```
+
+### 详情错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
+
+---
+
+## PUT /conversations/{id} — 更新对话
+
+更新对话标题或设置。
+
+### 更新路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 更新请求体
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `title` | str | 否 | 新标题 |
+| `model` | str | 否 | 新模型 |
+| `tool_mode` | str | 否 | 新工具模式 |
+
+仅传入需要更新的字段。
+
+### 更新响应格式
+
+`ApiResponse[ConversationSchema]`，包含更新后的完整对话及消息。
+
+### 更新示例
+
+```bash
+curl -X PUT "http://localhost:8000/api/v1/conversations/1" \
+  -H "Content-Type: application/json" \
+  -d '{"title": "新标题"}'
+```
+
+### 更新错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
+
+---
+
+## DELETE /conversations/{id} — 删除对话
+
+删除对话及其全部消息（级联删除）。
+
+### 删除路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 删除响应格式
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "deleted": true,
+    "id": 1
+  }
+}
+```
+
+### 删除示例
+
+```bash
+curl -X DELETE "http://localhost:8000/api/v1/conversations/1"
+```
+
+### 删除错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
diff --git a/docs/zh/api/crawler.md b/docs/zh/api/crawler.md
new file mode 100644
index 0000000..2273361
--- /dev/null
+++ b/docs/zh/api/crawler.md
@@ -0,0 +1,20 @@
+# Crawler API
+
+路径：`/api/v1/projects/{project_id}/crawl`
+
+## 端点
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | /projects/{id}/crawl/start | 启动 PDF 下载 |
+| GET | /projects/{id}/crawl/stats | 下载统计 |
+
+## POST /start
+
+对项目内待下载文献启动 PDF 下载（Unpaywall 等多源回退）。仅处理 `pending` 或 `metadata_only` 状态文献。
+
+**查询参数：** `priority`（high/low）、`max_papers`（默认 50）
+
+## GET /stats
+
+返回项目内各状态文献数量及存储统计。
diff --git a/docs/zh/api/dedup.md b/docs/zh/api/dedup.md
new file mode 100644
index 0000000..b3a2d27
--- /dev/null
+++ b/docs/zh/api/dedup.md
@@ -0,0 +1,229 @@
+# 去重 API
+
+去重模块 API，支持 DOI 精确去重、标题相似度去重及 LLM 辅助验证。
+
+**基础路径：** `/api/v1/projects/{project_id}/dedup`
+
+---
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | `/run` | 执行去重流程 |
+| GET | `/candidates` | 列出待人工审核的候选重复对 |
+| POST | `/verify` | 使用 LLM 验证两个文献是否为重复 |
+| POST | `/resolve` | 解决单条上传冲突（keep_old / keep_new / merge / skip） |
+| POST | `/auto-resolve` | AI 自动建议冲突解决方式 |
+
+---
+
+## POST /run
+
+执行去重流水线。
+
+**查询参数**
+
+| 参数名 | 类型 | 默认值 | 说明 |
+|--------|------|--------|------|
+| `strategy` | string | `"full"` | 策略：`doi_only` \| `title_only` \| `full` |
+
+**响应**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "stage1_doi_removed": 0,
+    "stage2_title_removed": 0,
+    "stage3_candidates": 5,
+    "total_remaining": 120,
+    "details": {
+      "doi_duplicates": [],
+      "title_duplicates": [],
+      "llm_candidates": []
+    }
+  }
+}
+```
+
+- `strategy=doi_only`：仅 DOI 精确去重
+- `strategy=title_only`：仅标题相似度去重
+- `strategy=full`：完整三阶段（DOI → 标题 → LLM 候选）
+
+**示例**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/run?strategy=full"
+```
+
+---
+
+## GET /candidates
+
+列出待人工审核的候选重复对（标题相似度较高，需 LLM 或人工确认）。
+
+**响应**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "paper_a_id": 10,
+      "paper_b_id": 11,
+      "similarity": 0.92,
+      "paper_a": { "id": 10, "title": "...", "doi": "..." },
+      "paper_b": { "id": 11, "title": "...", "doi": "..." }
+    }
+  ]
+}
+```
+
+**示例**
+
+```bash
+curl "http://localhost:8000/api/v1/projects/1/dedup/candidates"
+```
+
+---
+
+## POST /verify
+
+使用 LLM 判断两个文献是否为重复。
+
+**查询参数**
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `paper_a_id` | int | 是 | 文献 A ID |
+| `paper_b_id` | int | 是 | 文献 B ID |
+
+**响应**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "is_duplicate": true,
+    "reason": "Same paper, different sources"
+  }
+}
+```
+
+**示例**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/verify?paper_a_id=10&paper_b_id=11"
+```
+
+---
+
+## POST /resolve
+
+解决单条上传冲突。`conflict_id` 格式：`{old_paper_id}:{saved_filename}`，由上传接口返回的 `conflicts` 提供。
+
+**请求体**
+
+```json
+{
+  "conflict_id": "123:uploaded.pdf",
+  "action": "keep_old",
+  "merged_paper": null
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `conflict_id` | string | 是 | 冲突 ID，格式 `old_paper_id:saved_filename` |
+| `action` | string | 是 | `keep_old` \| `keep_new` \| `merge` \| `skip` |
+| `merged_paper` | object | 否 | 仅当 `action=merge` 时提供，合并后的元数据 |
+
+**操作说明**
+
+- `keep_old`：保留现有文献，丢弃上传
+- `keep_new`：以新上传为准，创建新文献
+- `merge`：合并元数据，创建新文献（需提供 `merged_paper`）
+- `skip`：以新上传为准，创建新文献（与 keep_new 行为相同）
+
+**响应**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "action": "keep_new",
+    "paper_id": 124,
+    "message": "Created new paper"
+  }
+}
+```
+
+**示例**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/resolve" \
+  -H "Content-Type: application/json" \
+  -d '{"conflict_id":"123:paper.pdf","action":"keep_new"}'
+```
+
+---
+
+## POST /auto-resolve
+
+使用 LLM 批量建议冲突解决方式。
+
+**请求体**
+
+```json
+{
+  "conflict_ids": ["123:file1.pdf", "124:file2.pdf"]
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `conflict_ids` | list[string] | 否 | 冲突 ID 列表；为空则返回空列表 |
+
+**响应**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "conflict_id": "123:file1.pdf",
+      "action": "keep_new",
+      "reason": "New version has more complete metadata"
+    },
+    {
+      "conflict_id": "124:file2.pdf",
+      "error": "Paper not found"
+    }
+  ]
+}
+```
+
+每个元素为 `{conflict_id, action, reason}` 或 `{conflict_id, error}`。
+
+**示例**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/auto-resolve" \
+  -H "Content-Type: application/json" \
+  -d '{"conflict_ids":["123:paper.pdf"]}'
+```
+
+---
+
+## 错误码
+
+| 状态码 | 说明 |
+|--------|------|
+| 400 | 无效的 `conflict_id` 格式、`action` 或请求体 |
+| 404 | 文献不存在或 PDF 文件不存在 |
diff --git a/docs/zh/api/index.md b/docs/zh/api/index.md
index c79a885..f9cc1bd 100644
--- a/docs/zh/api/index.md
+++ b/docs/zh/api/index.md
@@ -50,6 +50,14 @@ GET /api/v1/tasks/{task_id}
 | [Papers](/zh/api/papers) | /projects/{id}/papers |
 | [Keywords](/zh/api/keywords) | /projects/{id}/keywords |
 | [Search](/zh/api/search) | /projects/{id}/search |
+| [Dedup](/zh/api/dedup) | /projects/{id}/dedup |
+| [OCR](/zh/api/ocr) | /projects/{id}/ocr |
+| [Crawler](/zh/api/crawler) | /projects/{id}/crawl |
+| [Subscription](/zh/api/subscription) | /projects/{id}/subscriptions |
 | [RAG](/zh/api/rag) | /projects/{id}/rag |
 | [Writing](/zh/api/writing) | /projects/{id}/writing |
-| Tasks | /tasks |
+| [Chat](/zh/api/chat) | /chat |
+| [Conversations](/zh/api/conversations) | /conversations |
+| [Settings](/zh/api/settings) | /settings |
+| [Tasks](/zh/api/tasks) | /tasks |
+| [Pipelines](/zh/api/pipelines) | /pipelines |
diff --git a/docs/zh/api/keywords.md b/docs/zh/api/keywords.md
index ae5071b..2375f82 100644
--- a/docs/zh/api/keywords.md
+++ b/docs/zh/api/keywords.md
@@ -14,6 +14,10 @@
 | POST | /projects/{id}/keywords/expand | LLM 扩展 |
 | GET | /projects/{id}/keywords/search-formula | 检索公式 |
 
+## 扩展响应
+
+`expanded_terms` 为对象列表：`{ term, term_zh, relation }`，`relation` 为 `synonym`、`abbreviation` 或 `related`。
+
 ## 检索公式
 
-参数 `database`：wos、scopus、pubmed
+`GET /projects/{id}/keywords/search-formula?database=wos` — 查询参数 `database`：`wos`、`scopus`、`pubmed`（默认 `wos`）。
diff --git a/docs/zh/api/ocr.md b/docs/zh/api/ocr.md
new file mode 100644
index 0000000..da9011e
--- /dev/null
+++ b/docs/zh/api/ocr.md
@@ -0,0 +1,93 @@
+# OCR API
+
+OCR 模块 API，用于对已下载 PDF 进行文本提取与分块。
+
+**基础路径：** `/api/v1/projects/{project_id}/ocr`
+
+---
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | `/process` | 对指定或待处理文献执行 OCR |
+| GET | `/stats` | 获取 OCR 统计信息 |
+
+---
+
+## POST /process
+
+对项目内文献执行 OCR 文本提取。支持 pdfplumber（原生）与 PaddleOCR（扫描版）。
+
+**查询参数**
+
+| 参数名 | 类型 | 默认值 | 说明 |
+|--------|------|--------|------|
+| `paper_ids` | list[int] | null | 指定文献 ID 列表；为空则处理所有 `pdf_downloaded` 状态文献 |
+| `force_ocr` | bool | false | 是否强制重新 OCR（覆盖已有结果） |
+| `use_gpu` | bool | true | 是否使用 GPU（PaddleOCR） |
+
+**响应**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "processed": 5,
+    "failed": 0,
+    "total": 5
+  }
+}
+```
+
+**示例**
+
+```bash
+# 处理所有待 OCR 文献
+curl -X POST "http://localhost:8000/api/v1/projects/1/ocr/process"
+
+# 处理指定文献并强制重做
+curl -X POST "http://localhost:8000/api/v1/projects/1/ocr/process?paper_ids=1&paper_ids=2&force_ocr=true"
+```
+
+---
+
+## GET /stats
+
+返回项目内 OCR 相关统计。
+
+**响应**
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "pending": 10,
+    "metadata_only": 5,
+    "pdf_downloaded": 3,
+    "ocr_complete": 80,
+    "indexed": 50,
+    "error": 2,
+    "total_chunks": 1200
+  }
+}
+```
+
+- `pending`, `metadata_only`, `pdf_downloaded`, `ocr_complete`, `indexed`, `error`：各状态文献数量
+- `total_chunks`：项目内分块总数
+
+**示例**
+
+```bash
+curl "http://localhost:8000/api/v1/projects/1/ocr/stats"
+```
+
+---
+
+## 错误码
+
+| 状态码 | 说明 |
+|--------|------|
+| 404 | 项目不存在 |
diff --git a/docs/zh/api/papers.md b/docs/zh/api/papers.md
index dcd0555..c7602e0 100644
--- a/docs/zh/api/papers.md
+++ b/docs/zh/api/papers.md
@@ -9,6 +9,8 @@
 | GET | /projects/{id}/papers | 列表（分页） |
 | POST | /projects/{id}/papers | 创建 |
 | POST | /projects/{id}/papers/bulk | 批量导入 |
+| POST | /projects/{id}/papers/upload | 多文件上传（PDF） |
+| POST | /projects/{id}/papers/process | 触发论文处理 |
 | GET | /projects/{id}/papers/{paper_id} | 获取 |
 | PUT | /projects/{id}/papers/{paper_id} | 更新 |
 | DELETE | /projects/{id}/papers/{paper_id} | 删除 |
@@ -20,3 +22,15 @@
 - `year` — 年份过滤
 - `q` — 标题/摘要搜索
 - `sort_by`, `order` — 排序
+
+## 上传
+
+`POST /projects/{id}/papers/upload` — 多文件上传 PDF，返回 `{ papers, conflicts, total_uploaded }`。
+
+## 处理
+
+`POST /projects/{id}/papers/process` — 触发 OCR + RAG 索引。可选查询参数 `paper_ids`，省略则处理全部待处理论文。
+
+## 批量导入响应
+
+`POST /projects/{id}/papers/bulk` 返回 `{ created, skipped, total }`。
diff --git a/docs/zh/api/pipelines.md b/docs/zh/api/pipelines.md
new file mode 100644
index 0000000..89c1f35
--- /dev/null
+++ b/docs/zh/api/pipelines.md
@@ -0,0 +1,286 @@
+# 流水线 API
+
+基础路径：`/api/v1/pipelines`
+
+## 简介
+
+流水线 API 用于编排 LangGraph 工作流：关键词检索（search → dedup → crawl → OCR → index）和 PDF 上传（extract → dedup → OCR → index）。流水线异步执行，支持 HITL（人机协同）中断以处理去重冲突。使用 `thread_id` 轮询状态、在 HITL 后恢复或取消流水线。
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | `/pipelines/search` | 运行检索流水线（支持 HITL） |
+| POST | `/pipelines/upload` | 运行上传流水线 |
+| GET | `/pipelines/{thread_id}/status` | 获取流水线状态 |
+| POST | `/pipelines/{thread_id}/resume` | 恢复 HITL 中断的流水线 |
+| POST | `/pipelines/{thread_id}/cancel` | 取消流水线 |
+
+---
+
+## POST /api/v1/pipelines/search
+
+**说明：** 启动关键词检索流水线：search → dedup → crawl → OCR → index。发现去重冲突时可能中断以等待 HITL 处理。
+
+**请求体：** `SearchPipelineRequest`
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `project_id` | int | 是 | 项目 ID |
+| `query` | string | 否 | 检索词（默认：`""`） |
+| `sources` | string[] | 否 | 检索源（如 `["semantic_scholar", "openalex"]`） |
+| `max_results` | int | 否 | 最大结果数（1–200，默认：50） |
+
+**响应：** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID（如 `search_a1b2c3d4e5f6`） |
+| `status` | string | `running` |
+| `project_id` | int | 项目 ID |
+
+### 检索流水线示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/pipelines/search" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": 1,
+    "query": "transformer attention",
+    "sources": ["semantic_scholar"],
+    "max_results": 30
+  }'
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "thread_id": "search_a1b2c3d4e5f6",
+    "status": "running",
+    "project_id": 1
+  }
+}
+```
+
+---
+
+## POST /api/v1/pipelines/upload
+
+**说明：** 启动 PDF 上传流水线：提取元数据 → dedup → OCR → index。接受允许目录内的本地文件路径。
+
+**请求体：** `UploadPipelineRequest`
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `project_id` | int | 是 | 项目 ID |
+| `pdf_paths` | string[] | 是 | PDF 文件的绝对路径（需在配置的 `pdf_dir` 下） |
+
+**响应：** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID（如 `upload_x1y2z3a4b5c6`） |
+| `status` | string | `running` |
+| `project_id` | int | 项目 ID |
+
+### 上传流水线示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/pipelines/upload" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": 1,
+    "pdf_paths": [
+      "/data0/djx/omelette/pdfs/paper1.pdf",
+      "/data0/djx/omelette/pdfs/paper2.pdf"
+    ]
+  }'
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "thread_id": "upload_x1y2z3a4b5c6",
+    "status": "running",
+    "project_id": 1
+  }
+}
+```
+
+---
+
+## GET /api/v1/pipelines/{thread_id}/status
+
+**说明：** 获取流水线执行状态。当 `status` 为 `interrupted` 时，包含 `conflicts` 用于 HITL 处理。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID |
+
+**响应：** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 线程 ID |
+| `status` | string | `running`、`interrupted`、`completed`、`failed`、`cancelled` |
+| `stage` | string | 当前阶段（若可用） |
+| `progress` | int | 进度 0–100 |
+| `conflicts` | object[] | 去重冲突（`interrupted` 时） |
+| `interrupted_at` | string[] | 中断节点 ID（`interrupted` 时） |
+| `result` | object | 最终结果（`completed` 时） |
+| `error` | string | 错误信息（`failed` 时） |
+
+### 状态查询示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/pipelines/search_a1b2c3d4e5f6/status"
+```
+
+**运行中：**
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "thread_id": "search_a1b2c3d4e5f6",
+    "status": "running"
+  }
+}
+```
+
+**HITL 中断：**
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "thread_id": "search_a1b2c3d4e5f6",
+    "status": "interrupted",
+    "conflicts": [
+      {
+        "existing": {"id": 1, "title": "Paper A", "doi": "10.1234/abc"},
+        "new": {"title": "Paper A (preprint)", "doi": "10.1234/abc"}
+      }
+    ],
+    "stage": "dedup",
+    "progress": 45,
+    "interrupted_at": ["dedup_resolve"]
+  }
+}
+```
+
+**已完成：**
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "thread_id": "search_a1b2c3d4e5f6",
+    "status": "completed",
+    "stage": "completed",
+    "progress": 100,
+    "result": {"papers_imported": 12}
+  }
+}
+```
+
+---
+
+## POST /api/v1/pipelines/{thread_id}/resume
+
+**说明：** 使用已解决的冲突恢复 HITL 中断的流水线。仅在 `status` 为 `interrupted` 时有效。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID |
+
+**请求体：** `ResumeRequest`
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `resolved_conflicts` | object[] | 否 | 已解决的冲突决策（默认：`[]`） |
+
+**响应：** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 线程 ID |
+| `status` | string | `running` |
+
+### 恢复流水线示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/pipelines/search_a1b2c3d4e5f6/resume" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "resolved_conflicts": [
+      {"conflict_id": 0, "action": "keep_existing"},
+      {"conflict_id": 1, "action": "import_new"}
+    ]
+  }'
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "thread_id": "search_a1b2c3d4e5f6",
+    "status": "running"
+  }
+}
+```
+
+---
+
+## POST /api/v1/pipelines/{thread_id}/cancel
+
+**说明：** 取消运行中或已中断的流水线。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID |
+
+**响应：** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 线程 ID |
+| `status` | string | `cancelled` |
+
+### 取消流水线示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/pipelines/search_a1b2c3d4e5f6/cancel"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "thread_id": "search_a1b2c3d4e5f6",
+    "status": "cancelled"
+  }
+}
+```
+
+---
+
+## 错误码
+
+| 错误码 | 说明 |
+|--------|------|
+| 200 | 成功 |
+| 400 | 请求错误（如路径不在允许目录内、流水线未处于中断状态） |
+| 404 | 流水线不存在（thread_id 未知或已完成且已清理） |
diff --git a/docs/zh/api/projects.md b/docs/zh/api/projects.md
index 6e579d1..e582700 100644
--- a/docs/zh/api/projects.md
+++ b/docs/zh/api/projects.md
@@ -11,6 +11,8 @@
 | GET | /projects/{id} | 获取 |
 | PUT | /projects/{id} | 更新 |
 | DELETE | /projects/{id} | 删除 |
+| POST | /projects/{id}/pipeline/run | 运行完整流程（爬取→OCR→索引） |
+| POST | /projects/{id}/pipeline/paper/{paper_id} | 对单篇论文运行流程 |
 
 ## 请求体（创建/更新）
 
diff --git a/docs/zh/api/rag.md b/docs/zh/api/rag.md
index 61167cc..3064734 100644
--- a/docs/zh/api/rag.md
+++ b/docs/zh/api/rag.md
@@ -6,7 +6,30 @@
 
 | 方法 | 路径 | 说明 |
 |------|------|------|
-| POST | /projects/{id}/rag/query | 查询 |
-| POST | /projects/{id}/rag/index | 构建索引 |
-| GET | /projects/{id}/rag/stats | 统计 |
+| POST | /projects/{id}/rag/query | 查询知识库 |
+| POST | /projects/{id}/rag/index | 构建/重建索引 |
+| POST | /projects/{id}/rag/index/stream | 构建索引（SSE 流式进度） |
+| GET | /projects/{id}/rag/stats | 索引统计 |
 | DELETE | /projects/{id}/rag/index | 删除索引 |
+
+## 查询请求
+
+```json
+{
+  "question": "什么是注意力机制？",
+  "top_k": 10,
+  "use_reranker": true,
+  "include_sources": true
+}
+```
+
+- `question` — 待回答的问题（必填）
+- `top_k` — 检索块数量（默认：10）
+- `use_reranker` — 是否使用重排序（默认：true）
+- `include_sources` — 是否包含来源（默认：true）
+
+## 索引流式接口
+
+`POST /projects/{id}/rag/index/stream` — 通过 SSE 流式重建向量索引，实时推送进度。
+
+**事件类型：** `progress`、`complete`、`error`
diff --git a/docs/zh/api/settings.md b/docs/zh/api/settings.md
new file mode 100644
index 0000000..e5a5f56
--- /dev/null
+++ b/docs/zh/api/settings.md
@@ -0,0 +1,261 @@
+# 设置 API
+
+基础路径：`/api/v1/settings`
+
+## 简介
+
+设置 API 用于管理应用配置：LLM 提供商选择、模型参数、各提供商（OpenAI、Anthropic、阿里云、火山引擎、Ollama）的 API 密钥、嵌入/重排序模型及其他系统设置。配置值由环境变量与数据库覆盖合并而成；API 密钥在响应中会被脱敏显示。
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | `/settings` | 获取全部设置 |
+| PUT | `/settings` | 更新设置（部分更新） |
+| GET | `/settings/models` | 按提供商列出可用模型 |
+| POST | `/settings/test-connection` | 测试 LLM 提供商连接 |
+| GET | `/settings/health` | 健康检查 |
+
+---
+
+## GET /api/v1/settings
+
+**说明：** 返回合并后的设置（数据库覆盖 .env）。API 密钥会被脱敏（如 `sk-12***abcd`）。
+
+**响应：** `ApiResponse[SettingsSchema]`
+
+### SettingsSchema 字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `llm_provider` | string | 默认 LLM 提供商（`openai`、`anthropic`、`aliyun`、`volcengine`、`ollama`、`mock`） |
+| `llm_model` | string | 默认模型（覆盖提供商默认值） |
+| `llm_temperature` | float | 温度（0.0–2.0） |
+| `llm_max_tokens` | int | 最大 token 数 |
+| `openai_api_key` | string | OpenAI API 密钥（脱敏） |
+| `openai_model` | string | OpenAI 模型 |
+| `anthropic_api_key` | string | Anthropic API 密钥（脱敏） |
+| `anthropic_model` | string | Anthropic 模型 |
+| `aliyun_api_key` | string | 阿里云 API 密钥（脱敏） |
+| `aliyun_base_url` | string | 阿里云 base URL |
+| `aliyun_model` | string | 阿里云模型 |
+| `volcengine_api_key` | string | 火山引擎 API 密钥（脱敏） |
+| `volcengine_base_url` | string | 火山引擎 base URL |
+| `volcengine_model` | string | 火山引擎模型 |
+| `ollama_base_url` | string | Ollama base URL |
+| `ollama_model` | string | Ollama 模型 |
+| `embedding_model` | string | 嵌入模型名称 |
+| `reranker_model` | string | 重排序模型名称 |
+| `data_dir` | string | 数据目录路径 |
+| `cuda_visible_devices` | string | CUDA 设备 ID |
+| `semantic_scholar_api_key` | string | Semantic Scholar API 密钥（脱敏） |
+| `unpaywall_email` | string | Unpaywall 邮箱 |
+
+### 获取设置示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "llm_provider": "openai",
+    "llm_model": "gpt-4o-mini",
+    "llm_temperature": 0.7,
+    "llm_max_tokens": 4096,
+    "openai_api_key": "sk-12***abcd",
+    "openai_model": "gpt-4o-mini",
+    "anthropic_api_key": "",
+    "anthropic_model": "",
+    "aliyun_api_key": "",
+    "aliyun_base_url": "",
+    "aliyun_model": "",
+    "volcengine_api_key": "",
+    "volcengine_base_url": "",
+    "volcengine_model": "",
+    "ollama_base_url": "http://localhost:11434",
+    "ollama_model": "",
+    "embedding_model": "BAAI/bge-m3",
+    "reranker_model": "",
+    "data_dir": "/data0/djx/omelette",
+    "cuda_visible_devices": "",
+    "semantic_scholar_api_key": "",
+    "unpaywall_email": ""
+  }
+}
+```
+
+---
+
+## PUT /api/v1/settings
+
+**说明：** 更新用户可配置的设置。仅非空字段会被应用。包含 `***` 的脱敏 API 密钥会被跳过，避免覆盖真实密钥。
+
+**请求体：** `SettingsUpdateSchema`（部分更新，所有字段可选）
+
+| 字段 | 类型 | 约束 |
+|------|------|------|
+| `llm_provider` | string | — |
+| `llm_model` | string | — |
+| `llm_temperature` | float | 0.0–2.0 |
+| `llm_max_tokens` | int | 1–128000 |
+| `openai_api_key` | string | — |
+| `openai_model` | string | — |
+| `anthropic_api_key` | string | — |
+| `anthropic_model` | string | — |
+| `aliyun_api_key` | string | — |
+| `aliyun_base_url` | string | — |
+| `aliyun_model` | string | — |
+| `volcengine_api_key` | string | — |
+| `volcengine_base_url` | string | — |
+| `volcengine_model` | string | — |
+| `ollama_base_url` | string | — |
+| `ollama_model` | string | — |
+
+**响应：** `ApiResponse[SettingsSchema]`（更新后的合并设置）
+
+### 更新设置示例
+
+```bash
+curl -X PUT "http://localhost:8000/api/v1/settings" \
+  -H "Content-Type: application/json" \
+  -d '{"llm_provider": "openai", "llm_model": "gpt-4o-mini"}'
+```
+
+---
+
+## GET /api/v1/settings/models
+
+**说明：** 返回可用的 LLM 提供商及其模型列表。
+
+**响应：** `ApiResponse[list[ProviderModelInfo]]`
+
+### ProviderModelInfo 字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `provider` | string | 提供商 ID |
+| `display_name` | string | 显示名称 |
+| `models` | string[] | 模型 ID 列表 |
+| `requires_api_key` | bool | 是否需要 API 密钥 |
+| `requires_base_url` | bool | 是否可配置 base URL |
+| `default_base_url` | string | 默认 base URL（若适用） |
+
+### 模型列表示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings/models"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "provider": "openai",
+      "display_name": "OpenAI",
+      "models": ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3-mini"],
+      "requires_api_key": true,
+      "requires_base_url": false,
+      "default_base_url": ""
+    },
+    {
+      "provider": "ollama",
+      "display_name": "Ollama (本地)",
+      "models": ["llama3", "llama3.1", "mistral", "qwen2", "deepseek-r1"],
+      "requires_api_key": false,
+      "requires_base_url": true,
+      "default_base_url": "http://localhost:11434"
+    }
+  ]
+}
+```
+
+---
+
+## POST /api/v1/settings/test-connection
+
+**说明：** 使用当前 LLM 配置发送简单提示进行连接测试。使用数据库中的合并配置（无请求体）。
+
+**响应：** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `success` | bool | 测试是否成功 |
+| `response` | string | LLM 响应前 200 字符（成功时） |
+| `error` | string | 错误信息（失败时） |
+
+### 连接测试示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/settings/test-connection"
+```
+
+**成功：**
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "success": true,
+    "response": "OK."
+  }
+}
+```
+
+**失败：**
+```json
+{
+  "code": 500,
+  "message": "Connection test failed",
+  "data": {
+    "success": false,
+    "error": "Invalid API key"
+  }
+}
+```
+
+---
+
+## GET /api/v1/settings/health
+
+**说明：** 简单健康检查端点。
+
+**响应：** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `status` | string | `"healthy"` |
+| `version` | string | 应用版本 |
+
+### 健康检查示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings/health"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "status": "healthy",
+    "version": "0.1.0"
+  }
+}
+```
+
+---
+
+## 错误码
+
+| 错误码 | 说明 |
+|--------|------|
+| 200 | 成功 |
+| 400 | 请求错误（如温度范围无效） |
+| 422 | 校验错误（请求体无效） |
+| 500 | 服务端错误（如连接测试失败） |
diff --git a/docs/zh/api/subscription.md b/docs/zh/api/subscription.md
new file mode 100644
index 0000000..80778db
--- /dev/null
+++ b/docs/zh/api/subscription.md
@@ -0,0 +1,21 @@
+# Subscription API
+
+路径：`/api/v1/projects/{project_id}/subscriptions`
+
+## 端点
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | /feeds | 常用学术 RSS 模板 |
+| GET | / | 列表 |
+| POST | / | 创建 |
+| GET | /{sub_id} | 获取 |
+| PUT | /{sub_id} | 更新 |
+| DELETE | /{sub_id} | 删除 |
+| POST | /{sub_id}/trigger | 手动触发更新 |
+| POST | /check-rss | 检查 RSS |
+| POST | /check-updates | 检查 API 更新 |
+
+## 说明
+
+订阅模块用于增量文献更新（RSS / API 检索）。创建订阅后可定期或手动触发，检查新文献并导入项目。
diff --git a/docs/zh/api/tasks.md b/docs/zh/api/tasks.md
new file mode 100644
index 0000000..9403af8
--- /dev/null
+++ b/docs/zh/api/tasks.md
@@ -0,0 +1,163 @@
+# 任务 API
+
+基础路径：`/api/v1/tasks`
+
+## 简介
+
+任务 API 用于管理后台处理任务：search、dedup、crawl、ocr、index、keyword_expand。任务由流水线及其他服务创建；本 API 提供列表、详情查询和取消功能。
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | `/tasks` | 任务列表 |
+| GET | `/tasks/{id}` | 任务详情 |
+| POST | `/tasks/{id}/cancel` | 取消运行中的任务 |
+
+---
+
+## GET /api/v1/tasks
+
+**说明：** 列出任务，支持可选过滤。结果按 `created_at` 降序排列。
+
+**查询参数**
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `project_id` | int | 否 | 按项目 ID 过滤 |
+| `status` | string | 否 | 按状态过滤：`pending`、`running`、`completed`、`failed`、`cancelled` |
+| `limit` | int | 否 | 最大条数（默认：50） |
+
+**响应：** `ApiResponse[list[TaskSchema]]`
+
+### TaskSchema（列表视图）
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 任务 ID |
+| `project_id` | int | 项目 ID |
+| `task_type` | string | `search`、`dedup`、`crawl`、`ocr`、`index`、`keyword_expand` |
+| `status` | string | `pending`、`running`、`completed`、`failed`、`cancelled` |
+| `progress` | int | 当前进度 |
+| `total` | int | 总步数 |
+| `created_at` | string | ISO 8601 时间 |
+
+### 列表示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/tasks?project_id=1&status=running&limit=20"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": [
+    {
+      "id": 42,
+      "project_id": 1,
+      "task_type": "search",
+      "status": "running",
+      "progress": 30,
+      "total": 100,
+      "created_at": "2025-03-12T10:00:00"
+    }
+  ]
+}
+```
+
+---
+
+## GET /api/v1/tasks/{id}
+
+**说明：** 获取任务完整详情，包括 params、result、error_message。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 任务 ID |
+
+**响应：** `ApiResponse[TaskDetailSchema]`
+
+### TaskDetailSchema 字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 任务 ID |
+| `project_id` | int | 项目 ID |
+| `task_type` | string | 任务类型 |
+| `status` | string | 任务状态 |
+| `progress` | int | 当前进度 |
+| `total` | int | 总步数 |
+| `params` | object | 输入参数 |
+| `result` | object | 输出结果（完成时） |
+| `error_message` | string | 错误信息（失败时） |
+| `created_at` | string | ISO 8601 时间 |
+| `started_at` | string | ISO 8601 时间（可为空） |
+| `completed_at` | string | ISO 8601 时间（可为空） |
+
+### 详情示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/tasks/42"
+```
+
+```json
+{
+  "code": 200,
+  "message": "success",
+  "data": {
+    "id": 42,
+    "project_id": 1,
+    "task_type": "search",
+    "status": "completed",
+    "progress": 100,
+    "total": 100,
+    "params": {"query": "machine learning", "sources": ["semantic_scholar"]},
+    "result": {"papers_found": 15, "imported": 10},
+    "error_message": "",
+    "created_at": "2025-03-12T10:00:00",
+    "started_at": "2025-03-12T10:00:01",
+    "completed_at": "2025-03-12T10:02:30"
+  }
+}
+```
+
+---
+
+## POST /api/v1/tasks/{id}/cancel
+
+**说明：** 取消运行中或待处理的任务。处于 `completed`、`failed`、`cancelled` 状态的任务不可取消。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 任务 ID |
+
+**响应：** `ApiResponse`（无 data）
+
+### 取消示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/tasks/42/cancel"
+```
+
+```json
+{
+  "code": 200,
+  "message": "Task cancelled",
+  "data": null
+}
+```
+
+---
+
+## 错误码
+
+| 错误码 | 说明 |
+|--------|------|
+| 200 | 成功 |
+| 400 | 无法取消任务（已处于 completed/failed/cancelled 状态） |
+| 404 | 任务不存在 |
diff --git a/docs/zh/api/writing.md b/docs/zh/api/writing.md
index a102ab8..bc428f8 100644
--- a/docs/zh/api/writing.md
+++ b/docs/zh/api/writing.md
@@ -6,12 +6,16 @@
 
 | 方法 | 路径 | 说明 |
 |------|------|------|
-| POST | /projects/{id}/writing/assist | 通用辅助 |
+| POST | /projects/{id}/writing/assist | 通用写作辅助 |
 | POST | /projects/{id}/writing/summarize | 摘要 |
 | POST | /projects/{id}/writing/citations | 引用生成 |
 | POST | /projects/{id}/writing/review-outline | 综述提纲 |
 | POST | /projects/{id}/writing/gap-analysis | 缺口分析 |
 
+## Assist 请求
+
+`task`：`summarize`、`cite`、`review_outline`、`gap_analysis`；`style` 用于引用样式。
+
 ## 引用样式
 
-gb7714、apa、mla
+`gb_t_7714`、`apa`、`mla`
diff --git a/frontend/src/pages/ChatHistoryPage.tsx b/frontend/src/pages/ChatHistoryPage.tsx
index fdb6562..9fb9a66 100644
--- a/frontend/src/pages/ChatHistoryPage.tsx
+++ b/frontend/src/pages/ChatHistoryPage.tsx
@@ -113,7 +113,7 @@ export default function ChatHistoryPage() {
                           {formatDate(conv.updated_at)}
                         </span>
                         <span className="text-xs text-muted-foreground">
-                          {t('history.messageCount', { count: conv.messages?.length ?? 0 })}
+                          {t('history.messageCount', { count: conv.message_count ?? conv.messages?.length ?? 0 })}
                         </span>
                       </div>
                     </div>
diff --git a/frontend/src/pages/project/KeywordsPage.tsx b/frontend/src/pages/project/KeywordsPage.tsx
index 3dce598..7183eb6 100644
--- a/frontend/src/pages/project/KeywordsPage.tsx
+++ b/frontend/src/pages/project/KeywordsPage.tsx
@@ -68,10 +68,12 @@ export default function KeywordsPage() {
     onSuccess: (res) => {
       const terms = res?.expanded_terms ?? [];
       if (terms.length > 0) {
-        terms.forEach((term: string) => {
+        terms.forEach((item: string | { term: string; term_zh?: string; relation?: string }) => {
+          const termStr = typeof item === 'string' ? item : item.term;
+          const termEn = typeof item === 'string' ? item : item.term;
           createMutation.mutate({
-            term,
-            term_en: term,
+            term: termStr,
+            term_en: termEn,
             level: 1,
           });
         });
diff --git a/frontend/src/pages/project/PapersPage.tsx b/frontend/src/pages/project/PapersPage.tsx
index 9a05f86..4d5405c 100644
--- a/frontend/src/pages/project/PapersPage.tsx
+++ b/frontend/src/pages/project/PapersPage.tsx
@@ -144,7 +144,17 @@ export default function PapersPage() {
 
   const handleResolveConflict = async (conflictId: string, action: string) => {
     try {
-      await kbApi.resolveConflict(pid, conflictId, action === 'keep_existing' ? 'keep_old' : action);
+      const mappedAction = action === 'keep_existing' ? 'keep_old' : action === 'keep_new' ? 'keep_new' : action;
+      if (mappedAction === 'ai_resolve') {
+        const suggestions = await kbApi.autoResolve(pid, [conflictId]);
+        if (Array.isArray(suggestions) && suggestions.length > 0) {
+          await kbApi.resolveConflict(pid, conflictId, suggestions[0].action ?? 'skip');
+        }
+        setConflicts((prev) => prev.filter((c) => c.conflict_id !== conflictId));
+        queryClient.invalidateQueries({ queryKey: ['papers', pid] });
+        return;
+      }
+      await kbApi.resolveConflict(pid, conflictId, mappedAction);
       setConflicts((prev) => prev.filter((c) => c.conflict_id !== conflictId));
       queryClient.invalidateQueries({ queryKey: ['papers', pid] });
     } catch (err) {
@@ -155,7 +165,14 @@ export default function PapersPage() {
   const handleAutoResolveAll = async () => {
     const ids = conflicts.map((c) => c.conflict_id);
     try {
-      await kbApi.autoResolve(pid, ids);
+      const suggestions = await kbApi.autoResolve(pid, ids);
+      if (Array.isArray(suggestions)) {
+        for (const s of suggestions) {
+          if (s.action && !s.error) {
+            await kbApi.resolveConflict(pid, s.conflict_id, s.action);
+          }
+        }
+      }
       setConflicts([]);
       queryClient.invalidateQueries({ queryKey: ['papers', pid] });
     } catch (err) {
diff --git a/frontend/src/pages/project/SearchPage.tsx b/frontend/src/pages/project/SearchPage.tsx
index 657ae72..49cbbce 100644
--- a/frontend/src/pages/project/SearchPage.tsx
+++ b/frontend/src/pages/project/SearchPage.tsx
@@ -84,7 +84,7 @@ export default function SearchPage() {
     errorMessage: t('searchPage.importFailed'),
     invalidateKeys: [['papers', pid], ['project', projectId]],
     onSuccess: (res) => {
-      setImported(res?.imported ?? 0);
+      setImported(res?.created ?? 0);
     },
   });
 
diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts
index 20835ad..3f8f97e 100644
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@@ -25,7 +25,7 @@ export const paperApi = {
   delete: (projectId: number, paperId: number) =>
     api.delete<null>(`/projects/${projectId}/papers/${paperId}`).then(r => r.data),
   bulkImport: (projectId: number, papers: Partial<Paper>[]) =>
-    api.post<{ imported: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data),
+    api.post<{ created: number; skipped: number; total: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data),
 };
 
 export const keywordApi = {
diff --git a/frontend/src/services/kb-api.ts b/frontend/src/services/kb-api.ts
index b387d96..87e4676 100644
--- a/frontend/src/services/kb-api.ts
+++ b/frontend/src/services/kb-api.ts
@@ -49,7 +49,7 @@ export const kbApi = {
     }).then(r => r.data),
 
   autoResolve: (projectId: number, conflictIds: string[]) =>
-    api.post<{ resolved: number }>(`/projects/${projectId}/dedup/auto-resolve`, {
+    api.post<Array<{ conflict_id: string; action: string; reason: string; error?: string }>>(`/projects/${projectId}/dedup/auto-resolve`, {
       conflict_ids: conflictIds,
     }).then(r => r.data),
 
@@ -69,5 +69,5 @@ export const kbApi = {
     }).then(r => r.data),
 
   bulkImport: (projectId: number, papers: NewPaperData[]) =>
-    api.post<{ imported: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data),
+    api.post<{ created: number; skipped: number; total: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data),
 };
diff --git a/frontend/src/types/chat.ts b/frontend/src/types/chat.ts
index ed374a5..fc1ea66 100644
--- a/frontend/src/types/chat.ts
+++ b/frontend/src/types/chat.ts
@@ -7,6 +7,8 @@ export interface Conversation {
   created_at: string;
   updated_at: string;
   messages: ChatMessage[];
+  message_count?: number;
+  last_message_preview?: string;
 }
 
 export interface ChatMessage {