diff --git a/backend/app/api/v1/rewrite.py b/backend/app/api/v1/rewrite.py
index 19fa391..5b40fd2 100644
--- a/backend/app/api/v1/rewrite.py
+++ b/backend/app/api/v1/rewrite.py
@@ -13,7 +13,7 @@
from sqlalchemy.ext.asyncio import AsyncSession
from app.api.deps import get_db
-from app.services.llm.client import LLMClient, get_llm_client
+from app.services.llm.client import get_llm_client
from app.services.user_settings_service import UserSettingsService
logger = logging.getLogger(__name__)
@@ -85,12 +85,10 @@ async def _stream_rewrite(request: RewriteRequest, db: AsyncSession):
full_text = ""
try:
- async for token in asyncio.wait_for(
- _collect_stream(llm, messages),
- timeout=REWRITE_TIMEOUT,
- ):
- full_text += token
- yield _sse("rewrite_delta", {"delta": token})
+ async with asyncio.timeout(REWRITE_TIMEOUT):
+ async for token in llm.chat_stream(messages, temperature=0.3, task_type="rewrite"):
+ full_text += token
+ yield _sse("rewrite_delta", {"delta": token})
except TimeoutError:
yield _sse("error", {"code": "timeout", "message": "Rewrite timed out after 30s"})
return
@@ -105,12 +103,6 @@ async def _stream_rewrite(request: RewriteRequest, db: AsyncSession):
yield _sse("error", {"code": "rewrite_error", "message": str(e)})
-async def _collect_stream(llm: LLMClient, messages: list[dict[str, str]]):
- """Wrap the async iterator so asyncio.wait_for can timeout the whole stream."""
- async for token in llm.chat_stream(messages, temperature=0.3, task_type="rewrite"):
- yield token
-
-
@router.post("/rewrite")
async def rewrite_stream(
request: RewriteRequest,
diff --git a/backend/app/services/search_service.py b/backend/app/services/search_service.py
index 5001031..f8f1df9 100644
--- a/backend/app/services/search_service.py
+++ b/backend/app/services/search_service.py
@@ -219,7 +219,7 @@ def _affiliation(auth: dict) -> str:
class ArXivProvider(SearchProvider):
"""arXiv API — Atom XML feed."""
- BASE = "http://export.arxiv.org/api/query"
+ BASE = "https://export.arxiv.org/api/query"
@property
def name(self) -> str:
diff --git a/backend/app/services/subscription_service.py b/backend/app/services/subscription_service.py
index 4420de9..11ce2e5 100644
--- a/backend/app/services/subscription_service.py
+++ b/backend/app/services/subscription_service.py
@@ -91,10 +91,10 @@ def get_common_feeds() -> list[dict]:
return [
{
"name": "arXiv - Physics Optics",
- "url": "http://export.arxiv.org/rss/physics.optics",
+ "url": "https://export.arxiv.org/rss/physics.optics",
"category": "preprint",
},
- {"name": "arXiv - Quantum Physics", "url": "http://export.arxiv.org/rss/quant-ph", "category": "preprint"},
+ {"name": "arXiv - Quantum Physics", "url": "https://export.arxiv.org/rss/quant-ph", "category": "preprint"},
{"name": "Nature Photonics", "url": "https://www.nature.com/nphoton.rss", "category": "journal"},
{
"name": "Science - Latest",
diff --git a/docs/api/chat.md b/docs/api/chat.md
new file mode 100644
index 0000000..fc2d867
--- /dev/null
+++ b/docs/api/chat.md
@@ -0,0 +1,121 @@
+# Chat API
+
+Chat 模块提供基于 SSE 的流式对话与文本改写接口,支持知识库 RAG 检索、多工具模式及实时流式输出。
+
+**Base path:** `/api/v1/chat`
+
+---
+
+## 1. 流式对话
+
+### POST /api/v1/chat/stream
+
+基于 SSE 的流式对话接口,支持知识库检索、引用标注及多轮对话上下文。
+
+#### 请求体 (ChatStreamRequest)
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `conversation_id` | int | 否 | 对话 ID,续写时传入以保持上下文 |
+| `message` | str | 是 | 用户消息内容(至少 1 字符) |
+| `knowledge_base_ids` | list[int] | 否 | 知识库(项目)ID 列表,用于 RAG 检索 |
+| `model` | str | 否 | 模型标识,空则使用用户设置 |
+| `tool_mode` | str | 否 | 工具模式,默认 `"qa"` |
+
+**tool_mode 可选值:**
+
+| 值 | 说明 |
+|----|------|
+| `qa` | 问答模式:基于上下文回答问题,使用 [1]、[2] 等引用格式 |
+| `citation_lookup` | 引用查找:识别并列出与文本最相关的参考文献 |
+| `review_outline` | 综述提纲:生成结构化文献综述提纲 |
+| `gap_analysis` | 研究缺口分析:识别研究空白与未来方向 |
+
+#### 对话响应格式
+
+SSE 流式响应,`Content-Type: text/event-stream`。
+
+#### 对话 SSE 事件类型
+
+| 事件 | 说明 | data 字段 |
+|------|------|-----------|
+| `message_start` | 消息开始 | `{ message_id }` |
+| `citation` | 引用信息(每个来源一条) | `{ index, paper_id, paper_title, page_number, excerpt, relevance_score, chunk_type, authors, year, doi }` |
+| `text_delta` | 文本增量 | `{ delta }` |
+| `message_end` | 消息结束 | `{ message_id, conversation_id, finish_reason }` |
+| `error` | 错误 | `{ code, message }` |
+
+#### 对话示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/chat/stream" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "message": "什么是注意力机制?",
+ "knowledge_base_ids": [1, 2],
+ "tool_mode": "qa"
+ }'
+```
+
+#### 对话错误码
+
+| code | 说明 |
+|------|------|
+| `stream_error` | 流式处理异常 |
+
+---
+
+## 2. 文本改写
+
+### POST /api/v1/chat/rewrite
+
+基于 SSE 的流式文本改写接口,支持多种风格与自定义提示。
+
+#### 请求体 (RewriteRequest)
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `excerpt` | str | 是 | 待改写文本,**最多 2000 字符** |
+| `style` | str | 是 | 改写风格 |
+| `custom_prompt` | str | 否 | 自定义提示,`style=custom` 时必填 |
+| `source_language` | str | 否 | 源语言,默认 `"auto"` |
+
+**style 可选值:**
+
+| 值 | 说明 |
+|----|------|
+| `simplify` | 通俗化:将学术文本改写为易懂语言 |
+| `academic` | 学术化:改写为正式学术风格 |
+| `translate_en` | 英译:翻译为英文 |
+| `translate_zh` | 中译:翻译为中文 |
+| `custom` | 自定义:使用 `custom_prompt` 作为系统提示 |
+
+#### 改写响应格式
+
+SSE 流式响应,`Content-Type: text/event-stream`。
+
+#### 改写 SSE 事件类型
+
+| 事件 | 说明 | data 字段 |
+|------|------|-----------|
+| `rewrite_delta` | 改写文本增量 | `{ delta }` |
+| `rewrite_end` | 改写完成 | `{ full_text }` |
+| `error` | 错误 | `{ code, message }` |
+
+#### 改写示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/chat/rewrite" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "excerpt": "The attention mechanism allows the model to focus on different parts of the input.",
+ "style": "translate_zh"
+ }'
+```
+
+#### 改写错误码
+
+| code | 说明 |
+|------|------|
+| `timeout` | 改写超时(30 秒) |
+| `rewrite_error` | 改写处理异常 |
diff --git a/docs/api/conversations.md b/docs/api/conversations.md
new file mode 100644
index 0000000..3d5e2dd
--- /dev/null
+++ b/docs/api/conversations.md
@@ -0,0 +1,232 @@
+# Conversations API
+
+Conversations 模块提供对话的 CRUD 接口,支持分页列表、按知识库筛选及消息详情查询。
+
+**Base path:** `/api/v1/conversations`
+
+---
+
+## 端点总览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | `/conversations` | 分页列表 |
+| POST | `/conversations` | 创建对话 |
+| GET | `/conversations/{id}` | 获取详情(含消息) |
+| PUT | `/conversations/{id}` | 更新对话 |
+| DELETE | `/conversations/{id}` | 删除对话 |
+
+---
+
+## GET /conversations — 列表对话
+
+分页获取对话列表,按更新时间倒序,支持按知识库 ID 筛选。
+
+### 查询参数
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `page` | int | 否 | 页码,默认 1 |
+| `page_size` | int | 否 | 每页条数,默认 20 |
+| `knowledge_base_id` | int | 否 | 仅返回包含该知识库的对话 |
+
+### 列表响应格式
+
+`ApiResponse[PaginatedData[ConversationListSchema]]`
+
+**ConversationListSchema 字段:**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+| `title` | str | 标题 |
+| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 |
+| `model` | str | 模型标识 |
+| `tool_mode` | str | 工具模式,默认 `"qa"` |
+| `created_at` | datetime | 创建时间 |
+| `updated_at` | datetime | 更新时间 |
+| `message_count` | int | 消息数量 |
+| `last_message_preview` | str | 最后一条消息预览(最多 100 字符) |
+
+**PaginatedData 结构:**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "items": [...],
+ "total": 42,
+ "page": 1,
+ "page_size": 20,
+ "total_pages": 3
+ }
+}
+```
+
+### 列表示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/conversations?page=1&page_size=20"
+curl -X GET "http://localhost:8000/api/v1/conversations?knowledge_base_id=1"
+```
+
+---
+
+## POST /conversations — 创建对话
+
+创建新对话。
+
+### 创建请求体
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `title` | str | 否 | 标题,默认 `"新对话"` |
+| `knowledge_base_ids` | list[int] | 否 | 知识库 ID 列表 |
+| `model` | str | 否 | 模型标识 |
+| `tool_mode` | str | 否 | 工具模式,默认 `"qa"` |
+
+### 创建响应格式
+
+`ApiResponse[ConversationSchema]`,包含完整对话及空 `messages` 数组。
+
+### 创建示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/conversations" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "title": "文献综述讨论",
+ "knowledge_base_ids": [1, 2],
+ "tool_mode": "review_outline"
+ }'
+```
+
+---
+
+## GET /conversations/{id} — 获取对话详情
+
+获取单个对话及其全部消息。
+
+### 详情路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 详情响应格式
+
+`ApiResponse[ConversationSchema]`
+
+**ConversationSchema 字段:**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+| `title` | str | 标题 |
+| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 |
+| `model` | str | 模型标识 |
+| `tool_mode` | str | 工具模式 |
+| `created_at` | datetime | 创建时间 |
+| `updated_at` | datetime | 更新时间 |
+| `messages` | list[MessageSchema] | 消息列表 |
+
+**MessageSchema 字段:**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 消息 ID |
+| `conversation_id` | int | 对话 ID |
+| `role` | str | 角色:`user` / `assistant` |
+| `content` | str | 内容 |
+| `citations` | list[dict] \| null | 引用列表(assistant 消息) |
+| `created_at` | datetime | 创建时间 |
+
+### 详情示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/conversations/1"
+```
+
+### 详情错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
+
+---
+
+## PUT /conversations/{id} — 更新对话
+
+更新对话标题或设置。
+
+### 更新路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 更新请求体
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `title` | str | 否 | 新标题 |
+| `model` | str | 否 | 新模型 |
+| `tool_mode` | str | 否 | 新工具模式 |
+
+仅传入需要更新的字段。
+
+### 更新响应格式
+
+`ApiResponse[ConversationSchema]`,包含更新后的完整对话及消息。
+
+### 更新示例
+
+```bash
+curl -X PUT "http://localhost:8000/api/v1/conversations/1" \
+ -H "Content-Type: application/json" \
+ -d '{"title": "新标题"}'
+```
+
+### 更新错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
+
+---
+
+## DELETE /conversations/{id} — 删除对话
+
+删除对话及其全部消息(级联删除)。
+
+### 删除路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 删除响应格式
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "deleted": true,
+ "id": 1
+ }
+}
+```
+
+### 删除示例
+
+```bash
+curl -X DELETE "http://localhost:8000/api/v1/conversations/1"
+```
+
+### 删除错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
diff --git a/docs/api/crawler.md b/docs/api/crawler.md
new file mode 100644
index 0000000..d7d37dd
--- /dev/null
+++ b/docs/api/crawler.md
@@ -0,0 +1,98 @@
+# Crawler API
+
+爬虫模块 API,用于为待下载文献执行 PDF 下载(Unpaywall 等多源回退)。
+
+**Base path:** `/api/v1/projects/{project_id}/crawl`
+
+---
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|--------------|
+| POST | `/start` | 启动 PDF 下载任务 |
+| GET | `/stats` | 获取下载统计 |
+
+---
+
+## POST /start
+
+对项目内待下载文献启动 PDF 下载。仅处理 `pending` 或 `metadata_only` 状态文献。
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `priority` | string | `"high"` | 优先级:`high` 按引用数排序,`low` 按创建时间排序 |
+| `max_papers` | int | 50 | 单次处理最大文献数 |
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "total": 10,
+ "success": 8,
+ "failed": 2,
+ "details": [
+ {
+ "paper_id": 1,
+ "success": true,
+ "file_path": "/data0/djx/omelette/.../1.pdf"
+ }
+ ]
+ }
+}
+```
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/crawl/start?priority=high&max_papers=50"
+```
+
+---
+
+## GET /stats
+
+返回项目内下载相关统计。
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "pending": 20,
+ "metadata_only": 5,
+ "pdf_downloaded": 80,
+ "ocr_complete": 60,
+ "indexed": 50,
+ "error": 3,
+ "storage": {
+ "total_mb": 1024,
+ "used_mb": 512
+ }
+ }
+}
+```
+
+- 各状态字段:文献数量
+- `storage`:存储统计(可选,由 CrawlerService 提供)
+
+**Example**
+
+```bash
+curl "http://localhost:8000/api/v1/projects/1/crawl/stats"
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 404 | 项目不存在 |
diff --git a/docs/api/dedup.md b/docs/api/dedup.md
new file mode 100644
index 0000000..a5ca83b
--- /dev/null
+++ b/docs/api/dedup.md
@@ -0,0 +1,229 @@
+# Dedup API
+
+Deduplication module API: DOI exact dedup, title similarity dedup, and LLM-assisted verification.
+
+**Base path:** `/api/v1/projects/{project_id}/dedup`
+
+---
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/run` | Run deduplication pipeline |
+| GET | `/candidates` | List candidate duplicate pairs for manual review |
+| POST | `/verify` | LLM-verify if two papers are duplicates |
+| POST | `/resolve` | Resolve single upload conflict (keep_old / keep_new / merge / skip) |
+| POST | `/auto-resolve` | AI auto-suggest conflict resolution |
+
+---
+
+## POST /run
+
+Run the deduplication pipeline.
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `strategy` | string | `"full"` | Strategy: `doi_only` \| `title_only` \| `full` |
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "stage1_doi_removed": 0,
+ "stage2_title_removed": 0,
+ "stage3_candidates": 5,
+ "total_remaining": 120,
+ "details": {
+ "doi_duplicates": [],
+ "title_duplicates": [],
+ "llm_candidates": []
+ }
+ }
+}
+```
+
+- `strategy=doi_only`: DOI exact dedup only
+- `strategy=title_only`: Title similarity dedup only
+- `strategy=full`: Full 3-stage (DOI → title → LLM candidates)
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/run?strategy=full"
+```
+
+---
+
+## GET /candidates
+
+List candidate duplicate pairs for manual review (high title similarity, need LLM or human confirmation).
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "paper_a_id": 10,
+ "paper_b_id": 11,
+ "similarity": 0.92,
+ "paper_a": { "id": 10, "title": "...", "doi": "..." },
+ "paper_b": { "id": 11, "title": "...", "doi": "..." }
+ }
+ ]
+}
+```
+
+**Example**
+
+```bash
+curl "http://localhost:8000/api/v1/projects/1/dedup/candidates"
+```
+
+---
+
+## POST /verify
+
+Use LLM to determine if two papers are duplicates.
+
+**Query Parameters**
+
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `paper_a_id` | int | Yes | Paper A ID |
+| `paper_b_id` | int | Yes | Paper B ID |
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "is_duplicate": true,
+ "reason": "Same paper, different sources"
+ }
+}
+```
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/verify?paper_a_id=10&paper_b_id=11"
+```
+
+---
+
+## POST /resolve
+
+Resolve a single upload conflict. `conflict_id` format: `{old_paper_id}:{saved_filename}`, provided by the upload endpoint's `conflicts` array.
+
+**Request Body**
+
+```json
+{
+ "conflict_id": "123:uploaded.pdf",
+ "action": "keep_old",
+ "merged_paper": null
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `conflict_id` | string | Yes | Conflict ID, format `old_paper_id:saved_filename` |
+| `action` | string | Yes | `keep_old` \| `keep_new` \| `merge` \| `skip` |
+| `merged_paper` | object | No | Required when `action=merge`, merged metadata |
+
+**Actions**
+
+- `keep_old`: Keep existing paper, discard upload
+- `keep_new`: Use new upload, create new paper
+- `merge`: Merge metadata, create new paper (provide `merged_paper`)
+- `skip`: Use new upload, create new paper (same as keep_new)
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "action": "keep_new",
+ "paper_id": 124,
+ "message": "Created new paper"
+ }
+}
+```
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/resolve" \
+ -H "Content-Type: application/json" \
+ -d '{"conflict_id":"123:paper.pdf","action":"keep_new"}'
+```
+
+---
+
+## POST /auto-resolve
+
+Use LLM to batch-suggest conflict resolution.
+
+**Request Body**
+
+```json
+{
+ "conflict_ids": ["123:file1.pdf", "124:file2.pdf"]
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `conflict_ids` | list[string] | No | Conflict ID list; empty returns empty list |
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "conflict_id": "123:file1.pdf",
+ "action": "keep_new",
+ "reason": "New version has more complete metadata"
+ },
+ {
+ "conflict_id": "124:file2.pdf",
+ "error": "Paper not found"
+ }
+ ]
+}
+```
+
+Each element is either `{conflict_id, action, reason}` or `{conflict_id, error}`.
+
+**Example**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/auto-resolve" \
+ -H "Content-Type: application/json" \
+ -d '{"conflict_ids":["123:paper.pdf"]}'
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 400 | Invalid `conflict_id` format, `action`, or request body |
+| 404 | Paper not found or PDF file not found |
diff --git a/docs/api/index.md b/docs/api/index.md
index 05345bb..86818bc 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -54,6 +54,14 @@ GET /api/v1/tasks/{task_id}
| [Papers](/api/papers) | `/projects/{id}/papers` |
| [Keywords](/api/keywords) | `/projects/{id}/keywords` |
| [Search](/api/search) | `/projects/{id}/search` |
+| [Dedup](/api/dedup) | `/projects/{id}/dedup` |
+| [OCR](/api/ocr) | `/projects/{id}/ocr` |
+| [Crawler](/api/crawler) | `/projects/{id}/crawl` |
+| [Subscription](/api/subscription) | `/projects/{id}/subscriptions` |
| [RAG](/api/rag) | `/projects/{id}/rag` |
| [Writing](/api/writing) | `/projects/{id}/writing` |
-| Tasks | `/tasks` |
+| [Chat](/api/chat) | `/chat` |
+| [Conversations](/api/conversations) | `/conversations` |
+| [Settings](/api/settings) | `/settings` |
+| [Tasks](/api/tasks) | `/tasks` |
+| [Pipelines](/api/pipelines) | `/pipelines` |
diff --git a/docs/api/keywords.md b/docs/api/keywords.md
index 6a01c0c..f8ae072 100644
--- a/docs/api/keywords.md
+++ b/docs/api/keywords.md
@@ -12,7 +12,7 @@ Base path: `/api/v1/projects/{project_id}/keywords`
| PUT | `/projects/{id}/keywords/{kw_id}` | Update keyword |
| DELETE | `/projects/{id}/keywords/{kw_id}` | Delete keyword |
| POST | `/projects/{id}/keywords/expand` | LLM expand |
-| GET | `/projects/{id}/keywords/search-formula` | Generate formula |
+| GET | `/projects/{id}/keywords/search-formula` | Generate search formula |
## Query Parameters (List)
@@ -31,6 +31,14 @@ Base path: `/api/v1/projects/{project_id}/keywords`
}
```
+## Bulk Create
+
+`POST /projects/{id}/keywords/bulk` — Create multiple keywords at once.
+
+**Request body:** Array of `KeywordCreate` objects.
+
+**Response:** `{ created }` — Number of keywords created.
+
## Expand Request
```json
@@ -41,6 +49,30 @@ Base path: `/api/v1/projects/{project_id}/keywords`
}
```
+## Expand Response
+
+Returns `expanded_terms` as a list of objects:
+
+```json
+{
+ "expanded_terms": [
+ {"term": "self-attention", "term_zh": "自注意力", "relation": "synonym"},
+ {"term": "BERT", "term_zh": "", "relation": "abbreviation"}
+ ],
+ "source": "llm:openai"
+}
+```
+
+- `term` — Expanded term (English)
+- `term_zh` — Chinese translation (optional)
+- `relation` — `synonym`, `abbreviation`, or `related`
+
## Search Formula
-Query param: `database` — `wos`, `scopus`, or `pubmed`
+`GET /projects/{id}/keywords/search-formula?database=wos` — Generate a boolean search formula from project keywords for a specific database.
+
+**Query parameters:**
+
+- `database` — Target database: `wos`, `scopus`, or `pubmed` (default: `wos`)
+
+**Response:** `{ formula, database, keyword_count }`
diff --git a/docs/api/ocr.md b/docs/api/ocr.md
new file mode 100644
index 0000000..e4b0938
--- /dev/null
+++ b/docs/api/ocr.md
@@ -0,0 +1,34 @@
+# OCR API
+
+Base path: `/api/v1/projects/{project_id}/ocr`
+
+## Overview
+
+OCR and text extraction for PDF papers. Uses pdfplumber for native PDFs and PaddleOCR for scanned documents.
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/projects/{id}/ocr/process` | Run OCR on papers |
+| GET | `/projects/{id}/ocr/stats` | OCR statistics |
+
+## Process
+
+`POST /projects/{id}/ocr/process` — Extract text from PDFs via OCR.
+
+**Query parameters:**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `paper_ids` | list[int] | Optional. Specific paper IDs. If omitted, all `pdf_downloaded` papers are processed. |
+| `force_ocr` | bool | Re-run OCR even if already processed (default: false) |
+| `use_gpu` | bool | Use GPU for PaddleOCR (default: true) |
+
+**Response:** `{ processed, failed, total, message? }`
+
+## Stats
+
+`GET /projects/{id}/ocr/stats` — Return paper counts by status and total chunk count.
+
+**Response:** `{ metadata_only: n, pdf_downloaded: n, ocr_complete: n, indexed: n, error: n, total_chunks: n }`
diff --git a/docs/api/papers.md b/docs/api/papers.md
index 0c52078..ab9288e 100644
--- a/docs/api/papers.md
+++ b/docs/api/papers.md
@@ -9,6 +9,8 @@ Base path: `/api/v1/projects/{project_id}/papers`
| GET | `/projects/{id}/papers` | List papers (paginated) |
| POST | `/projects/{id}/papers` | Create paper |
| POST | `/projects/{id}/papers/bulk` | Bulk import |
+| POST | `/projects/{id}/papers/upload` | Multipart file upload (PDFs) |
+| POST | `/projects/{id}/papers/process` | Trigger processing for papers |
| GET | `/projects/{id}/papers/{paper_id}` | Get paper |
| PUT | `/projects/{id}/papers/{paper_id}` | Update paper |
| DELETE | `/projects/{id}/papers/{paper_id}` | Delete paper |
@@ -39,3 +41,31 @@ Base path: `/api/v1/projects/{project_id}/papers`
"status": "metadata_only"
}
```
+
+## Upload (Multipart)
+
+`POST /projects/{id}/papers/upload` — Upload PDF files. Accepts `multipart/form-data` with `files` (one or more PDFs). Extracts metadata, runs dedup check, and queues processing for new papers.
+
+**Response:** `{ papers, conflicts, total_uploaded }`
+
+- `papers` — List of newly created paper metadata
+- `conflicts` — Dedup conflicts (DOI or title similarity)
+- `total_uploaded` — Count of files successfully uploaded
+
+## Process
+
+`POST /projects/{id}/papers/process` — Trigger OCR + RAG indexing for papers.
+
+**Query parameters:**
+
+- `paper_ids` — Optional list of paper IDs. If omitted, all unprocessed papers in the project are queued.
+
+**Response:** `{ queued, message }`
+
+## Bulk Import Response
+
+`POST /projects/{id}/papers/bulk` returns `{ created, skipped, total }`:
+
+- `created` — Number of papers imported
+- `skipped` — Number skipped (duplicate DOI)
+- `total` — Total papers in request
diff --git a/docs/api/pipelines.md b/docs/api/pipelines.md
new file mode 100644
index 0000000..6a71438
--- /dev/null
+++ b/docs/api/pipelines.md
@@ -0,0 +1,71 @@
+# Pipelines API
+
+Base path: `/api/v1/pipelines`
+
+## Overview
+
+LangGraph pipeline orchestration for search and upload workflows. Pipelines run asynchronously and support HITL (human-in-the-loop) interrupt for conflict resolution.
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/pipelines/search` | Start keyword-search pipeline |
+| POST | `/pipelines/upload` | Start PDF-upload pipeline |
+| GET | `/pipelines/{thread_id}/status` | Get pipeline status |
+| POST | `/pipelines/{thread_id}/resume` | Resume interrupted pipeline |
+| POST | `/pipelines/{thread_id}/cancel` | Cancel running pipeline |
+
+## Search Pipeline
+
+`POST /pipelines/search` — Start search → dedup → crawl → OCR → index pipeline.
+
+**Request body:**
+
+```json
+{
+ "project_id": 1,
+ "query": "transformer attention",
+ "sources": ["semantic_scholar", "openalex"],
+ "max_results": 50
+}
+```
+
+**Response:** `{ thread_id, status, project_id }`
+
+## Upload Pipeline
+
+`POST /pipelines/upload` — Start extract → dedup → OCR → index pipeline for local PDF paths.
+
+**Request body:**
+
+```json
+{
+ "project_id": 1,
+ "pdf_paths": ["/path/to/paper1.pdf", "/path/to/paper2.pdf"]
+}
+```
+
+Paths must be within the configured `PDF_DIR` (see settings).
+
+**Response:** `{ thread_id, status, project_id }`
+
+## Status
+
+`GET /pipelines/{thread_id}/status` — Returns `status` (`running`, `interrupted`, `completed`, `failed`, `cancelled`). When `interrupted`, includes `conflicts` for HITL resolution.
+
+## Resume
+
+`POST /pipelines/{thread_id}/resume` — Resume interrupted pipeline with resolved conflicts.
+
+**Request body:**
+
+```json
+{
+ "resolved_conflicts": []
+}
+```
+
+## Cancel
+
+`POST /pipelines/{thread_id}/cancel` — Cancel a running pipeline.
diff --git a/docs/api/projects.md b/docs/api/projects.md
index e049ab5..7d7b9a1 100644
--- a/docs/api/projects.md
+++ b/docs/api/projects.md
@@ -11,6 +11,8 @@ Base path: `/api/v1/projects`
| GET | `/projects/{id}` | Get project |
| PUT | `/projects/{id}` | Update project |
| DELETE | `/projects/{id}` | Delete project |
+| POST | `/projects/{id}/pipeline/run` | Run full pipeline (crawl → OCR → index) for all pending papers |
+| POST | `/projects/{id}/pipeline/paper/{paper_id}` | Run pipeline for a single paper |
## Query Parameters (List)
diff --git a/docs/api/rag.md b/docs/api/rag.md
index 1187587..5c36b69 100644
--- a/docs/api/rag.md
+++ b/docs/api/rag.md
@@ -8,6 +8,7 @@ Base path: `/api/v1/projects/{project_id}/rag`
|--------|----------|-------------|
| POST | `/projects/{id}/rag/query` | Query knowledge base |
| POST | `/projects/{id}/rag/index` | Build/rebuild index |
+| POST | `/projects/{id}/rag/index/stream` | Build index (SSE streaming progress) |
| GET | `/projects/{id}/rag/stats` | Index statistics |
| DELETE | `/projects/{id}/rag/index` | Delete index |
@@ -15,12 +16,18 @@ Base path: `/api/v1/projects/{project_id}/rag`
```json
{
- "query": "What is attention mechanism?",
+ "question": "What is attention mechanism?",
"top_k": 10,
- "use_reranker": true
+ "use_reranker": true,
+ "include_sources": true
}
```
+- `question` — The question to answer (required)
+- `top_k` — Number of chunks to retrieve (default: 10)
+- `use_reranker` — Apply reranker for relevance (default: true)
+- `include_sources` — Include source chunks in response (default: true)
+
## Query Response
```json
@@ -28,6 +35,25 @@ Base path: `/api/v1/projects/{project_id}/rag`
"answer": "LLM-generated answer with citations",
"sources": [
{"paper_id": 1, "chunk_id": "...", "score": 0.9}
- ]
+ ],
+ "confidence": 0.0
}
```
+
+## Index Stream (SSE)
+
+`POST /projects/{id}/rag/index/stream` — Rebuild the vector index with Server-Sent Events for progress updates.
+
+**Response:** `text/event-stream`
+
+**Event types:**
+
+| Event | Description | data |
+|-------|-------------|------|
+| `progress` | Indexing progress | `{ stage, percent, message? }` |
+| `complete` | Indexing finished | `{ indexed, collection, papers_updated }` |
+| `error` | Error occurred | `{ message }` |
+
+## Delete Index
+
+`DELETE /projects/{id}/rag/index` — Delete the vector index for the project. Returns `ApiResponse[dict]` with deletion result.
diff --git a/docs/api/settings.md b/docs/api/settings.md
new file mode 100644
index 0000000..fee9a33
--- /dev/null
+++ b/docs/api/settings.md
@@ -0,0 +1,261 @@
+# Settings API
+
+Base path: `/api/v1/settings`
+
+## Overview
+
+The Settings API manages application configuration: LLM provider selection, model parameters, API keys for various providers (OpenAI, Anthropic, Aliyun, Volcengine, Ollama), embedding/reranker models, and other system settings. Values are merged from environment variables with DB overrides; API keys are masked in responses.
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/settings` | Get all settings |
+| PUT | `/settings` | Update settings (partial) |
+| GET | `/settings/models` | List available models per provider |
+| POST | `/settings/test-connection` | Test LLM provider connection |
+| GET | `/settings/health` | Health check |
+
+---
+
+## GET /api/v1/settings
+
+**Description:** Return merged settings (DB overrides .env). API keys are masked (e.g. `sk-12***abcd`).
+
+**Response:** `ApiResponse[SettingsSchema]`
+
+### SettingsSchema
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `llm_provider` | string | Default LLM provider (`openai`, `anthropic`, `aliyun`, `volcengine`, `ollama`, `mock`) |
+| `llm_model` | string | Default model (overrides provider default) |
+| `llm_temperature` | float | Temperature (0.0–2.0) |
+| `llm_max_tokens` | int | Max tokens |
+| `openai_api_key` | string | OpenAI API key (masked) |
+| `openai_model` | string | OpenAI model |
+| `anthropic_api_key` | string | Anthropic API key (masked) |
+| `anthropic_model` | string | Anthropic model |
+| `aliyun_api_key` | string | Aliyun API key (masked) |
+| `aliyun_base_url` | string | Aliyun base URL |
+| `aliyun_model` | string | Aliyun model |
+| `volcengine_api_key` | string | Volcengine API key (masked) |
+| `volcengine_base_url` | string | Volcengine base URL |
+| `volcengine_model` | string | Volcengine model |
+| `ollama_base_url` | string | Ollama base URL |
+| `ollama_model` | string | Ollama model |
+| `embedding_model` | string | Embedding model name |
+| `reranker_model` | string | Reranker model name |
+| `data_dir` | string | Data directory path |
+| `cuda_visible_devices` | string | CUDA device IDs |
+| `semantic_scholar_api_key` | string | Semantic Scholar API key (masked) |
+| `unpaywall_email` | string | Unpaywall email |
+
+### Get Settings Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "llm_provider": "openai",
+ "llm_model": "gpt-4o-mini",
+ "llm_temperature": 0.7,
+ "llm_max_tokens": 4096,
+ "openai_api_key": "sk-12***abcd",
+ "openai_model": "gpt-4o-mini",
+ "anthropic_api_key": "",
+ "anthropic_model": "",
+ "aliyun_api_key": "",
+ "aliyun_base_url": "",
+ "aliyun_model": "",
+ "volcengine_api_key": "",
+ "volcengine_base_url": "",
+ "volcengine_model": "",
+ "ollama_base_url": "http://localhost:11434",
+ "ollama_model": "",
+ "embedding_model": "BAAI/bge-m3",
+ "reranker_model": "",
+ "data_dir": "/data0/djx/omelette",
+ "cuda_visible_devices": "",
+ "semantic_scholar_api_key": "",
+ "unpaywall_email": ""
+ }
+}
+```
+
+---
+
+## PUT /api/v1/settings
+
+**Description:** Update user-configurable settings. Only non-null fields are applied. Masked API keys (containing `***`) are skipped to avoid overwriting secrets.
+
+**Request:** `SettingsUpdateSchema` (partial, all fields optional)
+
+| Field | Type | Constraints |
+|-------|------|-------------|
+| `llm_provider` | string | — |
+| `llm_model` | string | — |
+| `llm_temperature` | float | 0.0–2.0 |
+| `llm_max_tokens` | int | 1–128000 |
+| `openai_api_key` | string | — |
+| `openai_model` | string | — |
+| `anthropic_api_key` | string | — |
+| `anthropic_model` | string | — |
+| `aliyun_api_key` | string | — |
+| `aliyun_base_url` | string | — |
+| `aliyun_model` | string | — |
+| `volcengine_api_key` | string | — |
+| `volcengine_base_url` | string | — |
+| `volcengine_model` | string | — |
+| `ollama_base_url` | string | — |
+| `ollama_model` | string | — |
+
+**Response:** `ApiResponse[SettingsSchema]` (updated merged settings)
+
+### Update Settings Example
+
+```bash
+curl -X PUT "http://localhost:8000/api/v1/settings" \
+ -H "Content-Type: application/json" \
+ -d '{"llm_provider": "openai", "llm_model": "gpt-4o-mini"}'
+```
+
+---
+
+## GET /api/v1/settings/models
+
+**Description:** Return available LLM providers and their model lists.
+
+**Response:** `ApiResponse[list[ProviderModelInfo]]`
+
+### ProviderModelInfo
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `provider` | string | Provider ID |
+| `display_name` | string | Display name |
+| `models` | string[] | List of model IDs |
+| `requires_api_key` | bool | Whether API key is required |
+| `requires_base_url` | bool | Whether base URL is configurable |
+| `default_base_url` | string | Default base URL if applicable |
+
+### List Models Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings/models"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "provider": "openai",
+ "display_name": "OpenAI",
+ "models": ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3-mini"],
+ "requires_api_key": true,
+ "requires_base_url": false,
+ "default_base_url": ""
+ },
+ {
+ "provider": "ollama",
+ "display_name": "Ollama (本地)",
+ "models": ["llama3", "llama3.1", "mistral", "qwen2", "deepseek-r1"],
+ "requires_api_key": false,
+ "requires_base_url": true,
+ "default_base_url": "http://localhost:11434"
+ }
+ ]
+}
+```
+
+---
+
+## POST /api/v1/settings/test-connection
+
+**Description:** Test the current LLM configuration by sending a simple prompt. Uses merged settings from DB (no request body).
+
+**Response:** `ApiResponse[dict]`
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `success` | bool | Whether the test succeeded |
+| `response` | string | First 200 chars of LLM response (on success) |
+| `error` | string | Error message (on failure) |
+
+### Test Connection Example
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/settings/test-connection"
+```
+
+**Success:**
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "success": true,
+ "response": "OK."
+ }
+}
+```
+
+**Failure:**
+```json
+{
+ "code": 500,
+ "message": "Connection test failed",
+ "data": {
+ "success": false,
+ "error": "Invalid API key"
+ }
+}
+```
+
+---
+
+## GET /api/v1/settings/health
+
+**Description:** Simple health check endpoint.
+
+**Response:** `ApiResponse[dict]`
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `status` | string | `"healthy"` |
+| `version` | string | Application version |
+
+### Health Check Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings/health"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "status": "healthy",
+ "version": "0.1.0"
+ }
+}
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 200 | Success |
+| 400 | Bad request (e.g. invalid temperature range) |
+| 422 | Validation error (invalid request body) |
+| 500 | Server error (e.g. connection test failure) |
diff --git a/docs/api/subscription.md b/docs/api/subscription.md
new file mode 100644
index 0000000..57e6e35
--- /dev/null
+++ b/docs/api/subscription.md
@@ -0,0 +1,256 @@
+# Subscription API
+
+Subscription module API for managing incremental literature updates (RSS / API search).
+
+**Base path:** `/api/v1/projects/{project_id}/subscriptions`
+
+---
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/feeds` | Get common academic RSS feed templates |
+| GET | `/` | List project subscriptions |
+| POST | `/` | Create subscription |
+| GET | `/{sub_id}` | Get single subscription |
+| PUT | `/{sub_id}` | Update subscription |
+| DELETE | `/{sub_id}` | Delete subscription |
+| POST | `/{sub_id}/trigger` | Manually trigger subscription update |
+| POST | `/check-rss` | Check RSS feed |
+| POST | `/check-updates` | Check API for updates |
+
+---
+
+## GET /feeds
+
+Return common academic RSS feed templates (no project_id required in logic).
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "name": "arXiv CS",
+ "url": "https://...",
+ "description": "..."
+ }
+ ]
+}
+```
+
+---
+
+## GET /subscriptions
+
+List all subscriptions for the project.
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "id": 1,
+ "project_id": 1,
+ "name": "arXiv CS.AI",
+ "query": "machine learning",
+ "sources": ["arxiv"],
+ "frequency": "weekly",
+ "max_results": 50,
+ "is_active": true,
+ "last_run_at": "2025-03-10T12:00:00",
+ "total_found": 120,
+ "created_at": "2025-01-01T00:00:00",
+ "updated_at": "2025-03-10T12:00:00"
+ }
+ ]
+}
+```
+
+---
+
+## POST /subscriptions
+
+Create a new subscription.
+
+**Request Body**
+
+```json
+{
+ "name": "arXiv CS.AI",
+ "query": "machine learning",
+ "sources": ["arxiv", "semantic_scholar"],
+ "frequency": "weekly",
+ "max_results": 50
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | Yes | Subscription name |
+| `query` | string | No | Search query, default `""` |
+| `sources` | list[string] | No | Data sources, default `[]` |
+| `frequency` | string | No | `daily` \| `weekly` \| `monthly`, default `weekly` |
+| `max_results` | int | No | Max results per run 1–200, default 50 |
+
+**Response**
+
+```json
+{
+ "code": 201,
+ "message": "Subscription created",
+ "data": {
+ "id": 1,
+ "project_id": 1,
+ "name": "arXiv CS.AI",
+ "query": "machine learning",
+ "sources": ["arxiv"],
+ "frequency": "weekly",
+ "max_results": 50,
+ "is_active": true,
+ "last_run_at": null,
+ "total_found": 0,
+ "created_at": "2025-03-12T00:00:00",
+ "updated_at": "2025-03-12T00:00:00"
+ }
+}
+```
+
+---
+
+## PUT /subscriptions/{sub_id}
+
+Update a subscription.
+
+**Request Body**
+
+```json
+{
+ "name": "arXiv CS.AI (updated)",
+ "query": "deep learning",
+ "is_active": false
+}
+```
+
+All fields optional; only include fields to update.
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": { ... }
+}
+```
+
+---
+
+## DELETE /subscriptions/{sub_id}
+
+Delete a subscription.
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "Subscription deleted",
+ "data": null
+}
+```
+
+---
+
+## POST /subscriptions/{sub_id}/trigger
+
+Manually trigger subscription update (check API for new papers).
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `since_days` | int | 7 | Query last N days, 1–365 |
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "new_papers": 5,
+ "total_checked": 120,
+ "sources_searched": ["arxiv", "semantic_scholar"]
+ }
+}
+```
+
+---
+
+## POST /check-rss
+
+Check an RSS feed (does not require a saved subscription).
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `feed_url` | string | — | RSS/Atom feed URL |
+| `since_days` | int | 7 | Query last N days, 1–365 |
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "entries": [...],
+ "count": 10
+ }
+}
+```
+
+---
+
+## POST /check-updates
+
+Check for new papers via API search (does not require a saved subscription).
+
+**Query Parameters**
+
+| Name | Type | Default | Description |
+|------|------|---------|-------------|
+| `query` | string | `""` | Search query |
+| `sources` | list[string] | null | Data sources |
+| `since_days` | int | 7 | Query last N days, 1–365 |
+| `max_results` | int | 50 | Max results 1–200 |
+
+**Response**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "new_papers": [...],
+ "total_found": 50,
+ "sources_checked": { "arxiv": 30, "semantic_scholar": 20 }
+ }
+}
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 404 | Subscription not found |
diff --git a/docs/api/tasks.md b/docs/api/tasks.md
new file mode 100644
index 0000000..f217a5d
--- /dev/null
+++ b/docs/api/tasks.md
@@ -0,0 +1,163 @@
+# Tasks API
+
+Base path: `/api/v1/tasks`
+
+## Overview
+
+The Tasks API manages background processing jobs: search, dedup, crawl, OCR, index, keyword expansion. Tasks are created by pipelines and other services; this API provides listing, detail retrieval, and cancellation.
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/tasks` | List tasks |
+| GET | `/tasks/{id}` | Get task detail |
+| POST | `/tasks/{id}/cancel` | Cancel a running task |
+
+---
+
+## GET /api/v1/tasks
+
+**Description:** List tasks with optional filters. Results are ordered by `created_at` descending.
+
+**Query Parameters**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `project_id` | int | No | Filter by project ID |
+| `status` | string | No | Filter by status: `pending`, `running`, `completed`, `failed`, `cancelled` |
+| `limit` | int | No | Max results (default: 50) |
+
+**Response:** `ApiResponse[list[TaskSchema]]`
+
+### TaskSchema (list view)
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | int | Task ID |
+| `project_id` | int | Project ID |
+| `task_type` | string | `search`, `dedup`, `crawl`, `ocr`, `index`, `keyword_expand` |
+| `status` | string | `pending`, `running`, `completed`, `failed`, `cancelled` |
+| `progress` | int | Current progress |
+| `total` | int | Total steps |
+| `created_at` | string | ISO 8601 datetime |
+
+### List Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/tasks?project_id=1&status=running&limit=20"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "id": 42,
+ "project_id": 1,
+ "task_type": "search",
+ "status": "running",
+ "progress": 30,
+ "total": 100,
+ "created_at": "2025-03-12T10:00:00"
+ }
+ ]
+}
+```
+
+---
+
+## GET /api/v1/tasks/{id}
+
+**Description:** Get full task detail including params, result, and error message.
+
+**Path Parameters**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `id` | int | Task ID |
+
+**Response:** `ApiResponse[TaskDetailSchema]`
+
+### TaskDetailSchema
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | int | Task ID |
+| `project_id` | int | Project ID |
+| `task_type` | string | Task type |
+| `status` | string | Task status |
+| `progress` | int | Current progress |
+| `total` | int | Total steps |
+| `params` | object | Input parameters |
+| `result` | object | Output result (when completed) |
+| `error_message` | string | Error message (when failed) |
+| `created_at` | string | ISO 8601 datetime |
+| `started_at` | string | ISO 8601 datetime (nullable) |
+| `completed_at` | string | ISO 8601 datetime (nullable) |
+
+### Detail Example
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/tasks/42"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "id": 42,
+ "project_id": 1,
+ "task_type": "search",
+ "status": "completed",
+ "progress": 100,
+ "total": 100,
+ "params": {"query": "machine learning", "sources": ["semantic_scholar"]},
+ "result": {"papers_found": 15, "imported": 10},
+ "error_message": "",
+ "created_at": "2025-03-12T10:00:00",
+ "started_at": "2025-03-12T10:00:01",
+ "completed_at": "2025-03-12T10:02:30"
+ }
+}
+```
+
+---
+
+## POST /api/v1/tasks/{id}/cancel
+
+**Description:** Cancel a running or pending task. Tasks in `completed`, `failed`, or `cancelled` state cannot be cancelled.
+
+**Path Parameters**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `id` | int | Task ID |
+
+**Response:** `ApiResponse` (no data)
+
+### Cancel Example
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/tasks/42/cancel"
+```
+
+```json
+{
+ "code": 200,
+ "message": "Task cancelled",
+ "data": null
+}
+```
+
+---
+
+## Error Codes
+
+| Code | Description |
+|------|-------------|
+| 200 | Success |
+| 400 | Cannot cancel task (already completed/failed/cancelled) |
+| 404 | Task not found |
diff --git a/docs/api/writing.md b/docs/api/writing.md
index 8c47c57..50409bf 100644
--- a/docs/api/writing.md
+++ b/docs/api/writing.md
@@ -6,12 +6,37 @@ Base path: `/api/v1/projects/{project_id}/writing`
| Method | Endpoint | Description |
|--------|----------|-------------|
-| POST | `/projects/{id}/writing/assist` | General assistance |
+| POST | `/projects/{id}/writing/assist` | General writing assistance |
| POST | `/projects/{id}/writing/summarize` | Summarize papers |
| POST | `/projects/{id}/writing/citations` | Generate citations |
| POST | `/projects/{id}/writing/review-outline` | Review outline |
| POST | `/projects/{id}/writing/gap-analysis` | Gap analysis |
+## Assist (General)
+
+`POST /projects/{id}/writing/assist` — AI-powered writing assistance for summarize, cite, outline, or gap analysis.
+
+**Request body:**
+
+```json
+{
+ "task": "summarize",
+ "text": "",
+ "paper_ids": [1, 2],
+ "topic": "Literature Review",
+ "style": "gb_t_7714",
+ "language": "en"
+}
+```
+
+- `task` — `summarize`, `cite`, `review_outline`, or `gap_analysis`
+- `paper_ids` — Paper IDs (for summarize/cite)
+- `topic` — Topic for outline/gap analysis
+- `style` — Citation style (for cite task)
+- `language` — Output language (default: `en`)
+
+**Response:** `{ content, citations, suggestions }`
+
## Summarize Request
```json
@@ -25,8 +50,8 @@ Base path: `/api/v1/projects/{project_id}/writing`
```json
{
"paper_ids": [1, 2],
- "style": "gb7714"
+ "style": "gb_t_7714"
}
```
-Styles: `gb7714`, `apa`, `mla`
+**Citation styles:** `gb_t_7714`, `apa`, `mla`
diff --git a/docs/zh/api/chat.md b/docs/zh/api/chat.md
new file mode 100644
index 0000000..fc2d867
--- /dev/null
+++ b/docs/zh/api/chat.md
@@ -0,0 +1,121 @@
+# Chat API
+
+Chat 模块提供基于 SSE 的流式对话与文本改写接口,支持知识库 RAG 检索、多工具模式及实时流式输出。
+
+**Base path:** `/api/v1/chat`
+
+---
+
+## 1. 流式对话
+
+### POST /api/v1/chat/stream
+
+基于 SSE 的流式对话接口,支持知识库检索、引用标注及多轮对话上下文。
+
+#### 请求体 (ChatStreamRequest)
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `conversation_id` | int | 否 | 对话 ID,续写时传入以保持上下文 |
+| `message` | str | 是 | 用户消息内容(至少 1 字符) |
+| `knowledge_base_ids` | list[int] | 否 | 知识库(项目)ID 列表,用于 RAG 检索 |
+| `model` | str | 否 | 模型标识,空则使用用户设置 |
+| `tool_mode` | str | 否 | 工具模式,默认 `"qa"` |
+
+**tool_mode 可选值:**
+
+| 值 | 说明 |
+|----|------|
+| `qa` | 问答模式:基于上下文回答问题,使用 [1]、[2] 等引用格式 |
+| `citation_lookup` | 引用查找:识别并列出与文本最相关的参考文献 |
+| `review_outline` | 综述提纲:生成结构化文献综述提纲 |
+| `gap_analysis` | 研究缺口分析:识别研究空白与未来方向 |
+
+#### 对话响应格式
+
+SSE 流式响应,`Content-Type: text/event-stream`。
+
+#### 对话 SSE 事件类型
+
+| 事件 | 说明 | data 字段 |
+|------|------|-----------|
+| `message_start` | 消息开始 | `{ message_id }` |
+| `citation` | 引用信息(每个来源一条) | `{ index, paper_id, paper_title, page_number, excerpt, relevance_score, chunk_type, authors, year, doi }` |
+| `text_delta` | 文本增量 | `{ delta }` |
+| `message_end` | 消息结束 | `{ message_id, conversation_id, finish_reason }` |
+| `error` | 错误 | `{ code, message }` |
+
+#### 对话示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/chat/stream" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "message": "什么是注意力机制?",
+ "knowledge_base_ids": [1, 2],
+ "tool_mode": "qa"
+ }'
+```
+
+#### 对话错误码
+
+| code | 说明 |
+|------|------|
+| `stream_error` | 流式处理异常 |
+
+---
+
+## 2. 文本改写
+
+### POST /api/v1/chat/rewrite
+
+基于 SSE 的流式文本改写接口,支持多种风格与自定义提示。
+
+#### 请求体 (RewriteRequest)
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `excerpt` | str | 是 | 待改写文本,**最多 2000 字符** |
+| `style` | str | 是 | 改写风格 |
+| `custom_prompt` | str | 否 | 自定义提示,`style=custom` 时必填 |
+| `source_language` | str | 否 | 源语言,默认 `"auto"` |
+
+**style 可选值:**
+
+| 值 | 说明 |
+|----|------|
+| `simplify` | 通俗化:将学术文本改写为易懂语言 |
+| `academic` | 学术化:改写为正式学术风格 |
+| `translate_en` | 英译:翻译为英文 |
+| `translate_zh` | 中译:翻译为中文 |
+| `custom` | 自定义:使用 `custom_prompt` 作为系统提示 |
+
+#### 改写响应格式
+
+SSE 流式响应,`Content-Type: text/event-stream`。
+
+#### 改写 SSE 事件类型
+
+| 事件 | 说明 | data 字段 |
+|------|------|-----------|
+| `rewrite_delta` | 改写文本增量 | `{ delta }` |
+| `rewrite_end` | 改写完成 | `{ full_text }` |
+| `error` | 错误 | `{ code, message }` |
+
+#### 改写示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/chat/rewrite" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "excerpt": "The attention mechanism allows the model to focus on different parts of the input.",
+ "style": "translate_zh"
+ }'
+```
+
+#### 改写错误码
+
+| code | 说明 |
+|------|------|
+| `timeout` | 改写超时(30 秒) |
+| `rewrite_error` | 改写处理异常 |
diff --git a/docs/zh/api/conversations.md b/docs/zh/api/conversations.md
new file mode 100644
index 0000000..3d5e2dd
--- /dev/null
+++ b/docs/zh/api/conversations.md
@@ -0,0 +1,232 @@
+# Conversations API
+
+Conversations 模块提供对话的 CRUD 接口,支持分页列表、按知识库筛选及消息详情查询。
+
+**Base path:** `/api/v1/conversations`
+
+---
+
+## 端点总览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | `/conversations` | 分页列表 |
+| POST | `/conversations` | 创建对话 |
+| GET | `/conversations/{id}` | 获取详情(含消息) |
+| PUT | `/conversations/{id}` | 更新对话 |
+| DELETE | `/conversations/{id}` | 删除对话 |
+
+---
+
+## GET /conversations — 列表对话
+
+分页获取对话列表,按更新时间倒序,支持按知识库 ID 筛选。
+
+### 查询参数
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `page` | int | 否 | 页码,默认 1 |
+| `page_size` | int | 否 | 每页条数,默认 20 |
+| `knowledge_base_id` | int | 否 | 仅返回包含该知识库的对话 |
+
+### 列表响应格式
+
+`ApiResponse[PaginatedData[ConversationListSchema]]`
+
+**ConversationListSchema 字段:**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+| `title` | str | 标题 |
+| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 |
+| `model` | str | 模型标识 |
+| `tool_mode` | str | 工具模式,默认 `"qa"` |
+| `created_at` | datetime | 创建时间 |
+| `updated_at` | datetime | 更新时间 |
+| `message_count` | int | 消息数量 |
+| `last_message_preview` | str | 最后一条消息预览(最多 100 字符) |
+
+**PaginatedData 结构:**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "items": [...],
+ "total": 42,
+ "page": 1,
+ "page_size": 20,
+ "total_pages": 3
+ }
+}
+```
+
+### 列表示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/conversations?page=1&page_size=20"
+curl -X GET "http://localhost:8000/api/v1/conversations?knowledge_base_id=1"
+```
+
+---
+
+## POST /conversations — 创建对话
+
+创建新对话。
+
+### 创建请求体
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `title` | str | 否 | 标题,默认 `"新对话"` |
+| `knowledge_base_ids` | list[int] | 否 | 知识库 ID 列表 |
+| `model` | str | 否 | 模型标识 |
+| `tool_mode` | str | 否 | 工具模式,默认 `"qa"` |
+
+### 创建响应格式
+
+`ApiResponse[ConversationSchema]`,包含完整对话及空 `messages` 数组。
+
+### 创建示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/conversations" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "title": "文献综述讨论",
+ "knowledge_base_ids": [1, 2],
+ "tool_mode": "review_outline"
+ }'
+```
+
+---
+
+## GET /conversations/{id} — 获取对话详情
+
+获取单个对话及其全部消息。
+
+### 详情路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 详情响应格式
+
+`ApiResponse[ConversationSchema]`
+
+**ConversationSchema 字段:**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+| `title` | str | 标题 |
+| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 |
+| `model` | str | 模型标识 |
+| `tool_mode` | str | 工具模式 |
+| `created_at` | datetime | 创建时间 |
+| `updated_at` | datetime | 更新时间 |
+| `messages` | list[MessageSchema] | 消息列表 |
+
+**MessageSchema 字段:**
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 消息 ID |
+| `conversation_id` | int | 对话 ID |
+| `role` | str | 角色:`user` / `assistant` |
+| `content` | str | 内容 |
+| `citations` | list[dict] \| null | 引用列表(assistant 消息) |
+| `created_at` | datetime | 创建时间 |
+
+### 详情示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/conversations/1"
+```
+
+### 详情错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
+
+---
+
+## PUT /conversations/{id} — 更新对话
+
+更新对话标题或设置。
+
+### 更新路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 更新请求体
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `title` | str | 否 | 新标题 |
+| `model` | str | 否 | 新模型 |
+| `tool_mode` | str | 否 | 新工具模式 |
+
+仅传入需要更新的字段。
+
+### 更新响应格式
+
+`ApiResponse[ConversationSchema]`,包含更新后的完整对话及消息。
+
+### 更新示例
+
+```bash
+curl -X PUT "http://localhost:8000/api/v1/conversations/1" \
+ -H "Content-Type: application/json" \
+ -d '{"title": "新标题"}'
+```
+
+### 更新错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
+
+---
+
+## DELETE /conversations/{id} — 删除对话
+
+删除对话及其全部消息(级联删除)。
+
+### 删除路径参数
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 对话 ID |
+
+### 删除响应格式
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "deleted": true,
+ "id": 1
+ }
+}
+```
+
+### 删除示例
+
+```bash
+curl -X DELETE "http://localhost:8000/api/v1/conversations/1"
+```
+
+### 删除错误码
+
+| HTTP 状态 | 说明 |
+|-----------|------|
+| 404 | 对话不存在 |
diff --git a/docs/zh/api/crawler.md b/docs/zh/api/crawler.md
new file mode 100644
index 0000000..2273361
--- /dev/null
+++ b/docs/zh/api/crawler.md
@@ -0,0 +1,20 @@
+# Crawler API
+
+路径:`/api/v1/projects/{project_id}/crawl`
+
+## 端点
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | /projects/{id}/crawl/start | 启动 PDF 下载 |
+| GET | /projects/{id}/crawl/stats | 下载统计 |
+
+## POST /start
+
+对项目内待下载文献启动 PDF 下载(Unpaywall 等多源回退)。仅处理 `pending` 或 `metadata_only` 状态文献。
+
+**查询参数:** `priority`(high/low)、`max_papers`(默认 50)
+
+## GET /stats
+
+返回项目内各状态文献数量及存储统计。
diff --git a/docs/zh/api/dedup.md b/docs/zh/api/dedup.md
new file mode 100644
index 0000000..b3a2d27
--- /dev/null
+++ b/docs/zh/api/dedup.md
@@ -0,0 +1,229 @@
+# 去重 API
+
+去重模块 API,支持 DOI 精确去重、标题相似度去重及 LLM 辅助验证。
+
+**基础路径:** `/api/v1/projects/{project_id}/dedup`
+
+---
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | `/run` | 执行去重流程 |
+| GET | `/candidates` | 列出待人工审核的候选重复对 |
+| POST | `/verify` | 使用 LLM 验证两个文献是否为重复 |
+| POST | `/resolve` | 解决单条上传冲突(keep_old / keep_new / merge / skip) |
+| POST | `/auto-resolve` | AI 自动建议冲突解决方式 |
+
+---
+
+## POST /run
+
+执行去重流水线。
+
+**查询参数**
+
+| 参数名 | 类型 | 默认值 | 说明 |
+|--------|------|--------|------|
+| `strategy` | string | `"full"` | 策略:`doi_only` \| `title_only` \| `full` |
+
+**响应**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "stage1_doi_removed": 0,
+ "stage2_title_removed": 0,
+ "stage3_candidates": 5,
+ "total_remaining": 120,
+ "details": {
+ "doi_duplicates": [],
+ "title_duplicates": [],
+ "llm_candidates": []
+ }
+ }
+}
+```
+
+- `strategy=doi_only`:仅 DOI 精确去重
+- `strategy=title_only`:仅标题相似度去重
+- `strategy=full`:完整三阶段(DOI → 标题 → LLM 候选)
+
+**示例**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/run?strategy=full"
+```
+
+---
+
+## GET /candidates
+
+列出待人工审核的候选重复对(标题相似度较高,需 LLM 或人工确认)。
+
+**响应**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "paper_a_id": 10,
+ "paper_b_id": 11,
+ "similarity": 0.92,
+ "paper_a": { "id": 10, "title": "...", "doi": "..." },
+ "paper_b": { "id": 11, "title": "...", "doi": "..." }
+ }
+ ]
+}
+```
+
+**示例**
+
+```bash
+curl "http://localhost:8000/api/v1/projects/1/dedup/candidates"
+```
+
+---
+
+## POST /verify
+
+使用 LLM 判断两个文献是否为重复。
+
+**查询参数**
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `paper_a_id` | int | 是 | 文献 A ID |
+| `paper_b_id` | int | 是 | 文献 B ID |
+
+**响应**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "is_duplicate": true,
+ "reason": "Same paper, different sources"
+ }
+}
+```
+
+**示例**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/verify?paper_a_id=10&paper_b_id=11"
+```
+
+---
+
+## POST /resolve
+
+解决单条上传冲突。`conflict_id` 格式:`{old_paper_id}:{saved_filename}`,由上传接口返回的 `conflicts` 提供。
+
+**请求体**
+
+```json
+{
+ "conflict_id": "123:uploaded.pdf",
+ "action": "keep_old",
+ "merged_paper": null
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `conflict_id` | string | 是 | 冲突 ID,格式 `old_paper_id:saved_filename` |
+| `action` | string | 是 | `keep_old` \| `keep_new` \| `merge` \| `skip` |
+| `merged_paper` | object | 否 | 仅当 `action=merge` 时提供,合并后的元数据 |
+
+**操作说明**
+
+- `keep_old`:保留现有文献,丢弃上传
+- `keep_new`:以新上传为准,创建新文献
+- `merge`:合并元数据,创建新文献(需提供 `merged_paper`)
+- `skip`:以新上传为准,创建新文献(与 keep_new 行为相同)
+
+**响应**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "action": "keep_new",
+ "paper_id": 124,
+ "message": "Created new paper"
+ }
+}
+```
+
+**示例**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/resolve" \
+ -H "Content-Type: application/json" \
+ -d '{"conflict_id":"123:paper.pdf","action":"keep_new"}'
+```
+
+---
+
+## POST /auto-resolve
+
+使用 LLM 批量建议冲突解决方式。
+
+**请求体**
+
+```json
+{
+ "conflict_ids": ["123:file1.pdf", "124:file2.pdf"]
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `conflict_ids` | list[string] | 否 | 冲突 ID 列表;为空则返回空列表 |
+
+**响应**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "conflict_id": "123:file1.pdf",
+ "action": "keep_new",
+ "reason": "New version has more complete metadata"
+ },
+ {
+ "conflict_id": "124:file2.pdf",
+ "error": "Paper not found"
+ }
+ ]
+}
+```
+
+每个元素为 `{conflict_id, action, reason}` 或 `{conflict_id, error}`。
+
+**示例**
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/auto-resolve" \
+ -H "Content-Type: application/json" \
+ -d '{"conflict_ids":["123:paper.pdf"]}'
+```
+
+---
+
+## 错误码
+
+| 状态码 | 说明 |
+|--------|------|
+| 400 | 无效的 `conflict_id` 格式、`action` 或请求体 |
+| 404 | 文献不存在或 PDF 文件不存在 |
diff --git a/docs/zh/api/index.md b/docs/zh/api/index.md
index c79a885..f9cc1bd 100644
--- a/docs/zh/api/index.md
+++ b/docs/zh/api/index.md
@@ -50,6 +50,14 @@ GET /api/v1/tasks/{task_id}
| [Papers](/zh/api/papers) | /projects/{id}/papers |
| [Keywords](/zh/api/keywords) | /projects/{id}/keywords |
| [Search](/zh/api/search) | /projects/{id}/search |
+| [Dedup](/zh/api/dedup) | /projects/{id}/dedup |
+| [OCR](/zh/api/ocr) | /projects/{id}/ocr |
+| [Crawler](/zh/api/crawler) | /projects/{id}/crawl |
+| [Subscription](/zh/api/subscription) | /projects/{id}/subscriptions |
| [RAG](/zh/api/rag) | /projects/{id}/rag |
| [Writing](/zh/api/writing) | /projects/{id}/writing |
-| Tasks | /tasks |
+| [Chat](/zh/api/chat) | /chat |
+| [Conversations](/zh/api/conversations) | /conversations |
+| [Settings](/zh/api/settings) | /settings |
+| [Tasks](/zh/api/tasks) | /tasks |
+| [Pipelines](/zh/api/pipelines) | /pipelines |
diff --git a/docs/zh/api/keywords.md b/docs/zh/api/keywords.md
index ae5071b..2375f82 100644
--- a/docs/zh/api/keywords.md
+++ b/docs/zh/api/keywords.md
@@ -14,6 +14,10 @@
| POST | /projects/{id}/keywords/expand | LLM 扩展 |
| GET | /projects/{id}/keywords/search-formula | 检索公式 |
+## 扩展响应
+
+`expanded_terms` 为对象列表:`{ term, term_zh, relation }`,`relation` 为 `synonym`、`abbreviation` 或 `related`。
+
## 检索公式
-参数 `database`:wos、scopus、pubmed
+`GET /projects/{id}/keywords/search-formula?database=wos` — 查询参数 `database`:`wos`、`scopus`、`pubmed`(默认 `wos`)。
diff --git a/docs/zh/api/ocr.md b/docs/zh/api/ocr.md
new file mode 100644
index 0000000..da9011e
--- /dev/null
+++ b/docs/zh/api/ocr.md
@@ -0,0 +1,93 @@
+# OCR API
+
+OCR 模块 API,用于对已下载 PDF 进行文本提取与分块。
+
+**基础路径:** `/api/v1/projects/{project_id}/ocr`
+
+---
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | `/process` | 对指定或待处理文献执行 OCR |
+| GET | `/stats` | 获取 OCR 统计信息 |
+
+---
+
+## POST /process
+
+对项目内文献执行 OCR 文本提取。支持 pdfplumber(原生)与 PaddleOCR(扫描版)。
+
+**查询参数**
+
+| 参数名 | 类型 | 默认值 | 说明 |
+|--------|------|--------|------|
+| `paper_ids` | list[int] | null | 指定文献 ID 列表;为空则处理所有 `pdf_downloaded` 状态文献 |
+| `force_ocr` | bool | false | 是否强制重新 OCR(覆盖已有结果) |
+| `use_gpu` | bool | true | 是否使用 GPU(PaddleOCR) |
+
+**响应**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "processed": 5,
+ "failed": 0,
+ "total": 5
+ }
+}
+```
+
+**示例**
+
+```bash
+# 处理所有待 OCR 文献
+curl -X POST "http://localhost:8000/api/v1/projects/1/ocr/process"
+
+# 处理指定文献并强制重做
+curl -X POST "http://localhost:8000/api/v1/projects/1/ocr/process?paper_ids=1&paper_ids=2&force_ocr=true"
+```
+
+---
+
+## GET /stats
+
+返回项目内 OCR 相关统计。
+
+**响应**
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "pending": 10,
+ "metadata_only": 5,
+ "pdf_downloaded": 3,
+ "ocr_complete": 80,
+ "indexed": 50,
+ "error": 2,
+ "total_chunks": 1200
+ }
+}
+```
+
+- `pending`, `metadata_only`, `pdf_downloaded`, `ocr_complete`, `indexed`, `error`:各状态文献数量
+- `total_chunks`:项目内分块总数
+
+**示例**
+
+```bash
+curl "http://localhost:8000/api/v1/projects/1/ocr/stats"
+```
+
+---
+
+## 错误码
+
+| 状态码 | 说明 |
+|--------|------|
+| 404 | 项目不存在 |
diff --git a/docs/zh/api/papers.md b/docs/zh/api/papers.md
index dcd0555..c7602e0 100644
--- a/docs/zh/api/papers.md
+++ b/docs/zh/api/papers.md
@@ -9,6 +9,8 @@
| GET | /projects/{id}/papers | 列表(分页) |
| POST | /projects/{id}/papers | 创建 |
| POST | /projects/{id}/papers/bulk | 批量导入 |
+| POST | /projects/{id}/papers/upload | 多文件上传(PDF) |
+| POST | /projects/{id}/papers/process | 触发论文处理 |
| GET | /projects/{id}/papers/{paper_id} | 获取 |
| PUT | /projects/{id}/papers/{paper_id} | 更新 |
| DELETE | /projects/{id}/papers/{paper_id} | 删除 |
@@ -20,3 +22,15 @@
- `year` — 年份过滤
- `q` — 标题/摘要搜索
- `sort_by`, `order` — 排序
+
+## 上传
+
+`POST /projects/{id}/papers/upload` — 多文件上传 PDF,返回 `{ papers, conflicts, total_uploaded }`。
+
+## 处理
+
+`POST /projects/{id}/papers/process` — 触发 OCR + RAG 索引。可选查询参数 `paper_ids`,省略则处理全部待处理论文。
+
+## 批量导入响应
+
+`POST /projects/{id}/papers/bulk` 返回 `{ created, skipped, total }`。
diff --git a/docs/zh/api/pipelines.md b/docs/zh/api/pipelines.md
new file mode 100644
index 0000000..89c1f35
--- /dev/null
+++ b/docs/zh/api/pipelines.md
@@ -0,0 +1,286 @@
+# 流水线 API
+
+基础路径:`/api/v1/pipelines`
+
+## 简介
+
+流水线 API 用于编排 LangGraph 工作流:关键词检索(search → dedup → crawl → OCR → index)和 PDF 上传(extract → dedup → OCR → index)。流水线异步执行,支持 HITL(人机协同)中断以处理去重冲突。使用 `thread_id` 轮询状态、在 HITL 后恢复或取消流水线。
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | `/pipelines/search` | 运行检索流水线(支持 HITL) |
+| POST | `/pipelines/upload` | 运行上传流水线 |
+| GET | `/pipelines/{thread_id}/status` | 获取流水线状态 |
+| POST | `/pipelines/{thread_id}/resume` | 恢复 HITL 中断的流水线 |
+| POST | `/pipelines/{thread_id}/cancel` | 取消流水线 |
+
+---
+
+## POST /api/v1/pipelines/search
+
+**说明:** 启动关键词检索流水线:search → dedup → crawl → OCR → index。发现去重冲突时可能中断以等待 HITL 处理。
+
+**请求体:** `SearchPipelineRequest`
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `project_id` | int | 是 | 项目 ID |
+| `query` | string | 否 | 检索词(默认:`""`) |
+| `sources` | string[] | 否 | 检索源(如 `["semantic_scholar", "openalex"]`) |
+| `max_results` | int | 否 | 最大结果数(1–200,默认:50) |
+
+**响应:** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID(如 `search_a1b2c3d4e5f6`) |
+| `status` | string | `running` |
+| `project_id` | int | 项目 ID |
+
+### 检索流水线示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/pipelines/search" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "project_id": 1,
+ "query": "transformer attention",
+ "sources": ["semantic_scholar"],
+ "max_results": 30
+ }'
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "thread_id": "search_a1b2c3d4e5f6",
+ "status": "running",
+ "project_id": 1
+ }
+}
+```
+
+---
+
+## POST /api/v1/pipelines/upload
+
+**说明:** 启动 PDF 上传流水线:提取元数据 → dedup → OCR → index。接受允许目录内的本地文件路径。
+
+**请求体:** `UploadPipelineRequest`
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `project_id` | int | 是 | 项目 ID |
+| `pdf_paths` | string[] | 是 | PDF 文件的绝对路径(需在配置的 `pdf_dir` 下) |
+
+**响应:** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID(如 `upload_x1y2z3a4b5c6`) |
+| `status` | string | `running` |
+| `project_id` | int | 项目 ID |
+
+### 上传流水线示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/pipelines/upload" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "project_id": 1,
+ "pdf_paths": [
+ "/data0/djx/omelette/pdfs/paper1.pdf",
+ "/data0/djx/omelette/pdfs/paper2.pdf"
+ ]
+ }'
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "thread_id": "upload_x1y2z3a4b5c6",
+ "status": "running",
+ "project_id": 1
+ }
+}
+```
+
+---
+
+## GET /api/v1/pipelines/{thread_id}/status
+
+**说明:** 获取流水线执行状态。当 `status` 为 `interrupted` 时,包含 `conflicts` 用于 HITL 处理。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID |
+
+**响应:** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 线程 ID |
+| `status` | string | `running`、`interrupted`、`completed`、`failed`、`cancelled` |
+| `stage` | string | 当前阶段(若可用) |
+| `progress` | int | 进度 0–100 |
+| `conflicts` | object[] | 去重冲突(`interrupted` 时) |
+| `interrupted_at` | string[] | 中断节点 ID(`interrupted` 时) |
+| `result` | object | 最终结果(`completed` 时) |
+| `error` | string | 错误信息(`failed` 时) |
+
+### 状态查询示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/pipelines/search_a1b2c3d4e5f6/status"
+```
+
+**运行中:**
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "thread_id": "search_a1b2c3d4e5f6",
+ "status": "running"
+ }
+}
+```
+
+**HITL 中断:**
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "thread_id": "search_a1b2c3d4e5f6",
+ "status": "interrupted",
+ "conflicts": [
+ {
+ "existing": {"id": 1, "title": "Paper A", "doi": "10.1234/abc"},
+ "new": {"title": "Paper A (preprint)", "doi": "10.1234/abc"}
+ }
+ ],
+ "stage": "dedup",
+ "progress": 45,
+ "interrupted_at": ["dedup_resolve"]
+ }
+}
+```
+
+**已完成:**
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "thread_id": "search_a1b2c3d4e5f6",
+ "status": "completed",
+ "stage": "completed",
+ "progress": 100,
+ "result": {"papers_imported": 12}
+ }
+}
+```
+
+---
+
+## POST /api/v1/pipelines/{thread_id}/resume
+
+**说明:** 使用已解决的冲突恢复 HITL 中断的流水线。仅在 `status` 为 `interrupted` 时有效。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID |
+
+**请求体:** `ResumeRequest`
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `resolved_conflicts` | object[] | 否 | 已解决的冲突决策(默认:`[]`) |
+
+**响应:** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 线程 ID |
+| `status` | string | `running` |
+
+### 恢复流水线示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/pipelines/search_a1b2c3d4e5f6/resume" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "resolved_conflicts": [
+ {"conflict_id": 0, "action": "keep_existing"},
+ {"conflict_id": 1, "action": "import_new"}
+ ]
+ }'
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "thread_id": "search_a1b2c3d4e5f6",
+ "status": "running"
+ }
+}
+```
+
+---
+
+## POST /api/v1/pipelines/{thread_id}/cancel
+
+**说明:** 取消运行中或已中断的流水线。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 流水线线程 ID |
+
+**响应:** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `thread_id` | string | 线程 ID |
+| `status` | string | `cancelled` |
+
+### 取消流水线示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/pipelines/search_a1b2c3d4e5f6/cancel"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "thread_id": "search_a1b2c3d4e5f6",
+ "status": "cancelled"
+ }
+}
+```
+
+---
+
+## 错误码
+
+| 错误码 | 说明 |
+|--------|------|
+| 200 | 成功 |
+| 400 | 请求错误(如路径不在允许目录内、流水线未处于中断状态) |
+| 404 | 流水线不存在(thread_id 未知或已完成且已清理) |
diff --git a/docs/zh/api/projects.md b/docs/zh/api/projects.md
index 6e579d1..e582700 100644
--- a/docs/zh/api/projects.md
+++ b/docs/zh/api/projects.md
@@ -11,6 +11,8 @@
| GET | /projects/{id} | 获取 |
| PUT | /projects/{id} | 更新 |
| DELETE | /projects/{id} | 删除 |
+| POST | /projects/{id}/pipeline/run | 运行完整流程(爬取→OCR→索引) |
+| POST | /projects/{id}/pipeline/paper/{paper_id} | 对单篇论文运行流程 |
## 请求体(创建/更新)
diff --git a/docs/zh/api/rag.md b/docs/zh/api/rag.md
index 61167cc..3064734 100644
--- a/docs/zh/api/rag.md
+++ b/docs/zh/api/rag.md
@@ -6,7 +6,30 @@
| 方法 | 路径 | 说明 |
|------|------|------|
-| POST | /projects/{id}/rag/query | 查询 |
-| POST | /projects/{id}/rag/index | 构建索引 |
-| GET | /projects/{id}/rag/stats | 统计 |
+| POST | /projects/{id}/rag/query | 查询知识库 |
+| POST | /projects/{id}/rag/index | 构建/重建索引 |
+| POST | /projects/{id}/rag/index/stream | 构建索引(SSE 流式进度) |
+| GET | /projects/{id}/rag/stats | 索引统计 |
| DELETE | /projects/{id}/rag/index | 删除索引 |
+
+## 查询请求
+
+```json
+{
+ "question": "什么是注意力机制?",
+ "top_k": 10,
+ "use_reranker": true,
+ "include_sources": true
+}
+```
+
+- `question` — 待回答的问题(必填)
+- `top_k` — 检索块数量(默认:10)
+- `use_reranker` — 是否使用重排序(默认:true)
+- `include_sources` — 是否包含来源(默认:true)
+
+## 索引流式接口
+
+`POST /projects/{id}/rag/index/stream` — 通过 SSE 流式重建向量索引,实时推送进度。
+
+**事件类型:** `progress`、`complete`、`error`
diff --git a/docs/zh/api/settings.md b/docs/zh/api/settings.md
new file mode 100644
index 0000000..e5a5f56
--- /dev/null
+++ b/docs/zh/api/settings.md
@@ -0,0 +1,261 @@
+# 设置 API
+
+基础路径:`/api/v1/settings`
+
+## 简介
+
+设置 API 用于管理应用配置:LLM 提供商选择、模型参数、各提供商(OpenAI、Anthropic、阿里云、火山引擎、Ollama)的 API 密钥、嵌入/重排序模型及其他系统设置。配置值由环境变量与数据库覆盖合并而成;API 密钥在响应中会被脱敏显示。
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | `/settings` | 获取全部设置 |
+| PUT | `/settings` | 更新设置(部分更新) |
+| GET | `/settings/models` | 按提供商列出可用模型 |
+| POST | `/settings/test-connection` | 测试 LLM 提供商连接 |
+| GET | `/settings/health` | 健康检查 |
+
+---
+
+## GET /api/v1/settings
+
+**说明:** 返回合并后的设置(数据库覆盖 .env)。API 密钥会被脱敏(如 `sk-12***abcd`)。
+
+**响应:** `ApiResponse[SettingsSchema]`
+
+### SettingsSchema 字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `llm_provider` | string | 默认 LLM 提供商(`openai`、`anthropic`、`aliyun`、`volcengine`、`ollama`、`mock`) |
+| `llm_model` | string | 默认模型(覆盖提供商默认值) |
+| `llm_temperature` | float | 温度(0.0–2.0) |
+| `llm_max_tokens` | int | 最大 token 数 |
+| `openai_api_key` | string | OpenAI API 密钥(脱敏) |
+| `openai_model` | string | OpenAI 模型 |
+| `anthropic_api_key` | string | Anthropic API 密钥(脱敏) |
+| `anthropic_model` | string | Anthropic 模型 |
+| `aliyun_api_key` | string | 阿里云 API 密钥(脱敏) |
+| `aliyun_base_url` | string | 阿里云 base URL |
+| `aliyun_model` | string | 阿里云模型 |
+| `volcengine_api_key` | string | 火山引擎 API 密钥(脱敏) |
+| `volcengine_base_url` | string | 火山引擎 base URL |
+| `volcengine_model` | string | 火山引擎模型 |
+| `ollama_base_url` | string | Ollama base URL |
+| `ollama_model` | string | Ollama 模型 |
+| `embedding_model` | string | 嵌入模型名称 |
+| `reranker_model` | string | 重排序模型名称 |
+| `data_dir` | string | 数据目录路径 |
+| `cuda_visible_devices` | string | CUDA 设备 ID |
+| `semantic_scholar_api_key` | string | Semantic Scholar API 密钥(脱敏) |
+| `unpaywall_email` | string | Unpaywall 邮箱 |
+
+### 获取设置示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "llm_provider": "openai",
+ "llm_model": "gpt-4o-mini",
+ "llm_temperature": 0.7,
+ "llm_max_tokens": 4096,
+ "openai_api_key": "sk-12***abcd",
+ "openai_model": "gpt-4o-mini",
+ "anthropic_api_key": "",
+ "anthropic_model": "",
+ "aliyun_api_key": "",
+ "aliyun_base_url": "",
+ "aliyun_model": "",
+ "volcengine_api_key": "",
+ "volcengine_base_url": "",
+ "volcengine_model": "",
+ "ollama_base_url": "http://localhost:11434",
+ "ollama_model": "",
+ "embedding_model": "BAAI/bge-m3",
+ "reranker_model": "",
+ "data_dir": "/data0/djx/omelette",
+ "cuda_visible_devices": "",
+ "semantic_scholar_api_key": "",
+ "unpaywall_email": ""
+ }
+}
+```
+
+---
+
+## PUT /api/v1/settings
+
+**说明:** 更新用户可配置的设置。仅非空字段会被应用。包含 `***` 的脱敏 API 密钥会被跳过,避免覆盖真实密钥。
+
+**请求体:** `SettingsUpdateSchema`(部分更新,所有字段可选)
+
+| 字段 | 类型 | 约束 |
+|------|------|------|
+| `llm_provider` | string | — |
+| `llm_model` | string | — |
+| `llm_temperature` | float | 0.0–2.0 |
+| `llm_max_tokens` | int | 1–128000 |
+| `openai_api_key` | string | — |
+| `openai_model` | string | — |
+| `anthropic_api_key` | string | — |
+| `anthropic_model` | string | — |
+| `aliyun_api_key` | string | — |
+| `aliyun_base_url` | string | — |
+| `aliyun_model` | string | — |
+| `volcengine_api_key` | string | — |
+| `volcengine_base_url` | string | — |
+| `volcengine_model` | string | — |
+| `ollama_base_url` | string | — |
+| `ollama_model` | string | — |
+
+**响应:** `ApiResponse[SettingsSchema]`(更新后的合并设置)
+
+### 更新设置示例
+
+```bash
+curl -X PUT "http://localhost:8000/api/v1/settings" \
+ -H "Content-Type: application/json" \
+ -d '{"llm_provider": "openai", "llm_model": "gpt-4o-mini"}'
+```
+
+---
+
+## GET /api/v1/settings/models
+
+**说明:** 返回可用的 LLM 提供商及其模型列表。
+
+**响应:** `ApiResponse[list[ProviderModelInfo]]`
+
+### ProviderModelInfo 字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `provider` | string | 提供商 ID |
+| `display_name` | string | 显示名称 |
+| `models` | string[] | 模型 ID 列表 |
+| `requires_api_key` | bool | 是否需要 API 密钥 |
+| `requires_base_url` | bool | 是否可配置 base URL |
+| `default_base_url` | string | 默认 base URL(若适用) |
+
+### 模型列表示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings/models"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "provider": "openai",
+ "display_name": "OpenAI",
+ "models": ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3-mini"],
+ "requires_api_key": true,
+ "requires_base_url": false,
+ "default_base_url": ""
+ },
+ {
+ "provider": "ollama",
+ "display_name": "Ollama (本地)",
+ "models": ["llama3", "llama3.1", "mistral", "qwen2", "deepseek-r1"],
+ "requires_api_key": false,
+ "requires_base_url": true,
+ "default_base_url": "http://localhost:11434"
+ }
+ ]
+}
+```
+
+---
+
+## POST /api/v1/settings/test-connection
+
+**说明:** 使用当前 LLM 配置发送简单提示进行连接测试。使用数据库中的合并配置(无请求体)。
+
+**响应:** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `success` | bool | 测试是否成功 |
+| `response` | string | LLM 响应前 200 字符(成功时) |
+| `error` | string | 错误信息(失败时) |
+
+### 连接测试示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/settings/test-connection"
+```
+
+**成功:**
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "success": true,
+ "response": "OK."
+ }
+}
+```
+
+**失败:**
+```json
+{
+ "code": 500,
+ "message": "Connection test failed",
+ "data": {
+ "success": false,
+ "error": "Invalid API key"
+ }
+}
+```
+
+---
+
+## GET /api/v1/settings/health
+
+**说明:** 简单健康检查端点。
+
+**响应:** `ApiResponse[dict]`
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `status` | string | `"healthy"` |
+| `version` | string | 应用版本 |
+
+### 健康检查示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/settings/health"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "status": "healthy",
+ "version": "0.1.0"
+ }
+}
+```
+
+---
+
+## 错误码
+
+| 错误码 | 说明 |
+|--------|------|
+| 200 | 成功 |
+| 400 | 请求错误(如温度范围无效) |
+| 422 | 校验错误(请求体无效) |
+| 500 | 服务端错误(如连接测试失败) |
diff --git a/docs/zh/api/subscription.md b/docs/zh/api/subscription.md
new file mode 100644
index 0000000..80778db
--- /dev/null
+++ b/docs/zh/api/subscription.md
@@ -0,0 +1,21 @@
+# Subscription API
+
+路径:`/api/v1/projects/{project_id}/subscriptions`
+
+## 端点
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | /feeds | 常用学术 RSS 模板 |
+| GET | / | 列表 |
+| POST | / | 创建 |
+| GET | /{sub_id} | 获取 |
+| PUT | /{sub_id} | 更新 |
+| DELETE | /{sub_id} | 删除 |
+| POST | /{sub_id}/trigger | 手动触发更新 |
+| POST | /check-rss | 检查 RSS |
+| POST | /check-updates | 检查 API 更新 |
+
+## 说明
+
+订阅模块用于增量文献更新(RSS / API 检索)。创建订阅后可定期或手动触发,检查新文献并导入项目。
diff --git a/docs/zh/api/tasks.md b/docs/zh/api/tasks.md
new file mode 100644
index 0000000..9403af8
--- /dev/null
+++ b/docs/zh/api/tasks.md
@@ -0,0 +1,163 @@
+# 任务 API
+
+基础路径:`/api/v1/tasks`
+
+## 简介
+
+任务 API 用于管理后台处理任务:search、dedup、crawl、ocr、index、keyword_expand。任务由流水线及其他服务创建;本 API 提供列表、详情查询和取消功能。
+
+## 端点概览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | `/tasks` | 任务列表 |
+| GET | `/tasks/{id}` | 任务详情 |
+| POST | `/tasks/{id}/cancel` | 取消运行中的任务 |
+
+---
+
+## GET /api/v1/tasks
+
+**说明:** 列出任务,支持可选过滤。结果按 `created_at` 降序排列。
+
+**查询参数**
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `project_id` | int | 否 | 按项目 ID 过滤 |
+| `status` | string | 否 | 按状态过滤:`pending`、`running`、`completed`、`failed`、`cancelled` |
+| `limit` | int | 否 | 最大条数(默认:50) |
+
+**响应:** `ApiResponse[list[TaskSchema]]`
+
+### TaskSchema(列表视图)
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 任务 ID |
+| `project_id` | int | 项目 ID |
+| `task_type` | string | `search`、`dedup`、`crawl`、`ocr`、`index`、`keyword_expand` |
+| `status` | string | `pending`、`running`、`completed`、`failed`、`cancelled` |
+| `progress` | int | 当前进度 |
+| `total` | int | 总步数 |
+| `created_at` | string | ISO 8601 时间 |
+
+### 列表示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/tasks?project_id=1&status=running&limit=20"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": [
+ {
+ "id": 42,
+ "project_id": 1,
+ "task_type": "search",
+ "status": "running",
+ "progress": 30,
+ "total": 100,
+ "created_at": "2025-03-12T10:00:00"
+ }
+ ]
+}
+```
+
+---
+
+## GET /api/v1/tasks/{id}
+
+**说明:** 获取任务完整详情,包括 params、result、error_message。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 任务 ID |
+
+**响应:** `ApiResponse[TaskDetailSchema]`
+
+### TaskDetailSchema 字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 任务 ID |
+| `project_id` | int | 项目 ID |
+| `task_type` | string | 任务类型 |
+| `status` | string | 任务状态 |
+| `progress` | int | 当前进度 |
+| `total` | int | 总步数 |
+| `params` | object | 输入参数 |
+| `result` | object | 输出结果(完成时) |
+| `error_message` | string | 错误信息(失败时) |
+| `created_at` | string | ISO 8601 时间 |
+| `started_at` | string | ISO 8601 时间(可为空) |
+| `completed_at` | string | ISO 8601 时间(可为空) |
+
+### 详情示例
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/tasks/42"
+```
+
+```json
+{
+ "code": 200,
+ "message": "success",
+ "data": {
+ "id": 42,
+ "project_id": 1,
+ "task_type": "search",
+ "status": "completed",
+ "progress": 100,
+ "total": 100,
+ "params": {"query": "machine learning", "sources": ["semantic_scholar"]},
+ "result": {"papers_found": 15, "imported": 10},
+ "error_message": "",
+ "created_at": "2025-03-12T10:00:00",
+ "started_at": "2025-03-12T10:00:01",
+ "completed_at": "2025-03-12T10:02:30"
+ }
+}
+```
+
+---
+
+## POST /api/v1/tasks/{id}/cancel
+
+**说明:** 取消运行中或待处理的任务。处于 `completed`、`failed`、`cancelled` 状态的任务不可取消。
+
+**路径参数**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `id` | int | 任务 ID |
+
+**响应:** `ApiResponse`(无 data)
+
+### 取消示例
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/tasks/42/cancel"
+```
+
+```json
+{
+ "code": 200,
+ "message": "Task cancelled",
+ "data": null
+}
+```
+
+---
+
+## 错误码
+
+| 错误码 | 说明 |
+|--------|------|
+| 200 | 成功 |
+| 400 | 无法取消任务(已处于 completed/failed/cancelled 状态) |
+| 404 | 任务不存在 |
diff --git a/docs/zh/api/writing.md b/docs/zh/api/writing.md
index a102ab8..bc428f8 100644
--- a/docs/zh/api/writing.md
+++ b/docs/zh/api/writing.md
@@ -6,12 +6,16 @@
| 方法 | 路径 | 说明 |
|------|------|------|
-| POST | /projects/{id}/writing/assist | 通用辅助 |
+| POST | /projects/{id}/writing/assist | 通用写作辅助 |
| POST | /projects/{id}/writing/summarize | 摘要 |
| POST | /projects/{id}/writing/citations | 引用生成 |
| POST | /projects/{id}/writing/review-outline | 综述提纲 |
| POST | /projects/{id}/writing/gap-analysis | 缺口分析 |
+## Assist 请求
+
+`task`:`summarize`、`cite`、`review_outline`、`gap_analysis`;`style` 用于引用样式。
+
## 引用样式
-gb7714、apa、mla
+`gb_t_7714`、`apa`、`mla`
diff --git a/frontend/src/pages/ChatHistoryPage.tsx b/frontend/src/pages/ChatHistoryPage.tsx
index fdb6562..9fb9a66 100644
--- a/frontend/src/pages/ChatHistoryPage.tsx
+++ b/frontend/src/pages/ChatHistoryPage.tsx
@@ -113,7 +113,7 @@ export default function ChatHistoryPage() {
{formatDate(conv.updated_at)}
- {t('history.messageCount', { count: conv.messages?.length ?? 0 })}
+ {t('history.messageCount', { count: conv.message_count ?? conv.messages?.length ?? 0 })}
diff --git a/frontend/src/pages/project/KeywordsPage.tsx b/frontend/src/pages/project/KeywordsPage.tsx
index 3dce598..7183eb6 100644
--- a/frontend/src/pages/project/KeywordsPage.tsx
+++ b/frontend/src/pages/project/KeywordsPage.tsx
@@ -68,10 +68,12 @@ export default function KeywordsPage() {
onSuccess: (res) => {
const terms = res?.expanded_terms ?? [];
if (terms.length > 0) {
- terms.forEach((term: string) => {
+ terms.forEach((item: string | { term: string; term_zh?: string; relation?: string }) => {
+ const termStr = typeof item === 'string' ? item : item.term;
+ const termEn = typeof item === 'string' ? item : item.term;
createMutation.mutate({
- term,
- term_en: term,
+ term: termStr,
+ term_en: termEn,
level: 1,
});
});
diff --git a/frontend/src/pages/project/PapersPage.tsx b/frontend/src/pages/project/PapersPage.tsx
index 9a05f86..4d5405c 100644
--- a/frontend/src/pages/project/PapersPage.tsx
+++ b/frontend/src/pages/project/PapersPage.tsx
@@ -144,7 +144,17 @@ export default function PapersPage() {
const handleResolveConflict = async (conflictId: string, action: string) => {
try {
- await kbApi.resolveConflict(pid, conflictId, action === 'keep_existing' ? 'keep_old' : action);
+ const mappedAction = action === 'keep_existing' ? 'keep_old' : action === 'keep_new' ? 'keep_new' : action;
+ if (mappedAction === 'ai_resolve') {
+ const suggestions = await kbApi.autoResolve(pid, [conflictId]);
+ if (Array.isArray(suggestions) && suggestions.length > 0) {
+ await kbApi.resolveConflict(pid, conflictId, suggestions[0].action ?? 'skip');
+ }
+ setConflicts((prev) => prev.filter((c) => c.conflict_id !== conflictId));
+ queryClient.invalidateQueries({ queryKey: ['papers', pid] });
+ return;
+ }
+ await kbApi.resolveConflict(pid, conflictId, mappedAction);
setConflicts((prev) => prev.filter((c) => c.conflict_id !== conflictId));
queryClient.invalidateQueries({ queryKey: ['papers', pid] });
} catch (err) {
@@ -155,7 +165,14 @@ export default function PapersPage() {
const handleAutoResolveAll = async () => {
const ids = conflicts.map((c) => c.conflict_id);
try {
- await kbApi.autoResolve(pid, ids);
+ const suggestions = await kbApi.autoResolve(pid, ids);
+ if (Array.isArray(suggestions)) {
+ for (const s of suggestions) {
+ if (s.action && !s.error) {
+ await kbApi.resolveConflict(pid, s.conflict_id, s.action);
+ }
+ }
+ }
setConflicts([]);
queryClient.invalidateQueries({ queryKey: ['papers', pid] });
} catch (err) {
diff --git a/frontend/src/pages/project/SearchPage.tsx b/frontend/src/pages/project/SearchPage.tsx
index 657ae72..49cbbce 100644
--- a/frontend/src/pages/project/SearchPage.tsx
+++ b/frontend/src/pages/project/SearchPage.tsx
@@ -84,7 +84,7 @@ export default function SearchPage() {
errorMessage: t('searchPage.importFailed'),
invalidateKeys: [['papers', pid], ['project', projectId]],
onSuccess: (res) => {
- setImported(res?.imported ?? 0);
+ setImported(res?.created ?? 0);
},
});
diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts
index 20835ad..3f8f97e 100644
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@@ -25,7 +25,7 @@ export const paperApi = {
delete: (projectId: number, paperId: number) =>
api.delete(`/projects/${projectId}/papers/${paperId}`).then(r => r.data),
bulkImport: (projectId: number, papers: Partial[]) =>
- api.post<{ imported: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data),
+ api.post<{ created: number; skipped: number; total: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data),
};
export const keywordApi = {
diff --git a/frontend/src/services/kb-api.ts b/frontend/src/services/kb-api.ts
index b387d96..87e4676 100644
--- a/frontend/src/services/kb-api.ts
+++ b/frontend/src/services/kb-api.ts
@@ -49,7 +49,7 @@ export const kbApi = {
}).then(r => r.data),
autoResolve: (projectId: number, conflictIds: string[]) =>
- api.post<{ resolved: number }>(`/projects/${projectId}/dedup/auto-resolve`, {
+ api.post>(`/projects/${projectId}/dedup/auto-resolve`, {
conflict_ids: conflictIds,
}).then(r => r.data),
@@ -69,5 +69,5 @@ export const kbApi = {
}).then(r => r.data),
bulkImport: (projectId: number, papers: NewPaperData[]) =>
- api.post<{ imported: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data),
+ api.post<{ created: number; skipped: number; total: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data),
};
diff --git a/frontend/src/types/chat.ts b/frontend/src/types/chat.ts
index ed374a5..fc1ea66 100644
--- a/frontend/src/types/chat.ts
+++ b/frontend/src/types/chat.ts
@@ -7,6 +7,8 @@ export interface Conversation {
created_at: string;
updated_at: string;
messages: ChatMessage[];
+ message_count?: number;
+ last_message_preview?: string;
}
export interface ChatMessage {