diff --git a/backend/app/api/v1/rewrite.py b/backend/app/api/v1/rewrite.py index 19fa391..5b40fd2 100644 --- a/backend/app/api/v1/rewrite.py +++ b/backend/app/api/v1/rewrite.py @@ -13,7 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.api.deps import get_db -from app.services.llm.client import LLMClient, get_llm_client +from app.services.llm.client import get_llm_client from app.services.user_settings_service import UserSettingsService logger = logging.getLogger(__name__) @@ -85,12 +85,10 @@ async def _stream_rewrite(request: RewriteRequest, db: AsyncSession): full_text = "" try: - async for token in asyncio.wait_for( - _collect_stream(llm, messages), - timeout=REWRITE_TIMEOUT, - ): - full_text += token - yield _sse("rewrite_delta", {"delta": token}) + async with asyncio.timeout(REWRITE_TIMEOUT): + async for token in llm.chat_stream(messages, temperature=0.3, task_type="rewrite"): + full_text += token + yield _sse("rewrite_delta", {"delta": token}) except TimeoutError: yield _sse("error", {"code": "timeout", "message": "Rewrite timed out after 30s"}) return @@ -105,12 +103,6 @@ async def _stream_rewrite(request: RewriteRequest, db: AsyncSession): yield _sse("error", {"code": "rewrite_error", "message": str(e)}) -async def _collect_stream(llm: LLMClient, messages: list[dict[str, str]]): - """Wrap the async iterator so asyncio.wait_for can timeout the whole stream.""" - async for token in llm.chat_stream(messages, temperature=0.3, task_type="rewrite"): - yield token - - @router.post("/rewrite") async def rewrite_stream( request: RewriteRequest, diff --git a/backend/app/services/search_service.py b/backend/app/services/search_service.py index 5001031..f8f1df9 100644 --- a/backend/app/services/search_service.py +++ b/backend/app/services/search_service.py @@ -219,7 +219,7 @@ def _affiliation(auth: dict) -> str: class ArXivProvider(SearchProvider): """arXiv API — Atom XML feed.""" - BASE = "http://export.arxiv.org/api/query" + BASE = "https://export.arxiv.org/api/query" @property def name(self) -> str: diff --git a/backend/app/services/subscription_service.py b/backend/app/services/subscription_service.py index 4420de9..11ce2e5 100644 --- a/backend/app/services/subscription_service.py +++ b/backend/app/services/subscription_service.py @@ -91,10 +91,10 @@ def get_common_feeds() -> list[dict]: return [ { "name": "arXiv - Physics Optics", - "url": "http://export.arxiv.org/rss/physics.optics", + "url": "https://export.arxiv.org/rss/physics.optics", "category": "preprint", }, - {"name": "arXiv - Quantum Physics", "url": "http://export.arxiv.org/rss/quant-ph", "category": "preprint"}, + {"name": "arXiv - Quantum Physics", "url": "https://export.arxiv.org/rss/quant-ph", "category": "preprint"}, {"name": "Nature Photonics", "url": "https://www.nature.com/nphoton.rss", "category": "journal"}, { "name": "Science - Latest", diff --git a/docs/api/chat.md b/docs/api/chat.md new file mode 100644 index 0000000..fc2d867 --- /dev/null +++ b/docs/api/chat.md @@ -0,0 +1,121 @@ +# Chat API + +Chat 模块提供基于 SSE 的流式对话与文本改写接口,支持知识库 RAG 检索、多工具模式及实时流式输出。 + +**Base path:** `/api/v1/chat` + +--- + +## 1. 流式对话 + +### POST /api/v1/chat/stream + +基于 SSE 的流式对话接口,支持知识库检索、引用标注及多轮对话上下文。 + +#### 请求体 (ChatStreamRequest) + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `conversation_id` | int | 否 | 对话 ID,续写时传入以保持上下文 | +| `message` | str | 是 | 用户消息内容(至少 1 字符) | +| `knowledge_base_ids` | list[int] | 否 | 知识库(项目)ID 列表,用于 RAG 检索 | +| `model` | str | 否 | 模型标识,空则使用用户设置 | +| `tool_mode` | str | 否 | 工具模式,默认 `"qa"` | + +**tool_mode 可选值:** + +| 值 | 说明 | +|----|------| +| `qa` | 问答模式:基于上下文回答问题,使用 [1]、[2] 等引用格式 | +| `citation_lookup` | 引用查找:识别并列出与文本最相关的参考文献 | +| `review_outline` | 综述提纲:生成结构化文献综述提纲 | +| `gap_analysis` | 研究缺口分析:识别研究空白与未来方向 | + +#### 对话响应格式 + +SSE 流式响应,`Content-Type: text/event-stream`。 + +#### 对话 SSE 事件类型 + +| 事件 | 说明 | data 字段 | +|------|------|-----------| +| `message_start` | 消息开始 | `{ message_id }` | +| `citation` | 引用信息(每个来源一条) | `{ index, paper_id, paper_title, page_number, excerpt, relevance_score, chunk_type, authors, year, doi }` | +| `text_delta` | 文本增量 | `{ delta }` | +| `message_end` | 消息结束 | `{ message_id, conversation_id, finish_reason }` | +| `error` | 错误 | `{ code, message }` | + +#### 对话示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/chat/stream" \ + -H "Content-Type: application/json" \ + -d '{ + "message": "什么是注意力机制?", + "knowledge_base_ids": [1, 2], + "tool_mode": "qa" + }' +``` + +#### 对话错误码 + +| code | 说明 | +|------|------| +| `stream_error` | 流式处理异常 | + +--- + +## 2. 文本改写 + +### POST /api/v1/chat/rewrite + +基于 SSE 的流式文本改写接口,支持多种风格与自定义提示。 + +#### 请求体 (RewriteRequest) + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `excerpt` | str | 是 | 待改写文本,**最多 2000 字符** | +| `style` | str | 是 | 改写风格 | +| `custom_prompt` | str | 否 | 自定义提示,`style=custom` 时必填 | +| `source_language` | str | 否 | 源语言,默认 `"auto"` | + +**style 可选值:** + +| 值 | 说明 | +|----|------| +| `simplify` | 通俗化:将学术文本改写为易懂语言 | +| `academic` | 学术化:改写为正式学术风格 | +| `translate_en` | 英译:翻译为英文 | +| `translate_zh` | 中译:翻译为中文 | +| `custom` | 自定义:使用 `custom_prompt` 作为系统提示 | + +#### 改写响应格式 + +SSE 流式响应,`Content-Type: text/event-stream`。 + +#### 改写 SSE 事件类型 + +| 事件 | 说明 | data 字段 | +|------|------|-----------| +| `rewrite_delta` | 改写文本增量 | `{ delta }` | +| `rewrite_end` | 改写完成 | `{ full_text }` | +| `error` | 错误 | `{ code, message }` | + +#### 改写示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/chat/rewrite" \ + -H "Content-Type: application/json" \ + -d '{ + "excerpt": "The attention mechanism allows the model to focus on different parts of the input.", + "style": "translate_zh" + }' +``` + +#### 改写错误码 + +| code | 说明 | +|------|------| +| `timeout` | 改写超时(30 秒) | +| `rewrite_error` | 改写处理异常 | diff --git a/docs/api/conversations.md b/docs/api/conversations.md new file mode 100644 index 0000000..3d5e2dd --- /dev/null +++ b/docs/api/conversations.md @@ -0,0 +1,232 @@ +# Conversations API + +Conversations 模块提供对话的 CRUD 接口,支持分页列表、按知识库筛选及消息详情查询。 + +**Base path:** `/api/v1/conversations` + +--- + +## 端点总览 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | `/conversations` | 分页列表 | +| POST | `/conversations` | 创建对话 | +| GET | `/conversations/{id}` | 获取详情(含消息) | +| PUT | `/conversations/{id}` | 更新对话 | +| DELETE | `/conversations/{id}` | 删除对话 | + +--- + +## GET /conversations — 列表对话 + +分页获取对话列表,按更新时间倒序,支持按知识库 ID 筛选。 + +### 查询参数 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `page` | int | 否 | 页码,默认 1 | +| `page_size` | int | 否 | 每页条数,默认 20 | +| `knowledge_base_id` | int | 否 | 仅返回包含该知识库的对话 | + +### 列表响应格式 + +`ApiResponse[PaginatedData[ConversationListSchema]]` + +**ConversationListSchema 字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | +| `title` | str | 标题 | +| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 | +| `model` | str | 模型标识 | +| `tool_mode` | str | 工具模式,默认 `"qa"` | +| `created_at` | datetime | 创建时间 | +| `updated_at` | datetime | 更新时间 | +| `message_count` | int | 消息数量 | +| `last_message_preview` | str | 最后一条消息预览(最多 100 字符) | + +**PaginatedData 结构:** + +```json +{ + "code": 200, + "message": "success", + "data": { + "items": [...], + "total": 42, + "page": 1, + "page_size": 20, + "total_pages": 3 + } +} +``` + +### 列表示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/conversations?page=1&page_size=20" +curl -X GET "http://localhost:8000/api/v1/conversations?knowledge_base_id=1" +``` + +--- + +## POST /conversations — 创建对话 + +创建新对话。 + +### 创建请求体 + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `title` | str | 否 | 标题,默认 `"新对话"` | +| `knowledge_base_ids` | list[int] | 否 | 知识库 ID 列表 | +| `model` | str | 否 | 模型标识 | +| `tool_mode` | str | 否 | 工具模式,默认 `"qa"` | + +### 创建响应格式 + +`ApiResponse[ConversationSchema]`,包含完整对话及空 `messages` 数组。 + +### 创建示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/conversations" \ + -H "Content-Type: application/json" \ + -d '{ + "title": "文献综述讨论", + "knowledge_base_ids": [1, 2], + "tool_mode": "review_outline" + }' +``` + +--- + +## GET /conversations/{id} — 获取对话详情 + +获取单个对话及其全部消息。 + +### 详情路径参数 + +| 参数 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | + +### 详情响应格式 + +`ApiResponse[ConversationSchema]` + +**ConversationSchema 字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | +| `title` | str | 标题 | +| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 | +| `model` | str | 模型标识 | +| `tool_mode` | str | 工具模式 | +| `created_at` | datetime | 创建时间 | +| `updated_at` | datetime | 更新时间 | +| `messages` | list[MessageSchema] | 消息列表 | + +**MessageSchema 字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | int | 消息 ID | +| `conversation_id` | int | 对话 ID | +| `role` | str | 角色:`user` / `assistant` | +| `content` | str | 内容 | +| `citations` | list[dict] \| null | 引用列表(assistant 消息) | +| `created_at` | datetime | 创建时间 | + +### 详情示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/conversations/1" +``` + +### 详情错误码 + +| HTTP 状态 | 说明 | +|-----------|------| +| 404 | 对话不存在 | + +--- + +## PUT /conversations/{id} — 更新对话 + +更新对话标题或设置。 + +### 更新路径参数 + +| 参数 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | + +### 更新请求体 + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `title` | str | 否 | 新标题 | +| `model` | str | 否 | 新模型 | +| `tool_mode` | str | 否 | 新工具模式 | + +仅传入需要更新的字段。 + +### 更新响应格式 + +`ApiResponse[ConversationSchema]`,包含更新后的完整对话及消息。 + +### 更新示例 + +```bash +curl -X PUT "http://localhost:8000/api/v1/conversations/1" \ + -H "Content-Type: application/json" \ + -d '{"title": "新标题"}' +``` + +### 更新错误码 + +| HTTP 状态 | 说明 | +|-----------|------| +| 404 | 对话不存在 | + +--- + +## DELETE /conversations/{id} — 删除对话 + +删除对话及其全部消息(级联删除)。 + +### 删除路径参数 + +| 参数 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | + +### 删除响应格式 + +```json +{ + "code": 200, + "message": "success", + "data": { + "deleted": true, + "id": 1 + } +} +``` + +### 删除示例 + +```bash +curl -X DELETE "http://localhost:8000/api/v1/conversations/1" +``` + +### 删除错误码 + +| HTTP 状态 | 说明 | +|-----------|------| +| 404 | 对话不存在 | diff --git a/docs/api/crawler.md b/docs/api/crawler.md new file mode 100644 index 0000000..d7d37dd --- /dev/null +++ b/docs/api/crawler.md @@ -0,0 +1,98 @@ +# Crawler API + +爬虫模块 API,用于为待下载文献执行 PDF 下载(Unpaywall 等多源回退)。 + +**Base path:** `/api/v1/projects/{project_id}/crawl` + +--- + +## Endpoints + +| Method | Path | Description | +|--------|------|--------------| +| POST | `/start` | 启动 PDF 下载任务 | +| GET | `/stats` | 获取下载统计 | + +--- + +## POST /start + +对项目内待下载文献启动 PDF 下载。仅处理 `pending` 或 `metadata_only` 状态文献。 + +**Query Parameters** + +| Name | Type | Default | Description | +|------|------|---------|-------------| +| `priority` | string | `"high"` | 优先级:`high` 按引用数排序,`low` 按创建时间排序 | +| `max_papers` | int | 50 | 单次处理最大文献数 | + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": { + "total": 10, + "success": 8, + "failed": 2, + "details": [ + { + "paper_id": 1, + "success": true, + "file_path": "/data0/djx/omelette/.../1.pdf" + } + ] + } +} +``` + +**Example** + +```bash +curl -X POST "http://localhost:8000/api/v1/projects/1/crawl/start?priority=high&max_papers=50" +``` + +--- + +## GET /stats + +返回项目内下载相关统计。 + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": { + "pending": 20, + "metadata_only": 5, + "pdf_downloaded": 80, + "ocr_complete": 60, + "indexed": 50, + "error": 3, + "storage": { + "total_mb": 1024, + "used_mb": 512 + } + } +} +``` + +- 各状态字段:文献数量 +- `storage`:存储统计(可选,由 CrawlerService 提供) + +**Example** + +```bash +curl "http://localhost:8000/api/v1/projects/1/crawl/stats" +``` + +--- + +## Error Codes + +| Code | Description | +|------|-------------| +| 404 | 项目不存在 | diff --git a/docs/api/dedup.md b/docs/api/dedup.md new file mode 100644 index 0000000..a5ca83b --- /dev/null +++ b/docs/api/dedup.md @@ -0,0 +1,229 @@ +# Dedup API + +Deduplication module API: DOI exact dedup, title similarity dedup, and LLM-assisted verification. + +**Base path:** `/api/v1/projects/{project_id}/dedup` + +--- + +## Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/run` | Run deduplication pipeline | +| GET | `/candidates` | List candidate duplicate pairs for manual review | +| POST | `/verify` | LLM-verify if two papers are duplicates | +| POST | `/resolve` | Resolve single upload conflict (keep_old / keep_new / merge / skip) | +| POST | `/auto-resolve` | AI auto-suggest conflict resolution | + +--- + +## POST /run + +Run the deduplication pipeline. + +**Query Parameters** + +| Name | Type | Default | Description | +|------|------|---------|-------------| +| `strategy` | string | `"full"` | Strategy: `doi_only` \| `title_only` \| `full` | + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": { + "stage1_doi_removed": 0, + "stage2_title_removed": 0, + "stage3_candidates": 5, + "total_remaining": 120, + "details": { + "doi_duplicates": [], + "title_duplicates": [], + "llm_candidates": [] + } + } +} +``` + +- `strategy=doi_only`: DOI exact dedup only +- `strategy=title_only`: Title similarity dedup only +- `strategy=full`: Full 3-stage (DOI → title → LLM candidates) + +**Example** + +```bash +curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/run?strategy=full" +``` + +--- + +## GET /candidates + +List candidate duplicate pairs for manual review (high title similarity, need LLM or human confirmation). + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "paper_a_id": 10, + "paper_b_id": 11, + "similarity": 0.92, + "paper_a": { "id": 10, "title": "...", "doi": "..." }, + "paper_b": { "id": 11, "title": "...", "doi": "..." } + } + ] +} +``` + +**Example** + +```bash +curl "http://localhost:8000/api/v1/projects/1/dedup/candidates" +``` + +--- + +## POST /verify + +Use LLM to determine if two papers are duplicates. + +**Query Parameters** + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `paper_a_id` | int | Yes | Paper A ID | +| `paper_b_id` | int | Yes | Paper B ID | + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": { + "is_duplicate": true, + "reason": "Same paper, different sources" + } +} +``` + +**Example** + +```bash +curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/verify?paper_a_id=10&paper_b_id=11" +``` + +--- + +## POST /resolve + +Resolve a single upload conflict. `conflict_id` format: `{old_paper_id}:{saved_filename}`, provided by the upload endpoint's `conflicts` array. + +**Request Body** + +```json +{ + "conflict_id": "123:uploaded.pdf", + "action": "keep_old", + "merged_paper": null +} +``` + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `conflict_id` | string | Yes | Conflict ID, format `old_paper_id:saved_filename` | +| `action` | string | Yes | `keep_old` \| `keep_new` \| `merge` \| `skip` | +| `merged_paper` | object | No | Required when `action=merge`, merged metadata | + +**Actions** + +- `keep_old`: Keep existing paper, discard upload +- `keep_new`: Use new upload, create new paper +- `merge`: Merge metadata, create new paper (provide `merged_paper`) +- `skip`: Use new upload, create new paper (same as keep_new) + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": { + "action": "keep_new", + "paper_id": 124, + "message": "Created new paper" + } +} +``` + +**Example** + +```bash +curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/resolve" \ + -H "Content-Type: application/json" \ + -d '{"conflict_id":"123:paper.pdf","action":"keep_new"}' +``` + +--- + +## POST /auto-resolve + +Use LLM to batch-suggest conflict resolution. + +**Request Body** + +```json +{ + "conflict_ids": ["123:file1.pdf", "124:file2.pdf"] +} +``` + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `conflict_ids` | list[string] | No | Conflict ID list; empty returns empty list | + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "conflict_id": "123:file1.pdf", + "action": "keep_new", + "reason": "New version has more complete metadata" + }, + { + "conflict_id": "124:file2.pdf", + "error": "Paper not found" + } + ] +} +``` + +Each element is either `{conflict_id, action, reason}` or `{conflict_id, error}`. + +**Example** + +```bash +curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/auto-resolve" \ + -H "Content-Type: application/json" \ + -d '{"conflict_ids":["123:paper.pdf"]}' +``` + +--- + +## Error Codes + +| Code | Description | +|------|-------------| +| 400 | Invalid `conflict_id` format, `action`, or request body | +| 404 | Paper not found or PDF file not found | diff --git a/docs/api/index.md b/docs/api/index.md index 05345bb..86818bc 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -54,6 +54,14 @@ GET /api/v1/tasks/{task_id} | [Papers](/api/papers) | `/projects/{id}/papers` | | [Keywords](/api/keywords) | `/projects/{id}/keywords` | | [Search](/api/search) | `/projects/{id}/search` | +| [Dedup](/api/dedup) | `/projects/{id}/dedup` | +| [OCR](/api/ocr) | `/projects/{id}/ocr` | +| [Crawler](/api/crawler) | `/projects/{id}/crawl` | +| [Subscription](/api/subscription) | `/projects/{id}/subscriptions` | | [RAG](/api/rag) | `/projects/{id}/rag` | | [Writing](/api/writing) | `/projects/{id}/writing` | -| Tasks | `/tasks` | +| [Chat](/api/chat) | `/chat` | +| [Conversations](/api/conversations) | `/conversations` | +| [Settings](/api/settings) | `/settings` | +| [Tasks](/api/tasks) | `/tasks` | +| [Pipelines](/api/pipelines) | `/pipelines` | diff --git a/docs/api/keywords.md b/docs/api/keywords.md index 6a01c0c..f8ae072 100644 --- a/docs/api/keywords.md +++ b/docs/api/keywords.md @@ -12,7 +12,7 @@ Base path: `/api/v1/projects/{project_id}/keywords` | PUT | `/projects/{id}/keywords/{kw_id}` | Update keyword | | DELETE | `/projects/{id}/keywords/{kw_id}` | Delete keyword | | POST | `/projects/{id}/keywords/expand` | LLM expand | -| GET | `/projects/{id}/keywords/search-formula` | Generate formula | +| GET | `/projects/{id}/keywords/search-formula` | Generate search formula | ## Query Parameters (List) @@ -31,6 +31,14 @@ Base path: `/api/v1/projects/{project_id}/keywords` } ``` +## Bulk Create + +`POST /projects/{id}/keywords/bulk` — Create multiple keywords at once. + +**Request body:** Array of `KeywordCreate` objects. + +**Response:** `{ created }` — Number of keywords created. + ## Expand Request ```json @@ -41,6 +49,30 @@ Base path: `/api/v1/projects/{project_id}/keywords` } ``` +## Expand Response + +Returns `expanded_terms` as a list of objects: + +```json +{ + "expanded_terms": [ + {"term": "self-attention", "term_zh": "自注意力", "relation": "synonym"}, + {"term": "BERT", "term_zh": "", "relation": "abbreviation"} + ], + "source": "llm:openai" +} +``` + +- `term` — Expanded term (English) +- `term_zh` — Chinese translation (optional) +- `relation` — `synonym`, `abbreviation`, or `related` + ## Search Formula -Query param: `database` — `wos`, `scopus`, or `pubmed` +`GET /projects/{id}/keywords/search-formula?database=wos` — Generate a boolean search formula from project keywords for a specific database. + +**Query parameters:** + +- `database` — Target database: `wos`, `scopus`, or `pubmed` (default: `wos`) + +**Response:** `{ formula, database, keyword_count }` diff --git a/docs/api/ocr.md b/docs/api/ocr.md new file mode 100644 index 0000000..e4b0938 --- /dev/null +++ b/docs/api/ocr.md @@ -0,0 +1,34 @@ +# OCR API + +Base path: `/api/v1/projects/{project_id}/ocr` + +## Overview + +OCR and text extraction for PDF papers. Uses pdfplumber for native PDFs and PaddleOCR for scanned documents. + +## Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/projects/{id}/ocr/process` | Run OCR on papers | +| GET | `/projects/{id}/ocr/stats` | OCR statistics | + +## Process + +`POST /projects/{id}/ocr/process` — Extract text from PDFs via OCR. + +**Query parameters:** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `paper_ids` | list[int] | Optional. Specific paper IDs. If omitted, all `pdf_downloaded` papers are processed. | +| `force_ocr` | bool | Re-run OCR even if already processed (default: false) | +| `use_gpu` | bool | Use GPU for PaddleOCR (default: true) | + +**Response:** `{ processed, failed, total, message? }` + +## Stats + +`GET /projects/{id}/ocr/stats` — Return paper counts by status and total chunk count. + +**Response:** `{ metadata_only: n, pdf_downloaded: n, ocr_complete: n, indexed: n, error: n, total_chunks: n }` diff --git a/docs/api/papers.md b/docs/api/papers.md index 0c52078..ab9288e 100644 --- a/docs/api/papers.md +++ b/docs/api/papers.md @@ -9,6 +9,8 @@ Base path: `/api/v1/projects/{project_id}/papers` | GET | `/projects/{id}/papers` | List papers (paginated) | | POST | `/projects/{id}/papers` | Create paper | | POST | `/projects/{id}/papers/bulk` | Bulk import | +| POST | `/projects/{id}/papers/upload` | Multipart file upload (PDFs) | +| POST | `/projects/{id}/papers/process` | Trigger processing for papers | | GET | `/projects/{id}/papers/{paper_id}` | Get paper | | PUT | `/projects/{id}/papers/{paper_id}` | Update paper | | DELETE | `/projects/{id}/papers/{paper_id}` | Delete paper | @@ -39,3 +41,31 @@ Base path: `/api/v1/projects/{project_id}/papers` "status": "metadata_only" } ``` + +## Upload (Multipart) + +`POST /projects/{id}/papers/upload` — Upload PDF files. Accepts `multipart/form-data` with `files` (one or more PDFs). Extracts metadata, runs dedup check, and queues processing for new papers. + +**Response:** `{ papers, conflicts, total_uploaded }` + +- `papers` — List of newly created paper metadata +- `conflicts` — Dedup conflicts (DOI or title similarity) +- `total_uploaded` — Count of files successfully uploaded + +## Process + +`POST /projects/{id}/papers/process` — Trigger OCR + RAG indexing for papers. + +**Query parameters:** + +- `paper_ids` — Optional list of paper IDs. If omitted, all unprocessed papers in the project are queued. + +**Response:** `{ queued, message }` + +## Bulk Import Response + +`POST /projects/{id}/papers/bulk` returns `{ created, skipped, total }`: + +- `created` — Number of papers imported +- `skipped` — Number skipped (duplicate DOI) +- `total` — Total papers in request diff --git a/docs/api/pipelines.md b/docs/api/pipelines.md new file mode 100644 index 0000000..6a71438 --- /dev/null +++ b/docs/api/pipelines.md @@ -0,0 +1,71 @@ +# Pipelines API + +Base path: `/api/v1/pipelines` + +## Overview + +LangGraph pipeline orchestration for search and upload workflows. Pipelines run asynchronously and support HITL (human-in-the-loop) interrupt for conflict resolution. + +## Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/pipelines/search` | Start keyword-search pipeline | +| POST | `/pipelines/upload` | Start PDF-upload pipeline | +| GET | `/pipelines/{thread_id}/status` | Get pipeline status | +| POST | `/pipelines/{thread_id}/resume` | Resume interrupted pipeline | +| POST | `/pipelines/{thread_id}/cancel` | Cancel running pipeline | + +## Search Pipeline + +`POST /pipelines/search` — Start search → dedup → crawl → OCR → index pipeline. + +**Request body:** + +```json +{ + "project_id": 1, + "query": "transformer attention", + "sources": ["semantic_scholar", "openalex"], + "max_results": 50 +} +``` + +**Response:** `{ thread_id, status, project_id }` + +## Upload Pipeline + +`POST /pipelines/upload` — Start extract → dedup → OCR → index pipeline for local PDF paths. + +**Request body:** + +```json +{ + "project_id": 1, + "pdf_paths": ["/path/to/paper1.pdf", "/path/to/paper2.pdf"] +} +``` + +Paths must be within the configured `PDF_DIR` (see settings). + +**Response:** `{ thread_id, status, project_id }` + +## Status + +`GET /pipelines/{thread_id}/status` — Returns `status` (`running`, `interrupted`, `completed`, `failed`, `cancelled`). When `interrupted`, includes `conflicts` for HITL resolution. + +## Resume + +`POST /pipelines/{thread_id}/resume` — Resume interrupted pipeline with resolved conflicts. + +**Request body:** + +```json +{ + "resolved_conflicts": [] +} +``` + +## Cancel + +`POST /pipelines/{thread_id}/cancel` — Cancel a running pipeline. diff --git a/docs/api/projects.md b/docs/api/projects.md index e049ab5..7d7b9a1 100644 --- a/docs/api/projects.md +++ b/docs/api/projects.md @@ -11,6 +11,8 @@ Base path: `/api/v1/projects` | GET | `/projects/{id}` | Get project | | PUT | `/projects/{id}` | Update project | | DELETE | `/projects/{id}` | Delete project | +| POST | `/projects/{id}/pipeline/run` | Run full pipeline (crawl → OCR → index) for all pending papers | +| POST | `/projects/{id}/pipeline/paper/{paper_id}` | Run pipeline for a single paper | ## Query Parameters (List) diff --git a/docs/api/rag.md b/docs/api/rag.md index 1187587..5c36b69 100644 --- a/docs/api/rag.md +++ b/docs/api/rag.md @@ -8,6 +8,7 @@ Base path: `/api/v1/projects/{project_id}/rag` |--------|----------|-------------| | POST | `/projects/{id}/rag/query` | Query knowledge base | | POST | `/projects/{id}/rag/index` | Build/rebuild index | +| POST | `/projects/{id}/rag/index/stream` | Build index (SSE streaming progress) | | GET | `/projects/{id}/rag/stats` | Index statistics | | DELETE | `/projects/{id}/rag/index` | Delete index | @@ -15,12 +16,18 @@ Base path: `/api/v1/projects/{project_id}/rag` ```json { - "query": "What is attention mechanism?", + "question": "What is attention mechanism?", "top_k": 10, - "use_reranker": true + "use_reranker": true, + "include_sources": true } ``` +- `question` — The question to answer (required) +- `top_k` — Number of chunks to retrieve (default: 10) +- `use_reranker` — Apply reranker for relevance (default: true) +- `include_sources` — Include source chunks in response (default: true) + ## Query Response ```json @@ -28,6 +35,25 @@ Base path: `/api/v1/projects/{project_id}/rag` "answer": "LLM-generated answer with citations", "sources": [ {"paper_id": 1, "chunk_id": "...", "score": 0.9} - ] + ], + "confidence": 0.0 } ``` + +## Index Stream (SSE) + +`POST /projects/{id}/rag/index/stream` — Rebuild the vector index with Server-Sent Events for progress updates. + +**Response:** `text/event-stream` + +**Event types:** + +| Event | Description | data | +|-------|-------------|------| +| `progress` | Indexing progress | `{ stage, percent, message? }` | +| `complete` | Indexing finished | `{ indexed, collection, papers_updated }` | +| `error` | Error occurred | `{ message }` | + +## Delete Index + +`DELETE /projects/{id}/rag/index` — Delete the vector index for the project. Returns `ApiResponse[dict]` with deletion result. diff --git a/docs/api/settings.md b/docs/api/settings.md new file mode 100644 index 0000000..fee9a33 --- /dev/null +++ b/docs/api/settings.md @@ -0,0 +1,261 @@ +# Settings API + +Base path: `/api/v1/settings` + +## Overview + +The Settings API manages application configuration: LLM provider selection, model parameters, API keys for various providers (OpenAI, Anthropic, Aliyun, Volcengine, Ollama), embedding/reranker models, and other system settings. Values are merged from environment variables with DB overrides; API keys are masked in responses. + +## Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/settings` | Get all settings | +| PUT | `/settings` | Update settings (partial) | +| GET | `/settings/models` | List available models per provider | +| POST | `/settings/test-connection` | Test LLM provider connection | +| GET | `/settings/health` | Health check | + +--- + +## GET /api/v1/settings + +**Description:** Return merged settings (DB overrides .env). API keys are masked (e.g. `sk-12***abcd`). + +**Response:** `ApiResponse[SettingsSchema]` + +### SettingsSchema + +| Field | Type | Description | +|-------|------|-------------| +| `llm_provider` | string | Default LLM provider (`openai`, `anthropic`, `aliyun`, `volcengine`, `ollama`, `mock`) | +| `llm_model` | string | Default model (overrides provider default) | +| `llm_temperature` | float | Temperature (0.0–2.0) | +| `llm_max_tokens` | int | Max tokens | +| `openai_api_key` | string | OpenAI API key (masked) | +| `openai_model` | string | OpenAI model | +| `anthropic_api_key` | string | Anthropic API key (masked) | +| `anthropic_model` | string | Anthropic model | +| `aliyun_api_key` | string | Aliyun API key (masked) | +| `aliyun_base_url` | string | Aliyun base URL | +| `aliyun_model` | string | Aliyun model | +| `volcengine_api_key` | string | Volcengine API key (masked) | +| `volcengine_base_url` | string | Volcengine base URL | +| `volcengine_model` | string | Volcengine model | +| `ollama_base_url` | string | Ollama base URL | +| `ollama_model` | string | Ollama model | +| `embedding_model` | string | Embedding model name | +| `reranker_model` | string | Reranker model name | +| `data_dir` | string | Data directory path | +| `cuda_visible_devices` | string | CUDA device IDs | +| `semantic_scholar_api_key` | string | Semantic Scholar API key (masked) | +| `unpaywall_email` | string | Unpaywall email | + +### Get Settings Example + +```bash +curl -X GET "http://localhost:8000/api/v1/settings" +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "llm_provider": "openai", + "llm_model": "gpt-4o-mini", + "llm_temperature": 0.7, + "llm_max_tokens": 4096, + "openai_api_key": "sk-12***abcd", + "openai_model": "gpt-4o-mini", + "anthropic_api_key": "", + "anthropic_model": "", + "aliyun_api_key": "", + "aliyun_base_url": "", + "aliyun_model": "", + "volcengine_api_key": "", + "volcengine_base_url": "", + "volcengine_model": "", + "ollama_base_url": "http://localhost:11434", + "ollama_model": "", + "embedding_model": "BAAI/bge-m3", + "reranker_model": "", + "data_dir": "/data0/djx/omelette", + "cuda_visible_devices": "", + "semantic_scholar_api_key": "", + "unpaywall_email": "" + } +} +``` + +--- + +## PUT /api/v1/settings + +**Description:** Update user-configurable settings. Only non-null fields are applied. Masked API keys (containing `***`) are skipped to avoid overwriting secrets. + +**Request:** `SettingsUpdateSchema` (partial, all fields optional) + +| Field | Type | Constraints | +|-------|------|-------------| +| `llm_provider` | string | — | +| `llm_model` | string | — | +| `llm_temperature` | float | 0.0–2.0 | +| `llm_max_tokens` | int | 1–128000 | +| `openai_api_key` | string | — | +| `openai_model` | string | — | +| `anthropic_api_key` | string | — | +| `anthropic_model` | string | — | +| `aliyun_api_key` | string | — | +| `aliyun_base_url` | string | — | +| `aliyun_model` | string | — | +| `volcengine_api_key` | string | — | +| `volcengine_base_url` | string | — | +| `volcengine_model` | string | — | +| `ollama_base_url` | string | — | +| `ollama_model` | string | — | + +**Response:** `ApiResponse[SettingsSchema]` (updated merged settings) + +### Update Settings Example + +```bash +curl -X PUT "http://localhost:8000/api/v1/settings" \ + -H "Content-Type: application/json" \ + -d '{"llm_provider": "openai", "llm_model": "gpt-4o-mini"}' +``` + +--- + +## GET /api/v1/settings/models + +**Description:** Return available LLM providers and their model lists. + +**Response:** `ApiResponse[list[ProviderModelInfo]]` + +### ProviderModelInfo + +| Field | Type | Description | +|-------|------|-------------| +| `provider` | string | Provider ID | +| `display_name` | string | Display name | +| `models` | string[] | List of model IDs | +| `requires_api_key` | bool | Whether API key is required | +| `requires_base_url` | bool | Whether base URL is configurable | +| `default_base_url` | string | Default base URL if applicable | + +### List Models Example + +```bash +curl -X GET "http://localhost:8000/api/v1/settings/models" +``` + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "provider": "openai", + "display_name": "OpenAI", + "models": ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3-mini"], + "requires_api_key": true, + "requires_base_url": false, + "default_base_url": "" + }, + { + "provider": "ollama", + "display_name": "Ollama (本地)", + "models": ["llama3", "llama3.1", "mistral", "qwen2", "deepseek-r1"], + "requires_api_key": false, + "requires_base_url": true, + "default_base_url": "http://localhost:11434" + } + ] +} +``` + +--- + +## POST /api/v1/settings/test-connection + +**Description:** Test the current LLM configuration by sending a simple prompt. Uses merged settings from DB (no request body). + +**Response:** `ApiResponse[dict]` + +| Field | Type | Description | +|-------|------|-------------| +| `success` | bool | Whether the test succeeded | +| `response` | string | First 200 chars of LLM response (on success) | +| `error` | string | Error message (on failure) | + +### Test Connection Example + +```bash +curl -X POST "http://localhost:8000/api/v1/settings/test-connection" +``` + +**Success:** +```json +{ + "code": 200, + "message": "success", + "data": { + "success": true, + "response": "OK." + } +} +``` + +**Failure:** +```json +{ + "code": 500, + "message": "Connection test failed", + "data": { + "success": false, + "error": "Invalid API key" + } +} +``` + +--- + +## GET /api/v1/settings/health + +**Description:** Simple health check endpoint. + +**Response:** `ApiResponse[dict]` + +| Field | Type | Description | +|-------|------|-------------| +| `status` | string | `"healthy"` | +| `version` | string | Application version | + +### Health Check Example + +```bash +curl -X GET "http://localhost:8000/api/v1/settings/health" +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "status": "healthy", + "version": "0.1.0" + } +} +``` + +--- + +## Error Codes + +| Code | Description | +|------|-------------| +| 200 | Success | +| 400 | Bad request (e.g. invalid temperature range) | +| 422 | Validation error (invalid request body) | +| 500 | Server error (e.g. connection test failure) | diff --git a/docs/api/subscription.md b/docs/api/subscription.md new file mode 100644 index 0000000..57e6e35 --- /dev/null +++ b/docs/api/subscription.md @@ -0,0 +1,256 @@ +# Subscription API + +Subscription module API for managing incremental literature updates (RSS / API search). + +**Base path:** `/api/v1/projects/{project_id}/subscriptions` + +--- + +## Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/feeds` | Get common academic RSS feed templates | +| GET | `/` | List project subscriptions | +| POST | `/` | Create subscription | +| GET | `/{sub_id}` | Get single subscription | +| PUT | `/{sub_id}` | Update subscription | +| DELETE | `/{sub_id}` | Delete subscription | +| POST | `/{sub_id}/trigger` | Manually trigger subscription update | +| POST | `/check-rss` | Check RSS feed | +| POST | `/check-updates` | Check API for updates | + +--- + +## GET /feeds + +Return common academic RSS feed templates (no project_id required in logic). + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "name": "arXiv CS", + "url": "https://...", + "description": "..." + } + ] +} +``` + +--- + +## GET /subscriptions + +List all subscriptions for the project. + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "id": 1, + "project_id": 1, + "name": "arXiv CS.AI", + "query": "machine learning", + "sources": ["arxiv"], + "frequency": "weekly", + "max_results": 50, + "is_active": true, + "last_run_at": "2025-03-10T12:00:00", + "total_found": 120, + "created_at": "2025-01-01T00:00:00", + "updated_at": "2025-03-10T12:00:00" + } + ] +} +``` + +--- + +## POST /subscriptions + +Create a new subscription. + +**Request Body** + +```json +{ + "name": "arXiv CS.AI", + "query": "machine learning", + "sources": ["arxiv", "semantic_scholar"], + "frequency": "weekly", + "max_results": 50 +} +``` + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | string | Yes | Subscription name | +| `query` | string | No | Search query, default `""` | +| `sources` | list[string] | No | Data sources, default `[]` | +| `frequency` | string | No | `daily` \| `weekly` \| `monthly`, default `weekly` | +| `max_results` | int | No | Max results per run 1–200, default 50 | + +**Response** + +```json +{ + "code": 201, + "message": "Subscription created", + "data": { + "id": 1, + "project_id": 1, + "name": "arXiv CS.AI", + "query": "machine learning", + "sources": ["arxiv"], + "frequency": "weekly", + "max_results": 50, + "is_active": true, + "last_run_at": null, + "total_found": 0, + "created_at": "2025-03-12T00:00:00", + "updated_at": "2025-03-12T00:00:00" + } +} +``` + +--- + +## PUT /subscriptions/{sub_id} + +Update a subscription. + +**Request Body** + +```json +{ + "name": "arXiv CS.AI (updated)", + "query": "deep learning", + "is_active": false +} +``` + +All fields optional; only include fields to update. + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": { ... } +} +``` + +--- + +## DELETE /subscriptions/{sub_id} + +Delete a subscription. + +**Response** + +```json +{ + "code": 200, + "message": "Subscription deleted", + "data": null +} +``` + +--- + +## POST /subscriptions/{sub_id}/trigger + +Manually trigger subscription update (check API for new papers). + +**Query Parameters** + +| Name | Type | Default | Description | +|------|------|---------|-------------| +| `since_days` | int | 7 | Query last N days, 1–365 | + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": { + "new_papers": 5, + "total_checked": 120, + "sources_searched": ["arxiv", "semantic_scholar"] + } +} +``` + +--- + +## POST /check-rss + +Check an RSS feed (does not require a saved subscription). + +**Query Parameters** + +| Name | Type | Default | Description | +|------|------|---------|-------------| +| `feed_url` | string | — | RSS/Atom feed URL | +| `since_days` | int | 7 | Query last N days, 1–365 | + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": { + "entries": [...], + "count": 10 + } +} +``` + +--- + +## POST /check-updates + +Check for new papers via API search (does not require a saved subscription). + +**Query Parameters** + +| Name | Type | Default | Description | +|------|------|---------|-------------| +| `query` | string | `""` | Search query | +| `sources` | list[string] | null | Data sources | +| `since_days` | int | 7 | Query last N days, 1–365 | +| `max_results` | int | 50 | Max results 1–200 | + +**Response** + +```json +{ + "code": 200, + "message": "success", + "data": { + "new_papers": [...], + "total_found": 50, + "sources_checked": { "arxiv": 30, "semantic_scholar": 20 } + } +} +``` + +--- + +## Error Codes + +| Code | Description | +|------|-------------| +| 404 | Subscription not found | diff --git a/docs/api/tasks.md b/docs/api/tasks.md new file mode 100644 index 0000000..f217a5d --- /dev/null +++ b/docs/api/tasks.md @@ -0,0 +1,163 @@ +# Tasks API + +Base path: `/api/v1/tasks` + +## Overview + +The Tasks API manages background processing jobs: search, dedup, crawl, OCR, index, keyword expansion. Tasks are created by pipelines and other services; this API provides listing, detail retrieval, and cancellation. + +## Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/tasks` | List tasks | +| GET | `/tasks/{id}` | Get task detail | +| POST | `/tasks/{id}/cancel` | Cancel a running task | + +--- + +## GET /api/v1/tasks + +**Description:** List tasks with optional filters. Results are ordered by `created_at` descending. + +**Query Parameters** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `project_id` | int | No | Filter by project ID | +| `status` | string | No | Filter by status: `pending`, `running`, `completed`, `failed`, `cancelled` | +| `limit` | int | No | Max results (default: 50) | + +**Response:** `ApiResponse[list[TaskSchema]]` + +### TaskSchema (list view) + +| Field | Type | Description | +|-------|------|-------------| +| `id` | int | Task ID | +| `project_id` | int | Project ID | +| `task_type` | string | `search`, `dedup`, `crawl`, `ocr`, `index`, `keyword_expand` | +| `status` | string | `pending`, `running`, `completed`, `failed`, `cancelled` | +| `progress` | int | Current progress | +| `total` | int | Total steps | +| `created_at` | string | ISO 8601 datetime | + +### List Example + +```bash +curl -X GET "http://localhost:8000/api/v1/tasks?project_id=1&status=running&limit=20" +``` + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "id": 42, + "project_id": 1, + "task_type": "search", + "status": "running", + "progress": 30, + "total": 100, + "created_at": "2025-03-12T10:00:00" + } + ] +} +``` + +--- + +## GET /api/v1/tasks/{id} + +**Description:** Get full task detail including params, result, and error message. + +**Path Parameters** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `id` | int | Task ID | + +**Response:** `ApiResponse[TaskDetailSchema]` + +### TaskDetailSchema + +| Field | Type | Description | +|-------|------|-------------| +| `id` | int | Task ID | +| `project_id` | int | Project ID | +| `task_type` | string | Task type | +| `status` | string | Task status | +| `progress` | int | Current progress | +| `total` | int | Total steps | +| `params` | object | Input parameters | +| `result` | object | Output result (when completed) | +| `error_message` | string | Error message (when failed) | +| `created_at` | string | ISO 8601 datetime | +| `started_at` | string | ISO 8601 datetime (nullable) | +| `completed_at` | string | ISO 8601 datetime (nullable) | + +### Detail Example + +```bash +curl -X GET "http://localhost:8000/api/v1/tasks/42" +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "id": 42, + "project_id": 1, + "task_type": "search", + "status": "completed", + "progress": 100, + "total": 100, + "params": {"query": "machine learning", "sources": ["semantic_scholar"]}, + "result": {"papers_found": 15, "imported": 10}, + "error_message": "", + "created_at": "2025-03-12T10:00:00", + "started_at": "2025-03-12T10:00:01", + "completed_at": "2025-03-12T10:02:30" + } +} +``` + +--- + +## POST /api/v1/tasks/{id}/cancel + +**Description:** Cancel a running or pending task. Tasks in `completed`, `failed`, or `cancelled` state cannot be cancelled. + +**Path Parameters** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `id` | int | Task ID | + +**Response:** `ApiResponse` (no data) + +### Cancel Example + +```bash +curl -X POST "http://localhost:8000/api/v1/tasks/42/cancel" +``` + +```json +{ + "code": 200, + "message": "Task cancelled", + "data": null +} +``` + +--- + +## Error Codes + +| Code | Description | +|------|-------------| +| 200 | Success | +| 400 | Cannot cancel task (already completed/failed/cancelled) | +| 404 | Task not found | diff --git a/docs/api/writing.md b/docs/api/writing.md index 8c47c57..50409bf 100644 --- a/docs/api/writing.md +++ b/docs/api/writing.md @@ -6,12 +6,37 @@ Base path: `/api/v1/projects/{project_id}/writing` | Method | Endpoint | Description | |--------|----------|-------------| -| POST | `/projects/{id}/writing/assist` | General assistance | +| POST | `/projects/{id}/writing/assist` | General writing assistance | | POST | `/projects/{id}/writing/summarize` | Summarize papers | | POST | `/projects/{id}/writing/citations` | Generate citations | | POST | `/projects/{id}/writing/review-outline` | Review outline | | POST | `/projects/{id}/writing/gap-analysis` | Gap analysis | +## Assist (General) + +`POST /projects/{id}/writing/assist` — AI-powered writing assistance for summarize, cite, outline, or gap analysis. + +**Request body:** + +```json +{ + "task": "summarize", + "text": "", + "paper_ids": [1, 2], + "topic": "Literature Review", + "style": "gb_t_7714", + "language": "en" +} +``` + +- `task` — `summarize`, `cite`, `review_outline`, or `gap_analysis` +- `paper_ids` — Paper IDs (for summarize/cite) +- `topic` — Topic for outline/gap analysis +- `style` — Citation style (for cite task) +- `language` — Output language (default: `en`) + +**Response:** `{ content, citations, suggestions }` + ## Summarize Request ```json @@ -25,8 +50,8 @@ Base path: `/api/v1/projects/{project_id}/writing` ```json { "paper_ids": [1, 2], - "style": "gb7714" + "style": "gb_t_7714" } ``` -Styles: `gb7714`, `apa`, `mla` +**Citation styles:** `gb_t_7714`, `apa`, `mla` diff --git a/docs/zh/api/chat.md b/docs/zh/api/chat.md new file mode 100644 index 0000000..fc2d867 --- /dev/null +++ b/docs/zh/api/chat.md @@ -0,0 +1,121 @@ +# Chat API + +Chat 模块提供基于 SSE 的流式对话与文本改写接口,支持知识库 RAG 检索、多工具模式及实时流式输出。 + +**Base path:** `/api/v1/chat` + +--- + +## 1. 流式对话 + +### POST /api/v1/chat/stream + +基于 SSE 的流式对话接口,支持知识库检索、引用标注及多轮对话上下文。 + +#### 请求体 (ChatStreamRequest) + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `conversation_id` | int | 否 | 对话 ID,续写时传入以保持上下文 | +| `message` | str | 是 | 用户消息内容(至少 1 字符) | +| `knowledge_base_ids` | list[int] | 否 | 知识库(项目)ID 列表,用于 RAG 检索 | +| `model` | str | 否 | 模型标识,空则使用用户设置 | +| `tool_mode` | str | 否 | 工具模式,默认 `"qa"` | + +**tool_mode 可选值:** + +| 值 | 说明 | +|----|------| +| `qa` | 问答模式:基于上下文回答问题,使用 [1]、[2] 等引用格式 | +| `citation_lookup` | 引用查找:识别并列出与文本最相关的参考文献 | +| `review_outline` | 综述提纲:生成结构化文献综述提纲 | +| `gap_analysis` | 研究缺口分析:识别研究空白与未来方向 | + +#### 对话响应格式 + +SSE 流式响应,`Content-Type: text/event-stream`。 + +#### 对话 SSE 事件类型 + +| 事件 | 说明 | data 字段 | +|------|------|-----------| +| `message_start` | 消息开始 | `{ message_id }` | +| `citation` | 引用信息(每个来源一条) | `{ index, paper_id, paper_title, page_number, excerpt, relevance_score, chunk_type, authors, year, doi }` | +| `text_delta` | 文本增量 | `{ delta }` | +| `message_end` | 消息结束 | `{ message_id, conversation_id, finish_reason }` | +| `error` | 错误 | `{ code, message }` | + +#### 对话示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/chat/stream" \ + -H "Content-Type: application/json" \ + -d '{ + "message": "什么是注意力机制?", + "knowledge_base_ids": [1, 2], + "tool_mode": "qa" + }' +``` + +#### 对话错误码 + +| code | 说明 | +|------|------| +| `stream_error` | 流式处理异常 | + +--- + +## 2. 文本改写 + +### POST /api/v1/chat/rewrite + +基于 SSE 的流式文本改写接口,支持多种风格与自定义提示。 + +#### 请求体 (RewriteRequest) + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `excerpt` | str | 是 | 待改写文本,**最多 2000 字符** | +| `style` | str | 是 | 改写风格 | +| `custom_prompt` | str | 否 | 自定义提示,`style=custom` 时必填 | +| `source_language` | str | 否 | 源语言,默认 `"auto"` | + +**style 可选值:** + +| 值 | 说明 | +|----|------| +| `simplify` | 通俗化:将学术文本改写为易懂语言 | +| `academic` | 学术化:改写为正式学术风格 | +| `translate_en` | 英译:翻译为英文 | +| `translate_zh` | 中译:翻译为中文 | +| `custom` | 自定义:使用 `custom_prompt` 作为系统提示 | + +#### 改写响应格式 + +SSE 流式响应,`Content-Type: text/event-stream`。 + +#### 改写 SSE 事件类型 + +| 事件 | 说明 | data 字段 | +|------|------|-----------| +| `rewrite_delta` | 改写文本增量 | `{ delta }` | +| `rewrite_end` | 改写完成 | `{ full_text }` | +| `error` | 错误 | `{ code, message }` | + +#### 改写示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/chat/rewrite" \ + -H "Content-Type: application/json" \ + -d '{ + "excerpt": "The attention mechanism allows the model to focus on different parts of the input.", + "style": "translate_zh" + }' +``` + +#### 改写错误码 + +| code | 说明 | +|------|------| +| `timeout` | 改写超时(30 秒) | +| `rewrite_error` | 改写处理异常 | diff --git a/docs/zh/api/conversations.md b/docs/zh/api/conversations.md new file mode 100644 index 0000000..3d5e2dd --- /dev/null +++ b/docs/zh/api/conversations.md @@ -0,0 +1,232 @@ +# Conversations API + +Conversations 模块提供对话的 CRUD 接口,支持分页列表、按知识库筛选及消息详情查询。 + +**Base path:** `/api/v1/conversations` + +--- + +## 端点总览 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | `/conversations` | 分页列表 | +| POST | `/conversations` | 创建对话 | +| GET | `/conversations/{id}` | 获取详情(含消息) | +| PUT | `/conversations/{id}` | 更新对话 | +| DELETE | `/conversations/{id}` | 删除对话 | + +--- + +## GET /conversations — 列表对话 + +分页获取对话列表,按更新时间倒序,支持按知识库 ID 筛选。 + +### 查询参数 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `page` | int | 否 | 页码,默认 1 | +| `page_size` | int | 否 | 每页条数,默认 20 | +| `knowledge_base_id` | int | 否 | 仅返回包含该知识库的对话 | + +### 列表响应格式 + +`ApiResponse[PaginatedData[ConversationListSchema]]` + +**ConversationListSchema 字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | +| `title` | str | 标题 | +| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 | +| `model` | str | 模型标识 | +| `tool_mode` | str | 工具模式,默认 `"qa"` | +| `created_at` | datetime | 创建时间 | +| `updated_at` | datetime | 更新时间 | +| `message_count` | int | 消息数量 | +| `last_message_preview` | str | 最后一条消息预览(最多 100 字符) | + +**PaginatedData 结构:** + +```json +{ + "code": 200, + "message": "success", + "data": { + "items": [...], + "total": 42, + "page": 1, + "page_size": 20, + "total_pages": 3 + } +} +``` + +### 列表示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/conversations?page=1&page_size=20" +curl -X GET "http://localhost:8000/api/v1/conversations?knowledge_base_id=1" +``` + +--- + +## POST /conversations — 创建对话 + +创建新对话。 + +### 创建请求体 + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `title` | str | 否 | 标题,默认 `"新对话"` | +| `knowledge_base_ids` | list[int] | 否 | 知识库 ID 列表 | +| `model` | str | 否 | 模型标识 | +| `tool_mode` | str | 否 | 工具模式,默认 `"qa"` | + +### 创建响应格式 + +`ApiResponse[ConversationSchema]`,包含完整对话及空 `messages` 数组。 + +### 创建示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/conversations" \ + -H "Content-Type: application/json" \ + -d '{ + "title": "文献综述讨论", + "knowledge_base_ids": [1, 2], + "tool_mode": "review_outline" + }' +``` + +--- + +## GET /conversations/{id} — 获取对话详情 + +获取单个对话及其全部消息。 + +### 详情路径参数 + +| 参数 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | + +### 详情响应格式 + +`ApiResponse[ConversationSchema]` + +**ConversationSchema 字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | +| `title` | str | 标题 | +| `knowledge_base_ids` | list[int] \| null | 知识库 ID 列表 | +| `model` | str | 模型标识 | +| `tool_mode` | str | 工具模式 | +| `created_at` | datetime | 创建时间 | +| `updated_at` | datetime | 更新时间 | +| `messages` | list[MessageSchema] | 消息列表 | + +**MessageSchema 字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | int | 消息 ID | +| `conversation_id` | int | 对话 ID | +| `role` | str | 角色:`user` / `assistant` | +| `content` | str | 内容 | +| `citations` | list[dict] \| null | 引用列表(assistant 消息) | +| `created_at` | datetime | 创建时间 | + +### 详情示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/conversations/1" +``` + +### 详情错误码 + +| HTTP 状态 | 说明 | +|-----------|------| +| 404 | 对话不存在 | + +--- + +## PUT /conversations/{id} — 更新对话 + +更新对话标题或设置。 + +### 更新路径参数 + +| 参数 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | + +### 更新请求体 + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `title` | str | 否 | 新标题 | +| `model` | str | 否 | 新模型 | +| `tool_mode` | str | 否 | 新工具模式 | + +仅传入需要更新的字段。 + +### 更新响应格式 + +`ApiResponse[ConversationSchema]`,包含更新后的完整对话及消息。 + +### 更新示例 + +```bash +curl -X PUT "http://localhost:8000/api/v1/conversations/1" \ + -H "Content-Type: application/json" \ + -d '{"title": "新标题"}' +``` + +### 更新错误码 + +| HTTP 状态 | 说明 | +|-----------|------| +| 404 | 对话不存在 | + +--- + +## DELETE /conversations/{id} — 删除对话 + +删除对话及其全部消息(级联删除)。 + +### 删除路径参数 + +| 参数 | 类型 | 说明 | +|------|------|------| +| `id` | int | 对话 ID | + +### 删除响应格式 + +```json +{ + "code": 200, + "message": "success", + "data": { + "deleted": true, + "id": 1 + } +} +``` + +### 删除示例 + +```bash +curl -X DELETE "http://localhost:8000/api/v1/conversations/1" +``` + +### 删除错误码 + +| HTTP 状态 | 说明 | +|-----------|------| +| 404 | 对话不存在 | diff --git a/docs/zh/api/crawler.md b/docs/zh/api/crawler.md new file mode 100644 index 0000000..2273361 --- /dev/null +++ b/docs/zh/api/crawler.md @@ -0,0 +1,20 @@ +# Crawler API + +路径:`/api/v1/projects/{project_id}/crawl` + +## 端点 + +| 方法 | 路径 | 说明 | +|------|------|------| +| POST | /projects/{id}/crawl/start | 启动 PDF 下载 | +| GET | /projects/{id}/crawl/stats | 下载统计 | + +## POST /start + +对项目内待下载文献启动 PDF 下载(Unpaywall 等多源回退)。仅处理 `pending` 或 `metadata_only` 状态文献。 + +**查询参数:** `priority`(high/low)、`max_papers`(默认 50) + +## GET /stats + +返回项目内各状态文献数量及存储统计。 diff --git a/docs/zh/api/dedup.md b/docs/zh/api/dedup.md new file mode 100644 index 0000000..b3a2d27 --- /dev/null +++ b/docs/zh/api/dedup.md @@ -0,0 +1,229 @@ +# 去重 API + +去重模块 API,支持 DOI 精确去重、标题相似度去重及 LLM 辅助验证。 + +**基础路径:** `/api/v1/projects/{project_id}/dedup` + +--- + +## 端点概览 + +| 方法 | 路径 | 说明 | +|------|------|------| +| POST | `/run` | 执行去重流程 | +| GET | `/candidates` | 列出待人工审核的候选重复对 | +| POST | `/verify` | 使用 LLM 验证两个文献是否为重复 | +| POST | `/resolve` | 解决单条上传冲突(keep_old / keep_new / merge / skip) | +| POST | `/auto-resolve` | AI 自动建议冲突解决方式 | + +--- + +## POST /run + +执行去重流水线。 + +**查询参数** + +| 参数名 | 类型 | 默认值 | 说明 | +|--------|------|--------|------| +| `strategy` | string | `"full"` | 策略:`doi_only` \| `title_only` \| `full` | + +**响应** + +```json +{ + "code": 200, + "message": "success", + "data": { + "stage1_doi_removed": 0, + "stage2_title_removed": 0, + "stage3_candidates": 5, + "total_remaining": 120, + "details": { + "doi_duplicates": [], + "title_duplicates": [], + "llm_candidates": [] + } + } +} +``` + +- `strategy=doi_only`:仅 DOI 精确去重 +- `strategy=title_only`:仅标题相似度去重 +- `strategy=full`:完整三阶段(DOI → 标题 → LLM 候选) + +**示例** + +```bash +curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/run?strategy=full" +``` + +--- + +## GET /candidates + +列出待人工审核的候选重复对(标题相似度较高,需 LLM 或人工确认)。 + +**响应** + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "paper_a_id": 10, + "paper_b_id": 11, + "similarity": 0.92, + "paper_a": { "id": 10, "title": "...", "doi": "..." }, + "paper_b": { "id": 11, "title": "...", "doi": "..." } + } + ] +} +``` + +**示例** + +```bash +curl "http://localhost:8000/api/v1/projects/1/dedup/candidates" +``` + +--- + +## POST /verify + +使用 LLM 判断两个文献是否为重复。 + +**查询参数** + +| 参数名 | 类型 | 必填 | 说明 | +|--------|------|------|------| +| `paper_a_id` | int | 是 | 文献 A ID | +| `paper_b_id` | int | 是 | 文献 B ID | + +**响应** + +```json +{ + "code": 200, + "message": "success", + "data": { + "is_duplicate": true, + "reason": "Same paper, different sources" + } +} +``` + +**示例** + +```bash +curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/verify?paper_a_id=10&paper_b_id=11" +``` + +--- + +## POST /resolve + +解决单条上传冲突。`conflict_id` 格式:`{old_paper_id}:{saved_filename}`,由上传接口返回的 `conflicts` 提供。 + +**请求体** + +```json +{ + "conflict_id": "123:uploaded.pdf", + "action": "keep_old", + "merged_paper": null +} +``` + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `conflict_id` | string | 是 | 冲突 ID,格式 `old_paper_id:saved_filename` | +| `action` | string | 是 | `keep_old` \| `keep_new` \| `merge` \| `skip` | +| `merged_paper` | object | 否 | 仅当 `action=merge` 时提供,合并后的元数据 | + +**操作说明** + +- `keep_old`:保留现有文献,丢弃上传 +- `keep_new`:以新上传为准,创建新文献 +- `merge`:合并元数据,创建新文献(需提供 `merged_paper`) +- `skip`:以新上传为准,创建新文献(与 keep_new 行为相同) + +**响应** + +```json +{ + "code": 200, + "message": "success", + "data": { + "action": "keep_new", + "paper_id": 124, + "message": "Created new paper" + } +} +``` + +**示例** + +```bash +curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/resolve" \ + -H "Content-Type: application/json" \ + -d '{"conflict_id":"123:paper.pdf","action":"keep_new"}' +``` + +--- + +## POST /auto-resolve + +使用 LLM 批量建议冲突解决方式。 + +**请求体** + +```json +{ + "conflict_ids": ["123:file1.pdf", "124:file2.pdf"] +} +``` + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `conflict_ids` | list[string] | 否 | 冲突 ID 列表;为空则返回空列表 | + +**响应** + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "conflict_id": "123:file1.pdf", + "action": "keep_new", + "reason": "New version has more complete metadata" + }, + { + "conflict_id": "124:file2.pdf", + "error": "Paper not found" + } + ] +} +``` + +每个元素为 `{conflict_id, action, reason}` 或 `{conflict_id, error}`。 + +**示例** + +```bash +curl -X POST "http://localhost:8000/api/v1/projects/1/dedup/auto-resolve" \ + -H "Content-Type: application/json" \ + -d '{"conflict_ids":["123:paper.pdf"]}' +``` + +--- + +## 错误码 + +| 状态码 | 说明 | +|--------|------| +| 400 | 无效的 `conflict_id` 格式、`action` 或请求体 | +| 404 | 文献不存在或 PDF 文件不存在 | diff --git a/docs/zh/api/index.md b/docs/zh/api/index.md index c79a885..f9cc1bd 100644 --- a/docs/zh/api/index.md +++ b/docs/zh/api/index.md @@ -50,6 +50,14 @@ GET /api/v1/tasks/{task_id} | [Papers](/zh/api/papers) | /projects/{id}/papers | | [Keywords](/zh/api/keywords) | /projects/{id}/keywords | | [Search](/zh/api/search) | /projects/{id}/search | +| [Dedup](/zh/api/dedup) | /projects/{id}/dedup | +| [OCR](/zh/api/ocr) | /projects/{id}/ocr | +| [Crawler](/zh/api/crawler) | /projects/{id}/crawl | +| [Subscription](/zh/api/subscription) | /projects/{id}/subscriptions | | [RAG](/zh/api/rag) | /projects/{id}/rag | | [Writing](/zh/api/writing) | /projects/{id}/writing | -| Tasks | /tasks | +| [Chat](/zh/api/chat) | /chat | +| [Conversations](/zh/api/conversations) | /conversations | +| [Settings](/zh/api/settings) | /settings | +| [Tasks](/zh/api/tasks) | /tasks | +| [Pipelines](/zh/api/pipelines) | /pipelines | diff --git a/docs/zh/api/keywords.md b/docs/zh/api/keywords.md index ae5071b..2375f82 100644 --- a/docs/zh/api/keywords.md +++ b/docs/zh/api/keywords.md @@ -14,6 +14,10 @@ | POST | /projects/{id}/keywords/expand | LLM 扩展 | | GET | /projects/{id}/keywords/search-formula | 检索公式 | +## 扩展响应 + +`expanded_terms` 为对象列表:`{ term, term_zh, relation }`,`relation` 为 `synonym`、`abbreviation` 或 `related`。 + ## 检索公式 -参数 `database`:wos、scopus、pubmed +`GET /projects/{id}/keywords/search-formula?database=wos` — 查询参数 `database`:`wos`、`scopus`、`pubmed`(默认 `wos`)。 diff --git a/docs/zh/api/ocr.md b/docs/zh/api/ocr.md new file mode 100644 index 0000000..da9011e --- /dev/null +++ b/docs/zh/api/ocr.md @@ -0,0 +1,93 @@ +# OCR API + +OCR 模块 API,用于对已下载 PDF 进行文本提取与分块。 + +**基础路径:** `/api/v1/projects/{project_id}/ocr` + +--- + +## 端点概览 + +| 方法 | 路径 | 说明 | +|------|------|------| +| POST | `/process` | 对指定或待处理文献执行 OCR | +| GET | `/stats` | 获取 OCR 统计信息 | + +--- + +## POST /process + +对项目内文献执行 OCR 文本提取。支持 pdfplumber(原生)与 PaddleOCR(扫描版)。 + +**查询参数** + +| 参数名 | 类型 | 默认值 | 说明 | +|--------|------|--------|------| +| `paper_ids` | list[int] | null | 指定文献 ID 列表;为空则处理所有 `pdf_downloaded` 状态文献 | +| `force_ocr` | bool | false | 是否强制重新 OCR(覆盖已有结果) | +| `use_gpu` | bool | true | 是否使用 GPU(PaddleOCR) | + +**响应** + +```json +{ + "code": 200, + "message": "success", + "data": { + "processed": 5, + "failed": 0, + "total": 5 + } +} +``` + +**示例** + +```bash +# 处理所有待 OCR 文献 +curl -X POST "http://localhost:8000/api/v1/projects/1/ocr/process" + +# 处理指定文献并强制重做 +curl -X POST "http://localhost:8000/api/v1/projects/1/ocr/process?paper_ids=1&paper_ids=2&force_ocr=true" +``` + +--- + +## GET /stats + +返回项目内 OCR 相关统计。 + +**响应** + +```json +{ + "code": 200, + "message": "success", + "data": { + "pending": 10, + "metadata_only": 5, + "pdf_downloaded": 3, + "ocr_complete": 80, + "indexed": 50, + "error": 2, + "total_chunks": 1200 + } +} +``` + +- `pending`, `metadata_only`, `pdf_downloaded`, `ocr_complete`, `indexed`, `error`:各状态文献数量 +- `total_chunks`:项目内分块总数 + +**示例** + +```bash +curl "http://localhost:8000/api/v1/projects/1/ocr/stats" +``` + +--- + +## 错误码 + +| 状态码 | 说明 | +|--------|------| +| 404 | 项目不存在 | diff --git a/docs/zh/api/papers.md b/docs/zh/api/papers.md index dcd0555..c7602e0 100644 --- a/docs/zh/api/papers.md +++ b/docs/zh/api/papers.md @@ -9,6 +9,8 @@ | GET | /projects/{id}/papers | 列表(分页) | | POST | /projects/{id}/papers | 创建 | | POST | /projects/{id}/papers/bulk | 批量导入 | +| POST | /projects/{id}/papers/upload | 多文件上传(PDF) | +| POST | /projects/{id}/papers/process | 触发论文处理 | | GET | /projects/{id}/papers/{paper_id} | 获取 | | PUT | /projects/{id}/papers/{paper_id} | 更新 | | DELETE | /projects/{id}/papers/{paper_id} | 删除 | @@ -20,3 +22,15 @@ - `year` — 年份过滤 - `q` — 标题/摘要搜索 - `sort_by`, `order` — 排序 + +## 上传 + +`POST /projects/{id}/papers/upload` — 多文件上传 PDF,返回 `{ papers, conflicts, total_uploaded }`。 + +## 处理 + +`POST /projects/{id}/papers/process` — 触发 OCR + RAG 索引。可选查询参数 `paper_ids`,省略则处理全部待处理论文。 + +## 批量导入响应 + +`POST /projects/{id}/papers/bulk` 返回 `{ created, skipped, total }`。 diff --git a/docs/zh/api/pipelines.md b/docs/zh/api/pipelines.md new file mode 100644 index 0000000..89c1f35 --- /dev/null +++ b/docs/zh/api/pipelines.md @@ -0,0 +1,286 @@ +# 流水线 API + +基础路径:`/api/v1/pipelines` + +## 简介 + +流水线 API 用于编排 LangGraph 工作流:关键词检索(search → dedup → crawl → OCR → index)和 PDF 上传(extract → dedup → OCR → index)。流水线异步执行,支持 HITL(人机协同)中断以处理去重冲突。使用 `thread_id` 轮询状态、在 HITL 后恢复或取消流水线。 + +## 端点概览 + +| 方法 | 路径 | 说明 | +|------|------|------| +| POST | `/pipelines/search` | 运行检索流水线(支持 HITL) | +| POST | `/pipelines/upload` | 运行上传流水线 | +| GET | `/pipelines/{thread_id}/status` | 获取流水线状态 | +| POST | `/pipelines/{thread_id}/resume` | 恢复 HITL 中断的流水线 | +| POST | `/pipelines/{thread_id}/cancel` | 取消流水线 | + +--- + +## POST /api/v1/pipelines/search + +**说明:** 启动关键词检索流水线:search → dedup → crawl → OCR → index。发现去重冲突时可能中断以等待 HITL 处理。 + +**请求体:** `SearchPipelineRequest` + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `project_id` | int | 是 | 项目 ID | +| `query` | string | 否 | 检索词(默认:`""`) | +| `sources` | string[] | 否 | 检索源(如 `["semantic_scholar", "openalex"]`) | +| `max_results` | int | 否 | 最大结果数(1–200,默认:50) | + +**响应:** `ApiResponse[dict]` + +| 字段 | 类型 | 说明 | +|------|------|------| +| `thread_id` | string | 流水线线程 ID(如 `search_a1b2c3d4e5f6`) | +| `status` | string | `running` | +| `project_id` | int | 项目 ID | + +### 检索流水线示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/pipelines/search" \ + -H "Content-Type: application/json" \ + -d '{ + "project_id": 1, + "query": "transformer attention", + "sources": ["semantic_scholar"], + "max_results": 30 + }' +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "thread_id": "search_a1b2c3d4e5f6", + "status": "running", + "project_id": 1 + } +} +``` + +--- + +## POST /api/v1/pipelines/upload + +**说明:** 启动 PDF 上传流水线:提取元数据 → dedup → OCR → index。接受允许目录内的本地文件路径。 + +**请求体:** `UploadPipelineRequest` + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `project_id` | int | 是 | 项目 ID | +| `pdf_paths` | string[] | 是 | PDF 文件的绝对路径(需在配置的 `pdf_dir` 下) | + +**响应:** `ApiResponse[dict]` + +| 字段 | 类型 | 说明 | +|------|------|------| +| `thread_id` | string | 流水线线程 ID(如 `upload_x1y2z3a4b5c6`) | +| `status` | string | `running` | +| `project_id` | int | 项目 ID | + +### 上传流水线示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/pipelines/upload" \ + -H "Content-Type: application/json" \ + -d '{ + "project_id": 1, + "pdf_paths": [ + "/data0/djx/omelette/pdfs/paper1.pdf", + "/data0/djx/omelette/pdfs/paper2.pdf" + ] + }' +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "thread_id": "upload_x1y2z3a4b5c6", + "status": "running", + "project_id": 1 + } +} +``` + +--- + +## GET /api/v1/pipelines/{thread_id}/status + +**说明:** 获取流水线执行状态。当 `status` 为 `interrupted` 时,包含 `conflicts` 用于 HITL 处理。 + +**路径参数** + +| 参数 | 类型 | 说明 | +|------|------|------| +| `thread_id` | string | 流水线线程 ID | + +**响应:** `ApiResponse[dict]` + +| 字段 | 类型 | 说明 | +|------|------|------| +| `thread_id` | string | 线程 ID | +| `status` | string | `running`、`interrupted`、`completed`、`failed`、`cancelled` | +| `stage` | string | 当前阶段(若可用) | +| `progress` | int | 进度 0–100 | +| `conflicts` | object[] | 去重冲突(`interrupted` 时) | +| `interrupted_at` | string[] | 中断节点 ID(`interrupted` 时) | +| `result` | object | 最终结果(`completed` 时) | +| `error` | string | 错误信息(`failed` 时) | + +### 状态查询示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/pipelines/search_a1b2c3d4e5f6/status" +``` + +**运行中:** +```json +{ + "code": 200, + "message": "success", + "data": { + "thread_id": "search_a1b2c3d4e5f6", + "status": "running" + } +} +``` + +**HITL 中断:** +```json +{ + "code": 200, + "message": "success", + "data": { + "thread_id": "search_a1b2c3d4e5f6", + "status": "interrupted", + "conflicts": [ + { + "existing": {"id": 1, "title": "Paper A", "doi": "10.1234/abc"}, + "new": {"title": "Paper A (preprint)", "doi": "10.1234/abc"} + } + ], + "stage": "dedup", + "progress": 45, + "interrupted_at": ["dedup_resolve"] + } +} +``` + +**已完成:** +```json +{ + "code": 200, + "message": "success", + "data": { + "thread_id": "search_a1b2c3d4e5f6", + "status": "completed", + "stage": "completed", + "progress": 100, + "result": {"papers_imported": 12} + } +} +``` + +--- + +## POST /api/v1/pipelines/{thread_id}/resume + +**说明:** 使用已解决的冲突恢复 HITL 中断的流水线。仅在 `status` 为 `interrupted` 时有效。 + +**路径参数** + +| 参数 | 类型 | 说明 | +|------|------|------| +| `thread_id` | string | 流水线线程 ID | + +**请求体:** `ResumeRequest` + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `resolved_conflicts` | object[] | 否 | 已解决的冲突决策(默认:`[]`) | + +**响应:** `ApiResponse[dict]` + +| 字段 | 类型 | 说明 | +|------|------|------| +| `thread_id` | string | 线程 ID | +| `status` | string | `running` | + +### 恢复流水线示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/pipelines/search_a1b2c3d4e5f6/resume" \ + -H "Content-Type: application/json" \ + -d '{ + "resolved_conflicts": [ + {"conflict_id": 0, "action": "keep_existing"}, + {"conflict_id": 1, "action": "import_new"} + ] + }' +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "thread_id": "search_a1b2c3d4e5f6", + "status": "running" + } +} +``` + +--- + +## POST /api/v1/pipelines/{thread_id}/cancel + +**说明:** 取消运行中或已中断的流水线。 + +**路径参数** + +| 参数 | 类型 | 说明 | +|------|------|------| +| `thread_id` | string | 流水线线程 ID | + +**响应:** `ApiResponse[dict]` + +| 字段 | 类型 | 说明 | +|------|------|------| +| `thread_id` | string | 线程 ID | +| `status` | string | `cancelled` | + +### 取消流水线示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/pipelines/search_a1b2c3d4e5f6/cancel" +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "thread_id": "search_a1b2c3d4e5f6", + "status": "cancelled" + } +} +``` + +--- + +## 错误码 + +| 错误码 | 说明 | +|--------|------| +| 200 | 成功 | +| 400 | 请求错误(如路径不在允许目录内、流水线未处于中断状态) | +| 404 | 流水线不存在(thread_id 未知或已完成且已清理) | diff --git a/docs/zh/api/projects.md b/docs/zh/api/projects.md index 6e579d1..e582700 100644 --- a/docs/zh/api/projects.md +++ b/docs/zh/api/projects.md @@ -11,6 +11,8 @@ | GET | /projects/{id} | 获取 | | PUT | /projects/{id} | 更新 | | DELETE | /projects/{id} | 删除 | +| POST | /projects/{id}/pipeline/run | 运行完整流程(爬取→OCR→索引) | +| POST | /projects/{id}/pipeline/paper/{paper_id} | 对单篇论文运行流程 | ## 请求体(创建/更新) diff --git a/docs/zh/api/rag.md b/docs/zh/api/rag.md index 61167cc..3064734 100644 --- a/docs/zh/api/rag.md +++ b/docs/zh/api/rag.md @@ -6,7 +6,30 @@ | 方法 | 路径 | 说明 | |------|------|------| -| POST | /projects/{id}/rag/query | 查询 | -| POST | /projects/{id}/rag/index | 构建索引 | -| GET | /projects/{id}/rag/stats | 统计 | +| POST | /projects/{id}/rag/query | 查询知识库 | +| POST | /projects/{id}/rag/index | 构建/重建索引 | +| POST | /projects/{id}/rag/index/stream | 构建索引(SSE 流式进度) | +| GET | /projects/{id}/rag/stats | 索引统计 | | DELETE | /projects/{id}/rag/index | 删除索引 | + +## 查询请求 + +```json +{ + "question": "什么是注意力机制?", + "top_k": 10, + "use_reranker": true, + "include_sources": true +} +``` + +- `question` — 待回答的问题(必填) +- `top_k` — 检索块数量(默认:10) +- `use_reranker` — 是否使用重排序(默认:true) +- `include_sources` — 是否包含来源(默认:true) + +## 索引流式接口 + +`POST /projects/{id}/rag/index/stream` — 通过 SSE 流式重建向量索引,实时推送进度。 + +**事件类型:** `progress`、`complete`、`error` diff --git a/docs/zh/api/settings.md b/docs/zh/api/settings.md new file mode 100644 index 0000000..e5a5f56 --- /dev/null +++ b/docs/zh/api/settings.md @@ -0,0 +1,261 @@ +# 设置 API + +基础路径:`/api/v1/settings` + +## 简介 + +设置 API 用于管理应用配置:LLM 提供商选择、模型参数、各提供商(OpenAI、Anthropic、阿里云、火山引擎、Ollama)的 API 密钥、嵌入/重排序模型及其他系统设置。配置值由环境变量与数据库覆盖合并而成;API 密钥在响应中会被脱敏显示。 + +## 端点概览 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | `/settings` | 获取全部设置 | +| PUT | `/settings` | 更新设置(部分更新) | +| GET | `/settings/models` | 按提供商列出可用模型 | +| POST | `/settings/test-connection` | 测试 LLM 提供商连接 | +| GET | `/settings/health` | 健康检查 | + +--- + +## GET /api/v1/settings + +**说明:** 返回合并后的设置(数据库覆盖 .env)。API 密钥会被脱敏(如 `sk-12***abcd`)。 + +**响应:** `ApiResponse[SettingsSchema]` + +### SettingsSchema 字段 + +| 字段 | 类型 | 说明 | +|------|------|------| +| `llm_provider` | string | 默认 LLM 提供商(`openai`、`anthropic`、`aliyun`、`volcengine`、`ollama`、`mock`) | +| `llm_model` | string | 默认模型(覆盖提供商默认值) | +| `llm_temperature` | float | 温度(0.0–2.0) | +| `llm_max_tokens` | int | 最大 token 数 | +| `openai_api_key` | string | OpenAI API 密钥(脱敏) | +| `openai_model` | string | OpenAI 模型 | +| `anthropic_api_key` | string | Anthropic API 密钥(脱敏) | +| `anthropic_model` | string | Anthropic 模型 | +| `aliyun_api_key` | string | 阿里云 API 密钥(脱敏) | +| `aliyun_base_url` | string | 阿里云 base URL | +| `aliyun_model` | string | 阿里云模型 | +| `volcengine_api_key` | string | 火山引擎 API 密钥(脱敏) | +| `volcengine_base_url` | string | 火山引擎 base URL | +| `volcengine_model` | string | 火山引擎模型 | +| `ollama_base_url` | string | Ollama base URL | +| `ollama_model` | string | Ollama 模型 | +| `embedding_model` | string | 嵌入模型名称 | +| `reranker_model` | string | 重排序模型名称 | +| `data_dir` | string | 数据目录路径 | +| `cuda_visible_devices` | string | CUDA 设备 ID | +| `semantic_scholar_api_key` | string | Semantic Scholar API 密钥(脱敏) | +| `unpaywall_email` | string | Unpaywall 邮箱 | + +### 获取设置示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/settings" +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "llm_provider": "openai", + "llm_model": "gpt-4o-mini", + "llm_temperature": 0.7, + "llm_max_tokens": 4096, + "openai_api_key": "sk-12***abcd", + "openai_model": "gpt-4o-mini", + "anthropic_api_key": "", + "anthropic_model": "", + "aliyun_api_key": "", + "aliyun_base_url": "", + "aliyun_model": "", + "volcengine_api_key": "", + "volcengine_base_url": "", + "volcengine_model": "", + "ollama_base_url": "http://localhost:11434", + "ollama_model": "", + "embedding_model": "BAAI/bge-m3", + "reranker_model": "", + "data_dir": "/data0/djx/omelette", + "cuda_visible_devices": "", + "semantic_scholar_api_key": "", + "unpaywall_email": "" + } +} +``` + +--- + +## PUT /api/v1/settings + +**说明:** 更新用户可配置的设置。仅非空字段会被应用。包含 `***` 的脱敏 API 密钥会被跳过,避免覆盖真实密钥。 + +**请求体:** `SettingsUpdateSchema`(部分更新,所有字段可选) + +| 字段 | 类型 | 约束 | +|------|------|------| +| `llm_provider` | string | — | +| `llm_model` | string | — | +| `llm_temperature` | float | 0.0–2.0 | +| `llm_max_tokens` | int | 1–128000 | +| `openai_api_key` | string | — | +| `openai_model` | string | — | +| `anthropic_api_key` | string | — | +| `anthropic_model` | string | — | +| `aliyun_api_key` | string | — | +| `aliyun_base_url` | string | — | +| `aliyun_model` | string | — | +| `volcengine_api_key` | string | — | +| `volcengine_base_url` | string | — | +| `volcengine_model` | string | — | +| `ollama_base_url` | string | — | +| `ollama_model` | string | — | + +**响应:** `ApiResponse[SettingsSchema]`(更新后的合并设置) + +### 更新设置示例 + +```bash +curl -X PUT "http://localhost:8000/api/v1/settings" \ + -H "Content-Type: application/json" \ + -d '{"llm_provider": "openai", "llm_model": "gpt-4o-mini"}' +``` + +--- + +## GET /api/v1/settings/models + +**说明:** 返回可用的 LLM 提供商及其模型列表。 + +**响应:** `ApiResponse[list[ProviderModelInfo]]` + +### ProviderModelInfo 字段 + +| 字段 | 类型 | 说明 | +|------|------|------| +| `provider` | string | 提供商 ID | +| `display_name` | string | 显示名称 | +| `models` | string[] | 模型 ID 列表 | +| `requires_api_key` | bool | 是否需要 API 密钥 | +| `requires_base_url` | bool | 是否可配置 base URL | +| `default_base_url` | string | 默认 base URL(若适用) | + +### 模型列表示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/settings/models" +``` + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "provider": "openai", + "display_name": "OpenAI", + "models": ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3-mini"], + "requires_api_key": true, + "requires_base_url": false, + "default_base_url": "" + }, + { + "provider": "ollama", + "display_name": "Ollama (本地)", + "models": ["llama3", "llama3.1", "mistral", "qwen2", "deepseek-r1"], + "requires_api_key": false, + "requires_base_url": true, + "default_base_url": "http://localhost:11434" + } + ] +} +``` + +--- + +## POST /api/v1/settings/test-connection + +**说明:** 使用当前 LLM 配置发送简单提示进行连接测试。使用数据库中的合并配置(无请求体)。 + +**响应:** `ApiResponse[dict]` + +| 字段 | 类型 | 说明 | +|------|------|------| +| `success` | bool | 测试是否成功 | +| `response` | string | LLM 响应前 200 字符(成功时) | +| `error` | string | 错误信息(失败时) | + +### 连接测试示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/settings/test-connection" +``` + +**成功:** +```json +{ + "code": 200, + "message": "success", + "data": { + "success": true, + "response": "OK." + } +} +``` + +**失败:** +```json +{ + "code": 500, + "message": "Connection test failed", + "data": { + "success": false, + "error": "Invalid API key" + } +} +``` + +--- + +## GET /api/v1/settings/health + +**说明:** 简单健康检查端点。 + +**响应:** `ApiResponse[dict]` + +| 字段 | 类型 | 说明 | +|------|------|------| +| `status` | string | `"healthy"` | +| `version` | string | 应用版本 | + +### 健康检查示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/settings/health" +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "status": "healthy", + "version": "0.1.0" + } +} +``` + +--- + +## 错误码 + +| 错误码 | 说明 | +|--------|------| +| 200 | 成功 | +| 400 | 请求错误(如温度范围无效) | +| 422 | 校验错误(请求体无效) | +| 500 | 服务端错误(如连接测试失败) | diff --git a/docs/zh/api/subscription.md b/docs/zh/api/subscription.md new file mode 100644 index 0000000..80778db --- /dev/null +++ b/docs/zh/api/subscription.md @@ -0,0 +1,21 @@ +# Subscription API + +路径:`/api/v1/projects/{project_id}/subscriptions` + +## 端点 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | /feeds | 常用学术 RSS 模板 | +| GET | / | 列表 | +| POST | / | 创建 | +| GET | /{sub_id} | 获取 | +| PUT | /{sub_id} | 更新 | +| DELETE | /{sub_id} | 删除 | +| POST | /{sub_id}/trigger | 手动触发更新 | +| POST | /check-rss | 检查 RSS | +| POST | /check-updates | 检查 API 更新 | + +## 说明 + +订阅模块用于增量文献更新(RSS / API 检索)。创建订阅后可定期或手动触发,检查新文献并导入项目。 diff --git a/docs/zh/api/tasks.md b/docs/zh/api/tasks.md new file mode 100644 index 0000000..9403af8 --- /dev/null +++ b/docs/zh/api/tasks.md @@ -0,0 +1,163 @@ +# 任务 API + +基础路径:`/api/v1/tasks` + +## 简介 + +任务 API 用于管理后台处理任务:search、dedup、crawl、ocr、index、keyword_expand。任务由流水线及其他服务创建;本 API 提供列表、详情查询和取消功能。 + +## 端点概览 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | `/tasks` | 任务列表 | +| GET | `/tasks/{id}` | 任务详情 | +| POST | `/tasks/{id}/cancel` | 取消运行中的任务 | + +--- + +## GET /api/v1/tasks + +**说明:** 列出任务,支持可选过滤。结果按 `created_at` 降序排列。 + +**查询参数** + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `project_id` | int | 否 | 按项目 ID 过滤 | +| `status` | string | 否 | 按状态过滤:`pending`、`running`、`completed`、`failed`、`cancelled` | +| `limit` | int | 否 | 最大条数(默认:50) | + +**响应:** `ApiResponse[list[TaskSchema]]` + +### TaskSchema(列表视图) + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | int | 任务 ID | +| `project_id` | int | 项目 ID | +| `task_type` | string | `search`、`dedup`、`crawl`、`ocr`、`index`、`keyword_expand` | +| `status` | string | `pending`、`running`、`completed`、`failed`、`cancelled` | +| `progress` | int | 当前进度 | +| `total` | int | 总步数 | +| `created_at` | string | ISO 8601 时间 | + +### 列表示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/tasks?project_id=1&status=running&limit=20" +``` + +```json +{ + "code": 200, + "message": "success", + "data": [ + { + "id": 42, + "project_id": 1, + "task_type": "search", + "status": "running", + "progress": 30, + "total": 100, + "created_at": "2025-03-12T10:00:00" + } + ] +} +``` + +--- + +## GET /api/v1/tasks/{id} + +**说明:** 获取任务完整详情,包括 params、result、error_message。 + +**路径参数** + +| 参数 | 类型 | 说明 | +|------|------|------| +| `id` | int | 任务 ID | + +**响应:** `ApiResponse[TaskDetailSchema]` + +### TaskDetailSchema 字段 + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | int | 任务 ID | +| `project_id` | int | 项目 ID | +| `task_type` | string | 任务类型 | +| `status` | string | 任务状态 | +| `progress` | int | 当前进度 | +| `total` | int | 总步数 | +| `params` | object | 输入参数 | +| `result` | object | 输出结果(完成时) | +| `error_message` | string | 错误信息(失败时) | +| `created_at` | string | ISO 8601 时间 | +| `started_at` | string | ISO 8601 时间(可为空) | +| `completed_at` | string | ISO 8601 时间(可为空) | + +### 详情示例 + +```bash +curl -X GET "http://localhost:8000/api/v1/tasks/42" +``` + +```json +{ + "code": 200, + "message": "success", + "data": { + "id": 42, + "project_id": 1, + "task_type": "search", + "status": "completed", + "progress": 100, + "total": 100, + "params": {"query": "machine learning", "sources": ["semantic_scholar"]}, + "result": {"papers_found": 15, "imported": 10}, + "error_message": "", + "created_at": "2025-03-12T10:00:00", + "started_at": "2025-03-12T10:00:01", + "completed_at": "2025-03-12T10:02:30" + } +} +``` + +--- + +## POST /api/v1/tasks/{id}/cancel + +**说明:** 取消运行中或待处理的任务。处于 `completed`、`failed`、`cancelled` 状态的任务不可取消。 + +**路径参数** + +| 参数 | 类型 | 说明 | +|------|------|------| +| `id` | int | 任务 ID | + +**响应:** `ApiResponse`(无 data) + +### 取消示例 + +```bash +curl -X POST "http://localhost:8000/api/v1/tasks/42/cancel" +``` + +```json +{ + "code": 200, + "message": "Task cancelled", + "data": null +} +``` + +--- + +## 错误码 + +| 错误码 | 说明 | +|--------|------| +| 200 | 成功 | +| 400 | 无法取消任务(已处于 completed/failed/cancelled 状态) | +| 404 | 任务不存在 | diff --git a/docs/zh/api/writing.md b/docs/zh/api/writing.md index a102ab8..bc428f8 100644 --- a/docs/zh/api/writing.md +++ b/docs/zh/api/writing.md @@ -6,12 +6,16 @@ | 方法 | 路径 | 说明 | |------|------|------| -| POST | /projects/{id}/writing/assist | 通用辅助 | +| POST | /projects/{id}/writing/assist | 通用写作辅助 | | POST | /projects/{id}/writing/summarize | 摘要 | | POST | /projects/{id}/writing/citations | 引用生成 | | POST | /projects/{id}/writing/review-outline | 综述提纲 | | POST | /projects/{id}/writing/gap-analysis | 缺口分析 | +## Assist 请求 + +`task`:`summarize`、`cite`、`review_outline`、`gap_analysis`;`style` 用于引用样式。 + ## 引用样式 -gb7714、apa、mla +`gb_t_7714`、`apa`、`mla` diff --git a/frontend/src/pages/ChatHistoryPage.tsx b/frontend/src/pages/ChatHistoryPage.tsx index fdb6562..9fb9a66 100644 --- a/frontend/src/pages/ChatHistoryPage.tsx +++ b/frontend/src/pages/ChatHistoryPage.tsx @@ -113,7 +113,7 @@ export default function ChatHistoryPage() { {formatDate(conv.updated_at)} - {t('history.messageCount', { count: conv.messages?.length ?? 0 })} + {t('history.messageCount', { count: conv.message_count ?? conv.messages?.length ?? 0 })} diff --git a/frontend/src/pages/project/KeywordsPage.tsx b/frontend/src/pages/project/KeywordsPage.tsx index 3dce598..7183eb6 100644 --- a/frontend/src/pages/project/KeywordsPage.tsx +++ b/frontend/src/pages/project/KeywordsPage.tsx @@ -68,10 +68,12 @@ export default function KeywordsPage() { onSuccess: (res) => { const terms = res?.expanded_terms ?? []; if (terms.length > 0) { - terms.forEach((term: string) => { + terms.forEach((item: string | { term: string; term_zh?: string; relation?: string }) => { + const termStr = typeof item === 'string' ? item : item.term; + const termEn = typeof item === 'string' ? item : item.term; createMutation.mutate({ - term, - term_en: term, + term: termStr, + term_en: termEn, level: 1, }); }); diff --git a/frontend/src/pages/project/PapersPage.tsx b/frontend/src/pages/project/PapersPage.tsx index 9a05f86..4d5405c 100644 --- a/frontend/src/pages/project/PapersPage.tsx +++ b/frontend/src/pages/project/PapersPage.tsx @@ -144,7 +144,17 @@ export default function PapersPage() { const handleResolveConflict = async (conflictId: string, action: string) => { try { - await kbApi.resolveConflict(pid, conflictId, action === 'keep_existing' ? 'keep_old' : action); + const mappedAction = action === 'keep_existing' ? 'keep_old' : action === 'keep_new' ? 'keep_new' : action; + if (mappedAction === 'ai_resolve') { + const suggestions = await kbApi.autoResolve(pid, [conflictId]); + if (Array.isArray(suggestions) && suggestions.length > 0) { + await kbApi.resolveConflict(pid, conflictId, suggestions[0].action ?? 'skip'); + } + setConflicts((prev) => prev.filter((c) => c.conflict_id !== conflictId)); + queryClient.invalidateQueries({ queryKey: ['papers', pid] }); + return; + } + await kbApi.resolveConflict(pid, conflictId, mappedAction); setConflicts((prev) => prev.filter((c) => c.conflict_id !== conflictId)); queryClient.invalidateQueries({ queryKey: ['papers', pid] }); } catch (err) { @@ -155,7 +165,14 @@ export default function PapersPage() { const handleAutoResolveAll = async () => { const ids = conflicts.map((c) => c.conflict_id); try { - await kbApi.autoResolve(pid, ids); + const suggestions = await kbApi.autoResolve(pid, ids); + if (Array.isArray(suggestions)) { + for (const s of suggestions) { + if (s.action && !s.error) { + await kbApi.resolveConflict(pid, s.conflict_id, s.action); + } + } + } setConflicts([]); queryClient.invalidateQueries({ queryKey: ['papers', pid] }); } catch (err) { diff --git a/frontend/src/pages/project/SearchPage.tsx b/frontend/src/pages/project/SearchPage.tsx index 657ae72..49cbbce 100644 --- a/frontend/src/pages/project/SearchPage.tsx +++ b/frontend/src/pages/project/SearchPage.tsx @@ -84,7 +84,7 @@ export default function SearchPage() { errorMessage: t('searchPage.importFailed'), invalidateKeys: [['papers', pid], ['project', projectId]], onSuccess: (res) => { - setImported(res?.imported ?? 0); + setImported(res?.created ?? 0); }, }); diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index 20835ad..3f8f97e 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -25,7 +25,7 @@ export const paperApi = { delete: (projectId: number, paperId: number) => api.delete(`/projects/${projectId}/papers/${paperId}`).then(r => r.data), bulkImport: (projectId: number, papers: Partial[]) => - api.post<{ imported: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data), + api.post<{ created: number; skipped: number; total: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data), }; export const keywordApi = { diff --git a/frontend/src/services/kb-api.ts b/frontend/src/services/kb-api.ts index b387d96..87e4676 100644 --- a/frontend/src/services/kb-api.ts +++ b/frontend/src/services/kb-api.ts @@ -49,7 +49,7 @@ export const kbApi = { }).then(r => r.data), autoResolve: (projectId: number, conflictIds: string[]) => - api.post<{ resolved: number }>(`/projects/${projectId}/dedup/auto-resolve`, { + api.post>(`/projects/${projectId}/dedup/auto-resolve`, { conflict_ids: conflictIds, }).then(r => r.data), @@ -69,5 +69,5 @@ export const kbApi = { }).then(r => r.data), bulkImport: (projectId: number, papers: NewPaperData[]) => - api.post<{ imported: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data), + api.post<{ created: number; skipped: number; total: number }>(`/projects/${projectId}/papers/bulk`, { papers }).then(r => r.data), }; diff --git a/frontend/src/types/chat.ts b/frontend/src/types/chat.ts index ed374a5..fc1ea66 100644 --- a/frontend/src/types/chat.ts +++ b/frontend/src/types/chat.ts @@ -7,6 +7,8 @@ export interface Conversation { created_at: string; updated_at: string; messages: ChatMessage[]; + message_count?: number; + last_message_preview?: string; } export interface ChatMessage {