diff --git a/README.md b/README.md index afc03fd..3d01113 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ - 🆕 NanaDraw now supports the GPT Image 2 model as an image generation option - 📝 Paste method description text → auto-generate pipeline diagrams +- 📎 Use one upload entry for images, PDFs, and text files; images become sketch references, while PDFs are parsed with MinerU for quoted-selection prompts - 🎨 Three creation modes: Draft, Generation, and Assembly - 🖼️ Built-in style gallery with 250+ academic paper reference images - 🧰 Asset workshop with Bioicons, reusable personal assets, and AI-generated materials @@ -36,6 +37,15 @@ Upload a hand-drawn sketch and turn it into a high-fidelity editable pipeline di Figure 1 shows the rough hand-drawn sketch. Figure 2 shows the generated high-fidelity editable workflow diagram. +### PDF Parsing and Quoted-Selection Drawing + +The AI Workbench provides one upload entry for images, PDFs, Markdown, and text files. NanaDraw routes each file by type: images become sketch references, PDFs are parsed into Markdown through the MinerU online API, and Markdown/Text files are attached as prompt reference material. + +- File upload is available in Draft, Generation, Assembly, and Auto modes. +- Parsed PDFs appear in a scrollable, collapsible floating panel on the left side of the workbench. +- PDF content is sent only to MinerU for document parsing; NanaDraw does not automatically send the whole paper to the LLM. +- Only the text explicitly quoted by the user is merged into the generation prompt; users can enrich the prompt before generating. + ### Creation Modes | Mode | Description | Steps | Example Screenshot | @@ -164,6 +174,7 @@ After starting, click the ⚙️ gear icon in the top-right corner to configure: - **Image Model**: Default `gemini-3-pro-image-preview` - **Component Model**: Default `gemini-3.1-flash-image-preview` - **NanaSoul**: Custom AI persona for style constraints +- **Document Parsing Token**: MinerU online API token for PDF parsing #### Data Directory (Environment Variable) diff --git a/README_zh-CN.md b/README_zh-CN.md index c45e39f..b0bb96e 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -18,6 +18,7 @@ - 🆕 NanaDraw 上线 GPT Image 2 模型供大家选择 - 📝 粘贴方法描述文本,自动生成流程图 +- 📎 单一入口上传图片、PDF 或文本文件;图片作为草稿参考,PDF 经 MinerU 解析后引用选段绘图 - 🎨 三种创作模式:草稿模式、生成模式、组装模式 - 🖼️ 内置 250+ 学术论文风格参考图 - 🧰 素材工坊集成 Bioicons、个人常用素材和 AI 生成素材 @@ -36,6 +37,15 @@ 图 1 为用户上传的手绘草图,图 2 为系统生成的高保真可编辑流程图。 +### PDF 文档解析与引用选段绘图 + +AI 工作台底部提供统一上传入口,支持图片、PDF、Markdown 和文本文件。NanaDraw 会按文件类型自动分流:图片作为草稿图参考,PDF 调用 MinerU 在线 API 解析为 Markdown,Markdown/Text 作为文本附件进入提示词参考。 + +- 支持在草稿模式、生成模式、组装模式和自动模式中上传文件。 +- PDF 解析结果会显示在工作台左侧可滚动、可收起的浮窗中。 +- PDF 内容只用于 MinerU 文档解析;不会自动把整篇论文发送给 LLM。 +- 只有用户主动引用的选中文本会随提示词进入 NanaDraw 的生成流程;用户可以继续补充绘图要求后再生成。 + ### 多种模式 | 模式 | 说明 | 步骤 | 示例截图 | @@ -164,6 +174,7 @@ python start.py --dev - **图像模型**:默认 `gemini-3-pro-image-preview` - **组件模型**:默认 `gemini-3.1-flash-image-preview` - **NanaSoul**:用于风格约束的自定义 AI 角色 +- **文档解析 Token**:MinerU 在线 API Token,用于 PDF 解析 #### 数据目录(环境变量) diff --git a/backend/app/api/v1/endpoints/documents.py b/backend/app/api/v1/endpoints/documents.py new file mode 100644 index 0000000..8e87154 --- /dev/null +++ b/backend/app/api/v1/endpoints/documents.py @@ -0,0 +1,38 @@ +from fastapi import APIRouter, Depends, File, HTTPException, UploadFile + +from app.dependencies import require_auth +from app.services.mineru_service import MinerUError, parse_pdf_with_mineru +from app.services.settings_service import load_settings + +router = APIRouter(prefix="/documents", tags=["documents"]) + +MAX_PDF_SIZE_BYTES = 200 * 1024 * 1024 + + +@router.post("/parse-pdf") +async def parse_pdf(file: UploadFile = File(...), _user=Depends(require_auth)): + file_name = file.filename or "document.pdf" + if not file_name.lower().endswith(".pdf") and file.content_type != "application/pdf": + raise HTTPException(status_code=400, detail="仅支持上传 PDF 文件") + + content = await file.read() + if not content: + raise HTTPException(status_code=400, detail="PDF 文件为空") + if len(content) > MAX_PDF_SIZE_BYTES: + raise HTTPException(status_code=413, detail="PDF 文件不能超过 200MB") + + token = str(load_settings().get("mineru_api_token", "")).strip() + if not token: + raise HTTPException(status_code=400, detail="请先在设置中配置 MinerU Token") + + try: + return await parse_pdf_with_mineru( + file_name=file_name, + file_bytes=content, + token=token, + ) + except MinerUError as exc: + raise HTTPException(status_code=502, detail=str(exc)) from exc + except Exception as exc: + msg = str(exc).strip() or exc.__class__.__name__ + raise HTTPException(status_code=502, detail=f"PDF 解析失败: {msg}") from exc diff --git a/backend/app/api/v1/endpoints/settings.py b/backend/app/api/v1/endpoints/settings.py index 3885618..8905762 100644 --- a/backend/app/api/v1/endpoints/settings.py +++ b/backend/app/api/v1/endpoints/settings.py @@ -24,9 +24,11 @@ class SettingsResponse(BaseModel): llm_image_model: str = "" llm_component_model: str = "" api_format: str = "auto" + mineru_api_token: str = "" nana_soul: str = "" language: str = "zh" is_configured: bool = False + mineru_is_configured: bool = False class SettingsUpdate(BaseModel): @@ -40,6 +42,7 @@ class SettingsUpdate(BaseModel): llm_image_model: str | None = None llm_component_model: str | None = None api_format: str | None = None + mineru_api_token: str | None = None nana_soul: str | None = Field(default=None, max_length=500) language: str | None = None @@ -102,9 +105,11 @@ def _to_response(data: dict[str, Any]) -> SettingsResponse: llm_image_model=str(data.get("llm_image_model", "") or ""), llm_component_model=str(data.get("llm_component_model", "") or ""), api_format=str(data.get("api_format", "") or "auto"), + mineru_api_token=mask_api_key(str(data.get("mineru_api_token", ""))), nana_soul=str(data.get("nana_soul", "") or ""), language=str(data.get("language", "") or "zh"), is_configured=bool(str(data.get("llm_api_key", "")).strip()), + mineru_is_configured=bool(str(data.get("mineru_api_token", "")).strip()), ) @@ -119,6 +124,13 @@ async def get_settings(): async def update_settings(body: SettingsUpdate): """Update settings. Only non-None fields are updated.""" updates = body.model_dump(exclude_none=True) + mineru_token = updates.get("mineru_api_token") + if isinstance(mineru_token, str): + mineru_token = mineru_token.strip() + if mineru_token: + updates["mineru_api_token"] = mineru_token + else: + updates.pop("mineru_api_token") data = await asyncio.to_thread(apply_settings_updates, updates) return _to_response(data) diff --git a/backend/app/api/v1/router.py b/backend/app/api/v1/router.py index ca0580b..3ad0da3 100644 --- a/backend/app/api/v1/router.py +++ b/backend/app/api/v1/router.py @@ -9,6 +9,7 @@ models, assistant, settings, + documents, ) api_router = APIRouter() @@ -21,3 +22,4 @@ api_router.include_router(models.router) api_router.include_router(assistant.router) api_router.include_router(settings.router) +api_router.include_router(documents.router) diff --git a/backend/app/services/mineru_service.py b/backend/app/services/mineru_service.py new file mode 100644 index 0000000..f7617fb --- /dev/null +++ b/backend/app/services/mineru_service.py @@ -0,0 +1,186 @@ +import asyncio +import io +import time +import uuid +import zipfile +from pathlib import Path +from typing import Any + +import httpx + + +MINERU_API_BASE = "https://mineru.net/api/v4" +POLL_INTERVAL_SECONDS = 5 +POLL_TIMEOUT_SECONDS = 600 +ACTIVE_STATES = {"waiting-file", "uploading", "pending", "running", "converting"} + + +class MinerUError(RuntimeError): + pass + + +def _auth_headers(token: str) -> dict[str, str]: + return { + "Authorization": f"Bearer {token}", + "Accept": "*/*", + } + + +def _json_headers(token: str) -> dict[str, str]: + return { + **_auth_headers(token), + "Content-Type": "application/json", + } + + +def _ensure_success_payload(payload: dict[str, Any], action: str) -> dict[str, Any]: + if payload.get("code") != 0: + msg = str(payload.get("msg") or f"MinerU {action} failed") + raise MinerUError(msg) + data = payload.get("data") + if not isinstance(data, dict): + raise MinerUError(f"MinerU {action} returned invalid data") + return data + + +def _first_matching_result(results: list[Any], data_id: str, file_name: str) -> dict[str, Any] | None: + for item in results: + if isinstance(item, dict) and item.get("data_id") == data_id: + return item + for item in results: + if isinstance(item, dict) and item.get("file_name") == file_name: + return item + for item in results: + if isinstance(item, dict): + return item + return None + + +def _extract_result(data: dict[str, Any], data_id: str, file_name: str) -> dict[str, Any]: + result = data.get("extract_result") + if isinstance(result, dict): + return result + if isinstance(result, list): + matched = _first_matching_result(result, data_id, file_name) + if matched: + return matched + + results = data.get("extract_results") + if isinstance(results, list): + matched = _first_matching_result(results, data_id, file_name) + if matched: + return matched + + raise MinerUError("MinerU result payload is missing extract_result") + + +async def _download_full_markdown(client: httpx.AsyncClient, full_zip_url: str) -> str: + response = await client.get(full_zip_url) + response.raise_for_status() + + try: + with zipfile.ZipFile(io.BytesIO(response.content)) as archive: + names = archive.namelist() + full_md_name = next( + (name for name in names if Path(name).name == "full.md"), + None, + ) + if not full_md_name: + raise MinerUError("MinerU result zip does not contain full.md") + with archive.open(full_md_name) as f: + return f.read().decode("utf-8", errors="replace") + except zipfile.BadZipFile as exc: + raise MinerUError("MinerU result is not a valid zip file") from exc + + +async def parse_pdf_with_mineru( + *, + file_name: str, + file_bytes: bytes, + token: str, + client_factory: Any | None = None, +) -> dict[str, str]: + data_id = f"nanadraw-{uuid.uuid4().hex}" + timeout = httpx.Timeout(connect=15.0, read=120.0, write=120.0, pool=15.0) + make_client = client_factory or httpx.AsyncClient + client_kwargs: dict[str, Any] = {"timeout": timeout} + if client_factory is None: + # Avoid inheriting broken/unsupported proxy envs in local runs. + client_kwargs["trust_env"] = False + + try: + async with make_client(**client_kwargs) as client: + apply_body = { + "files": [ + { + "name": file_name, + "data_id": data_id, + "is_ocr": False, + } + ], + "model_version": "vlm", + "language": "ch", + "enable_table": True, + "enable_formula": True, + } + + apply_response = await client.post( + f"{MINERU_API_BASE}/file-urls/batch", + headers=_json_headers(token), + json=apply_body, + ) + apply_response.raise_for_status() + apply_data = _ensure_success_payload(apply_response.json(), "upload URL request") + + batch_id = str(apply_data.get("batch_id") or "") + file_urls = apply_data.get("file_urls") + if not batch_id or not isinstance(file_urls, list) or not file_urls: + raise MinerUError("MinerU upload URL response is missing batch_id or file_urls") + + upload_response = await client.put(str(file_urls[0]), content=file_bytes) + upload_response.raise_for_status() + + deadline = time.monotonic() + POLL_TIMEOUT_SECONDS + result: dict[str, Any] | None = None + while time.monotonic() < deadline: + await asyncio.sleep(POLL_INTERVAL_SECONDS) + poll_response = await client.get( + f"{MINERU_API_BASE}/extract-results/batch/{batch_id}", + headers=_json_headers(token), + ) + poll_response.raise_for_status() + poll_data = _ensure_success_payload(poll_response.json(), "result polling") + result = _extract_result(poll_data, data_id, file_name) + + state = str(result.get("state") or "").lower() + if state == "done": + break + if state == "failed": + raise MinerUError(str(result.get("err_msg") or "MinerU parsing failed")) + if state and state not in ACTIVE_STATES: + raise MinerUError(f"Unexpected MinerU parsing state: {state}") + else: + raise MinerUError("MinerU parsing timed out") + + if not result: + raise MinerUError("MinerU did not return a parsing result") + + full_zip_url = str(result.get("full_zip_url") or "") + if not full_zip_url: + raise MinerUError("MinerU result is missing full_zip_url") + + markdown = await _download_full_markdown(client, full_zip_url) + if not markdown.strip(): + raise MinerUError("MinerU returned empty Markdown") + + return { + "file_name": file_name, + "markdown": markdown, + "batch_id": batch_id, + "data_id": str(result.get("data_id") or data_id), + "source": "mineru", + } + except httpx.ConnectError as exc: + raise MinerUError("无法连接 MinerU 服务,请检查网络、DNS 或代理设置") from exc + except httpx.TimeoutException as exc: + raise MinerUError("连接 MinerU 超时,请稍后重试") from exc diff --git a/backend/app/services/settings_service.py b/backend/app/services/settings_service.py index 0fe5102..9a4b188 100644 --- a/backend/app/services/settings_service.py +++ b/backend/app/services/settings_service.py @@ -18,6 +18,7 @@ "llm_image_model": "gemini-3-pro-image-preview", "llm_component_model": "gemini-3.1-flash-image-preview", "api_format": "auto", + "mineru_api_token": "", "nana_soul": "", "language": "zh", } @@ -103,6 +104,7 @@ def _persist_unlocked(merged: dict[str, Any]) -> dict[str, Any]: "llm_api_key": mask_api_key(str(to_store.get("llm_api_key", ""))), "image_api_key": mask_api_key(str(to_store.get("image_api_key", ""))), "vision_api_key": mask_api_key(str(to_store.get("vision_api_key", ""))), + "mineru_api_token": mask_api_key(str(to_store.get("mineru_api_token", ""))), } logger.info("Settings saved: %s", log_payload) return dict(to_store) diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 0000000..c4d884b --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,7 @@ +import sys +from pathlib import Path + + +BACKEND_ROOT = Path(__file__).resolve().parents[1] +if str(BACKEND_ROOT) not in sys.path: + sys.path.insert(0, str(BACKEND_ROOT)) diff --git a/backend/tests/test_documents.py b/backend/tests/test_documents.py new file mode 100644 index 0000000..9b8ce1b --- /dev/null +++ b/backend/tests/test_documents.py @@ -0,0 +1,81 @@ +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from app.api.v1.endpoints import documents + + +def _client() -> TestClient: + app = FastAPI() + app.include_router(documents.router) + return TestClient(app) + + +def test_parse_pdf_rejects_non_pdf(): + res = _client().post( + "/documents/parse-pdf", + files={"file": ("notes.txt", b"hello", "text/plain")}, + ) + + assert res.status_code == 400 + assert "PDF" in res.json()["detail"] + + +def test_parse_pdf_rejects_empty_pdf(): + res = _client().post( + "/documents/parse-pdf", + files={"file": ("paper.pdf", b"", "application/pdf")}, + ) + + assert res.status_code == 400 + assert "空" in res.json()["detail"] + + +def test_parse_pdf_rejects_oversized_pdf(monkeypatch): + monkeypatch.setattr(documents, "MAX_PDF_SIZE_BYTES", 4) + + res = _client().post( + "/documents/parse-pdf", + files={"file": ("paper.pdf", b"12345", "application/pdf")}, + ) + + assert res.status_code == 413 + assert "200MB" in res.json()["detail"] + + +def test_parse_pdf_requires_mineru_token(monkeypatch): + monkeypatch.setattr(documents, "load_settings", lambda: {"mineru_api_token": ""}) + + res = _client().post( + "/documents/parse-pdf", + files={"file": ("paper.pdf", b"%PDF", "application/pdf")}, + ) + + assert res.status_code == 400 + assert "MinerU Token" in res.json()["detail"] + + +def test_parse_pdf_returns_mineru_markdown(monkeypatch): + monkeypatch.setattr(documents, "load_settings", lambda: {"mineru_api_token": "secret-token"}) + + async def fake_parse_pdf_with_mineru(*, file_name: str, file_bytes: bytes, token: str): + assert file_name == "paper.pdf" + assert file_bytes == b"%PDF" + assert token == "secret-token" + return { + "file_name": file_name, + "markdown": "# Parsed", + "batch_id": "batch-1", + "data_id": "data-1", + "source": "mineru", + } + + monkeypatch.setattr(documents, "parse_pdf_with_mineru", fake_parse_pdf_with_mineru) + + res = _client().post( + "/documents/parse-pdf", + files={"file": ("paper.pdf", b"%PDF", "application/pdf")}, + ) + + assert res.status_code == 200 + assert res.json()["markdown"] == "# Parsed" + assert res.json()["source"] == "mineru" diff --git a/backend/tests/test_mineru_service.py b/backend/tests/test_mineru_service.py new file mode 100644 index 0000000..e5f6cfd --- /dev/null +++ b/backend/tests/test_mineru_service.py @@ -0,0 +1,150 @@ +import io +import zipfile + +import pytest + +from app.services import mineru_service +from app.services.mineru_service import MinerUError, parse_pdf_with_mineru + + +class FakeResponse: + def __init__(self, json_data=None, content: bytes = b""): + self._json_data = json_data + self.content = content + + def json(self): + return self._json_data + + def raise_for_status(self): + return None + + +def _zip_with(entries: dict[str, str]) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as archive: + for name, text in entries.items(): + archive.writestr(name, text) + return buf.getvalue() + + +class FakeMinerUClient: + def __init__(self, *, apply_payload=None, poll_payload=None, zip_bytes=None, **_kwargs): + self.apply_payload = apply_payload or { + "code": 0, + "data": {"batch_id": "batch-1", "file_urls": ["https://upload.example"]}, + } + self.poll_payload = poll_payload or { + "code": 0, + "data": { + "extract_result": { + "data_id": "data-1", + "file_name": "paper.pdf", + "state": "done", + "full_zip_url": "https://download.example/result.zip", + } + }, + } + self.zip_bytes = zip_bytes or _zip_with({"nested/full.md": "# Parsed"}) + self.uploaded = b"" + + async def __aenter__(self): + return self + + async def __aexit__(self, *_args): + return None + + async def post(self, *_args, **_kwargs): + return FakeResponse(self.apply_payload) + + async def put(self, _url, content: bytes): + self.uploaded = content + return FakeResponse({}) + + async def get(self, url, **_kwargs): + if str(url).endswith(".zip"): + return FakeResponse(content=self.zip_bytes) + return FakeResponse(self.poll_payload) + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_success(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_INTERVAL_SECONDS", 0) + + result = await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=FakeMinerUClient, + ) + + assert result["file_name"] == "paper.pdf" + assert result["markdown"] == "# Parsed" + assert result["batch_id"] == "batch-1" + assert result["source"] == "mineru" + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_rejects_error_payload(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_INTERVAL_SECONDS", 0) + + def factory(**kwargs): + return FakeMinerUClient(apply_payload={"code": 7, "msg": "bad token"}, **kwargs) + + with pytest.raises(MinerUError, match="bad token"): + await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=factory, + ) + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_reports_failed_state(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_INTERVAL_SECONDS", 0) + + def factory(**kwargs): + return FakeMinerUClient( + poll_payload={ + "code": 0, + "data": {"extract_result": {"state": "failed", "err_msg": "parse failed"}}, + }, + **kwargs, + ) + + with pytest.raises(MinerUError, match="parse failed"): + await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=factory, + ) + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_requires_full_markdown(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_INTERVAL_SECONDS", 0) + + def factory(**kwargs): + return FakeMinerUClient(zip_bytes=_zip_with({"other.md": "No full file"}), **kwargs) + + with pytest.raises(MinerUError, match="full.md"): + await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=factory, + ) + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_times_out(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_TIMEOUT_SECONDS", 0) + + with pytest.raises(MinerUError, match="timed out"): + await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=FakeMinerUClient, + ) diff --git a/backend/tests/test_settings.py b/backend/tests/test_settings.py new file mode 100644 index 0000000..d973bc4 --- /dev/null +++ b/backend/tests/test_settings.py @@ -0,0 +1,58 @@ +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from app.api.v1.endpoints import settings + + +def _client() -> TestClient: + app = FastAPI() + app.include_router(settings.router) + return TestClient(app) + + +def test_settings_masks_mineru_token(monkeypatch): + monkeypatch.setattr( + settings, + "load_settings", + lambda: { + "llm_api_key": "llm-secret", + "llm_base_url": "", + "llm_model": "text-model", + "llm_image_model": "image-model", + "llm_component_model": "component-model", + "mineru_api_token": "mineru-secret", + "nana_soul": "", + "language": "zh", + }, + ) + + res = _client().get("/settings") + + assert res.status_code == 200 + assert res.json()["mineru_api_token"] == "****cret" + assert res.json()["mineru_is_configured"] is True + + +def test_settings_ignores_blank_mineru_token_update(monkeypatch): + captured = {} + + def fake_update_settings(updates): + captured.update(updates) + return { + "llm_api_key": "", + "llm_base_url": "", + "llm_model": "", + "llm_image_model": "", + "llm_component_model": "", + "mineru_api_token": "existing-token", + "nana_soul": "", + "language": "zh", + } + + monkeypatch.setattr(settings, "apply_settings_updates", fake_update_settings) + + res = _client().put("/settings", json={"mineru_api_token": " "}) + + assert res.status_code == 200 + assert "mineru_api_token" not in captured + assert res.json()["mineru_is_configured"] is True diff --git a/docs/releases/v0.17.0-mineru-pdf.md b/docs/releases/v0.17.0-mineru-pdf.md new file mode 100644 index 0000000..00c3b0b --- /dev/null +++ b/docs/releases/v0.17.0-mineru-pdf.md @@ -0,0 +1,48 @@ +# NanaDraw v0.17.0-dev: MinerU PDF Parsing and Quoted Selection + +## 变更说明(提交到 dev 分支) + +- 新增 MinerU PDF 解析能力:后端支持申请上传 URL、上传 PDF、轮询解析结果、下载 zip 并提取 `full.md`。 +- 新增 `POST /api/v1/documents/parse-pdf`,校验 PDF 类型、空文件、200MB 大小上限和 MinerU Token 配置状态。 +- 设置页新增“文档解析”配置项,支持保存 MinerU Token;接口仅返回掩码 Token 与是否已配置状态。 +- AI 工作台上传入口扩展到所有创作模式,支持 PDF、Markdown 和文本文件。 +- 合并草稿图上传和文档上传为单一入口,自动按文件类型分流:图片作为草稿参考,PDF 触发 MinerU 解析,Markdown/Text 作为文本附件。 +- PDF 解析结果从对话框迁移到 AI 工作台左侧独立浮窗,支持滚动、收起和关闭,样式对齐平台暖色视觉体系。 +- 选中文本后的动作调整为“引用选中内容”:先把选段作为参考材料附加到输入区,用户可以继续丰富提示词,再手动触发现有绘图流程。 +- 补充前后端测试,覆盖设置保存、PDF 校验、MinerU 成功/失败流程、前端解析接口、文件类型分流和引用逻辑。 + +## PR 描述 + +### Summary + +This PR adds the first version of PDF-assisted academic drawing to NanaDraw and streamlines the AI Workbench upload experience. Users can upload images, PDFs, Markdown, or text files from a single entry point. Images become sketch references, PDFs are parsed through MinerU, and quoted PDF selections can be enriched in the prompt composer before continuing with the existing NanaDraw generation workflow. + +### What Changed + +- Added secure MinerU token settings with masked responses and configured-state reporting. +- Added a backend document parsing endpoint and MinerU service wrapper. +- Added PDF parsing UI states and a left-side floating Markdown viewer in the AI Workbench. +- Enabled a single upload entry across Draft, Generation, Assembly, and Auto modes. +- Routed uploaded files by type: image files become sketch references, PDFs trigger MinerU parsing, and Markdown/Text files remain prompt attachments. +- Changed selected-text handling from immediate generation to quote-and-enrich prompt composition. +- Added Chinese and English i18n strings for document parsing, upload, selection, and quote states. +- Added backend and frontend tests for the new parsing, API, and upload-routing behavior. + +### Testing + +- `COREPACK_ENABLE_AUTO_PIN=0 pnpm type-check` +- `COREPACK_ENABLE_AUTO_PIN=0 pnpm test -- --runInBand` +- `COREPACK_ENABLE_AUTO_PIN=0 pnpm lint` +- `./.venv/bin/python -m pytest backend/tests -q` +- `COREPACK_ENABLE_AUTO_PIN=0 pnpm build:react` +- Browser validation confirmed the workbench shows one upload entry, the old image/document split buttons are gone, the accepted file types include image/PDF/Markdown/Text, parsed PDFs render in the left floating panel, and quoting selected text does not trigger generation until the user submits a prompt. + +### Notes + +- A real end-to-end MinerU parsing check still requires a valid MinerU API token. +- v1 does not persist PDFs, parsed Markdown, selections, or parsing history. +- v1 sends only the user-quoted selection into the LLM prompt; the full PDF is not automatically sent to the generation pipeline. + +## 项目展示简介 + +NanaDraw is an academic figure creation workspace that turns paper methods, sketches, and structured prompts into editable research diagrams. With the unified upload entry and MinerU PDF parsing, researchers can attach a rough sketch, upload a paper, extract its Markdown structure, quote the exact passage they want to visualize, and refine the drawing prompt before generating a figure. The workflow keeps the researcher in control: files are routed by intent, the selected evidence becomes prompt context, and NanaDraw's existing multi-mode generation pipeline handles the final visual composition. diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 58d240c..cdec39b 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -14,6 +14,9 @@ function App() { } /> } /> } /> + } /> + } /> + } /> diff --git a/frontend/src/components/ChatPanel.test.ts b/frontend/src/components/ChatPanel.test.ts new file mode 100644 index 0000000..3cd2b80 --- /dev/null +++ b/frontend/src/components/ChatPanel.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, it } from "vitest"; +import { classifyUploadFile } from "./chatUpload"; + +describe("classifyUploadFile", () => { + it("routes images to the sketch reference flow", () => { + expect(classifyUploadFile({ name: "sketch.png", type: "image/png" })).toBe("image"); + }); + + it("routes PDFs to the MinerU parsing flow", () => { + expect(classifyUploadFile({ name: "paper.pdf", type: "" })).toBe("pdf"); + expect(classifyUploadFile({ name: "paper", type: "application/pdf" })).toBe("pdf"); + }); + + it("routes Markdown and text files to the text attachment flow", () => { + expect(classifyUploadFile({ name: "notes.md", type: "" })).toBe("markdown"); + expect(classifyUploadFile({ name: "notes.markdown", type: "" })).toBe("markdown"); + expect(classifyUploadFile({ name: "prompt.txt", type: "" })).toBe("text"); + expect(classifyUploadFile({ name: "prompt", type: "text/plain" })).toBe("text"); + }); + + it("rejects unsupported upload types", () => { + expect(classifyUploadFile({ name: "archive.zip", type: "application/zip" })).toBe("unsupported"); + }); +}); diff --git a/frontend/src/components/ChatPanel.tsx b/frontend/src/components/ChatPanel.tsx index 0d5b58d..5f937e1 100644 --- a/frontend/src/components/ChatPanel.tsx +++ b/frontend/src/components/ChatPanel.tsx @@ -28,6 +28,7 @@ import { } from "lucide-react"; import clsx from "clsx"; import ReactMarkdown from "react-markdown"; +import { parsePdfDocument, type ParsedPdfResult } from "../services/api"; import { extractImages, stripComponentDescriptions } from "../services/projectApi"; import { ASSISTANT_AVATAR_URL } from "../lib/avatarUrl"; import { useLanguage, useT } from "../contexts/LanguageContext"; @@ -41,6 +42,7 @@ import type { StyleReference, } from "../types/paper"; import { GalleryModal } from "./GalleryModal"; +import { ACCEPTED_UPLOAD_TYPES, classifyUploadFile } from "./chatUpload"; // ── Constants ── @@ -76,6 +78,12 @@ const MODE_ICONS: Record = { }; const DRAWIO_MODES: AssistantMode[] = ["auto", "fast", "image_only", "full_gen", "free", "text_edit"]; +const MAX_PDF_SIZE_BYTES = 200 * 1024 * 1024; + +type PdfParseState = + | { status: "parsing"; fileName: string } + | { status: "done"; result: ParsedPdfResult; selectedText: string } + | { status: "error"; fileName: string; error: string }; // ── Props ── @@ -193,7 +201,10 @@ export function ChatPanel({ const bodyRef = useRef(null); const textareaRef = useRef(null); + const pdfTextRef = useRef(null); const [attachedFile, setAttachedFile] = useState<{ name: string; type: string; content: string } | null>(null); + const [pdfParse, setPdfParse] = useState(null); + const [pdfPanelCollapsed, setPdfPanelCollapsed] = useState(false); const bridgedXmlRef = useRef(null); const bridgedImageRef = useRef(null); // Bridge assistant state to DrawPage (dedup via refs to prevent @@ -378,7 +389,15 @@ export function ChatPanel({ if (!trimmed && !attachedFile) return; - const text = trimmed || (attachedFile ? t("chat.genFromAttachedFile", { name: attachedFile.name }) : ""); + const text = attachedFile + ? [ + t("chat.referenceMaterial", { name: attachedFile.name }), + attachedFile.content, + trimmed + ? t("chat.userPromptWithReference", { prompt: trimmed }) + : t("chat.genFromAttachedFile", { name: attachedFile.name }), + ].join("\n\n") + : trimmed; const sketch = sketchImage; setInput(""); @@ -411,6 +430,55 @@ export function ChatPanel({ sendMessage(text, opts); }, [input, isLoading, isGenerating, mode, sendMessage, sketchImage, selectedStyleId, textModel, imageModel, componentGenModel, attachedFile, editorRef, canvasRegenRequest, activeRegenContext, t]); + const handlePdfTextSelection = useCallback(() => { + const selection = window.getSelection(); + const container = pdfTextRef.current; + if (!selection || !container || !selection.anchorNode || !selection.focusNode) return; + if (!container.contains(selection.anchorNode) || !container.contains(selection.focusNode)) return; + const selected = selection.toString().trim(); + if (!selected) return; + setPdfParse((prev) => ( + prev?.status === "done" + ? { ...prev, selectedText: selected } + : prev + )); + }, []); + + const handleUsePdfSelection = useCallback(() => { + if (isLoading || isGenerating || pdfParse?.status !== "done") return; + const selected = pdfParse.selectedText.trim(); + if (!selected) return; + + setAttachedFile({ + name: pdfParse.result.file_name, + type: "pdf-selection", + content: selected, + }); + setPdfPanelCollapsed(true); + textareaRef.current?.focus(); + }, [isLoading, isGenerating, pdfParse]); + + const handleSelectAllPdfText = useCallback(() => { + if (pdfParse?.status !== "done") return; + const allText = pdfParse.result.markdown.trim(); + if (!allText) return; + + setPdfParse((prev) => ( + prev?.status === "done" + ? { ...prev, selectedText: allText } + : prev + )); + + const container = pdfTextRef.current; + if (!container) return; + const selection = window.getSelection(); + if (!selection) return; + const range = document.createRange(); + range.selectNodeContents(container); + selection.removeAllRanges(); + selection.addRange(range); + }, [pdfParse]); + const handleKeyDown = useCallback( (e: React.KeyboardEvent) => { if (e.key === "Enter" && !e.shiftKey) { @@ -433,37 +501,55 @@ export function ChatPanel({ reader.readAsDataURL(file); }, []); - const readDocFile = useCallback((file: File) => { - const ext = file.name.split(".").pop()?.toLowerCase(); - const isPdf = ext === "pdf" || file.type === "application/pdf"; - const isMd = ext === "md" || ext === "markdown" || file.type === "text/markdown"; - const isTxt = ext === "txt" || file.type === "text/plain"; + const handleUploadFile = useCallback(async (file: File) => { + if (pdfParse?.status === "parsing") return; - if (isPdf) { - const reader = new FileReader(); - reader.onload = () => { - const dataUrl = reader.result as string; - const b64 = dataUrl.split(",")[1]; - if (b64) setAttachedFile({ name: file.name, type: "pdf", content: b64 }); - }; - reader.readAsDataURL(file); - } else if (isMd || isTxt) { + const fileKind = classifyUploadFile(file); + + if (fileKind === "image") { + readFileAsB64(file); + return; + } + + if (fileKind === "pdf") { + if (file.size > MAX_PDF_SIZE_BYTES) { + setAttachedFile(null); + setPdfParse({ status: "error", fileName: file.name, error: t("chat.pdfTooLarge") }); + return; + } + setAttachedFile(null); + setPdfPanelCollapsed(false); + setPdfParse({ status: "parsing", fileName: file.name }); + try { + const result = await parsePdfDocument(file); + setPdfParse({ status: "done", result, selectedText: "" }); + } catch (err) { + setPdfParse({ + status: "error", + fileName: file.name, + error: err instanceof Error ? err.message : t("chat.pdfParseFailed"), + }); + } + } else if (fileKind === "markdown" || fileKind === "text") { + setPdfParse(null); const reader = new FileReader(); reader.onload = () => { const text = reader.result as string; - setAttachedFile({ name: file.name, type: isMd ? "markdown" : "text", content: text }); + setAttachedFile({ name: file.name, type: fileKind, content: text }); }; reader.readAsText(file); + } else { + setPdfParse({ status: "error", fileName: file.name, error: t("chat.unsupportedUploadFile") }); } - }, []); + }, [pdfParse, readFileAsB64, t]); - const handleDocFileChange = useCallback( + const handleUploadFileChange = useCallback( (e: React.ChangeEvent) => { const file = e.target.files?.[0]; - if (file) readDocFile(file); + if (file) handleUploadFile(file); e.target.value = ""; }, - [readDocFile], + [handleUploadFile], ); const handlePaste = useCallback( @@ -482,15 +568,6 @@ export function ChatPanel({ [readFileAsB64], ); - const handleFileChange = useCallback( - (e: React.ChangeEvent) => { - const file = e.target.files?.[0]; - if (file) readFileAsB64(file); - e.target.value = ""; - }, - [readFileAsB64], - ); - // ── Track generation completion to add assistant messages ── const prevGenRef = useRef(false); useEffect(() => { @@ -528,7 +605,8 @@ export function ChatPanel({ } const isActive = isGenerating || isLoading; - const canAct = (input.trim().length > 0 || !!attachedFile) && !isActive; + const isPdfParsing = pdfParse?.status === "parsing"; + const canAct = (input.trim().length > 0 || !!attachedFile) && !isActive && !isPdfParsing; const actionLabel = mode === "auto" ? t("chat.send") : t("chat.generate"); const rawSteps = isGenerating @@ -538,7 +616,120 @@ export function ChatPanel({ const visibleSteps = (isActive || hasActiveStep) ? rawSteps : []; return ( -
+
+ {pdfParse && ( +
+ {pdfPanelCollapsed ? ( + + ) : ( +
+
+ {pdfParse.status === "parsing" ? ( + + ) : pdfParse.status === "error" ? ( + + ) : ( + + )} +
+
+ {pdfParse.status === "done" ? pdfParse.result.file_name : pdfParse.fileName} +
+
+ {pdfParse.status === "done" + ? (pdfParse.selectedText + ? t("chat.pdfSelectedChars", { count: String(pdfParse.selectedText.length) }) + : t("chat.pdfNoSelection")) + : pdfParse.status === "parsing" + ? t("chat.pdfParsing") + : t("chat.pdfParseFailed")} +
+
+ + +
+ +
+ {pdfParse.status === "parsing" && ( +
+ {t("chat.pdfParsing")} +
+ )} + {pdfParse.status === "error" && ( +
+ {pdfParse.error} +
+ )} + {pdfParse.status === "done" && ( + <> +

{t("chat.pdfSelectHint")}

+
+                      {pdfParse.result.markdown}
+                    
+
+ + {pdfParse.selectedText + ? t("chat.pdfSelectedChars", { count: String(pdfParse.selectedText.length) }) + : t("chat.pdfNoSelection")} + + + +
+ + )} +
+
+ )} +
+ )} + {/* ── Header ── */}
@@ -699,23 +890,26 @@ export function ChatPanel({ - {/* Attached file preview */} {attachedFile && ( -
+
- {attachedFile.name} +
+ {attachedFile.name} + + {attachedFile.type === "pdf-selection" + ? t("chat.pdfSelectionAttached") + : attachedFile.type === "markdown" + ? t("chat.mdAttached") + : t("chat.txtAttached")} + +
- {mode === "auto" && ( - - )} - {/* Mode */}
setMineruApiToken(e.target.value)} + className="w-full rounded-xl border border-stone-200 bg-stone-50/50 px-3 py-2.5 text-sm text-stone-800 outline-none transition focus:border-amber-300 focus:bg-white focus:ring-2 focus:ring-amber-100" + placeholder={isMineruConfigured ? t("settings.apiKeyPlaceholder") : ""} + /> + +
) : (

{t("settings.nanaSoulHelper")}

diff --git a/frontend/src/components/chatUpload.ts b/frontend/src/components/chatUpload.ts new file mode 100644 index 0000000..3cb3c6c --- /dev/null +++ b/frontend/src/components/chatUpload.ts @@ -0,0 +1,12 @@ +export const ACCEPTED_UPLOAD_TYPES = "image/*,.pdf,application/pdf,.md,.txt,.markdown,text/plain,text/markdown"; + +export type UploadFileKind = "image" | "pdf" | "markdown" | "text" | "unsupported"; + +export function classifyUploadFile(file: Pick): UploadFileKind { + const ext = file.name.split(".").pop()?.toLowerCase(); + if (file.type.startsWith("image/")) return "image"; + if (ext === "pdf" || file.type === "application/pdf") return "pdf"; + if (ext === "md" || ext === "markdown" || file.type === "text/markdown") return "markdown"; + if (ext === "txt" || file.type === "text/plain") return "text"; + return "unsupported"; +} diff --git a/frontend/src/i18n/en.ts b/frontend/src/i18n/en.ts index 64b11fe..d3897bc 100644 --- a/frontend/src/i18n/en.ts +++ b/frontend/src/i18n/en.ts @@ -97,6 +97,7 @@ const en: Record = { "chat.placeholder.generate": "Enter method description text and click Generate (can paste sketch images)...", "chat.uploadSketch": "Upload Sketch Image", "chat.uploadFile": "Upload File (PDF/Markdown/Text)", + "chat.uploadAttachment": "Upload image, PDF, or text file", "chat.drawMode": "Drawing Mode", "chat.textModel": "Text Model", "chat.imageModel": "Image Model", @@ -123,8 +124,20 @@ const en: Record = { "chat.feedback.submitted": "Thanks for your feedback!", "chat.feedback.title": "How was this generation?", "chat.pdfAttached": "PDF file attached", + "chat.pdfSelectionAttached": "PDF selection quoted", "chat.mdAttached": "Markdown file attached", "chat.txtAttached": "Text file attached", + "chat.pdfParsing": "Parsing PDF with MinerU. This may take a few minutes...", + "chat.pdfParseFailed": "PDF parsing failed", + "chat.pdfTooLarge": "PDF file cannot exceed 200MB", + "chat.unsupportedUploadFile": "Unsupported file type. Upload an image, PDF, Markdown, or text file.", + "chat.pdfSelectHint": "Parsing complete. Select a text snippet below, then use it to generate a diagram.", + "chat.pdfNoSelection": "No text snippet selected yet", + "chat.pdfSelectedChars": "{count} characters selected", + "chat.pdfSelectAll": "Select all", + "chat.pdfUseSelection": "Quote selection", + "chat.referenceMaterial": "Reference material ({name}):", + "chat.userPromptWithReference": "User prompt:\n{prompt}", "chat.assistantAlt": "Assistant", "chat.isThinking": "Thinking...", "chat.foundStyles": "Found {count} style references", @@ -151,7 +164,7 @@ const en: Record = { "chat.regenBatchAssistant": "Wow, you selected {count} components to regenerate: {labels} 🎨 Tell me how you'd like to change them — the more detail, the better ✨", "chat.chartType": "chart", "chat.tableType": "table", - "chat.genFromAttachedFile": "Continue with the attached file {name}", + "chat.genFromAttachedFile": "Continue drawing from the referenced material", "chat.queueInfo": "Queuing · Position {pos}/{total}", "chat.genAssetsCount": "Generated Assets ({count})", "chat.componentAltCount": "Component Alternatives ({count})", @@ -166,6 +179,7 @@ const en: Record = { // ── Settings panel ── "settings.title": "Settings", "settings.apiConfig": "API Configuration", + "settings.documents": "Document Parsing", "settings.nanaSoul": "NanaSoul Persona", "settings.apiKey": "API Key", "settings.baseUrl": "API Base URL", @@ -175,10 +189,13 @@ const en: Record = { "settings.textModel": "Text Model", "settings.imageModel": "Image Model", "settings.componentModel": "Component Model", + "settings.mineruToken": "MinerU Token", + "settings.mineruTokenHelper": "Used for PDF parsing. It is stored only in local backend settings and is never exposed to the frontend for third-party calls.", "settings.nanaSoulHelper": "Set your unique style constraints, applied automatically during generation", "settings.save": "Save", "settings.saved": "Settings saved", "settings.notConfigured": "Please configure API Key first", + "settings.mineruNotConfigured": "Please configure MinerU Token first", "settings.saveFailed": "Save failed", "settings.loadFailed": "Failed to load settings", @@ -436,6 +453,7 @@ const en: Record = { // ── services error/default ── "svc.genFailed": "Generation failed", "svc.stylizeFailed": "Stylization failed", + "svc.pdfParseFailed": "PDF parsing failed", "svc.cancelled": "Cancelled", "svc.defaultProjectName": "Untitled Project", diff --git a/frontend/src/i18n/zh.ts b/frontend/src/i18n/zh.ts index 0c9aa18..3650af1 100644 --- a/frontend/src/i18n/zh.ts +++ b/frontend/src/i18n/zh.ts @@ -95,6 +95,7 @@ const zh = { "chat.placeholder.generate": "输入方法描述文本后点击生成(可粘贴草稿图)...", "chat.uploadSketch": "上传草稿图", "chat.uploadFile": "上传文件(PDF/Markdown/文本)", + "chat.uploadAttachment": "上传图片、PDF 或文本文件", "chat.drawMode": "绘制模式", "chat.textModel": "语言模型", "chat.imageModel": "生图模型", @@ -121,8 +122,20 @@ const zh = { "chat.feedback.submitted": "感谢反馈!", "chat.feedback.title": "这次生成效果如何?", "chat.pdfAttached": "PDF 文件已附加", + "chat.pdfSelectionAttached": "PDF 选段已引用", "chat.mdAttached": "Markdown 文件已附加", "chat.txtAttached": "文本文件已附加", + "chat.pdfParsing": "正在调用 MinerU 解析 PDF,可能需要几分钟,请稍候...", + "chat.pdfParseFailed": "PDF 解析失败", + "chat.pdfTooLarge": "PDF 文件不能超过 200MB", + "chat.unsupportedUploadFile": "暂不支持该文件类型,请上传图片、PDF、Markdown 或文本文件", + "chat.pdfSelectHint": "解析完成。请在下方文本中选择一段内容,再用于生成图。", + "chat.pdfNoSelection": "尚未选择文本片段", + "chat.pdfSelectedChars": "已选择 {count} 个字符", + "chat.pdfSelectAll": "全选", + "chat.pdfUseSelection": "引用选中内容", + "chat.referenceMaterial": "引用材料({name}):", + "chat.userPromptWithReference": "用户补充要求:\n{prompt}", "chat.assistantAlt": "助手", "chat.isThinking": "正在思考...", "chat.foundStyles": "找到 {count} 个风格参考", @@ -149,7 +162,7 @@ const zh = { "chat.regenBatchAssistant": "哇~ 你一次选了 {count} 个组件要重新生成呀:{labels} 🎨 告诉香蕉宝宝你想怎么改它们呢?描述越详细越好哦~ ✨", "chat.chartType": "图表", "chat.tableType": "表格", - "chat.genFromAttachedFile": "请根据上传的附件 {name} 继续", + "chat.genFromAttachedFile": "请根据引用材料继续绘图", "chat.queueInfo": "排队中 · 第 {pos}/{total} 位", "chat.genAssetsCount": "生成素材({count})", "chat.componentAltCount": "组件备选方案({count})", @@ -164,6 +177,7 @@ const zh = { // ── Settings panel ── "settings.title": "设置", "settings.apiConfig": "API 配置", + "settings.documents": "文档解析", "settings.nanaSoul": "NanaSoul 人设", "settings.apiKey": "API 密钥", "settings.baseUrl": "Base URL 地址", @@ -173,10 +187,13 @@ const zh = { "settings.textModel": "文本模型", "settings.imageModel": "生图模型", "settings.componentModel": "组件模型", + "settings.mineruToken": "MinerU Token", + "settings.mineruTokenHelper": "用于 PDF 文档解析,仅保存在本地后端配置中,不会暴露给前端调用第三方接口。", "settings.nanaSoulHelper": "设置你的专属画风约束,生成时自动应用", "settings.save": "保存", "settings.saved": "设置已保存", "settings.notConfigured": "请先配置 API 密钥", + "settings.mineruNotConfigured": "请先配置 MinerU Token", "settings.saveFailed": "保存失败", "settings.loadFailed": "加载设置失败", @@ -434,6 +451,7 @@ const zh = { // ── services error/default ── "svc.genFailed": "生成失败", "svc.stylizeFailed": "风格化失败", + "svc.pdfParseFailed": "PDF 解析失败", "svc.cancelled": "已取消", "svc.defaultProjectName": "未命名项目", diff --git a/frontend/src/services/api.test.ts b/frontend/src/services/api.test.ts new file mode 100644 index 0000000..6463271 --- /dev/null +++ b/frontend/src/services/api.test.ts @@ -0,0 +1,37 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { parsePdfDocument } from "./api"; + +describe("parsePdfDocument", () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it("posts the PDF to the document parsing endpoint", async () => { + const payload = { + file_name: "paper.pdf", + markdown: "# Parsed", + batch_id: "batch-1", + data_id: "data-1", + source: "mineru", + }; + const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify(payload), { status: 200 })); + vi.stubGlobal("fetch", fetchMock); + + const result = await parsePdfDocument(new File(["%PDF"], "paper.pdf", { type: "application/pdf" })); + + expect(fetchMock).toHaveBeenCalledWith("/api/v1/documents/parse-pdf", { + method: "POST", + body: expect.any(FormData), + }); + expect(result).toEqual(payload); + }); + + it("uses backend detail for failed parsing requests", async () => { + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue(new Response(JSON.stringify({ detail: "请先在设置中配置 MinerU Token" }), { status: 400 })), + ); + + await expect(parsePdfDocument(new File(["%PDF"], "paper.pdf"))).rejects.toThrow("MinerU Token"); + }); +}); diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index 0b8c05f..7c6208a 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -3,6 +3,7 @@ import { tStandalone as t } from "../contexts/LanguageContext"; const PATH_PREFIX = import.meta.env.BASE_URL.replace(/\/$/, ""); const API_BASE = `${PATH_PREFIX}/api/v1`; const SETTINGS_API = `${PATH_PREFIX}/api/v1/settings`; +const FALLBACK_API_BASE = "/api/v1"; export async function fetchNanaSoul(): Promise { const res = await fetch(SETTINGS_API); @@ -247,6 +248,38 @@ export async function fetchAssistantStatus(): Promise<{ enabled: boolean }> { } } +// ── Document Parsing API ── + +export interface ParsedPdfResult { + file_name: string; + markdown: string; + batch_id: string; + data_id: string; + source: "mineru"; +} + +async function fetchDocumentsApi(path: string, init?: RequestInit): Promise { + const primary = await fetch(`${API_BASE}${path}`, init); + const contentType = (primary.headers.get("content-type") || "").toLowerCase(); + const shouldFallback = primary.status === 404 || contentType.includes("text/html"); + if (!shouldFallback) return primary; + return fetch(`${FALLBACK_API_BASE}${path}`, init); +} + +export async function parsePdfDocument(file: File): Promise { + const form = new FormData(); + form.append("file", file); + const res = await fetchDocumentsApi("/documents/parse-pdf", { + method: "POST", + body: form, + }); + if (!res.ok) { + const err = await res.json().catch(() => ({ detail: t("svc.pdfParseFailed") })); + throw new Error(err.detail || `HTTP ${res.status}`); + } + return res.json(); +} + // ── Pipeline SSE ── @@ -670,4 +703,3 @@ export async function assistantChat( } } } - diff --git a/frontend/src/services/settingsApi.ts b/frontend/src/services/settingsApi.ts index 7ebdc40..dde863f 100644 --- a/frontend/src/services/settingsApi.ts +++ b/frontend/src/services/settingsApi.ts @@ -1,5 +1,6 @@ const PATH_PREFIX = import.meta.env.BASE_URL.replace(/\/$/, ""); const API_BASE = `${PATH_PREFIX}/api/v1`; +const FALLBACK_API_BASE = "/api/v1"; export interface Settings { llm_api_key: string; @@ -12,9 +13,11 @@ export interface Settings { llm_image_model: string; llm_component_model: string; api_format: "auto" | "gemini_native" | "openai"; + mineru_api_token: string; nana_soul: string; language: string; is_configured: boolean; + mineru_is_configured: boolean; } export interface LLMPoolDisplay { @@ -31,24 +34,48 @@ export interface LLMConfigResponse { api_format?: "auto" | "gemini_native" | "openai"; } +async function parseErrorMessage(res: Response, fallback: string): Promise { + const contentType = res.headers.get("content-type") || ""; + if (contentType.includes("application/json")) { + const err = await res.json().catch(() => ({} as Record)); + if (err && typeof err === "object" && typeof err.detail === "string" && err.detail.trim()) { + return err.detail; + } + } else { + const text = await res.text().catch(() => ""); + if (text.trim()) return text.trim(); + } + return fallback; +} + +async function fetchWithApiFallback(path: string, init?: RequestInit): Promise { + const primary = await fetch(`${API_BASE}${path}`, init); + const contentType = (primary.headers.get("content-type") || "").toLowerCase(); + const shouldFallback = primary.status === 404 || contentType.includes("text/html"); + if (!shouldFallback) return primary; + return fetch(`${FALLBACK_API_BASE}${path}`, init); +} + export async function getSettings(): Promise { - const res = await fetch(`${API_BASE}/settings`); + const res = await fetchWithApiFallback("/settings"); if (!res.ok) throw new Error("Failed to fetch settings"); return res.json(); } -export async function updateSettings(data: Partial>): Promise { - const res = await fetch(`${API_BASE}/settings`, { +export async function updateSettings( + data: Partial>, +): Promise { + const res = await fetchWithApiFallback("/settings", { method: "PUT", headers: { "Content-Type": "application/json" }, body: JSON.stringify(data), }); - if (!res.ok) throw new Error("Failed to update settings"); + if (!res.ok) throw new Error(await parseErrorMessage(res, "Failed to update settings")); return res.json(); } export async function fetchLLMConfig(): Promise { - const res = await fetch(`${API_BASE}/settings/llm-config`); + const res = await fetchWithApiFallback("/settings/llm-config"); if (!res.ok) throw new Error("Failed to fetch LLM config"); return res.json(); } @@ -64,7 +91,7 @@ export async function updateLLMConfig( ): Promise { const pools = [{ base_url: baseUrl, api_keys: apiKey }]; const imagePools = (imageBaseUrl || imageApiKey) ? [{ base_url: imageBaseUrl || "", api_keys: imageApiKey || "" }] : []; - const res = await fetch(`${API_BASE}/settings/llm-config`, { + const res = await fetchWithApiFallback("/settings/llm-config", { method: "PUT", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ @@ -76,12 +103,11 @@ export async function updateLLMConfig( }), }); if (!res.ok) { - const err = await res.json().catch(() => ({ detail: "Failed to update LLM config" })); - throw new Error(err.detail || "Failed to update LLM config"); + throw new Error(await parseErrorMessage(res, "Failed to update LLM config")); } } export async function clearLLMConfig(): Promise { - const res = await fetch(`${API_BASE}/settings/llm-config`, { method: "DELETE" }); - if (!res.ok) throw new Error("Failed to clear LLM config"); + const res = await fetchWithApiFallback("/settings/llm-config", { method: "DELETE" }); + if (!res.ok) throw new Error(await parseErrorMessage(res, "Failed to clear LLM config")); }