From 9586bd394b246936417b48768fb413803aa60826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=95=86=E5=AF=8C=E5=87=AF?= Date: Mon, 27 Apr 2026 16:16:49 +0800 Subject: [PATCH 1/8] feat: add MinerU PDF parsing workflow --- README.md | 10 + README_zh-CN.md | 10 + backend/app/api/v1/endpoints/documents.py | 37 ++++ backend/app/api/v1/endpoints/settings.py | 12 ++ backend/app/api/v1/router.py | 2 + backend/app/services/mineru_service.py | 177 ++++++++++++++++ backend/app/services/settings_service.py | 2 + backend/tests/conftest.py | 7 + backend/tests/test_documents.py | 81 ++++++++ backend/tests/test_mineru_service.py | 150 ++++++++++++++ backend/tests/test_settings.py | 58 ++++++ docs/releases/v0.17.0-mineru-pdf.md | 45 +++++ frontend/src/components/ChatPanel.tsx | 233 +++++++++++++++++++--- frontend/src/components/SettingsPanel.tsx | 59 +++++- frontend/src/i18n/en.ts | 17 +- frontend/src/i18n/zh.ts | 17 +- frontend/src/services/api.test.ts | 37 ++++ frontend/src/services/api.ts | 25 ++- frontend/src/services/settingsApi.ts | 6 +- 19 files changed, 947 insertions(+), 38 deletions(-) create mode 100644 backend/app/api/v1/endpoints/documents.py create mode 100644 backend/app/services/mineru_service.py create mode 100644 backend/tests/conftest.py create mode 100644 backend/tests/test_documents.py create mode 100644 backend/tests/test_mineru_service.py create mode 100644 backend/tests/test_settings.py create mode 100644 docs/releases/v0.17.0-mineru-pdf.md create mode 100644 frontend/src/services/api.test.ts diff --git a/README.md b/README.md index c30c6f4..b7a93eb 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ ## Features - 📝 Paste method description text → auto-generate pipeline diagrams +- 📄 Upload paper PDFs, parse them with MinerU, and quote selected text for drawing prompts - 🎨 Three creation modes: Draft, Generation, and Assembly - 🖼️ Built-in style gallery with 250+ academic paper reference images - 🧰 Asset workshop with Bioicons, reusable personal assets, and AI-generated materials @@ -36,6 +37,14 @@ Upload a hand-drawn sketch and turn it into a high-fidelity editable pipeline di Figure 1 shows the rough hand-drawn sketch. Figure 2 shows the generated high-fidelity editable workflow diagram. +### PDF Parsing and Quoted-Selection Drawing + +Upload a PDF in the AI Workbench and NanaDraw will call the MinerU online API to parse the document into Markdown. The parsed result appears in a scrollable, collapsible floating panel on the left side of the workbench. You can select a method paragraph, experiment flow, or paper-structure passage, click "Quote selection", enrich the prompt in your own words, and then continue through NanaDraw's existing generation flow. + +- PDF upload is available in Draft, Generation, Assembly, and Auto modes. +- PDF content is sent only to MinerU for document parsing; NanaDraw does not automatically send the whole paper to the LLM. +- Only the text explicitly quoted by the user is merged into the generation prompt. + ### Creation Modes | Mode | Description | Steps | Example Screenshot | @@ -164,6 +173,7 @@ After starting, click the ⚙️ gear icon in the top-right corner to configure: - **Image Model**: Default `gemini-3-pro-image-preview` - **Component Model**: Default `gemini-3.1-flash-image-preview` - **NanaSoul**: Custom AI persona for style constraints +- **Document Parsing Token**: MinerU online API token for PDF parsing #### Data Directory (Environment Variable) diff --git a/README_zh-CN.md b/README_zh-CN.md index ddf6c59..785631b 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -17,6 +17,7 @@ ## 功能特性 - 📝 粘贴方法描述文本,自动生成流程图 +- 📄 上传论文 PDF,经 MinerU 解析后引用选中文本继续绘图 - 🎨 三种创作模式:草稿模式、生成模式、组装模式 - 🖼️ 内置 250+ 学术论文风格参考图 - 🧰 素材工坊集成 Bioicons、个人常用素材和 AI 生成素材 @@ -35,6 +36,14 @@ 图 1 为用户上传的手绘草图,图 2 为系统生成的高保真可编辑流程图。 +### PDF 文档解析与引用选段绘图 + +在 AI 工作台中上传 PDF 后,NanaDraw 会调用 MinerU 在线 API 将文档解析为 Markdown,并在工作台左侧显示可滚动、可收起的解析结果浮窗。用户可以手动选择方法段落、实验流程或论文结构说明,点击“引用选中内容”后继续补充自己的绘图提示词,再进入原有生成流程。 + +- 支持在草稿模式、生成模式、组装模式和自动模式中上传 PDF。 +- PDF 内容只用于 MinerU 文档解析;不会自动把整篇论文发送给 LLM。 +- 只有用户主动引用的选中文本会随提示词进入 NanaDraw 的生成流程。 + ### 多种模式 | 模式 | 说明 | 步骤 | 示例截图 | @@ -163,6 +172,7 @@ python start.py --dev - **图像模型**:默认 `gemini-3-pro-image-preview` - **组件模型**:默认 `gemini-3.1-flash-image-preview` - **NanaSoul**:用于风格约束的自定义 AI 角色 +- **文档解析 Token**:MinerU 在线 API Token,用于 PDF 解析 #### 数据目录(环境变量) diff --git a/backend/app/api/v1/endpoints/documents.py b/backend/app/api/v1/endpoints/documents.py new file mode 100644 index 0000000..bf28945 --- /dev/null +++ b/backend/app/api/v1/endpoints/documents.py @@ -0,0 +1,37 @@ +from fastapi import APIRouter, Depends, File, HTTPException, UploadFile + +from app.dependencies import require_auth +from app.services.mineru_service import MinerUError, parse_pdf_with_mineru +from app.services.settings_service import load_settings + +router = APIRouter(prefix="/documents", tags=["documents"]) + +MAX_PDF_SIZE_BYTES = 200 * 1024 * 1024 + + +@router.post("/parse-pdf") +async def parse_pdf(file: UploadFile = File(...), _user=Depends(require_auth)): + file_name = file.filename or "document.pdf" + if not file_name.lower().endswith(".pdf") and file.content_type != "application/pdf": + raise HTTPException(status_code=400, detail="仅支持上传 PDF 文件") + + content = await file.read() + if not content: + raise HTTPException(status_code=400, detail="PDF 文件为空") + if len(content) > MAX_PDF_SIZE_BYTES: + raise HTTPException(status_code=413, detail="PDF 文件不能超过 200MB") + + token = str(load_settings().get("mineru_api_token", "")).strip() + if not token: + raise HTTPException(status_code=400, detail="请先在设置中配置 MinerU Token") + + try: + return await parse_pdf_with_mineru( + file_name=file_name, + file_bytes=content, + token=token, + ) + except MinerUError as exc: + raise HTTPException(status_code=502, detail=str(exc)) from exc + except Exception as exc: + raise HTTPException(status_code=502, detail=f"PDF 解析失败: {exc}") from exc diff --git a/backend/app/api/v1/endpoints/settings.py b/backend/app/api/v1/endpoints/settings.py index 0ce3da2..4828ff3 100644 --- a/backend/app/api/v1/endpoints/settings.py +++ b/backend/app/api/v1/endpoints/settings.py @@ -19,9 +19,11 @@ class SettingsResponse(BaseModel): llm_model: str = "" llm_image_model: str = "" llm_component_model: str = "" + mineru_api_token: str = "" nana_soul: str = "" language: str = "zh" is_configured: bool = False + mineru_is_configured: bool = False class SettingsUpdate(BaseModel): @@ -30,6 +32,7 @@ class SettingsUpdate(BaseModel): llm_model: str | None = None llm_image_model: str | None = None llm_component_model: str | None = None + mineru_api_token: str | None = None nana_soul: str | None = Field(default=None, max_length=500) language: str | None = None @@ -50,9 +53,11 @@ def _to_response(data: dict[str, Any]) -> SettingsResponse: llm_model=str(data.get("llm_model", "") or ""), llm_image_model=str(data.get("llm_image_model", "") or ""), llm_component_model=str(data.get("llm_component_model", "") or ""), + mineru_api_token=mask_api_key(str(data.get("mineru_api_token", ""))), nana_soul=str(data.get("nana_soul", "") or ""), language=str(data.get("language", "") or "zh"), is_configured=bool(str(data.get("llm_api_key", "")).strip()), + mineru_is_configured=bool(str(data.get("mineru_api_token", "")).strip()), ) @@ -67,5 +72,12 @@ async def get_settings(): async def update_settings(body: SettingsUpdate): """Update settings. Only non-None fields are updated.""" updates = body.model_dump(exclude_none=True) + mineru_token = updates.get("mineru_api_token") + if isinstance(mineru_token, str): + mineru_token = mineru_token.strip() + if mineru_token: + updates["mineru_api_token"] = mineru_token + else: + updates.pop("mineru_api_token") data = await asyncio.to_thread(apply_settings_updates, updates) return _to_response(data) diff --git a/backend/app/api/v1/router.py b/backend/app/api/v1/router.py index ca0580b..3ad0da3 100644 --- a/backend/app/api/v1/router.py +++ b/backend/app/api/v1/router.py @@ -9,6 +9,7 @@ models, assistant, settings, + documents, ) api_router = APIRouter() @@ -21,3 +22,4 @@ api_router.include_router(models.router) api_router.include_router(assistant.router) api_router.include_router(settings.router) +api_router.include_router(documents.router) diff --git a/backend/app/services/mineru_service.py b/backend/app/services/mineru_service.py new file mode 100644 index 0000000..ec84e95 --- /dev/null +++ b/backend/app/services/mineru_service.py @@ -0,0 +1,177 @@ +import asyncio +import io +import time +import uuid +import zipfile +from pathlib import Path +from typing import Any + +import httpx + + +MINERU_API_BASE = "https://mineru.net/api/v4" +POLL_INTERVAL_SECONDS = 5 +POLL_TIMEOUT_SECONDS = 600 +ACTIVE_STATES = {"waiting-file", "uploading", "pending", "running", "converting"} + + +class MinerUError(RuntimeError): + pass + + +def _auth_headers(token: str) -> dict[str, str]: + return { + "Authorization": f"Bearer {token}", + "Accept": "*/*", + } + + +def _json_headers(token: str) -> dict[str, str]: + return { + **_auth_headers(token), + "Content-Type": "application/json", + } + + +def _ensure_success_payload(payload: dict[str, Any], action: str) -> dict[str, Any]: + if payload.get("code") != 0: + msg = str(payload.get("msg") or f"MinerU {action} failed") + raise MinerUError(msg) + data = payload.get("data") + if not isinstance(data, dict): + raise MinerUError(f"MinerU {action} returned invalid data") + return data + + +def _first_matching_result(results: list[Any], data_id: str, file_name: str) -> dict[str, Any] | None: + for item in results: + if isinstance(item, dict) and item.get("data_id") == data_id: + return item + for item in results: + if isinstance(item, dict) and item.get("file_name") == file_name: + return item + for item in results: + if isinstance(item, dict): + return item + return None + + +def _extract_result(data: dict[str, Any], data_id: str, file_name: str) -> dict[str, Any]: + result = data.get("extract_result") + if isinstance(result, dict): + return result + if isinstance(result, list): + matched = _first_matching_result(result, data_id, file_name) + if matched: + return matched + + results = data.get("extract_results") + if isinstance(results, list): + matched = _first_matching_result(results, data_id, file_name) + if matched: + return matched + + raise MinerUError("MinerU result payload is missing extract_result") + + +async def _download_full_markdown(client: httpx.AsyncClient, full_zip_url: str) -> str: + response = await client.get(full_zip_url) + response.raise_for_status() + + try: + with zipfile.ZipFile(io.BytesIO(response.content)) as archive: + names = archive.namelist() + full_md_name = next( + (name for name in names if Path(name).name == "full.md"), + None, + ) + if not full_md_name: + raise MinerUError("MinerU result zip does not contain full.md") + with archive.open(full_md_name) as f: + return f.read().decode("utf-8", errors="replace") + except zipfile.BadZipFile as exc: + raise MinerUError("MinerU result is not a valid zip file") from exc + + +async def parse_pdf_with_mineru( + *, + file_name: str, + file_bytes: bytes, + token: str, + client_factory: Any | None = None, +) -> dict[str, str]: + data_id = f"nanadraw-{uuid.uuid4().hex}" + timeout = httpx.Timeout(connect=15.0, read=120.0, write=120.0, pool=15.0) + make_client = client_factory or httpx.AsyncClient + + async with make_client(timeout=timeout) as client: + apply_body = { + "files": [ + { + "name": file_name, + "data_id": data_id, + "is_ocr": False, + } + ], + "model_version": "vlm", + "language": "ch", + "enable_table": True, + "enable_formula": True, + } + + apply_response = await client.post( + f"{MINERU_API_BASE}/file-urls/batch", + headers=_json_headers(token), + json=apply_body, + ) + apply_response.raise_for_status() + apply_data = _ensure_success_payload(apply_response.json(), "upload URL request") + + batch_id = str(apply_data.get("batch_id") or "") + file_urls = apply_data.get("file_urls") + if not batch_id or not isinstance(file_urls, list) or not file_urls: + raise MinerUError("MinerU upload URL response is missing batch_id or file_urls") + + upload_response = await client.put(str(file_urls[0]), content=file_bytes) + upload_response.raise_for_status() + + deadline = time.monotonic() + POLL_TIMEOUT_SECONDS + result: dict[str, Any] | None = None + while time.monotonic() < deadline: + await asyncio.sleep(POLL_INTERVAL_SECONDS) + poll_response = await client.get( + f"{MINERU_API_BASE}/extract-results/batch/{batch_id}", + headers=_json_headers(token), + ) + poll_response.raise_for_status() + poll_data = _ensure_success_payload(poll_response.json(), "result polling") + result = _extract_result(poll_data, data_id, file_name) + + state = str(result.get("state") or "").lower() + if state == "done": + break + if state == "failed": + raise MinerUError(str(result.get("err_msg") or "MinerU parsing failed")) + if state and state not in ACTIVE_STATES: + raise MinerUError(f"Unexpected MinerU parsing state: {state}") + else: + raise MinerUError("MinerU parsing timed out") + + if not result: + raise MinerUError("MinerU did not return a parsing result") + + full_zip_url = str(result.get("full_zip_url") or "") + if not full_zip_url: + raise MinerUError("MinerU result is missing full_zip_url") + + markdown = await _download_full_markdown(client, full_zip_url) + if not markdown.strip(): + raise MinerUError("MinerU returned empty Markdown") + + return { + "file_name": file_name, + "markdown": markdown, + "batch_id": batch_id, + "data_id": str(result.get("data_id") or data_id), + "source": "mineru", + } diff --git a/backend/app/services/settings_service.py b/backend/app/services/settings_service.py index 0fcaa25..652543b 100644 --- a/backend/app/services/settings_service.py +++ b/backend/app/services/settings_service.py @@ -13,6 +13,7 @@ "llm_model": "gemini-3.1-pro-preview", "llm_image_model": "gemini-3-pro-image-preview", "llm_component_model": "gemini-3.1-flash-image-preview", + "mineru_api_token": "", "nana_soul": "", "language": "zh", } @@ -96,6 +97,7 @@ def _persist_unlocked(merged: dict[str, Any]) -> dict[str, Any]: log_payload = { **to_store, "llm_api_key": mask_api_key(str(to_store.get("llm_api_key", ""))), + "mineru_api_token": mask_api_key(str(to_store.get("mineru_api_token", ""))), } logger.info("Settings saved: %s", log_payload) return dict(to_store) diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 0000000..c4d884b --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,7 @@ +import sys +from pathlib import Path + + +BACKEND_ROOT = Path(__file__).resolve().parents[1] +if str(BACKEND_ROOT) not in sys.path: + sys.path.insert(0, str(BACKEND_ROOT)) diff --git a/backend/tests/test_documents.py b/backend/tests/test_documents.py new file mode 100644 index 0000000..9b8ce1b --- /dev/null +++ b/backend/tests/test_documents.py @@ -0,0 +1,81 @@ +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from app.api.v1.endpoints import documents + + +def _client() -> TestClient: + app = FastAPI() + app.include_router(documents.router) + return TestClient(app) + + +def test_parse_pdf_rejects_non_pdf(): + res = _client().post( + "/documents/parse-pdf", + files={"file": ("notes.txt", b"hello", "text/plain")}, + ) + + assert res.status_code == 400 + assert "PDF" in res.json()["detail"] + + +def test_parse_pdf_rejects_empty_pdf(): + res = _client().post( + "/documents/parse-pdf", + files={"file": ("paper.pdf", b"", "application/pdf")}, + ) + + assert res.status_code == 400 + assert "空" in res.json()["detail"] + + +def test_parse_pdf_rejects_oversized_pdf(monkeypatch): + monkeypatch.setattr(documents, "MAX_PDF_SIZE_BYTES", 4) + + res = _client().post( + "/documents/parse-pdf", + files={"file": ("paper.pdf", b"12345", "application/pdf")}, + ) + + assert res.status_code == 413 + assert "200MB" in res.json()["detail"] + + +def test_parse_pdf_requires_mineru_token(monkeypatch): + monkeypatch.setattr(documents, "load_settings", lambda: {"mineru_api_token": ""}) + + res = _client().post( + "/documents/parse-pdf", + files={"file": ("paper.pdf", b"%PDF", "application/pdf")}, + ) + + assert res.status_code == 400 + assert "MinerU Token" in res.json()["detail"] + + +def test_parse_pdf_returns_mineru_markdown(monkeypatch): + monkeypatch.setattr(documents, "load_settings", lambda: {"mineru_api_token": "secret-token"}) + + async def fake_parse_pdf_with_mineru(*, file_name: str, file_bytes: bytes, token: str): + assert file_name == "paper.pdf" + assert file_bytes == b"%PDF" + assert token == "secret-token" + return { + "file_name": file_name, + "markdown": "# Parsed", + "batch_id": "batch-1", + "data_id": "data-1", + "source": "mineru", + } + + monkeypatch.setattr(documents, "parse_pdf_with_mineru", fake_parse_pdf_with_mineru) + + res = _client().post( + "/documents/parse-pdf", + files={"file": ("paper.pdf", b"%PDF", "application/pdf")}, + ) + + assert res.status_code == 200 + assert res.json()["markdown"] == "# Parsed" + assert res.json()["source"] == "mineru" diff --git a/backend/tests/test_mineru_service.py b/backend/tests/test_mineru_service.py new file mode 100644 index 0000000..e5f6cfd --- /dev/null +++ b/backend/tests/test_mineru_service.py @@ -0,0 +1,150 @@ +import io +import zipfile + +import pytest + +from app.services import mineru_service +from app.services.mineru_service import MinerUError, parse_pdf_with_mineru + + +class FakeResponse: + def __init__(self, json_data=None, content: bytes = b""): + self._json_data = json_data + self.content = content + + def json(self): + return self._json_data + + def raise_for_status(self): + return None + + +def _zip_with(entries: dict[str, str]) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as archive: + for name, text in entries.items(): + archive.writestr(name, text) + return buf.getvalue() + + +class FakeMinerUClient: + def __init__(self, *, apply_payload=None, poll_payload=None, zip_bytes=None, **_kwargs): + self.apply_payload = apply_payload or { + "code": 0, + "data": {"batch_id": "batch-1", "file_urls": ["https://upload.example"]}, + } + self.poll_payload = poll_payload or { + "code": 0, + "data": { + "extract_result": { + "data_id": "data-1", + "file_name": "paper.pdf", + "state": "done", + "full_zip_url": "https://download.example/result.zip", + } + }, + } + self.zip_bytes = zip_bytes or _zip_with({"nested/full.md": "# Parsed"}) + self.uploaded = b"" + + async def __aenter__(self): + return self + + async def __aexit__(self, *_args): + return None + + async def post(self, *_args, **_kwargs): + return FakeResponse(self.apply_payload) + + async def put(self, _url, content: bytes): + self.uploaded = content + return FakeResponse({}) + + async def get(self, url, **_kwargs): + if str(url).endswith(".zip"): + return FakeResponse(content=self.zip_bytes) + return FakeResponse(self.poll_payload) + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_success(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_INTERVAL_SECONDS", 0) + + result = await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=FakeMinerUClient, + ) + + assert result["file_name"] == "paper.pdf" + assert result["markdown"] == "# Parsed" + assert result["batch_id"] == "batch-1" + assert result["source"] == "mineru" + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_rejects_error_payload(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_INTERVAL_SECONDS", 0) + + def factory(**kwargs): + return FakeMinerUClient(apply_payload={"code": 7, "msg": "bad token"}, **kwargs) + + with pytest.raises(MinerUError, match="bad token"): + await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=factory, + ) + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_reports_failed_state(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_INTERVAL_SECONDS", 0) + + def factory(**kwargs): + return FakeMinerUClient( + poll_payload={ + "code": 0, + "data": {"extract_result": {"state": "failed", "err_msg": "parse failed"}}, + }, + **kwargs, + ) + + with pytest.raises(MinerUError, match="parse failed"): + await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=factory, + ) + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_requires_full_markdown(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_INTERVAL_SECONDS", 0) + + def factory(**kwargs): + return FakeMinerUClient(zip_bytes=_zip_with({"other.md": "No full file"}), **kwargs) + + with pytest.raises(MinerUError, match="full.md"): + await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=factory, + ) + + +@pytest.mark.asyncio +async def test_parse_pdf_with_mineru_times_out(monkeypatch): + monkeypatch.setattr(mineru_service, "POLL_TIMEOUT_SECONDS", 0) + + with pytest.raises(MinerUError, match="timed out"): + await parse_pdf_with_mineru( + file_name="paper.pdf", + file_bytes=b"%PDF", + token="secret", + client_factory=FakeMinerUClient, + ) diff --git a/backend/tests/test_settings.py b/backend/tests/test_settings.py new file mode 100644 index 0000000..d973bc4 --- /dev/null +++ b/backend/tests/test_settings.py @@ -0,0 +1,58 @@ +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from app.api.v1.endpoints import settings + + +def _client() -> TestClient: + app = FastAPI() + app.include_router(settings.router) + return TestClient(app) + + +def test_settings_masks_mineru_token(monkeypatch): + monkeypatch.setattr( + settings, + "load_settings", + lambda: { + "llm_api_key": "llm-secret", + "llm_base_url": "", + "llm_model": "text-model", + "llm_image_model": "image-model", + "llm_component_model": "component-model", + "mineru_api_token": "mineru-secret", + "nana_soul": "", + "language": "zh", + }, + ) + + res = _client().get("/settings") + + assert res.status_code == 200 + assert res.json()["mineru_api_token"] == "****cret" + assert res.json()["mineru_is_configured"] is True + + +def test_settings_ignores_blank_mineru_token_update(monkeypatch): + captured = {} + + def fake_update_settings(updates): + captured.update(updates) + return { + "llm_api_key": "", + "llm_base_url": "", + "llm_model": "", + "llm_image_model": "", + "llm_component_model": "", + "mineru_api_token": "existing-token", + "nana_soul": "", + "language": "zh", + } + + monkeypatch.setattr(settings, "apply_settings_updates", fake_update_settings) + + res = _client().put("/settings", json={"mineru_api_token": " "}) + + assert res.status_code == 200 + assert "mineru_api_token" not in captured + assert res.json()["mineru_is_configured"] is True diff --git a/docs/releases/v0.17.0-mineru-pdf.md b/docs/releases/v0.17.0-mineru-pdf.md new file mode 100644 index 0000000..b260b3b --- /dev/null +++ b/docs/releases/v0.17.0-mineru-pdf.md @@ -0,0 +1,45 @@ +# NanaDraw v0.17.0-dev: MinerU PDF Parsing and Quoted Selection + +## 变更说明(提交到 dev 分支) + +- 新增 MinerU PDF 解析能力:后端支持申请上传 URL、上传 PDF、轮询解析结果、下载 zip 并提取 `full.md`。 +- 新增 `POST /api/v1/documents/parse-pdf`,校验 PDF 类型、空文件、200MB 大小上限和 MinerU Token 配置状态。 +- 设置页新增“文档解析”配置项,支持保存 MinerU Token;接口仅返回掩码 Token 与是否已配置状态。 +- AI 工作台上传入口扩展到所有创作模式,支持 PDF、Markdown 和文本文件。 +- PDF 解析结果从对话框迁移到 AI 工作台左侧独立浮窗,支持滚动、收起和关闭,样式对齐平台暖色视觉体系。 +- 选中文本后的动作调整为“引用选中内容”:先把选段作为参考材料附加到输入区,用户可以继续丰富提示词,再手动触发现有绘图流程。 +- 补充前后端测试,覆盖设置保存、PDF 校验、MinerU 成功/失败流程、前端解析接口和引用逻辑。 + +## PR 描述 + +### Summary + +This PR adds the first version of PDF-assisted academic drawing to NanaDraw. Users can upload a paper PDF from the AI Workbench, parse it through MinerU, review the Markdown result in a dedicated floating panel, select a relevant passage, quote it into the prompt composer, and continue with the existing NanaDraw generation workflow. + +### What Changed + +- Added secure MinerU token settings with masked responses and configured-state reporting. +- Added a backend document parsing endpoint and MinerU service wrapper. +- Added PDF parsing UI states and a left-side floating Markdown viewer in the AI Workbench. +- Enabled document upload across Draft, Generation, Assembly, and Auto modes. +- Changed selected-text handling from immediate generation to quote-and-enrich prompt composition. +- Added Chinese and English i18n strings for document parsing, upload, selection, and quote states. +- Added backend and frontend tests for the new parsing and API behavior. + +### Testing + +- `COREPACK_ENABLE_AUTO_PIN=0 pnpm type-check` +- `COREPACK_ENABLE_AUTO_PIN=0 pnpm test -- --runInBand` +- `./.venv/bin/python -m pytest backend/tests -q` +- `COREPACK_ENABLE_AUTO_PIN=0 pnpm build:react` +- Browser validation with mocked MinerU responses confirmed all modes show the upload entry, parsed PDFs render in the left floating panel, and quoting selected text does not trigger generation until the user submits a prompt. + +### Notes + +- A real end-to-end MinerU parsing check still requires a valid MinerU API token. +- v1 does not persist PDFs, parsed Markdown, selections, or parsing history. +- v1 sends only the user-quoted selection into the LLM prompt; the full PDF is not automatically sent to the generation pipeline. + +## 项目展示简介 + +NanaDraw is an academic figure creation workspace that turns paper methods, sketches, and structured prompts into editable research diagrams. With MinerU PDF parsing, researchers can now upload a paper, extract its Markdown structure, quote the exact passage they want to visualize, and refine the drawing prompt before generating a figure. The workflow keeps the researcher in control: the paper is parsed for reading, the selected evidence becomes prompt context, and NanaDraw's existing multi-mode generation pipeline handles the final visual composition. diff --git a/frontend/src/components/ChatPanel.tsx b/frontend/src/components/ChatPanel.tsx index 1089dc8..861d461 100644 --- a/frontend/src/components/ChatPanel.tsx +++ b/frontend/src/components/ChatPanel.tsx @@ -26,6 +26,7 @@ import { } from "lucide-react"; import clsx from "clsx"; import ReactMarkdown from "react-markdown"; +import { parsePdfDocument, type ParsedPdfResult } from "../services/api"; import { extractImages, stripComponentDescriptions } from "../services/projectApi"; import { ASSISTANT_AVATAR_URL } from "../lib/avatarUrl"; import { useT } from "../contexts/LanguageContext"; @@ -65,6 +66,12 @@ const MODE_ICONS: Record = { }; const DRAWIO_MODES: AssistantMode[] = ["auto", "fast", "image_only", "full_gen"]; +const MAX_PDF_SIZE_BYTES = 200 * 1024 * 1024; + +type PdfParseState = + | { status: "parsing"; fileName: string } + | { status: "done"; result: ParsedPdfResult; selectedText: string } + | { status: "error"; fileName: string; error: string }; // ── Props ── @@ -180,7 +187,10 @@ export function ChatPanel({ const bodyRef = useRef(null); const textareaRef = useRef(null); + const pdfTextRef = useRef(null); const [attachedFile, setAttachedFile] = useState<{ name: string; type: string; content: string } | null>(null); + const [pdfParse, setPdfParse] = useState(null); + const [pdfPanelCollapsed, setPdfPanelCollapsed] = useState(false); const bridgedXmlRef = useRef(null); const bridgedImageRef = useRef(null); // Bridge assistant state to DrawPage (dedup via refs to prevent @@ -365,7 +375,15 @@ export function ChatPanel({ if (!trimmed && !attachedFile) return; - const text = trimmed || (attachedFile ? t("chat.genFromAttachedFile", { name: attachedFile.name }) : ""); + const text = attachedFile + ? [ + t("chat.referenceMaterial", { name: attachedFile.name }), + attachedFile.content, + trimmed + ? t("chat.userPromptWithReference", { prompt: trimmed }) + : t("chat.genFromAttachedFile", { name: attachedFile.name }), + ].join("\n\n") + : trimmed; const sketch = sketchImage; setInput(""); @@ -398,6 +416,34 @@ export function ChatPanel({ sendMessage(text, opts); }, [input, isLoading, isGenerating, mode, sendMessage, sketchImage, selectedStyleId, textModel, imageModel, componentGenModel, attachedFile, editorRef, canvasRegenRequest, activeRegenContext, t]); + const handlePdfTextSelection = useCallback(() => { + const selection = window.getSelection(); + const container = pdfTextRef.current; + if (!selection || !container || !selection.anchorNode || !selection.focusNode) return; + if (!container.contains(selection.anchorNode) || !container.contains(selection.focusNode)) return; + const selected = selection.toString().trim(); + if (!selected) return; + setPdfParse((prev) => ( + prev?.status === "done" + ? { ...prev, selectedText: selected } + : prev + )); + }, []); + + const handleUsePdfSelection = useCallback(() => { + if (isLoading || isGenerating || pdfParse?.status !== "done") return; + const selected = pdfParse.selectedText.trim(); + if (!selected) return; + + setAttachedFile({ + name: pdfParse.result.file_name, + type: "pdf-selection", + content: selected, + }); + setPdfPanelCollapsed(true); + textareaRef.current?.focus(); + }, [isLoading, isGenerating, pdfParse]); + const handleKeyDown = useCallback( (e: React.KeyboardEvent) => { if (e.key === "Enter" && !e.shiftKey) { @@ -420,21 +466,35 @@ export function ChatPanel({ reader.readAsDataURL(file); }, []); - const readDocFile = useCallback((file: File) => { + const readDocFile = useCallback(async (file: File) => { + if (pdfParse?.status === "parsing") return; + const ext = file.name.split(".").pop()?.toLowerCase(); const isPdf = ext === "pdf" || file.type === "application/pdf"; const isMd = ext === "md" || ext === "markdown" || file.type === "text/markdown"; const isTxt = ext === "txt" || file.type === "text/plain"; if (isPdf) { - const reader = new FileReader(); - reader.onload = () => { - const dataUrl = reader.result as string; - const b64 = dataUrl.split(",")[1]; - if (b64) setAttachedFile({ name: file.name, type: "pdf", content: b64 }); - }; - reader.readAsDataURL(file); + if (file.size > MAX_PDF_SIZE_BYTES) { + setAttachedFile(null); + setPdfParse({ status: "error", fileName: file.name, error: t("chat.pdfTooLarge") }); + return; + } + setAttachedFile(null); + setPdfPanelCollapsed(false); + setPdfParse({ status: "parsing", fileName: file.name }); + try { + const result = await parsePdfDocument(file); + setPdfParse({ status: "done", result, selectedText: "" }); + } catch (err) { + setPdfParse({ + status: "error", + fileName: file.name, + error: err instanceof Error ? err.message : t("chat.pdfParseFailed"), + }); + } } else if (isMd || isTxt) { + setPdfParse(null); const reader = new FileReader(); reader.onload = () => { const text = reader.result as string; @@ -442,7 +502,7 @@ export function ChatPanel({ }; reader.readAsText(file); } - }, []); + }, [pdfParse, t]); const handleDocFileChange = useCallback( (e: React.ChangeEvent) => { @@ -515,7 +575,8 @@ export function ChatPanel({ } const isActive = isGenerating || isLoading; - const canAct = (input.trim().length > 0 || !!attachedFile) && !isActive; + const isPdfParsing = pdfParse?.status === "parsing"; + const canAct = (input.trim().length > 0 || !!attachedFile) && !isActive && !isPdfParsing; const actionLabel = mode === "auto" ? t("chat.send") : t("chat.generate"); const rawSteps = isGenerating @@ -525,7 +586,112 @@ export function ChatPanel({ const visibleSteps = (isActive || hasActiveStep) ? rawSteps : []; return ( -
+
+ {pdfParse && ( +
+ {pdfPanelCollapsed ? ( + + ) : ( +
+
+ {pdfParse.status === "parsing" ? ( + + ) : pdfParse.status === "error" ? ( + + ) : ( + + )} +
+
+ {pdfParse.status === "done" ? pdfParse.result.file_name : pdfParse.fileName} +
+
+ {pdfParse.status === "done" + ? (pdfParse.selectedText + ? t("chat.pdfSelectedChars", { count: String(pdfParse.selectedText.length) }) + : t("chat.pdfNoSelection")) + : pdfParse.status === "parsing" + ? t("chat.pdfParsing") + : t("chat.pdfParseFailed")} +
+
+ + +
+ +
+ {pdfParse.status === "parsing" && ( +
+ {t("chat.pdfParsing")} +
+ )} + {pdfParse.status === "error" && ( +
+ {pdfParse.error} +
+ )} + {pdfParse.status === "done" && ( + <> +

{t("chat.pdfSelectHint")}

+
+                      {pdfParse.result.markdown}
+                    
+
+ + {pdfParse.selectedText + ? t("chat.pdfSelectedChars", { count: String(pdfParse.selectedText.length) }) + : t("chat.pdfNoSelection")} + + +
+ + )} +
+
+ )} +
+ )} + {/* ── Header ── */}
@@ -699,9 +865,18 @@ export function ChatPanel({ {/* Attached file preview */} {attachedFile && ( -
+
- {attachedFile.name} +
+ {attachedFile.name} + + {attachedFile.type === "pdf-selection" + ? t("chat.pdfSelectionAttached") + : attachedFile.type === "markdown" + ? t("chat.mdAttached") + : t("chat.txtAttached")} + +
- {mode === "auto" && ( - - )} + {/* Mode */}
@@ -958,7 +1131,7 @@ function MessageBubble({
{msg.attachedFile.name} - {msg.attachedFile.type === "pdf" ? t("chat.pdfAttached") : msg.attachedFile.type === "markdown" ? t("chat.mdAttached") : t("chat.txtAttached")} + {msg.attachedFile.type === "pdf-selection" ? t("chat.pdfSelectionAttached") : msg.attachedFile.type === "pdf" ? t("chat.pdfAttached") : msg.attachedFile.type === "markdown" ? t("chat.mdAttached") : t("chat.txtAttached")}
diff --git a/frontend/src/components/SettingsPanel.tsx b/frontend/src/components/SettingsPanel.tsx index 5ea85a7..eddabfd 100644 --- a/frontend/src/components/SettingsPanel.tsx +++ b/frontend/src/components/SettingsPanel.tsx @@ -11,7 +11,7 @@ const DEFAULT_IMAGE_MODEL = "gemini-3-pro-image-preview"; const DEFAULT_COMPONENT_MODEL = "gemini-3.1-flash-image-preview"; const NANA_SOUL_MAX = 500; -type TabId = "api" | "nana"; +type TabId = "api" | "documents" | "nana"; export interface SettingsPanelProps { open: boolean; @@ -32,6 +32,8 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) { const [llmModel, setLlmModel] = useState(DEFAULT_TEXT_MODEL); const [llmImageModel, setLlmImageModel] = useState(DEFAULT_IMAGE_MODEL); const [llmComponentModel, setLlmComponentModel] = useState(DEFAULT_COMPONENT_MODEL); + const [mineruApiToken, setMineruApiToken] = useState(""); + const [isMineruConfigured, setIsMineruConfigured] = useState(false); const [nanaSoul, setNanaSoul] = useState(""); useEffect(() => { @@ -49,6 +51,8 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) { setLlmModel(data.llm_model || DEFAULT_TEXT_MODEL); setLlmImageModel(data.llm_image_model || DEFAULT_IMAGE_MODEL); setLlmComponentModel(data.llm_component_model || DEFAULT_COMPONENT_MODEL); + setMineruApiToken(""); + setIsMineruConfigured(data.mineru_is_configured); setNanaSoul(data.nana_soul || ""); }) .catch(() => { @@ -59,6 +63,8 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) { setLlmModel(DEFAULT_TEXT_MODEL); setLlmImageModel(DEFAULT_IMAGE_MODEL); setLlmComponentModel(DEFAULT_COMPONENT_MODEL); + setMineruApiToken(""); + setIsMineruConfigured(false); setNanaSoul(""); setToast({ type: "error", text: t("settings.loadFailed") }); } @@ -90,10 +96,15 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) { const handleSave = useCallback(async () => { const hasNewKey = llmApiKey.trim().length > 0; - if (!hasNewKey && !isKeyConfigured) { + const hasNewMineruToken = mineruApiToken.trim().length > 0; + if (tab === "api" && !hasNewKey && !isKeyConfigured) { setToast({ type: "error", text: t("settings.notConfigured") }); return; } + if (tab === "documents" && !hasNewMineruToken && !isMineruConfigured) { + setToast({ type: "error", text: t("settings.mineruNotConfigured") }); + return; + } setSaving(true); try { const payload: Record = { @@ -107,17 +118,23 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) { if (hasNewKey) { payload.llm_api_key = llmApiKey.trim(); } - await updateSettings(payload as Partial>); + if (hasNewMineruToken) { + payload.mineru_api_token = mineruApiToken.trim(); + } + await updateSettings(payload as Partial>); if (hasNewKey) { setIsKeyConfigured(true); } + if (hasNewMineruToken) { + setIsMineruConfigured(true); + } onClose(); } catch { setToast({ type: "error", text: t("settings.saveFailed") }); } finally { setSaving(false); } - }, [llmApiKey, isKeyConfigured, llmBaseUrl, llmModel, llmImageModel, llmComponentModel, nanaSoul, t]); + }, [tab, llmApiKey, isKeyConfigured, llmBaseUrl, llmModel, llmImageModel, llmComponentModel, mineruApiToken, isMineruConfigured, nanaSoul, t]); if (!open) return null; @@ -164,6 +181,18 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) { > {t("settings.apiConfig")} +
+ ) : tab === "documents" ? ( +
+

{t("settings.mineruTokenHelper")}

+ +
) : (

{t("settings.nanaSoulHelper")}

diff --git a/frontend/src/i18n/en.ts b/frontend/src/i18n/en.ts index a37da05..51fa281 100644 --- a/frontend/src/i18n/en.ts +++ b/frontend/src/i18n/en.ts @@ -117,8 +117,18 @@ const en: Record = { "chat.feedback.submitted": "Thanks for your feedback!", "chat.feedback.title": "How was this generation?", "chat.pdfAttached": "PDF file attached", + "chat.pdfSelectionAttached": "PDF selection quoted", "chat.mdAttached": "Markdown file attached", "chat.txtAttached": "Text file attached", + "chat.pdfParsing": "Parsing PDF with MinerU. This may take a few minutes...", + "chat.pdfParseFailed": "PDF parsing failed", + "chat.pdfTooLarge": "PDF file cannot exceed 200MB", + "chat.pdfSelectHint": "Parsing complete. Select a text snippet below, then use it to generate a diagram.", + "chat.pdfNoSelection": "No text snippet selected yet", + "chat.pdfSelectedChars": "{count} characters selected", + "chat.pdfUseSelection": "Quote selection", + "chat.referenceMaterial": "Reference material ({name}):", + "chat.userPromptWithReference": "User prompt:\n{prompt}", "chat.assistantAlt": "Assistant", "chat.isThinking": "Thinking...", "chat.foundStyles": "Found {count} style references", @@ -145,7 +155,7 @@ const en: Record = { "chat.regenBatchAssistant": "Wow, you selected {count} components to regenerate: {labels} 🎨 Tell me how you'd like to change them — the more detail, the better ✨", "chat.chartType": "chart", "chat.tableType": "table", - "chat.genFromAttachedFile": "Continue with the attached file {name}", + "chat.genFromAttachedFile": "Continue drawing from the referenced material", "chat.queueInfo": "Queuing · Position {pos}/{total}", "chat.genAssetsCount": "Generated Assets ({count})", "chat.componentAltCount": "Component Alternatives ({count})", @@ -160,6 +170,7 @@ const en: Record = { // ── Settings panel ── "settings.title": "Settings", "settings.apiConfig": "API Configuration", + "settings.documents": "Document Parsing", "settings.nanaSoul": "NanaSoul Persona", "settings.apiKey": "API Key", "settings.baseUrl": "API Base URL", @@ -169,10 +180,13 @@ const en: Record = { "settings.textModel": "Text Model", "settings.imageModel": "Image Model", "settings.componentModel": "Component Model", + "settings.mineruToken": "MinerU Token", + "settings.mineruTokenHelper": "Used for PDF parsing. It is stored only in local backend settings and is never exposed to the frontend for third-party calls.", "settings.nanaSoulHelper": "Set your unique style constraints, applied automatically during generation", "settings.save": "Save", "settings.saved": "Settings saved", "settings.notConfigured": "Please configure API Key first", + "settings.mineruNotConfigured": "Please configure MinerU Token first", "settings.saveFailed": "Save failed", "settings.loadFailed": "Failed to load settings", @@ -430,6 +444,7 @@ const en: Record = { // ── services error/default ── "svc.genFailed": "Generation failed", "svc.stylizeFailed": "Stylization failed", + "svc.pdfParseFailed": "PDF parsing failed", "svc.cancelled": "Cancelled", "svc.defaultProjectName": "Untitled Project", diff --git a/frontend/src/i18n/zh.ts b/frontend/src/i18n/zh.ts index 281ce2b..f7b55b0 100644 --- a/frontend/src/i18n/zh.ts +++ b/frontend/src/i18n/zh.ts @@ -115,8 +115,18 @@ const zh = { "chat.feedback.submitted": "感谢反馈!", "chat.feedback.title": "这次生成效果如何?", "chat.pdfAttached": "PDF 文件已附加", + "chat.pdfSelectionAttached": "PDF 选段已引用", "chat.mdAttached": "Markdown 文件已附加", "chat.txtAttached": "文本文件已附加", + "chat.pdfParsing": "正在调用 MinerU 解析 PDF,可能需要几分钟,请稍候...", + "chat.pdfParseFailed": "PDF 解析失败", + "chat.pdfTooLarge": "PDF 文件不能超过 200MB", + "chat.pdfSelectHint": "解析完成。请在下方文本中选择一段内容,再用于生成图。", + "chat.pdfNoSelection": "尚未选择文本片段", + "chat.pdfSelectedChars": "已选择 {count} 个字符", + "chat.pdfUseSelection": "引用选中内容", + "chat.referenceMaterial": "引用材料({name}):", + "chat.userPromptWithReference": "用户补充要求:\n{prompt}", "chat.assistantAlt": "助手", "chat.isThinking": "正在思考...", "chat.foundStyles": "找到 {count} 个风格参考", @@ -143,7 +153,7 @@ const zh = { "chat.regenBatchAssistant": "哇~ 你一次选了 {count} 个组件要重新生成呀:{labels} 🎨 告诉香蕉宝宝你想怎么改它们呢?描述越详细越好哦~ ✨", "chat.chartType": "图表", "chat.tableType": "表格", - "chat.genFromAttachedFile": "请根据上传的附件 {name} 继续", + "chat.genFromAttachedFile": "请根据引用材料继续绘图", "chat.queueInfo": "排队中 · 第 {pos}/{total} 位", "chat.genAssetsCount": "生成素材({count})", "chat.componentAltCount": "组件备选方案({count})", @@ -158,6 +168,7 @@ const zh = { // ── Settings panel ── "settings.title": "设置", "settings.apiConfig": "API 配置", + "settings.documents": "文档解析", "settings.nanaSoul": "NanaSoul 人设", "settings.apiKey": "API 密钥", "settings.baseUrl": "Base URL 地址", @@ -167,10 +178,13 @@ const zh = { "settings.textModel": "文本模型", "settings.imageModel": "生图模型", "settings.componentModel": "组件模型", + "settings.mineruToken": "MinerU Token", + "settings.mineruTokenHelper": "用于 PDF 文档解析,仅保存在本地后端配置中,不会暴露给前端调用第三方接口。", "settings.nanaSoulHelper": "设置你的专属画风约束,生成时自动应用", "settings.save": "保存", "settings.saved": "设置已保存", "settings.notConfigured": "请先配置 API 密钥", + "settings.mineruNotConfigured": "请先配置 MinerU Token", "settings.saveFailed": "保存失败", "settings.loadFailed": "加载设置失败", @@ -428,6 +442,7 @@ const zh = { // ── services error/default ── "svc.genFailed": "生成失败", "svc.stylizeFailed": "风格化失败", + "svc.pdfParseFailed": "PDF 解析失败", "svc.cancelled": "已取消", "svc.defaultProjectName": "未命名项目", diff --git a/frontend/src/services/api.test.ts b/frontend/src/services/api.test.ts new file mode 100644 index 0000000..6463271 --- /dev/null +++ b/frontend/src/services/api.test.ts @@ -0,0 +1,37 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { parsePdfDocument } from "./api"; + +describe("parsePdfDocument", () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it("posts the PDF to the document parsing endpoint", async () => { + const payload = { + file_name: "paper.pdf", + markdown: "# Parsed", + batch_id: "batch-1", + data_id: "data-1", + source: "mineru", + }; + const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify(payload), { status: 200 })); + vi.stubGlobal("fetch", fetchMock); + + const result = await parsePdfDocument(new File(["%PDF"], "paper.pdf", { type: "application/pdf" })); + + expect(fetchMock).toHaveBeenCalledWith("/api/v1/documents/parse-pdf", { + method: "POST", + body: expect.any(FormData), + }); + expect(result).toEqual(payload); + }); + + it("uses backend detail for failed parsing requests", async () => { + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue(new Response(JSON.stringify({ detail: "请先在设置中配置 MinerU Token" }), { status: 400 })), + ); + + await expect(parsePdfDocument(new File(["%PDF"], "paper.pdf"))).rejects.toThrow("MinerU Token"); + }); +}); diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index 0b8c05f..39355ab 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -247,6 +247,30 @@ export async function fetchAssistantStatus(): Promise<{ enabled: boolean }> { } } +// ── Document Parsing API ── + +export interface ParsedPdfResult { + file_name: string; + markdown: string; + batch_id: string; + data_id: string; + source: "mineru"; +} + +export async function parsePdfDocument(file: File): Promise { + const form = new FormData(); + form.append("file", file); + const res = await fetch(`${API_BASE}/documents/parse-pdf`, { + method: "POST", + body: form, + }); + if (!res.ok) { + const err = await res.json().catch(() => ({ detail: t("svc.pdfParseFailed") })); + throw new Error(err.detail || `HTTP ${res.status}`); + } + return res.json(); +} + // ── Pipeline SSE ── @@ -670,4 +694,3 @@ export async function assistantChat( } } } - diff --git a/frontend/src/services/settingsApi.ts b/frontend/src/services/settingsApi.ts index 1da074d..0f58b7d 100644 --- a/frontend/src/services/settingsApi.ts +++ b/frontend/src/services/settingsApi.ts @@ -7,9 +7,11 @@ export interface Settings { llm_model: string; llm_image_model: string; llm_component_model: string; + mineru_api_token: string; nana_soul: string; language: string; is_configured: boolean; + mineru_is_configured: boolean; } export async function getSettings(): Promise { @@ -18,7 +20,9 @@ export async function getSettings(): Promise { return res.json(); } -export async function updateSettings(data: Partial>): Promise { +export async function updateSettings( + data: Partial>, +): Promise { const res = await fetch(`${API_BASE}/settings`, { method: "PUT", headers: { "Content-Type": "application/json" }, From 50f9920c17750de983facd6e53cc9352df39fd3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=95=86=E5=AF=8C=E5=87=AF?= Date: Mon, 27 Apr 2026 17:37:25 +0800 Subject: [PATCH 2/8] feat: unify assistant upload entry --- README.md | 9 +-- README_zh-CN.md | 9 +-- docs/releases/v0.17.0-mineru-pdf.md | 15 +++-- frontend/src/components/ChatPanel.test.ts | 24 ++++++++ frontend/src/components/ChatPanel.tsx | 72 ++++++++--------------- frontend/src/components/SettingsPanel.tsx | 2 +- frontend/src/components/chatUpload.ts | 12 ++++ frontend/src/i18n/en.ts | 2 + frontend/src/i18n/zh.ts | 2 + 9 files changed, 84 insertions(+), 63 deletions(-) create mode 100644 frontend/src/components/ChatPanel.test.ts create mode 100644 frontend/src/components/chatUpload.ts diff --git a/README.md b/README.md index b7a93eb..76f4823 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ ## Features - 📝 Paste method description text → auto-generate pipeline diagrams -- 📄 Upload paper PDFs, parse them with MinerU, and quote selected text for drawing prompts +- 📎 Use one upload entry for images, PDFs, and text files; images become sketch references, while PDFs are parsed with MinerU for quoted-selection prompts - 🎨 Three creation modes: Draft, Generation, and Assembly - 🖼️ Built-in style gallery with 250+ academic paper reference images - 🧰 Asset workshop with Bioicons, reusable personal assets, and AI-generated materials @@ -39,11 +39,12 @@ Figure 1 shows the rough hand-drawn sketch. Figure 2 shows the generated high-fi ### PDF Parsing and Quoted-Selection Drawing -Upload a PDF in the AI Workbench and NanaDraw will call the MinerU online API to parse the document into Markdown. The parsed result appears in a scrollable, collapsible floating panel on the left side of the workbench. You can select a method paragraph, experiment flow, or paper-structure passage, click "Quote selection", enrich the prompt in your own words, and then continue through NanaDraw's existing generation flow. +The AI Workbench provides one upload entry for images, PDFs, Markdown, and text files. NanaDraw routes each file by type: images become sketch references, PDFs are parsed into Markdown through the MinerU online API, and Markdown/Text files are attached as prompt reference material. -- PDF upload is available in Draft, Generation, Assembly, and Auto modes. +- File upload is available in Draft, Generation, Assembly, and Auto modes. +- Parsed PDFs appear in a scrollable, collapsible floating panel on the left side of the workbench. - PDF content is sent only to MinerU for document parsing; NanaDraw does not automatically send the whole paper to the LLM. -- Only the text explicitly quoted by the user is merged into the generation prompt. +- Only the text explicitly quoted by the user is merged into the generation prompt; users can enrich the prompt before generating. ### Creation Modes diff --git a/README_zh-CN.md b/README_zh-CN.md index 785631b..fcff505 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -17,7 +17,7 @@ ## 功能特性 - 📝 粘贴方法描述文本,自动生成流程图 -- 📄 上传论文 PDF,经 MinerU 解析后引用选中文本继续绘图 +- 📎 单一入口上传图片、PDF 或文本文件;图片作为草稿参考,PDF 经 MinerU 解析后引用选段绘图 - 🎨 三种创作模式:草稿模式、生成模式、组装模式 - 🖼️ 内置 250+ 学术论文风格参考图 - 🧰 素材工坊集成 Bioicons、个人常用素材和 AI 生成素材 @@ -38,11 +38,12 @@ ### PDF 文档解析与引用选段绘图 -在 AI 工作台中上传 PDF 后,NanaDraw 会调用 MinerU 在线 API 将文档解析为 Markdown,并在工作台左侧显示可滚动、可收起的解析结果浮窗。用户可以手动选择方法段落、实验流程或论文结构说明,点击“引用选中内容”后继续补充自己的绘图提示词,再进入原有生成流程。 +AI 工作台底部提供统一上传入口,支持图片、PDF、Markdown 和文本文件。NanaDraw 会按文件类型自动分流:图片作为草稿图参考,PDF 调用 MinerU 在线 API 解析为 Markdown,Markdown/Text 作为文本附件进入提示词参考。 -- 支持在草稿模式、生成模式、组装模式和自动模式中上传 PDF。 +- 支持在草稿模式、生成模式、组装模式和自动模式中上传文件。 +- PDF 解析结果会显示在工作台左侧可滚动、可收起的浮窗中。 - PDF 内容只用于 MinerU 文档解析;不会自动把整篇论文发送给 LLM。 -- 只有用户主动引用的选中文本会随提示词进入 NanaDraw 的生成流程。 +- 只有用户主动引用的选中文本会随提示词进入 NanaDraw 的生成流程;用户可以继续补充绘图要求后再生成。 ### 多种模式 diff --git a/docs/releases/v0.17.0-mineru-pdf.md b/docs/releases/v0.17.0-mineru-pdf.md index b260b3b..00c3b0b 100644 --- a/docs/releases/v0.17.0-mineru-pdf.md +++ b/docs/releases/v0.17.0-mineru-pdf.md @@ -6,33 +6,36 @@ - 新增 `POST /api/v1/documents/parse-pdf`,校验 PDF 类型、空文件、200MB 大小上限和 MinerU Token 配置状态。 - 设置页新增“文档解析”配置项,支持保存 MinerU Token;接口仅返回掩码 Token 与是否已配置状态。 - AI 工作台上传入口扩展到所有创作模式,支持 PDF、Markdown 和文本文件。 +- 合并草稿图上传和文档上传为单一入口,自动按文件类型分流:图片作为草稿参考,PDF 触发 MinerU 解析,Markdown/Text 作为文本附件。 - PDF 解析结果从对话框迁移到 AI 工作台左侧独立浮窗,支持滚动、收起和关闭,样式对齐平台暖色视觉体系。 - 选中文本后的动作调整为“引用选中内容”:先把选段作为参考材料附加到输入区,用户可以继续丰富提示词,再手动触发现有绘图流程。 -- 补充前后端测试,覆盖设置保存、PDF 校验、MinerU 成功/失败流程、前端解析接口和引用逻辑。 +- 补充前后端测试,覆盖设置保存、PDF 校验、MinerU 成功/失败流程、前端解析接口、文件类型分流和引用逻辑。 ## PR 描述 ### Summary -This PR adds the first version of PDF-assisted academic drawing to NanaDraw. Users can upload a paper PDF from the AI Workbench, parse it through MinerU, review the Markdown result in a dedicated floating panel, select a relevant passage, quote it into the prompt composer, and continue with the existing NanaDraw generation workflow. +This PR adds the first version of PDF-assisted academic drawing to NanaDraw and streamlines the AI Workbench upload experience. Users can upload images, PDFs, Markdown, or text files from a single entry point. Images become sketch references, PDFs are parsed through MinerU, and quoted PDF selections can be enriched in the prompt composer before continuing with the existing NanaDraw generation workflow. ### What Changed - Added secure MinerU token settings with masked responses and configured-state reporting. - Added a backend document parsing endpoint and MinerU service wrapper. - Added PDF parsing UI states and a left-side floating Markdown viewer in the AI Workbench. -- Enabled document upload across Draft, Generation, Assembly, and Auto modes. +- Enabled a single upload entry across Draft, Generation, Assembly, and Auto modes. +- Routed uploaded files by type: image files become sketch references, PDFs trigger MinerU parsing, and Markdown/Text files remain prompt attachments. - Changed selected-text handling from immediate generation to quote-and-enrich prompt composition. - Added Chinese and English i18n strings for document parsing, upload, selection, and quote states. -- Added backend and frontend tests for the new parsing and API behavior. +- Added backend and frontend tests for the new parsing, API, and upload-routing behavior. ### Testing - `COREPACK_ENABLE_AUTO_PIN=0 pnpm type-check` - `COREPACK_ENABLE_AUTO_PIN=0 pnpm test -- --runInBand` +- `COREPACK_ENABLE_AUTO_PIN=0 pnpm lint` - `./.venv/bin/python -m pytest backend/tests -q` - `COREPACK_ENABLE_AUTO_PIN=0 pnpm build:react` -- Browser validation with mocked MinerU responses confirmed all modes show the upload entry, parsed PDFs render in the left floating panel, and quoting selected text does not trigger generation until the user submits a prompt. +- Browser validation confirmed the workbench shows one upload entry, the old image/document split buttons are gone, the accepted file types include image/PDF/Markdown/Text, parsed PDFs render in the left floating panel, and quoting selected text does not trigger generation until the user submits a prompt. ### Notes @@ -42,4 +45,4 @@ This PR adds the first version of PDF-assisted academic drawing to NanaDraw. Use ## 项目展示简介 -NanaDraw is an academic figure creation workspace that turns paper methods, sketches, and structured prompts into editable research diagrams. With MinerU PDF parsing, researchers can now upload a paper, extract its Markdown structure, quote the exact passage they want to visualize, and refine the drawing prompt before generating a figure. The workflow keeps the researcher in control: the paper is parsed for reading, the selected evidence becomes prompt context, and NanaDraw's existing multi-mode generation pipeline handles the final visual composition. +NanaDraw is an academic figure creation workspace that turns paper methods, sketches, and structured prompts into editable research diagrams. With the unified upload entry and MinerU PDF parsing, researchers can attach a rough sketch, upload a paper, extract its Markdown structure, quote the exact passage they want to visualize, and refine the drawing prompt before generating a figure. The workflow keeps the researcher in control: files are routed by intent, the selected evidence becomes prompt context, and NanaDraw's existing multi-mode generation pipeline handles the final visual composition. diff --git a/frontend/src/components/ChatPanel.test.ts b/frontend/src/components/ChatPanel.test.ts new file mode 100644 index 0000000..3cd2b80 --- /dev/null +++ b/frontend/src/components/ChatPanel.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, it } from "vitest"; +import { classifyUploadFile } from "./chatUpload"; + +describe("classifyUploadFile", () => { + it("routes images to the sketch reference flow", () => { + expect(classifyUploadFile({ name: "sketch.png", type: "image/png" })).toBe("image"); + }); + + it("routes PDFs to the MinerU parsing flow", () => { + expect(classifyUploadFile({ name: "paper.pdf", type: "" })).toBe("pdf"); + expect(classifyUploadFile({ name: "paper", type: "application/pdf" })).toBe("pdf"); + }); + + it("routes Markdown and text files to the text attachment flow", () => { + expect(classifyUploadFile({ name: "notes.md", type: "" })).toBe("markdown"); + expect(classifyUploadFile({ name: "notes.markdown", type: "" })).toBe("markdown"); + expect(classifyUploadFile({ name: "prompt.txt", type: "" })).toBe("text"); + expect(classifyUploadFile({ name: "prompt", type: "text/plain" })).toBe("text"); + }); + + it("rejects unsupported upload types", () => { + expect(classifyUploadFile({ name: "archive.zip", type: "application/zip" })).toBe("unsupported"); + }); +}); diff --git a/frontend/src/components/ChatPanel.tsx b/frontend/src/components/ChatPanel.tsx index 861d461..44c10c8 100644 --- a/frontend/src/components/ChatPanel.tsx +++ b/frontend/src/components/ChatPanel.tsx @@ -40,6 +40,7 @@ import type { StyleReference, } from "../types/paper"; import { GalleryModal } from "./GalleryModal"; +import { ACCEPTED_UPLOAD_TYPES, classifyUploadFile } from "./chatUpload"; // ── Constants ── @@ -466,15 +467,17 @@ export function ChatPanel({ reader.readAsDataURL(file); }, []); - const readDocFile = useCallback(async (file: File) => { + const handleUploadFile = useCallback(async (file: File) => { if (pdfParse?.status === "parsing") return; - const ext = file.name.split(".").pop()?.toLowerCase(); - const isPdf = ext === "pdf" || file.type === "application/pdf"; - const isMd = ext === "md" || ext === "markdown" || file.type === "text/markdown"; - const isTxt = ext === "txt" || file.type === "text/plain"; + const fileKind = classifyUploadFile(file); - if (isPdf) { + if (fileKind === "image") { + readFileAsB64(file); + return; + } + + if (fileKind === "pdf") { if (file.size > MAX_PDF_SIZE_BYTES) { setAttachedFile(null); setPdfParse({ status: "error", fileName: file.name, error: t("chat.pdfTooLarge") }); @@ -493,24 +496,26 @@ export function ChatPanel({ error: err instanceof Error ? err.message : t("chat.pdfParseFailed"), }); } - } else if (isMd || isTxt) { + } else if (fileKind === "markdown" || fileKind === "text") { setPdfParse(null); const reader = new FileReader(); reader.onload = () => { const text = reader.result as string; - setAttachedFile({ name: file.name, type: isMd ? "markdown" : "text", content: text }); + setAttachedFile({ name: file.name, type: fileKind, content: text }); }; reader.readAsText(file); + } else { + setPdfParse({ status: "error", fileName: file.name, error: t("chat.unsupportedUploadFile") }); } - }, [pdfParse, t]); + }, [pdfParse, readFileAsB64, t]); - const handleDocFileChange = useCallback( + const handleUploadFileChange = useCallback( (e: React.ChangeEvent) => { const file = e.target.files?.[0]; - if (file) readDocFile(file); + if (file) handleUploadFile(file); e.target.value = ""; }, - [readDocFile], + [handleUploadFile], ); const handlePaste = useCallback( @@ -529,15 +534,6 @@ export function ChatPanel({ [readFileAsB64], ); - const handleFileChange = useCallback( - (e: React.ChangeEvent) => { - const file = e.target.files?.[0]; - if (file) readFileAsB64(file); - e.target.value = ""; - }, - [readFileAsB64], - ); - // ── Track generation completion to add assistant messages ── const prevGenRef = useRef(false); useEffect(() => { @@ -851,16 +847,10 @@ export function ChatPanel({ - {/* Attached file preview */} @@ -888,33 +878,19 @@ export function ChatPanel({ {/* Action bar */}
- {/* Attach sketch */} - - {/* Mode */} diff --git a/frontend/src/components/SettingsPanel.tsx b/frontend/src/components/SettingsPanel.tsx index eddabfd..6aa5bd9 100644 --- a/frontend/src/components/SettingsPanel.tsx +++ b/frontend/src/components/SettingsPanel.tsx @@ -134,7 +134,7 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) { } finally { setSaving(false); } - }, [tab, llmApiKey, isKeyConfigured, llmBaseUrl, llmModel, llmImageModel, llmComponentModel, mineruApiToken, isMineruConfigured, nanaSoul, t]); + }, [tab, llmApiKey, isKeyConfigured, llmBaseUrl, llmModel, llmImageModel, llmComponentModel, mineruApiToken, isMineruConfigured, nanaSoul, onClose, t]); if (!open) return null; diff --git a/frontend/src/components/chatUpload.ts b/frontend/src/components/chatUpload.ts new file mode 100644 index 0000000..3cb3c6c --- /dev/null +++ b/frontend/src/components/chatUpload.ts @@ -0,0 +1,12 @@ +export const ACCEPTED_UPLOAD_TYPES = "image/*,.pdf,application/pdf,.md,.txt,.markdown,text/plain,text/markdown"; + +export type UploadFileKind = "image" | "pdf" | "markdown" | "text" | "unsupported"; + +export function classifyUploadFile(file: Pick): UploadFileKind { + const ext = file.name.split(".").pop()?.toLowerCase(); + if (file.type.startsWith("image/")) return "image"; + if (ext === "pdf" || file.type === "application/pdf") return "pdf"; + if (ext === "md" || ext === "markdown" || file.type === "text/markdown") return "markdown"; + if (ext === "txt" || file.type === "text/plain") return "text"; + return "unsupported"; +} diff --git a/frontend/src/i18n/en.ts b/frontend/src/i18n/en.ts index 51fa281..194cf26 100644 --- a/frontend/src/i18n/en.ts +++ b/frontend/src/i18n/en.ts @@ -91,6 +91,7 @@ const en: Record = { "chat.placeholder.generate": "Enter method description text and click Generate (can paste sketch images)...", "chat.uploadSketch": "Upload Sketch Image", "chat.uploadFile": "Upload File (PDF/Markdown/Text)", + "chat.uploadAttachment": "Upload image, PDF, or text file", "chat.drawMode": "Drawing Mode", "chat.textModel": "Text Model", "chat.imageModel": "Image Model", @@ -123,6 +124,7 @@ const en: Record = { "chat.pdfParsing": "Parsing PDF with MinerU. This may take a few minutes...", "chat.pdfParseFailed": "PDF parsing failed", "chat.pdfTooLarge": "PDF file cannot exceed 200MB", + "chat.unsupportedUploadFile": "Unsupported file type. Upload an image, PDF, Markdown, or text file.", "chat.pdfSelectHint": "Parsing complete. Select a text snippet below, then use it to generate a diagram.", "chat.pdfNoSelection": "No text snippet selected yet", "chat.pdfSelectedChars": "{count} characters selected", diff --git a/frontend/src/i18n/zh.ts b/frontend/src/i18n/zh.ts index f7b55b0..600ece5 100644 --- a/frontend/src/i18n/zh.ts +++ b/frontend/src/i18n/zh.ts @@ -89,6 +89,7 @@ const zh = { "chat.placeholder.generate": "输入方法描述文本后点击生成(可粘贴草稿图)...", "chat.uploadSketch": "上传草稿图", "chat.uploadFile": "上传文件(PDF/Markdown/文本)", + "chat.uploadAttachment": "上传图片、PDF 或文本文件", "chat.drawMode": "绘制模式", "chat.textModel": "语言模型", "chat.imageModel": "生图模型", @@ -121,6 +122,7 @@ const zh = { "chat.pdfParsing": "正在调用 MinerU 解析 PDF,可能需要几分钟,请稍候...", "chat.pdfParseFailed": "PDF 解析失败", "chat.pdfTooLarge": "PDF 文件不能超过 200MB", + "chat.unsupportedUploadFile": "暂不支持该文件类型,请上传图片、PDF、Markdown 或文本文件", "chat.pdfSelectHint": "解析完成。请在下方文本中选择一段内容,再用于生成图。", "chat.pdfNoSelection": "尚未选择文本片段", "chat.pdfSelectedChars": "已选择 {count} 个字符", From 113557e067146d70244bf6614e545ef6a840f91c Mon Sep 17 00:00:00 2001 From: Yutong Leng Date: Tue, 28 Apr 2026 15:59:17 +0800 Subject: [PATCH 3/8] fix(frontend): add /nanadraw route compatibility to prevent blank page --- frontend/src/App.tsx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 58d240c..cdec39b 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -14,6 +14,9 @@ function App() { } /> } /> } /> + } /> + } /> + } /> From b1ea3c8ff3535f2529d516279b6b56e3169091b1 Mon Sep 17 00:00:00 2001 From: Yutong Leng Date: Tue, 28 Apr 2026 16:10:51 +0800 Subject: [PATCH 4/8] fix(settings): remove duplicated response kwargs causing llm-config update failure --- backend/app/api/v1/endpoints/settings.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/app/api/v1/endpoints/settings.py b/backend/app/api/v1/endpoints/settings.py index 94d93b9..8905762 100644 --- a/backend/app/api/v1/endpoints/settings.py +++ b/backend/app/api/v1/endpoints/settings.py @@ -106,8 +106,6 @@ def _to_response(data: dict[str, Any]) -> SettingsResponse: llm_component_model=str(data.get("llm_component_model", "") or ""), api_format=str(data.get("api_format", "") or "auto"), mineru_api_token=mask_api_key(str(data.get("mineru_api_token", ""))), - api_format=str(data.get("api_format", "") or "auto"), - mineru_api_token=mask_api_key(str(data.get("mineru_api_token", ""))), nana_soul=str(data.get("nana_soul", "") or ""), language=str(data.get("language", "") or "zh"), is_configured=bool(str(data.get("llm_api_key", "")).strip()), From beacecfe2380ab46acf3b30046b3647694a37f59 Mon Sep 17 00:00:00 2001 From: Yutong Leng Date: Tue, 28 Apr 2026 16:14:23 +0800 Subject: [PATCH 5/8] fix(frontend): fallback to /api/v1 when running under /nanadraw routes --- frontend/src/services/settingsApi.ts | 38 +++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/frontend/src/services/settingsApi.ts b/frontend/src/services/settingsApi.ts index 3b379de..0a1efc3 100644 --- a/frontend/src/services/settingsApi.ts +++ b/frontend/src/services/settingsApi.ts @@ -1,5 +1,6 @@ const PATH_PREFIX = import.meta.env.BASE_URL.replace(/\/$/, ""); const API_BASE = `${PATH_PREFIX}/api/v1`; +const FALLBACK_API_BASE = "/api/v1"; export interface Settings { llm_api_key: string; @@ -33,8 +34,28 @@ export interface LLMConfigResponse { api_format?: "auto" | "gemini_native" | "openai"; } +async function parseErrorMessage(res: Response, fallback: string): Promise { + const contentType = res.headers.get("content-type") || ""; + if (contentType.includes("application/json")) { + const err = await res.json().catch(() => ({} as Record)); + if (err && typeof err === "object" && typeof err.detail === "string" && err.detail.trim()) { + return err.detail; + } + } else { + const text = await res.text().catch(() => ""); + if (text.trim()) return text.trim(); + } + return fallback; +} + +async function fetchWithApiFallback(path: string, init?: RequestInit): Promise { + const primary = await fetch(`${API_BASE}${path}`, init); + if (primary.status !== 404 || API_BASE === FALLBACK_API_BASE) return primary; + return fetch(`${FALLBACK_API_BASE}${path}`, init); +} + export async function getSettings(): Promise { - const res = await fetch(`${API_BASE}/settings`); + const res = await fetchWithApiFallback("/settings"); if (!res.ok) throw new Error("Failed to fetch settings"); return res.json(); } @@ -42,17 +63,17 @@ export async function getSettings(): Promise { export async function updateSettings( data: Partial>, ): Promise { - const res = await fetch(`${API_BASE}/settings`, { + const res = await fetchWithApiFallback("/settings", { method: "PUT", headers: { "Content-Type": "application/json" }, body: JSON.stringify(data), }); - if (!res.ok) throw new Error("Failed to update settings"); + if (!res.ok) throw new Error(await parseErrorMessage(res, "Failed to update settings")); return res.json(); } export async function fetchLLMConfig(): Promise { - const res = await fetch(`${API_BASE}/settings/llm-config`); + const res = await fetchWithApiFallback("/settings/llm-config"); if (!res.ok) throw new Error("Failed to fetch LLM config"); return res.json(); } @@ -68,7 +89,7 @@ export async function updateLLMConfig( ): Promise { const pools = [{ base_url: baseUrl, api_keys: apiKey }]; const imagePools = (imageBaseUrl || imageApiKey) ? [{ base_url: imageBaseUrl || "", api_keys: imageApiKey || "" }] : []; - const res = await fetch(`${API_BASE}/settings/llm-config`, { + const res = await fetchWithApiFallback("/settings/llm-config", { method: "PUT", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ @@ -80,12 +101,11 @@ export async function updateLLMConfig( }), }); if (!res.ok) { - const err = await res.json().catch(() => ({ detail: "Failed to update LLM config" })); - throw new Error(err.detail || "Failed to update LLM config"); + throw new Error(await parseErrorMessage(res, "Failed to update LLM config")); } } export async function clearLLMConfig(): Promise { - const res = await fetch(`${API_BASE}/settings/llm-config`, { method: "DELETE" }); - if (!res.ok) throw new Error("Failed to clear LLM config"); + const res = await fetchWithApiFallback("/settings/llm-config", { method: "DELETE" }); + if (!res.ok) throw new Error(await parseErrorMessage(res, "Failed to clear LLM config")); } From 4d3a604214375353491df0f2c9bae1b005e8b89d Mon Sep 17 00:00:00 2001 From: Yutong Leng Date: Tue, 28 Apr 2026 16:39:38 +0800 Subject: [PATCH 6/8] fix(pdf): stabilize MinerU token flow and improve parse error diagnostics --- backend/app/api/v1/endpoints/documents.py | 3 ++- backend/app/services/settings_service.py | 5 ----- frontend/src/services/api.ts | 11 ++++++++++- frontend/src/services/settingsApi.ts | 4 +++- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/backend/app/api/v1/endpoints/documents.py b/backend/app/api/v1/endpoints/documents.py index bf28945..8e87154 100644 --- a/backend/app/api/v1/endpoints/documents.py +++ b/backend/app/api/v1/endpoints/documents.py @@ -34,4 +34,5 @@ async def parse_pdf(file: UploadFile = File(...), _user=Depends(require_auth)): except MinerUError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc except Exception as exc: - raise HTTPException(status_code=502, detail=f"PDF 解析失败: {exc}") from exc + msg = str(exc).strip() or exc.__class__.__name__ + raise HTTPException(status_code=502, detail=f"PDF 解析失败: {msg}") from exc diff --git a/backend/app/services/settings_service.py b/backend/app/services/settings_service.py index 198d5c7..9a4b188 100644 --- a/backend/app/services/settings_service.py +++ b/backend/app/services/settings_service.py @@ -19,8 +19,6 @@ "llm_component_model": "gemini-3.1-flash-image-preview", "api_format": "auto", "mineru_api_token": "", - "api_format": "auto", - "mineru_api_token": "", "nana_soul": "", "language": "zh", } @@ -107,9 +105,6 @@ def _persist_unlocked(merged: dict[str, Any]) -> dict[str, Any]: "image_api_key": mask_api_key(str(to_store.get("image_api_key", ""))), "vision_api_key": mask_api_key(str(to_store.get("vision_api_key", ""))), "mineru_api_token": mask_api_key(str(to_store.get("mineru_api_token", ""))), - "image_api_key": mask_api_key(str(to_store.get("image_api_key", ""))), - "vision_api_key": mask_api_key(str(to_store.get("vision_api_key", ""))), - "mineru_api_token": mask_api_key(str(to_store.get("mineru_api_token", ""))), } logger.info("Settings saved: %s", log_payload) return dict(to_store) diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index 39355ab..7c6208a 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -3,6 +3,7 @@ import { tStandalone as t } from "../contexts/LanguageContext"; const PATH_PREFIX = import.meta.env.BASE_URL.replace(/\/$/, ""); const API_BASE = `${PATH_PREFIX}/api/v1`; const SETTINGS_API = `${PATH_PREFIX}/api/v1/settings`; +const FALLBACK_API_BASE = "/api/v1"; export async function fetchNanaSoul(): Promise { const res = await fetch(SETTINGS_API); @@ -257,10 +258,18 @@ export interface ParsedPdfResult { source: "mineru"; } +async function fetchDocumentsApi(path: string, init?: RequestInit): Promise { + const primary = await fetch(`${API_BASE}${path}`, init); + const contentType = (primary.headers.get("content-type") || "").toLowerCase(); + const shouldFallback = primary.status === 404 || contentType.includes("text/html"); + if (!shouldFallback) return primary; + return fetch(`${FALLBACK_API_BASE}${path}`, init); +} + export async function parsePdfDocument(file: File): Promise { const form = new FormData(); form.append("file", file); - const res = await fetch(`${API_BASE}/documents/parse-pdf`, { + const res = await fetchDocumentsApi("/documents/parse-pdf", { method: "POST", body: form, }); diff --git a/frontend/src/services/settingsApi.ts b/frontend/src/services/settingsApi.ts index 0a1efc3..dde863f 100644 --- a/frontend/src/services/settingsApi.ts +++ b/frontend/src/services/settingsApi.ts @@ -50,7 +50,9 @@ async function parseErrorMessage(res: Response, fallback: string): Promise { const primary = await fetch(`${API_BASE}${path}`, init); - if (primary.status !== 404 || API_BASE === FALLBACK_API_BASE) return primary; + const contentType = (primary.headers.get("content-type") || "").toLowerCase(); + const shouldFallback = primary.status === 404 || contentType.includes("text/html"); + if (!shouldFallback) return primary; return fetch(`${FALLBACK_API_BASE}${path}`, init); } From 5d8d7c4289e59dbcd40496e881c6b6adc2951881 Mon Sep 17 00:00:00 2001 From: Yutong Leng Date: Tue, 28 Apr 2026 16:49:03 +0800 Subject: [PATCH 7/8] fix(mineru): disable env proxy inheritance and clarify connection errors --- backend/app/services/mineru_service.py | 147 +++++++++++++------------ 1 file changed, 78 insertions(+), 69 deletions(-) diff --git a/backend/app/services/mineru_service.py b/backend/app/services/mineru_service.py index ec84e95..f7617fb 100644 --- a/backend/app/services/mineru_service.py +++ b/backend/app/services/mineru_service.py @@ -103,75 +103,84 @@ async def parse_pdf_with_mineru( data_id = f"nanadraw-{uuid.uuid4().hex}" timeout = httpx.Timeout(connect=15.0, read=120.0, write=120.0, pool=15.0) make_client = client_factory or httpx.AsyncClient + client_kwargs: dict[str, Any] = {"timeout": timeout} + if client_factory is None: + # Avoid inheriting broken/unsupported proxy envs in local runs. + client_kwargs["trust_env"] = False - async with make_client(timeout=timeout) as client: - apply_body = { - "files": [ - { - "name": file_name, - "data_id": data_id, - "is_ocr": False, - } - ], - "model_version": "vlm", - "language": "ch", - "enable_table": True, - "enable_formula": True, - } - - apply_response = await client.post( - f"{MINERU_API_BASE}/file-urls/batch", - headers=_json_headers(token), - json=apply_body, - ) - apply_response.raise_for_status() - apply_data = _ensure_success_payload(apply_response.json(), "upload URL request") - - batch_id = str(apply_data.get("batch_id") or "") - file_urls = apply_data.get("file_urls") - if not batch_id or not isinstance(file_urls, list) or not file_urls: - raise MinerUError("MinerU upload URL response is missing batch_id or file_urls") - - upload_response = await client.put(str(file_urls[0]), content=file_bytes) - upload_response.raise_for_status() - - deadline = time.monotonic() + POLL_TIMEOUT_SECONDS - result: dict[str, Any] | None = None - while time.monotonic() < deadline: - await asyncio.sleep(POLL_INTERVAL_SECONDS) - poll_response = await client.get( - f"{MINERU_API_BASE}/extract-results/batch/{batch_id}", + try: + async with make_client(**client_kwargs) as client: + apply_body = { + "files": [ + { + "name": file_name, + "data_id": data_id, + "is_ocr": False, + } + ], + "model_version": "vlm", + "language": "ch", + "enable_table": True, + "enable_formula": True, + } + + apply_response = await client.post( + f"{MINERU_API_BASE}/file-urls/batch", headers=_json_headers(token), + json=apply_body, ) - poll_response.raise_for_status() - poll_data = _ensure_success_payload(poll_response.json(), "result polling") - result = _extract_result(poll_data, data_id, file_name) - - state = str(result.get("state") or "").lower() - if state == "done": - break - if state == "failed": - raise MinerUError(str(result.get("err_msg") or "MinerU parsing failed")) - if state and state not in ACTIVE_STATES: - raise MinerUError(f"Unexpected MinerU parsing state: {state}") - else: - raise MinerUError("MinerU parsing timed out") - - if not result: - raise MinerUError("MinerU did not return a parsing result") - - full_zip_url = str(result.get("full_zip_url") or "") - if not full_zip_url: - raise MinerUError("MinerU result is missing full_zip_url") - - markdown = await _download_full_markdown(client, full_zip_url) - if not markdown.strip(): - raise MinerUError("MinerU returned empty Markdown") - - return { - "file_name": file_name, - "markdown": markdown, - "batch_id": batch_id, - "data_id": str(result.get("data_id") or data_id), - "source": "mineru", - } + apply_response.raise_for_status() + apply_data = _ensure_success_payload(apply_response.json(), "upload URL request") + + batch_id = str(apply_data.get("batch_id") or "") + file_urls = apply_data.get("file_urls") + if not batch_id or not isinstance(file_urls, list) or not file_urls: + raise MinerUError("MinerU upload URL response is missing batch_id or file_urls") + + upload_response = await client.put(str(file_urls[0]), content=file_bytes) + upload_response.raise_for_status() + + deadline = time.monotonic() + POLL_TIMEOUT_SECONDS + result: dict[str, Any] | None = None + while time.monotonic() < deadline: + await asyncio.sleep(POLL_INTERVAL_SECONDS) + poll_response = await client.get( + f"{MINERU_API_BASE}/extract-results/batch/{batch_id}", + headers=_json_headers(token), + ) + poll_response.raise_for_status() + poll_data = _ensure_success_payload(poll_response.json(), "result polling") + result = _extract_result(poll_data, data_id, file_name) + + state = str(result.get("state") or "").lower() + if state == "done": + break + if state == "failed": + raise MinerUError(str(result.get("err_msg") or "MinerU parsing failed")) + if state and state not in ACTIVE_STATES: + raise MinerUError(f"Unexpected MinerU parsing state: {state}") + else: + raise MinerUError("MinerU parsing timed out") + + if not result: + raise MinerUError("MinerU did not return a parsing result") + + full_zip_url = str(result.get("full_zip_url") or "") + if not full_zip_url: + raise MinerUError("MinerU result is missing full_zip_url") + + markdown = await _download_full_markdown(client, full_zip_url) + if not markdown.strip(): + raise MinerUError("MinerU returned empty Markdown") + + return { + "file_name": file_name, + "markdown": markdown, + "batch_id": batch_id, + "data_id": str(result.get("data_id") or data_id), + "source": "mineru", + } + except httpx.ConnectError as exc: + raise MinerUError("无法连接 MinerU 服务,请检查网络、DNS 或代理设置") from exc + except httpx.TimeoutException as exc: + raise MinerUError("连接 MinerU 超时,请稍后重试") from exc From 2cf88446d2e1da8674cde7d7fd2e20cd4f132234 Mon Sep 17 00:00:00 2001 From: Yutong Leng Date: Tue, 28 Apr 2026 16:53:49 +0800 Subject: [PATCH 8/8] feat(pdf): add select-all action for parsed text selection --- frontend/src/components/ChatPanel.tsx | 29 +++++++++++++++++++++++++++ frontend/src/i18n/en.ts | 1 + frontend/src/i18n/zh.ts | 1 + 3 files changed, 31 insertions(+) diff --git a/frontend/src/components/ChatPanel.tsx b/frontend/src/components/ChatPanel.tsx index 1b0fd87..5f937e1 100644 --- a/frontend/src/components/ChatPanel.tsx +++ b/frontend/src/components/ChatPanel.tsx @@ -458,6 +458,27 @@ export function ChatPanel({ textareaRef.current?.focus(); }, [isLoading, isGenerating, pdfParse]); + const handleSelectAllPdfText = useCallback(() => { + if (pdfParse?.status !== "done") return; + const allText = pdfParse.result.markdown.trim(); + if (!allText) return; + + setPdfParse((prev) => ( + prev?.status === "done" + ? { ...prev, selectedText: allText } + : prev + )); + + const container = pdfTextRef.current; + if (!container) return; + const selection = window.getSelection(); + if (!selection) return; + const range = document.createRange(); + range.selectNodeContents(container); + selection.removeAllRanges(); + selection.addRange(range); + }, [pdfParse]); + const handleKeyDown = useCallback( (e: React.KeyboardEvent) => { if (e.key === "Enter" && !e.shiftKey) { @@ -684,6 +705,14 @@ export function ChatPanel({ ? t("chat.pdfSelectedChars", { count: String(pdfParse.selectedText.length) }) : t("chat.pdfNoSelection")} +