Shannon4Science · Enimdisy · Apr 27, 2026 · Apr 27, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/README.md b/README.md
@@ -18,6 +18,7 @@
 
 - 🆕 NanaDraw now supports the GPT Image 2 model as an image generation option
 - 📝 Paste method description text → auto-generate pipeline diagrams
+- 📎 Use one upload entry for images, PDFs, and text files; images become sketch references, while PDFs are parsed with MinerU for quoted-selection prompts
 - 🎨 Three creation modes: Draft, Generation, and Assembly
 - 🖼️ Built-in style gallery with 250+ academic paper reference images
 - 🧰 Asset workshop with Bioicons, reusable personal assets, and AI-generated materials
@@ -36,6 +37,15 @@ Upload a hand-drawn sketch and turn it into a high-fidelity editable pipeline di
 
 Figure 1 shows the rough hand-drawn sketch. Figure 2 shows the generated high-fidelity editable workflow diagram.
 
+### PDF Parsing and Quoted-Selection Drawing
+
+The AI Workbench provides one upload entry for images, PDFs, Markdown, and text files. NanaDraw routes each file by type: images become sketch references, PDFs are parsed into Markdown through the MinerU online API, and Markdown/Text files are attached as prompt reference material.
+
+- File upload is available in Draft, Generation, Assembly, and Auto modes.
+- Parsed PDFs appear in a scrollable, collapsible floating panel on the left side of the workbench.
+- PDF content is sent only to MinerU for document parsing; NanaDraw does not automatically send the whole paper to the LLM.
+- Only the text explicitly quoted by the user is merged into the generation prompt; users can enrich the prompt before generating.
+
 ### Creation Modes
 
 | Mode | Description | Steps | Example Screenshot |
@@ -164,6 +174,7 @@ After starting, click the ⚙️ gear icon in the top-right corner to configure:
 - **Image Model**: Default `gemini-3-pro-image-preview`
 - **Component Model**: Default `gemini-3.1-flash-image-preview`
 - **NanaSoul**: Custom AI persona for style constraints
+- **Document Parsing Token**: MinerU online API token for PDF parsing
 
 #### Data Directory (Environment Variable)
 

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -18,6 +18,7 @@
 
 - 🆕 NanaDraw 上线 GPT Image 2 模型供大家选择
 - 📝 粘贴方法描述文本，自动生成流程图
+- 📎 单一入口上传图片、PDF 或文本文件；图片作为草稿参考，PDF 经 MinerU 解析后引用选段绘图
 - 🎨 三种创作模式：草稿模式、生成模式、组装模式
 - 🖼️ 内置 250+ 学术论文风格参考图
 - 🧰 素材工坊集成 Bioicons、个人常用素材和 AI 生成素材
@@ -36,6 +37,15 @@
 
 图 1 为用户上传的手绘草图，图 2 为系统生成的高保真可编辑流程图。
 
+### PDF 文档解析与引用选段绘图
+
+AI 工作台底部提供统一上传入口，支持图片、PDF、Markdown 和文本文件。NanaDraw 会按文件类型自动分流：图片作为草稿图参考，PDF 调用 MinerU 在线 API 解析为 Markdown，Markdown/Text 作为文本附件进入提示词参考。
+
+- 支持在草稿模式、生成模式、组装模式和自动模式中上传文件。
+- PDF 解析结果会显示在工作台左侧可滚动、可收起的浮窗中。
+- PDF 内容只用于 MinerU 文档解析；不会自动把整篇论文发送给 LLM。
+- 只有用户主动引用的选中文本会随提示词进入 NanaDraw 的生成流程；用户可以继续补充绘图要求后再生成。
+
 ### 多种模式
 
 | 模式 | 说明 | 步骤 | 示例截图 |
@@ -164,6 +174,7 @@ python start.py --dev
 - **图像模型**：默认 `gemini-3-pro-image-preview`
 - **组件模型**：默认 `gemini-3.1-flash-image-preview`
 - **NanaSoul**：用于风格约束的自定义 AI 角色
+- **文档解析 Token**：MinerU 在线 API Token，用于 PDF 解析
 
 #### 数据目录（环境变量）
 

diff --git a/backend/app/api/v1/endpoints/documents.py b/backend/app/api/v1/endpoints/documents.py
@@ -0,0 +1,38 @@
+from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
+
+from app.dependencies import require_auth
+from app.services.mineru_service import MinerUError, parse_pdf_with_mineru
+from app.services.settings_service import load_settings
+
+router = APIRouter(prefix="/documents", tags=["documents"])
+
+MAX_PDF_SIZE_BYTES = 200 * 1024 * 1024
+
+
+@router.post("/parse-pdf")
+async def parse_pdf(file: UploadFile = File(...), _user=Depends(require_auth)):
+    file_name = file.filename or "document.pdf"
+    if not file_name.lower().endswith(".pdf") and file.content_type != "application/pdf":
+        raise HTTPException(status_code=400, detail="仅支持上传 PDF 文件")
+
+    content = await file.read()
+    if not content:
+        raise HTTPException(status_code=400, detail="PDF 文件为空")
+    if len(content) > MAX_PDF_SIZE_BYTES:
+        raise HTTPException(status_code=413, detail="PDF 文件不能超过 200MB")
+
+    token = str(load_settings().get("mineru_api_token", "")).strip()
+    if not token:
+        raise HTTPException(status_code=400, detail="请先在设置中配置 MinerU Token")
+
+    try:
+        return await parse_pdf_with_mineru(
+            file_name=file_name,
+            file_bytes=content,
+            token=token,
+        )
+    except MinerUError as exc:
+        raise HTTPException(status_code=502, detail=str(exc)) from exc
+    except Exception as exc:
+        msg = str(exc).strip() or exc.__class__.__name__
+        raise HTTPException(status_code=502, detail=f"PDF 解析失败: {msg}") from exc
diff --git a/backend/app/api/v1/endpoints/settings.py b/backend/app/api/v1/endpoints/settings.py
@@ -24,9 +24,11 @@ class SettingsResponse(BaseModel):
     llm_image_model: str = ""
     llm_component_model: str = ""
     api_format: str = "auto"
+    mineru_api_token: str = ""
     nana_soul: str = ""
     language: str = "zh"
     is_configured: bool = False
+    mineru_is_configured: bool = False
 
 
 class SettingsUpdate(BaseModel):
@@ -40,6 +42,7 @@ class SettingsUpdate(BaseModel):
     llm_image_model: str | None = None
     llm_component_model: str | None = None
     api_format: str | None = None
+    mineru_api_token: str | None = None
     nana_soul: str | None = Field(default=None, max_length=500)
     language: str | None = None
 
@@ -102,9 +105,11 @@ def _to_response(data: dict[str, Any]) -> SettingsResponse:
         llm_image_model=str(data.get("llm_image_model", "") or ""),
         llm_component_model=str(data.get("llm_component_model", "") or ""),
         api_format=str(data.get("api_format", "") or "auto"),
+        mineru_api_token=mask_api_key(str(data.get("mineru_api_token", ""))),
         nana_soul=str(data.get("nana_soul", "") or ""),
         language=str(data.get("language", "") or "zh"),
         is_configured=bool(str(data.get("llm_api_key", "")).strip()),
+        mineru_is_configured=bool(str(data.get("mineru_api_token", "")).strip()),
     )
 
 
@@ -119,6 +124,13 @@ async def get_settings():
 async def update_settings(body: SettingsUpdate):
     """Update settings. Only non-None fields are updated."""
     updates = body.model_dump(exclude_none=True)
+    mineru_token = updates.get("mineru_api_token")
+    if isinstance(mineru_token, str):
+        mineru_token = mineru_token.strip()
+        if mineru_token:
+            updates["mineru_api_token"] = mineru_token
+        else:
+            updates.pop("mineru_api_token")
     data = await asyncio.to_thread(apply_settings_updates, updates)
     return _to_response(data)
 

diff --git a/backend/app/api/v1/router.py b/backend/app/api/v1/router.py
@@ -9,6 +9,7 @@
     models,
     assistant,
     settings,
+    documents,
 )
 
 api_router = APIRouter()
@@ -21,3 +22,4 @@
 api_router.include_router(models.router)
 api_router.include_router(assistant.router)
 api_router.include_router(settings.router)
+api_router.include_router(documents.router)
diff --git a/backend/app/services/mineru_service.py b/backend/app/services/mineru_service.py
@@ -0,0 +1,186 @@
+import asyncio
+import io
+import time
+import uuid
+import zipfile
+from pathlib import Path
+from typing import Any
+
+import httpx
+
+
+MINERU_API_BASE = "https://mineru.net/api/v4"
+POLL_INTERVAL_SECONDS = 5
+POLL_TIMEOUT_SECONDS = 600
+ACTIVE_STATES = {"waiting-file", "uploading", "pending", "running", "converting"}
+
+
+class MinerUError(RuntimeError):
+    pass
+
+
+def _auth_headers(token: str) -> dict[str, str]:
+    return {
+        "Authorization": f"Bearer {token}",
+        "Accept": "*/*",
+    }
+
+
+def _json_headers(token: str) -> dict[str, str]:
+    return {
+        **_auth_headers(token),
+        "Content-Type": "application/json",
+    }
+
+
+def _ensure_success_payload(payload: dict[str, Any], action: str) -> dict[str, Any]:
+    if payload.get("code") != 0:
+        msg = str(payload.get("msg") or f"MinerU {action} failed")
+        raise MinerUError(msg)
+    data = payload.get("data")
+    if not isinstance(data, dict):
+        raise MinerUError(f"MinerU {action} returned invalid data")
+    return data
+
+
+def _first_matching_result(results: list[Any], data_id: str, file_name: str) -> dict[str, Any] | None:
+    for item in results:
+        if isinstance(item, dict) and item.get("data_id") == data_id:
+            return item
+    for item in results:
+        if isinstance(item, dict) and item.get("file_name") == file_name:
+            return item
+    for item in results:
+        if isinstance(item, dict):
+            return item
+    return None
+
+
+def _extract_result(data: dict[str, Any], data_id: str, file_name: str) -> dict[str, Any]:
+    result = data.get("extract_result")
+    if isinstance(result, dict):
+        return result
+    if isinstance(result, list):
+        matched = _first_matching_result(result, data_id, file_name)
+        if matched:
+            return matched
+
+    results = data.get("extract_results")
+    if isinstance(results, list):
+        matched = _first_matching_result(results, data_id, file_name)
+        if matched:
+            return matched
+
+    raise MinerUError("MinerU result payload is missing extract_result")
+
+
+async def _download_full_markdown(client: httpx.AsyncClient, full_zip_url: str) -> str:
+    response = await client.get(full_zip_url)
+    response.raise_for_status()
+
+    try:
+        with zipfile.ZipFile(io.BytesIO(response.content)) as archive:
+            names = archive.namelist()
+            full_md_name = next(
+                (name for name in names if Path(name).name == "full.md"),
+                None,
+            )
+            if not full_md_name:
+                raise MinerUError("MinerU result zip does not contain full.md")
+            with archive.open(full_md_name) as f:
+                return f.read().decode("utf-8", errors="replace")
+    except zipfile.BadZipFile as exc:
+        raise MinerUError("MinerU result is not a valid zip file") from exc
+
+
+async def parse_pdf_with_mineru(
+    *,
+    file_name: str,
+    file_bytes: bytes,
+    token: str,
+    client_factory: Any | None = None,
+) -> dict[str, str]:
+    data_id = f"nanadraw-{uuid.uuid4().hex}"
+    timeout = httpx.Timeout(connect=15.0, read=120.0, write=120.0, pool=15.0)
+    make_client = client_factory or httpx.AsyncClient
+    client_kwargs: dict[str, Any] = {"timeout": timeout}
+    if client_factory is None:
+        # Avoid inheriting broken/unsupported proxy envs in local runs.
+        client_kwargs["trust_env"] = False
+
+    try:
+        async with make_client(**client_kwargs) as client:
+            apply_body = {
+                "files": [
+                    {
+                        "name": file_name,
+                        "data_id": data_id,
+                        "is_ocr": False,
+                    }
+                ],
+                "model_version": "vlm",
+                "language": "ch",
+                "enable_table": True,
+                "enable_formula": True,
+            }
+
+            apply_response = await client.post(
+                f"{MINERU_API_BASE}/file-urls/batch",
+                headers=_json_headers(token),
+                json=apply_body,
+            )
+            apply_response.raise_for_status()
+            apply_data = _ensure_success_payload(apply_response.json(), "upload URL request")
+
+            batch_id = str(apply_data.get("batch_id") or "")
+            file_urls = apply_data.get("file_urls")
+            if not batch_id or not isinstance(file_urls, list) or not file_urls:
+                raise MinerUError("MinerU upload URL response is missing batch_id or file_urls")
+
+            upload_response = await client.put(str(file_urls[0]), content=file_bytes)
+            upload_response.raise_for_status()
+
+            deadline = time.monotonic() + POLL_TIMEOUT_SECONDS
+            result: dict[str, Any] | None = None
+            while time.monotonic() < deadline:
+                await asyncio.sleep(POLL_INTERVAL_SECONDS)
+                poll_response = await client.get(
+                    f"{MINERU_API_BASE}/extract-results/batch/{batch_id}",
+                    headers=_json_headers(token),
+                )
+                poll_response.raise_for_status()
+                poll_data = _ensure_success_payload(poll_response.json(), "result polling")
+                result = _extract_result(poll_data, data_id, file_name)
+
+                state = str(result.get("state") or "").lower()
+                if state == "done":
+                    break
+                if state == "failed":
+                    raise MinerUError(str(result.get("err_msg") or "MinerU parsing failed"))
+                if state and state not in ACTIVE_STATES:
+                    raise MinerUError(f"Unexpected MinerU parsing state: {state}")
+            else:
+                raise MinerUError("MinerU parsing timed out")
+
+            if not result:
+                raise MinerUError("MinerU did not return a parsing result")
+
+            full_zip_url = str(result.get("full_zip_url") or "")
+            if not full_zip_url:
+                raise MinerUError("MinerU result is missing full_zip_url")
+
+            markdown = await _download_full_markdown(client, full_zip_url)
+            if not markdown.strip():
+                raise MinerUError("MinerU returned empty Markdown")
+
+            return {
+                "file_name": file_name,
+                "markdown": markdown,
+                "batch_id": batch_id,
+                "data_id": str(result.get("data_id") or data_id),
+                "source": "mineru",
+            }
+    except httpx.ConnectError as exc:
+        raise MinerUError("无法连接 MinerU 服务，请检查网络、DNS 或代理设置") from exc
+    except httpx.TimeoutException as exc:
+        raise MinerUError("连接 MinerU 超时，请稍后重试") from exc
diff --git a/backend/app/services/settings_service.py b/backend/app/services/settings_service.py
@@ -18,6 +18,7 @@
     "llm_image_model": "gemini-3-pro-image-preview",
     "llm_component_model": "gemini-3.1-flash-image-preview",
     "api_format": "auto",
+    "mineru_api_token": "",
     "nana_soul": "",
     "language": "zh",
 }
@@ -103,6 +104,7 @@ def _persist_unlocked(merged: dict[str, Any]) -> dict[str, Any]:
         "llm_api_key": mask_api_key(str(to_store.get("llm_api_key", ""))),
         "image_api_key": mask_api_key(str(to_store.get("image_api_key", ""))),
         "vision_api_key": mask_api_key(str(to_store.get("vision_api_key", ""))),
+        "mineru_api_token": mask_api_key(str(to_store.get("mineru_api_token", ""))),
     }
     logger.info("Settings saved: %s", log_payload)
     return dict(to_store)

diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
@@ -0,0 +1,7 @@
+import sys
+from pathlib import Path
+
+
+BACKEND_ROOT = Path(__file__).resolve().parents[1]
+if str(BACKEND_ROOT) not in sys.path:
+    sys.path.insert(0, str(BACKEND_ROOT))