From 51e592228518d3114bacba12b46a1f8f5f17318c Mon Sep 17 00:00:00 2001
From: onemorelight2024 <1249767952@qq.com>
Date: Tue, 31 Mar 2026 18:19:55 +0800
Subject: [PATCH] feat: add GraphRAG pipeline

---
 fastapi_app/config/settings.py                |  20 +
 fastapi_app/main.py                           |   3 +
 fastapi_app/routers/graphrag_kb.py            | 347 ++++++++
 fastapi_app/source_manager.py                 | 130 ++-
 .../workflow_adapters/wa_graphrag_kb.py       | 152 ++++
 frontend_en/package-lock.json                 | 172 ++++
 frontend_en/package.json                      |   1 +
 .../graphrag-kb/GraphRAGKbPanel.tsx           | 757 +++++++++++++++++
 frontend_en/src/config/api.ts                 |   2 +
 frontend_en/src/pages/NotebookView.tsx        |  72 +-
 frontend_en/src/services/graphragKbService.ts |  84 ++
 frontend_en/src/types/graphragKb.ts           |  77 ++
 frontend_en/src/types/index.ts                |  12 +-
 .../src/utils/graphragMarkdownHighlight.ts    |  20 +
 frontend_en/src/vite-env.d.ts                 |   2 +
 frontend_en/vite.config.ts                    |   4 +-
 frontend_zh/package-lock.json                 | 172 ++++
 frontend_zh/package.json                      |   1 +
 .../graphrag-kb/GraphRAGKbPanel.tsx           | 760 ++++++++++++++++++
 frontend_zh/src/config/api.ts                 |   3 +
 frontend_zh/src/pages/NotebookView.tsx        |  76 +-
 frontend_zh/src/services/graphragKbService.ts |  93 +++
 frontend_zh/src/types/graphragKb.ts           |  77 ++
 frontend_zh/src/types/index.ts                |  12 +-
 .../src/utils/graphragMarkdownHighlight.ts    |  20 +
 frontend_zh/src/vite-env.d.ts                 |   2 +
 frontend_zh/vite.config.ts                    |   4 +-
 requirements-base.txt                         |  30 +-
 .../toolkits/graphrag_ms_tool/__init__.py     |  21 +
 .../toolkits/graphrag_ms_tool/indexer.py      | 284 +++++++
 .../toolkits/graphrag_ms_tool/judge.py        | 140 ++++
 .../toolkits/graphrag_ms_tool/querier.py      | 449 +++++++++++
 .../graphrag_ms_tool/subgraph_pruner.py       | 185 +++++
 .../toolkits/kggen_tool/__init__.py           |  12 +
 .../toolkits/kggen_tool/kg_extractor.py       | 219 +++++
 .../toolkits/kggen_tool/kg_merger.py          |  87 ++
 workflow_engine/workflow/wf_graphrag_kb.py    | 390 +++++++++
 37 files changed, 4859 insertions(+), 33 deletions(-)
 create mode 100644 fastapi_app/routers/graphrag_kb.py
 create mode 100644 fastapi_app/workflow_adapters/wa_graphrag_kb.py
 create mode 100644 frontend_en/src/components/graphrag-kb/GraphRAGKbPanel.tsx
 create mode 100644 frontend_en/src/services/graphragKbService.ts
 create mode 100644 frontend_en/src/types/graphragKb.ts
 create mode 100644 frontend_en/src/utils/graphragMarkdownHighlight.ts
 create mode 100644 frontend_zh/src/components/graphrag-kb/GraphRAGKbPanel.tsx
 create mode 100644 frontend_zh/src/services/graphragKbService.ts
 create mode 100644 frontend_zh/src/types/graphragKb.ts
 create mode 100644 frontend_zh/src/utils/graphragMarkdownHighlight.ts
 create mode 100644 workflow_engine/toolkits/graphrag_ms_tool/__init__.py
 create mode 100644 workflow_engine/toolkits/graphrag_ms_tool/indexer.py
 create mode 100644 workflow_engine/toolkits/graphrag_ms_tool/judge.py
 create mode 100644 workflow_engine/toolkits/graphrag_ms_tool/querier.py
 create mode 100644 workflow_engine/toolkits/graphrag_ms_tool/subgraph_pruner.py
 create mode 100644 workflow_engine/toolkits/kggen_tool/__init__.py
 create mode 100644 workflow_engine/toolkits/kggen_tool/kg_extractor.py
 create mode 100644 workflow_engine/toolkits/kggen_tool/kg_merger.py
 create mode 100644 workflow_engine/workflow/wf_graphrag_kb.py

diff --git a/fastapi_app/config/settings.py b/fastapi_app/config/settings.py
index 1bdeead..51f2d37 100644
--- a/fastapi_app/config/settings.py
+++ b/fastapi_app/config/settings.py
@@ -75,6 +75,26 @@ class AppSettings(BaseSettings):
     LOCAL_EMBEDDING_CUDA_VISIBLE_DEVICES: Optional[str] = None
     LOCAL_EMBEDDING_GPU_MEMORY_UTILIZATION: float = 0.3
 
+    # ── GraphRAG ──────────────────────────────────────────────────────────────
+    GRAPHRAG_LLM_MODEL: str = "gpt-5"              # chat model for entity/community extraction
+    GRAPHRAG_EMBEDDING_MODEL: str = "text-embedding-3-small"
+    GRAPHRAG_OUTPUT_DIR: str = "outputs/graphrag_kb"  # workspace root, layout: {dir}/{email}/{nb_id}/
+    GRAPHRAG_CMD: str = ""                          # graphrag CLI path; auto-detected from PATH if empty
+    GRAPHRAG_CHUNK_SIZE: int = 512                  # chars per chunk; also written to settings.yaml chunks.size
+    GRAPHRAG_CHUNK_OVERLAP: int = 64
+    GRAPHRAG_RESPONSE_TYPE: str = "Single Paragraph"  # passed to local/global_search response_type
+    GRAPHRAG_SUBGRAPH_PRUNE_ENABLED: bool = True    # run LLM subgraph pruning after each query
+    GRAPHRAG_SUBGRAPH_PRUNE_MAX_EDGES_INPUT: int = 80  # truncate input to pruner to this many edges
+    GRAPHRAG_MAX_HIGHLIGHT_HINTS: int = 10          # max highlight_hints returned (0 = unlimited)
+
+    # ── KGGen (optional triple extraction, disabled by default) ───────────────
+    KGGEN_MODEL: str = "deepseek-v3.2"
+    KGGEN_PER_CHUNK: bool = True                    # True = per-chunk calls; False = full-text single call
+    KGGEN_LOG_CHUNK_INTERVAL: int = 10              # log every N chunks (0 = first/last only)
+
+    # ── Judge (answer confidence scoring) ─────────────────────────────────────
+    JUDGE_MODEL: str = "gpt-5"                      # returns judge_score [0,1] and judge_rationale
+
     class Config:
         env_file = ".env"
         env_file_encoding = "utf-8"
diff --git a/fastapi_app/main.py b/fastapi_app/main.py
index fdbd9b1..b2d1047 100644
--- a/fastapi_app/main.py
+++ b/fastapi_app/main.py
@@ -41,6 +41,7 @@
 from fastapi.responses import FileResponse
 
 from fastapi_app.routers import auth, data_extract, files, kb, kb_embedding, paper2drawio, paper2ppt
+from fastapi_app.routers import graphrag_kb
 from fastapi_app.middleware.api_key import APIKeyMiddleware
 from fastapi_app.middleware.logging import LoggingMiddleware
 from workflow_engine.utils import get_project_root
@@ -476,6 +477,8 @@ def create_app() -> FastAPI:
     app.include_router(paper2drawio.router, prefix="/api/v1", tags=["Paper2Drawio"])
     app.include_router(paper2ppt.router, prefix="/api/v1", tags=["Paper2PPT"])
     app.include_router(auth.router, prefix="/api/v1", tags=["Auth"])
+    # GraphRAG 知识库：/api/v1/graphrag-kb/{index,query,merge,chunk-snippet} → wa_graphrag_kb → wf_graphrag_kb
+    app.include_router(graphrag_kb.router, prefix="/api/v1", tags=["GraphRAG KB"])
 
     # 静态文件：/outputs 下的文件（兼容 URL 中 %40 与 磁盘 @ 两种路径）
     project_root = get_project_root()
diff --git a/fastapi_app/routers/graphrag_kb.py b/fastapi_app/routers/graphrag_kb.py
new file mode 100644
index 0000000..24a1a7d
--- /dev/null
+++ b/fastapi_app/routers/graphrag_kb.py
@@ -0,0 +1,347 @@
+"""GraphRAG 知识库 HTTP 路由（前缀在 ``main`` 中与 ``/api/v1`` 拼接）。
+
+【端点与数据流】
+    POST ``/graphrag-kb/index``  → ``wa_graphrag_kb.run_index`` → 建索引 → ``IndexResponse``
+    POST ``/graphrag-kb/query``   → ``run_query`` → 检索 + Judge（+ 子图 CoT）→ ``QueryResponse``
+    POST ``/graphrag-kb/merge``  → ``run_merge`` → 合并两 workspace → ``MergeResponse``
+    POST ``/graphrag-kb/chunk-snippet`` → 按 ``chunk_id`` 从 workspace ``input/*.txt`` 抽取 ``[chunk:…]`` 块正文（供前端阅读器高亮）
+
+【安全】
+    ``_safe_workspace_dir`` 将路径解析到项目根目录下，防止目录穿越。
+
+【说明】
+    请求体携带与其它路由一致的 LLM 凭证；前端不直连 ``workflow_engine``，仅调本路由。
+"""
+from __future__ import annotations
+
+import json
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+
+from fastapi_app.config import settings
+from fastapi_app.workflow_adapters.wa_graphrag_kb import run_index, run_query, run_merge
+from workflow_engine.logger import get_logger
+from workflow_engine.utils import get_project_root
+
+log = get_logger(__name__)
+
+# 匹配 GraphRAG input 文件中每个 chunk 段的起始行（后跟该段正文直至下一 chunk 或 EOF）
+_CHUNK_HEAD = re.compile(r"\[chunk:([a-f0-9]+)\]\s*\n", re.IGNORECASE)
+
+
+def _extract_chunk_block_from_input_text(text: str, chunk_id: str) -> str:
+    """在整份 ``input/<stem>.txt`` 文本中，定位 ``[chunk:目标id]`` 之后到下一 ``[chunk:`` 之前的正文。"""
+    want = chunk_id.strip().lower()
+    matches = list(_CHUNK_HEAD.finditer(text))
+    for i, m in enumerate(matches):
+        if m.group(1).lower() != want:
+            continue
+        start = m.end()
+        end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
+        return text[start:end].strip()
+    return ""
+
+
+def _safe_workspace_dir(raw: str) -> Path:
+    """将 *raw* 解析为项目根目录下的绝对路径；越界则抛 ``HTTPException(400)``。"""
+    root = get_project_root().resolve()
+    p = Path(raw.strip())
+    if not p.is_absolute():
+        p = (root / p).resolve()
+    else:
+        p = p.resolve()
+    try:
+        p.relative_to(root)
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail="workspace_dir must be under project root") from exc
+    return p
+
+router = APIRouter(prefix="/graphrag-kb", tags=["GraphRAG KB"])
+
+# ---------------------------------------------------------------------------
+# Pydantic request/response models
+# ---------------------------------------------------------------------------
+
+class _LLMBase(BaseModel):
+    api_url: str = Field(default_factory=lambda: settings.DEFAULT_LLM_API_URL)
+    api_key: str = ""
+    model: str = Field(default_factory=lambda: settings.GRAPHRAG_LLM_MODEL)
+
+
+class IndexRequest(_LLMBase):
+    notebook_id: str
+    notebook_title: str = ""
+    email: str = ""
+    source_stems: Optional[List[str]] = None
+    workspace_dir: str = ""
+    force_reindex: bool = False
+    # Run MinerU on un-parsed PDFs before chunk extraction.
+    # Set to False if MinerU was already triggered via /kb/upload.
+    parse_pdfs: bool = True
+    # Default True: do not run KGGen (user-facing path is GraphRAG-only).
+    skip_kggen: bool = True
+
+
+class IndexResponse(BaseModel):
+    workspace_dir: str
+    num_chunks: int
+    kg_entities: int
+    kg_relations: int
+
+
+class QueryRequest(_LLMBase):
+    notebook_id: str
+    notebook_title: str = ""
+    email: str = ""
+    question: str
+    search_method: str = Field(default="local", pattern="^(local|global)$")
+    workspace_dir: str = ""
+
+
+class QueryResponse(BaseModel):
+    answer: str
+    context_data: Dict[str, Any] = Field(default_factory=dict)
+    reasoning_subgraph: List[Dict[str, Any]] = Field(default_factory=list)
+    source_chunks: List[str] = Field(default_factory=list)
+    highlight_hints: List[Dict[str, Any]] = Field(default_factory=list)
+    judge_score: float = 0.0
+    judge_rationale: str = ""
+    reasoning_subgraph_cot: str = ""
+
+
+class MergeRequest(_LLMBase):
+    notebook_id: str = ""
+    notebook_title: str = ""
+    email: str = ""
+    workspace_dir_a: str
+    workspace_dir_b: str
+    dedupe: bool = False
+
+
+class MergeResponse(BaseModel):
+    merged_workspace_dir: str
+    num_chunks: int
+
+
+class ChunkSnippetRequest(BaseModel):
+    """Resolve *chunk_id* to raw text inside GraphRAG ``input/<stem>.txt`` markers."""
+
+    workspace_dir: str = Field(..., description="GraphRAG workspace root (contains chunk_meta.json + input/)")
+    chunk_id: str = Field(..., min_length=8, description="Hex chunk id from chunk_meta / query")
+    # Optional: pass reasoning_subgraph triples so the backend can ask an LLM to pick
+    # the exact sentence from the chunk that best expresses one of these relationships.
+    triples: Optional[List[Dict[str, Any]]] = None
+
+
+class ChunkSnippetResponse(BaseModel):
+    text: str = ""
+    source_stem: str = ""
+    found: bool = False
+    # LLM-extracted verbatim sentence from the chunk that best matches the triples.
+    # Empty string if triples were not provided or LLM extraction failed.
+    highlighted_sentence: str = ""
+
+
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+
+
+def _extract_sentence_for_triples(
+    chunk_text: str,
+    triples: List[Dict[str, Any]],
+) -> str:
+    """Ask the configured LLM to pick the verbatim sentence from chunk_text that best
+    expresses one of the given knowledge-graph triples.  Returns empty string on failure.
+    """
+    if not chunk_text.strip() or not triples:
+        return ""
+    try:
+        from openai import OpenAI
+    except ImportError:
+        log.debug("[ChunkSnippet] openai not installed; skipping sentence extraction")
+        return ""
+
+    triple_lines = "\n".join(
+        f"  ({t.get('source', '?')}) --[{t.get('relation', '?')}]--> ({t.get('target', '?')})"
+        for t in triples[:20]
+    )
+    system_prompt = (
+        "You are a precise text extraction assistant. "
+        "Return ONLY the verbatim sentence or short phrase from the provided chunk "
+        "that best expresses one of the given relationships. "
+        "Do NOT paraphrase, add explanation, or include any other text."
+    )
+    user_msg = (
+        f"Knowledge graph relationships:\n{triple_lines}\n\n"
+        f"Chunk text:\n{chunk_text}\n\n"
+        "Extract the EXACT sentence or phrase from the chunk that best matches "
+        "one of the relationships above. Return only that text."
+    )
+    try:
+        api_base = settings.DEFAULT_LLM_API_URL.rstrip("/")
+        import os
+        api_key = os.getenv("DF_API_KEY", "") or "none"
+        client = OpenAI(api_key=api_key, base_url=api_base)
+        resp = client.chat.completions.create(
+            model=settings.GRAPHRAG_LLM_MODEL,
+            max_tokens=256,
+            temperature=0,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_msg},
+            ],
+        )
+        sentence = (resp.choices[0].message.content or "").strip()
+        # Sanity check: LLM must return something that actually appears in the chunk
+        if sentence and sentence in chunk_text:
+            return sentence
+        log.debug("[ChunkSnippet] LLM sentence not found verbatim in chunk; discarding")
+        return ""
+    except Exception as exc:
+        log.warning("[ChunkSnippet] LLM extraction failed: %s", exc)
+        return ""
+
+
+@router.post("/chunk-snippet", response_model=ChunkSnippetResponse, summary="Extract [chunk:…] text from GraphRAG input")
+async def chunk_snippet_endpoint(req: ChunkSnippetRequest) -> ChunkSnippetResponse:
+    """Used by the notebook reader to show the exact indexed chunk, not the full MinerU MD."""
+    ws = _safe_workspace_dir(req.workspace_dir)
+    meta_path = ws / "chunk_meta.json"
+    if not meta_path.is_file():
+        return ChunkSnippetResponse()
+    try:
+        meta = json.loads(meta_path.read_text(encoding="utf-8"))
+    except Exception:
+        return ChunkSnippetResponse()
+    cid = req.chunk_id.strip().lower()
+    entry = meta.get(req.chunk_id.strip()) or meta.get(cid)
+    if not isinstance(entry, dict):
+        return ChunkSnippetResponse()
+    stem = str(entry.get("source_stem") or "").strip()
+    if not stem:
+        return ChunkSnippetResponse()
+    txt_path = ws / "input" / f"{stem}.txt"
+    if not txt_path.is_file():
+        return ChunkSnippetResponse(source_stem=stem, found=False)
+    try:
+        raw = txt_path.read_text(encoding="utf-8", errors="replace")
+    except Exception:
+        return ChunkSnippetResponse(source_stem=stem, found=False)
+    block = _extract_chunk_block_from_input_text(raw, cid)
+    if not block:
+        return ChunkSnippetResponse(source_stem=stem, found=False)
+    highlighted_sentence = ""
+    if req.triples:
+        highlighted_sentence = _extract_sentence_for_triples(block, req.triples)
+    return ChunkSnippetResponse(text=block, source_stem=stem, found=True, highlighted_sentence=highlighted_sentence)
+
+
+# ---------------------------------------------------------------------------
+# Index / query / merge
+# ---------------------------------------------------------------------------
+
+@router.post("/index", response_model=IndexResponse, summary="Build GraphRAG index from notebook sources")
+async def index_endpoint(req: IndexRequest):
+    """Chunk notebook sources and run GraphRAG index (KGGen off by default).
+
+    Requires that sources have already been imported into the notebook
+    (via the ``/kb`` upload endpoint) so that MinerU output exists.
+    """
+    try:
+        result = await run_index(
+            notebook_id=req.notebook_id,
+            notebook_title=req.notebook_title,
+            email=req.email,
+            api_url=req.api_url,
+            api_key=req.api_key,
+            model=req.model,
+            source_stems=req.source_stems,
+            workspace_dir=req.workspace_dir,
+            force_reindex=req.force_reindex,
+            parse_pdfs=req.parse_pdfs,
+            skip_kggen=req.skip_kggen,
+        )
+        return IndexResponse(
+            workspace_dir=result.get("workspace_dir", ""),
+            num_chunks=result.get("num_chunks", 0),
+            kg_entities=result.get("kg_entities", 0),
+            kg_relations=result.get("kg_relations", 0),
+        )
+    except Exception as exc:
+        log.exception("[Router] /graphrag-kb/index error: %s", exc)
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/query", response_model=QueryResponse, summary="Query GraphRAG index with Judge scoring")
+async def query_endpoint(req: QueryRequest):
+    """Run a local or global GraphRAG search and return a structured result.
+
+    Returns:
+    - ``answer``            — model answer text
+    - ``context_data``      — serialised evidence tables (entities, relations, sources…)
+    - ``reasoning_subgraph`` — edge list induced from context_data
+    - ``source_chunks``     — chunk_ids that contributed to the answer
+    - ``highlight_hints``   — page/bbox hints for PDF highlighting
+    - ``judge_score``       — confidence score in [0.0, 1.0]
+    - ``judge_rationale``   — one-sentence judge explanation
+    - ``reasoning_subgraph_cot`` — LLM chain-of-thought for minimal subgraph (hop analysis)
+    """
+    try:
+        result = await run_query(
+            notebook_id=req.notebook_id,
+            notebook_title=req.notebook_title,
+            email=req.email,
+            api_url=req.api_url,
+            api_key=req.api_key,
+            model=req.model,
+            question=req.question,
+            search_method=req.search_method,
+            workspace_dir=req.workspace_dir,
+        )
+        return QueryResponse(
+            answer=result.get("answer", ""),
+            context_data=result.get("context_data", {}),
+            reasoning_subgraph=result.get("reasoning_subgraph", []),
+            source_chunks=result.get("source_chunks", []),
+            highlight_hints=result.get("highlight_hints", []),
+            judge_score=float(result.get("judge_score", 0.0)),
+            judge_rationale=result.get("judge_rationale", ""),
+            reasoning_subgraph_cot=result.get("reasoning_subgraph_cot", ""),
+        )
+    except Exception as exc:
+        log.exception("[Router] /graphrag-kb/query error: %s", exc)
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/merge", response_model=MergeResponse, summary="Merge two GraphRAG KG workspaces")
+async def merge_endpoint(req: MergeRequest):
+    """Merge two GraphRAG workspaces using KGGen aggregate and re-index.
+
+    Both ``workspace_dir_a`` and ``workspace_dir_b`` must be absolute paths to
+    valid, previously indexed workspaces.  The merged workspace is written to
+    ``{workspace_dir_a}_merged/``.
+    """
+    try:
+        result = await run_merge(
+            notebook_id=req.notebook_id,
+            notebook_title=req.notebook_title,
+            email=req.email,
+            api_url=req.api_url,
+            api_key=req.api_key,
+            model=req.model,
+            workspace_dir_a=req.workspace_dir_a,
+            workspace_dir_b=req.workspace_dir_b,
+            dedupe=req.dedupe,
+        )
+        return MergeResponse(
+            merged_workspace_dir=result.get("merged_workspace_dir", ""),
+            num_chunks=result.get("num_chunks", 0),
+        )
+    except Exception as exc:
+        log.exception("[Router] /graphrag-kb/merge error: %s", exc)
+        raise HTTPException(status_code=500, detail=str(exc))
diff --git a/fastapi_app/source_manager.py b/fastapi_app/source_manager.py
index 5778163..2645cbc 100644
--- a/fastapi_app/source_manager.py
+++ b/fastapi_app/source_manager.py
@@ -7,16 +7,19 @@
 - Generating unified markdown for every source type
 - Reading back markdown / MinerU data for feature cards
 - Fallback to legacy kb_data / kb_mineru paths
+- Structured chunk extraction with chunk_id / page_index / order / bbox (for GraphRAG)
 """
 from __future__ import annotations
 
 import asyncio
+import hashlib
+import json
 import re
 import shutil
 import time
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 from workflow_engine.logger import get_logger
 from workflow_engine.utils import get_project_root
@@ -237,6 +240,85 @@ def ensure_sam3_dir(self, source_stem: str) -> Path:
         sam3_dir.mkdir(parents=True, exist_ok=True)
         return sam3_dir
 
+    def get_chunks_with_meta(
+        self,
+        source_stem: str,
+        chunk_size: int = 512,    # 默认值与 settings.GRAPHRAG_CHUNK_SIZE 一致
+        chunk_overlap: int = 64,  # 默认值与 settings.GRAPHRAG_CHUNK_OVERLAP 一致
+    ) -> List[Dict[str, Any]]:
+        """Return structured chunks for a single source, used by GraphRAG indexing.
+
+        Each dict has keys: chunk_id, text, page_index, order, bbox, source_stem.
+        chunk_id = SHA1("{stem}:{order}")[:16], embedded as [chunk:ID] in input/*.txt.
+        Priority: MinerU content_list.json (exact page+bbox) → MinerU MD (estimated page)
+        → unified MD (page_index=-1).
+        """
+        chunks: List[Dict[str, Any]] = []
+
+        # 1) MinerU content_list.json — exact page + bbox per block
+        mineru_root = self.get_mineru_root(source_stem)
+        if mineru_root:
+            content_list_path = None
+            # rglob to handle varying MinerU output directory layouts
+            for candidate in mineru_root.parent.rglob("*_content_list.json"):
+                content_list_path = candidate
+                break
+            if content_list_path and content_list_path.exists():
+                try:
+                    raw_blocks = json.loads(
+                        content_list_path.read_text(encoding="utf-8")
+                    )
+                    order = 0
+                    for block in raw_blocks:
+                        # MinerU uses "text" or "content" depending on version
+                        text = (block.get("text") or block.get("content") or "").strip()
+                        if not text:
+                            continue  # skip image / formula blocks
+                        # MinerU uses "page_idx" or "page_index" depending on version
+                        page_idx = int(block.get("page_idx", block.get("page_index", -1)))
+                        bbox = block.get("bbox")  # [x1,y1,x2,y2] normalized, may be None
+                        # chunk_id = SHA1("{stem}:{order}")[:16], embedded as [chunk:ID] in input/*.txt
+                        chunk_id = hashlib.sha1(
+                            f"{source_stem}:{order}".encode()
+                        ).hexdigest()[:16]
+                        chunks.append(
+                            {
+                                "chunk_id": chunk_id,
+                                "text": text,
+                                "page_index": page_idx,
+                                "order": order,
+                                "bbox": bbox,
+                                "source_stem": source_stem,
+                            }
+                        )
+                        order += 1
+                    if chunks:
+                        return chunks
+                except Exception as e:
+                    log.debug(
+                        "[SourceManager] content_list.json parse failed for %s: %s",
+                        source_stem,
+                        e,
+                    )
+
+        # 2) MinerU markdown fallback — sliding window, estimated page_index
+        mineru_md = self.get_mineru_md(source_stem)
+        if mineru_md.strip():
+            chunks = self._split_text_to_chunks(
+                mineru_md, source_stem, chunk_size, chunk_overlap, estimate_pages=True
+            )
+            if chunks:
+                return chunks
+
+        # 3) Unified markdown fallback — no page info (Word/PPT/TXT)
+        md = self.get_markdown(source_stem)
+        if md.strip():
+            return self._split_text_to_chunks(
+                md, source_stem, chunk_size, chunk_overlap, estimate_pages=False
+            )
+
+        return []
+
     def get_all_markdowns(self) -> List[Tuple[str, str]]:
         """Return [(stem, markdown_text), ...] for all sources."""
         results: List[Tuple[str, str]] = []
@@ -393,3 +475,49 @@ def _find_in_sources(self, source_stem: str, subdir: str, pattern: str) -> str:
             except Exception:
                 continue
         return ""
+
+    @staticmethod
+    def _split_text_to_chunks(
+        text: str,
+        source_stem: str,
+        chunk_size: int,
+        chunk_overlap: int,
+        estimate_pages: bool,
+    ) -> List[Dict[str, Any]]:
+        """Sliding-window character chunking fallback when content_list is unavailable.
+
+        estimate_pages=True roughly estimates page_index at ~2000 chars/page.
+        """
+        chunks: List[Dict[str, Any]] = []
+        text = text.strip()
+        if not text:
+            return chunks
+
+        total_chars = len(text)
+        step = max(1, chunk_size - chunk_overlap)
+        order = 0
+        pos = 0
+        chars_per_page = 2000  # rough estimate: ~2000 chars per page
+
+        while pos < total_chars:
+            end = min(pos + chunk_size, total_chars)
+            snippet = text[pos:end].strip()
+            if snippet:
+                page_idx = int(pos / chars_per_page) if estimate_pages else -1
+                chunk_id = hashlib.sha1(
+                    f"{source_stem}:{order}".encode()
+                ).hexdigest()[:16]
+                chunks.append(
+                    {
+                        "chunk_id": chunk_id,
+                        "text": snippet,
+                        "page_index": page_idx,
+                        "order": order,
+                        "bbox": None,
+                        "source_stem": source_stem,
+                    }
+                )
+                order += 1
+            pos += step
+
+        return chunks
diff --git a/fastapi_app/workflow_adapters/wa_graphrag_kb.py b/fastapi_app/workflow_adapters/wa_graphrag_kb.py
new file mode 100644
index 0000000..f15cdbf
--- /dev/null
+++ b/fastapi_app/workflow_adapters/wa_graphrag_kb.py
@@ -0,0 +1,152 @@
+"""GraphRAG KB 管线的工作流适配层。
+
+【职责】
+    在 FastAPI 路由（Pydantic 请求体）与 ``wf_graphrag_kb``（``GraphRAGKBState`` 数据类）之间做转换，
+    统一调用 ``run_workflow("graphrag_kb", state)``，再从 ``agent_results`` / ``temp_data.errors`` 取结果。
+
+【数据流】
+    ``run_index`` / ``run_query`` / ``run_merge`` → 组装 ``GraphRAGKBRequest.action`` →
+    ``GraphRAGKBState`` → LangGraph 执行 → 成功则返回对应 ``agent_results`` 字典；失败则 ``RuntimeError``（携带首条错误信息）。
+
+【约定】
+    与 ``wa_paper2ppt.py`` 类似：``_workflow_outcome`` 兼容 LangGraph 返回 dataclass 或 dict。
+"""
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional, Tuple
+
+from workflow_engine.logger import get_logger
+from workflow_engine.workflow import run_workflow
+from workflow_engine.workflow.wf_graphrag_kb import GraphRAGKBRequest, GraphRAGKBState
+
+log = get_logger(__name__)
+
+
+def _workflow_outcome(state: Any) -> Tuple[Dict[str, Any], Optional[list]]:
+    """统一解析工作流终态：得到 ``(agent_results, errors)``，兼容 dict 与 dataclass 两种返回形式。"""
+    if isinstance(state, dict):
+        td = state.get("temp_data")
+        td = td if isinstance(td, dict) else {}
+        errors = td.get("errors")
+        ar = state.get("agent_results")
+        ar = ar if isinstance(ar, dict) else {}
+        return ar, errors
+    td = getattr(state, "temp_data", None)
+    td = td if isinstance(td, dict) else {}
+    errors = td.get("errors")
+    ar = getattr(state, "agent_results", None)
+    ar = ar if isinstance(ar, dict) else {}
+    return ar, errors
+
+
+# ---------------------------------------------------------------------------
+# Public adapter functions (called by routers)
+# ---------------------------------------------------------------------------
+
+async def run_index(
+    *,
+    notebook_id: str,
+    notebook_title: str,
+    email: str,
+    api_url: str,
+    api_key: str,
+    model: str,
+    source_stems: Optional[List[str]] = None,
+    workspace_dir: str = "",
+    force_reindex: bool = False,
+    parse_pdfs: bool = True,
+    skip_kggen: bool = True,
+) -> Dict[str, Any]:
+    """Run indexing workflow; returns ``agent_results["index"]`` dict on success."""
+    req = GraphRAGKBRequest(
+        action="index",
+        notebook_id=notebook_id,
+        notebook_title=notebook_title,
+        email=email,
+        chat_api_url=api_url,
+        api_key=api_key,
+        model=model,
+        source_stems=source_stems or [],
+        workspace_dir=workspace_dir,
+        force_reindex=force_reindex,
+        parse_pdfs=parse_pdfs,
+        skip_kggen=skip_kggen,
+    )
+    state = GraphRAGKBState(request=req)
+    state = await run_workflow("graphrag_kb", state)
+
+    agent_results, errors = _workflow_outcome(state)
+    if errors:
+        raise RuntimeError(f"Indexing failed: {errors[0]}")
+
+    return agent_results.get("index", {})
+
+
+async def run_query(
+    *,
+    notebook_id: str,
+    notebook_title: str,
+    email: str,
+    api_url: str,
+    api_key: str,
+    model: str,
+    question: str,
+    search_method: str = "local",
+    workspace_dir: str = "",
+) -> Dict[str, Any]:
+    """Run query workflow; returns ``agent_results["query"]`` dict on success."""
+    req = GraphRAGKBRequest(
+        action="query",
+        notebook_id=notebook_id,
+        notebook_title=notebook_title,
+        email=email,
+        chat_api_url=api_url,
+        api_key=api_key,
+        model=model,
+        question=question,
+        search_method=search_method,
+        workspace_dir=workspace_dir,
+    )
+    state = GraphRAGKBState(request=req)
+    state = await run_workflow("graphrag_kb", state)
+
+    agent_results, errors = _workflow_outcome(state)
+    if errors:
+        raise RuntimeError(f"Query failed: {errors[0]}")
+
+    return agent_results.get("query", {})
+
+
+async def run_merge(
+    *,
+    notebook_id: str,
+    notebook_title: str,
+    email: str,
+    api_url: str,
+    api_key: str,
+    model: str,
+    workspace_dir_a: str,
+    workspace_dir_b: str,
+    dedupe: bool = False,
+) -> Dict[str, Any]:
+    """Merge two GraphRAG workspaces and re-index; returns ``agent_results["merge"]``."""
+    req = GraphRAGKBRequest(
+        action="merge",
+        notebook_id=notebook_id,
+        notebook_title=notebook_title,
+        email=email,
+        chat_api_url=api_url,
+        api_key=api_key,
+        model=model,
+        workspace_dir=workspace_dir_a,
+        workspace_dir_b=workspace_dir_b,
+        dedupe=dedupe,
+    )
+    state = GraphRAGKBState(request=req)
+    state = await run_workflow("graphrag_kb", state)
+
+    agent_results, errors = _workflow_outcome(state)
+    if errors:
+        raise RuntimeError(f"Merge failed: {errors[0]}")
+
+    return agent_results.get("merge", {})
diff --git a/frontend_en/package-lock.json b/frontend_en/package-lock.json
index 873bc08..df09701 100644
--- a/frontend_en/package-lock.json
+++ b/frontend_en/package-lock.json
@@ -19,6 +19,7 @@
         "react-dom": "^18.2.0",
         "react-markdown": "^9.1.0",
         "react-pdf": "^10.3.0",
+        "rehype-raw": "^7.0.0",
         "tailwind-merge": "^2.0.0",
         "zustand": "^4.4.7"
       },
@@ -2632,6 +2633,17 @@
       "integrity": "sha512-f/ZeWvW/BCXbhGEf1Ujp29EASo/lk1FDnETgNKwJrsVvGZhUWCZyg3xLJjAsxfOmt8KjswHmI5EwCQcPMpOYhQ==",
       "license": "EPL-2.0"
     },
+    "node_modules/entities": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz",
+      "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
     "node_modules/esbuild": {
       "version": "0.21.5",
       "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz",
@@ -2849,6 +2861,71 @@
         "node": ">= 0.4"
       }
     },
+    "node_modules/hast-util-from-parse5": {
+      "version": "8.0.3",
+      "resolved": "https://registry.npmjs.org/hast-util-from-parse5/-/hast-util-from-parse5-8.0.3.tgz",
+      "integrity": "sha512-3kxEVkEKt0zvcZ3hCRYI8rqrgwtlIOFMWkbclACvjlDw8Li9S2hk/d51OI0nr/gIpdMHNepwgOKqZ/sy0Clpyg==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "@types/unist": "^3.0.0",
+        "devlop": "^1.0.0",
+        "hastscript": "^9.0.0",
+        "property-information": "^7.0.0",
+        "vfile": "^6.0.0",
+        "vfile-location": "^5.0.0",
+        "web-namespaces": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/hast-util-from-parse5/node_modules/@types/unist": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
+      "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="
+    },
+    "node_modules/hast-util-parse-selector": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz",
+      "integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==",
+      "dependencies": {
+        "@types/hast": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/hast-util-raw": {
+      "version": "9.1.0",
+      "resolved": "https://registry.npmjs.org/hast-util-raw/-/hast-util-raw-9.1.0.tgz",
+      "integrity": "sha512-Y8/SBAHkZGoNkpzqqfCldijcuUKh7/su31kEBp67cFY09Wy0mTRgtsLYsiIxMJxlu0f6AA5SUTbDR8K0rxnbUw==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "@types/unist": "^3.0.0",
+        "@ungap/structured-clone": "^1.0.0",
+        "hast-util-from-parse5": "^8.0.0",
+        "hast-util-to-parse5": "^8.0.0",
+        "html-void-elements": "^3.0.0",
+        "mdast-util-to-hast": "^13.0.0",
+        "parse5": "^7.0.0",
+        "unist-util-position": "^5.0.0",
+        "unist-util-visit": "^5.0.0",
+        "vfile": "^6.0.0",
+        "web-namespaces": "^2.0.0",
+        "zwitch": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/hast-util-raw/node_modules/@types/unist": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
+      "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="
+    },
     "node_modules/hast-util-to-jsx-runtime": {
       "version": "2.3.6",
       "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz",
@@ -2882,6 +2959,24 @@
       "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==",
       "license": "MIT"
     },
+    "node_modules/hast-util-to-parse5": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/hast-util-to-parse5/-/hast-util-to-parse5-8.0.1.tgz",
+      "integrity": "sha512-MlWT6Pjt4CG9lFCjiz4BH7l9wmrMkfkJYCxFwKQic8+RTZgWPuWxwAfjJElsXkex7DJjfSJsQIt931ilUgmwdA==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "comma-separated-tokens": "^2.0.0",
+        "devlop": "^1.0.0",
+        "property-information": "^7.0.0",
+        "space-separated-tokens": "^2.0.0",
+        "web-namespaces": "^2.0.0",
+        "zwitch": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
     "node_modules/hast-util-whitespace": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz",
@@ -2895,6 +2990,22 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/hastscript": {
+      "version": "9.0.1",
+      "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-9.0.1.tgz",
+      "integrity": "sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "comma-separated-tokens": "^2.0.0",
+        "hast-util-parse-selector": "^4.0.0",
+        "property-information": "^7.0.0",
+        "space-separated-tokens": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
     "node_modules/html-url-attributes": {
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz",
@@ -2905,6 +3016,15 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/html-void-elements": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz",
+      "integrity": "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
     "node_modules/iceberg-js": {
       "version": "0.8.1",
       "resolved": "https://registry.npmjs.org/iceberg-js/-/iceberg-js-0.8.1.tgz",
@@ -5773,6 +5893,17 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/parse5": {
+      "version": "7.3.0",
+      "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
+      "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==",
+      "dependencies": {
+        "entities": "^6.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
     "node_modules/path-parse": {
       "version": "1.0.7",
       "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
@@ -6149,6 +6280,20 @@
         "node": ">=8.10.0"
       }
     },
+    "node_modules/rehype-raw": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/rehype-raw/-/rehype-raw-7.0.0.tgz",
+      "integrity": "sha512-/aE8hCfKlQeA8LmyeyQvQF3eBiLRGNlfBJEvWH7ivp9sBqs7TNqBL5X3v157rM4IFETqDnIOO+z5M/biZbo9Ww==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "hast-util-raw": "^9.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
     "node_modules/remark-parse": {
       "version": "11.0.0",
       "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz",
@@ -7361,6 +7506,24 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/vfile-location": {
+      "version": "5.0.3",
+      "resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-5.0.3.tgz",
+      "integrity": "sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==",
+      "dependencies": {
+        "@types/unist": "^3.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/vfile-location/node_modules/@types/unist": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
+      "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="
+    },
     "node_modules/vfile-message": {
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz",
@@ -7469,6 +7632,15 @@
         "loose-envify": "^1.0.0"
       }
     },
+    "node_modules/web-namespaces": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz",
+      "integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
     "node_modules/web-worker": {
       "version": "1.5.0",
       "resolved": "https://registry.npmjs.org/web-worker/-/web-worker-1.5.0.tgz",
diff --git a/frontend_en/package.json b/frontend_en/package.json
index b6d9db4..d2b2fac 100644
--- a/frontend_en/package.json
+++ b/frontend_en/package.json
@@ -20,6 +20,7 @@
     "react-dom": "^18.2.0",
     "react-markdown": "^9.1.0",
     "react-pdf": "^10.3.0",
+    "rehype-raw": "^7.0.0",
     "tailwind-merge": "^2.0.0",
     "zustand": "^4.4.7"
   },
diff --git a/frontend_en/src/components/graphrag-kb/GraphRAGKbPanel.tsx b/frontend_en/src/components/graphrag-kb/GraphRAGKbPanel.tsx
new file mode 100644
index 0000000..98b148d
--- /dev/null
+++ b/frontend_en/src/components/graphrag-kb/GraphRAGKbPanel.tsx
@@ -0,0 +1,757 @@
+/**
+ * GraphRAG 知识库侧栏 UI：索引构建、Local/Global 查询、推理子图展示、文档定位卡片、合并工作区。
+ *
+ * 数据流：用户操作 → ``graphragKbService`` → 后端管线 → ``queryResult`` 状态；
+ * 「在知识库中打开」通过 ``onOpenGraphragSource`` 回调把 sourceStem、chunkId、workspaceDir 交给 NotebookView，联动阅读器与高亮。
+ */
+import React, { useCallback, useEffect, useMemo, useState } from 'react';
+import ReactMarkdown from 'react-markdown';
+import { Loader2, Copy, Download, ChevronDown, ChevronRight, Network, ExternalLink } from 'lucide-react';
+import { getApiSettings } from '../../services/apiSettingsService';
+import {
+  indexGraphragKb,
+  queryGraphragKb,
+  mergeGraphragKb,
+  defaultGraphragModel,
+} from '../../services/graphragKbService';
+import type { QueryResponse, GraphragWorkspacePersist } from '../../types/graphragKb';
+import { MermaidPreview } from '../knowledge-base/tools/MermaidPreview';
+
+const KNOWN_HINT_KEYS = ['page', 'page_num', 'bbox', 'sentence', 'text', 'chunk_id', 'source', 'file', 'file_name'];
+
+/** 与阅读器联动时传入的载荷（source_stem 对应知识库里的文件名 stem） */
+export type GraphragOpenSourcePayload = {
+  sourceStem: string;
+  pageIndex: number;
+  chunkId?: string;
+  /** 当前笔记本 GraphRAG 工作区根目录，用于拉取 ``[chunk:…]`` 原文高亮 */
+  workspaceDir?: string;
+};
+
+function _parsePageIndex(v: unknown): number | undefined {
+  if (typeof v === 'number' && !Number.isNaN(v)) return v;
+  if (typeof v === 'string') {
+    const n = parseInt(v, 10);
+    return Number.isNaN(n) ? undefined : n;
+  }
+  return undefined;
+}
+
+function getWorkspaceStorageKey(userId: string, notebookId: string) {
+  return `graphrag_workspace_${userId}_${notebookId}`;
+}
+
+function sanitizeMermaidLabel(s: string, max = 48): string {
+  return s.replace(/["[\]#]/g, ' ').slice(0, max).trim() || '?';
+}
+
+/** 将 reasoning_subgraph 转为 Mermaid graph LR（边数上限避免卡顿） */
+export function reasoningSubgraphToMermaid(edges: Array<Record<string, unknown>>, maxEdges = 36): string | null {
+  if (!edges.length) return null;
+  const slice = edges.slice(0, maxEdges);
+  const idFor = (() => {
+    const m = new Map<string, string>();
+    let n = 0;
+    return (raw: string) => {
+      const k = raw || `_${n}`;
+      if (!m.has(k)) m.set(k, `N${n++}`);
+      return m.get(k)!;
+    };
+  })();
+  const lines: string[] = ['graph LR'];
+  for (let i = 0; i < slice.length; i++) {
+    const e = slice[i];
+    const src = String(e.source ?? e.src ?? e.from ?? e.head ?? `s${i}`);
+    const tgt = String(e.target ?? e.tgt ?? e.to ?? e.tail ?? `t${i}`);
+    const rel = String(e.relation ?? e.relationship ?? e.label ?? e.predicate ?? '');
+    const sid = idFor(src);
+    const tid = idFor(tgt);
+    const sl = sanitizeMermaidLabel(src);
+    const tl = sanitizeMermaidLabel(tgt);
+    const rl = sanitizeMermaidLabel(rel, 24);
+    lines.push(`  ${sid}["${sl}"] -->|"${rl}"| ${tid}["${tl}"]`);
+  }
+  return lines.join('\n');
+}
+
+const STR = {
+  zh: {
+    headerTitle: 'GraphRAG 知识库',
+    headerSub: '分块（MinerU）+ GraphRAG 建索引与检索（用户路径不含 KGGen）',
+    apiWarn: '请先在设置中配置 API URL 与 API Key',
+    noNotebook: '缺少笔记本 ID',
+    indexBtn: '构建索引',
+    indexing: '索引构建中…',
+    indexOk: '索引构建完成',
+    forceReindex: '强制重建',
+    parsePdfs: '解析 PDF（MinerU）',
+    summary: '上次索引摘要',
+    chunks: '分块数',
+    workspace: '工作区目录',
+    copy: '复制',
+    copied: '已复制',
+    queryQ: '问题',
+    queryPlaceholder: '输入要问的问题…',
+    searchLocal: 'Local',
+    searchGlobal: 'Global',
+    queryBtn: '查询',
+    querying: '查询中…',
+    answer: '回答',
+    judge: 'Judge 分数',
+    rationale: '说明',
+    subgraph: '推理子图',
+    viewTable: '表格',
+    viewMermaid: 'Mermaid',
+    viewJson: 'JSON',
+    noSubgraph: '无子图数据',
+    subgraphCot: '最小子图推理（CoT / 跳数）',
+    hintDoc: '文档',
+    hintPage: '页码',
+    hintBbox: '区域框',
+    openInKb: '在知识库中打开',
+    hints: '文档定位',
+    context: 'context_data（体积可能较大）',
+    downloadJson: '下载 JSON',
+    copyJson: '复制 JSON',
+    mergeTitle: '合并工作区',
+    mergeA: 'workspace_dir A',
+    mergeB: 'workspace_dir B',
+    dedupe: '去重合并',
+    mergeBtn: '合并并重建索引',
+    merging: '合并中…',
+    mergeOk: '合并完成',
+    modelLabel: 'LLM 模型名',
+    mermaidTitle: '子图（Mermaid）',
+    copyFailed: '复制失败',
+  },
+  en: {
+    headerTitle: 'GraphRAG Knowledge Base',
+    headerSub: 'Chunking (MinerU) + GraphRAG index & query (no KGGen on the default path)',
+    apiWarn: 'Configure API URL and API Key in Settings first',
+    noNotebook: 'Notebook ID is missing',
+    indexBtn: 'Build index',
+    indexing: 'Indexing…',
+    indexOk: 'Index completed',
+    forceReindex: 'Force reindex',
+    parsePdfs: 'Parse PDFs (MinerU)',
+    summary: 'Last index summary',
+    chunks: 'Chunks',
+    workspace: 'Workspace directory',
+    copy: 'Copy',
+    copied: 'Copied',
+    queryQ: 'Question',
+    queryPlaceholder: 'Ask a question…',
+    searchLocal: 'Local',
+    searchGlobal: 'Global',
+    queryBtn: 'Query',
+    querying: 'Querying…',
+    answer: 'Answer',
+    judge: 'Judge score',
+    rationale: 'Rationale',
+    subgraph: 'Reasoning subgraph',
+    viewTable: 'Table',
+    viewMermaid: 'Mermaid',
+    viewJson: 'JSON',
+    noSubgraph: 'No subgraph',
+    subgraphCot: 'Minimal subgraph reasoning (CoT / hops)',
+    hintDoc: 'Document',
+    hintPage: 'Page',
+    hintBbox: 'BBox',
+    openInKb: 'Open in knowledge base',
+    hints: 'Source location',
+    context: 'context_data (may be large)',
+    downloadJson: 'Download JSON',
+    copyJson: 'Copy JSON',
+    mergeTitle: 'Merge workspaces',
+    mergeA: 'workspace_dir A',
+    mergeB: 'workspace_dir B',
+    dedupe: 'Deduplicate when merging',
+    mergeBtn: 'Merge and re-index',
+    merging: 'Merging…',
+    mergeOk: 'Merge completed',
+    modelLabel: 'LLM model',
+    mermaidTitle: 'Subgraph (Mermaid)',
+    copyFailed: 'Copy failed',
+  },
+} as const;
+
+export interface GraphRAGKbPanelProps {
+  notebook: { id?: string; title?: string; name?: string };
+  userId: string | null;
+  email: string;
+  locale?: 'zh' | 'en';
+  showToast: (message: string, type?: 'success' | 'error' | 'warning') => void;
+  /** 在笔记本侧栏打开对应来源并展示 MinerU 解析内容（按 stem 匹配文件名） */
+  onOpenGraphragSource?: (payload: GraphragOpenSourcePayload) => void | Promise<void>;
+}
+
+export function GraphRAGKbPanel({
+  notebook,
+  userId,
+  email,
+  locale = 'zh',
+  showToast,
+  onOpenGraphragSource,
+}: GraphRAGKbPanelProps) {
+  const L = STR[locale];
+  const notebookId = notebook?.id || '';
+  const notebookTitle = notebook?.title || notebook?.name || '';
+
+  const [persist, setPersist] = useState<GraphragWorkspacePersist | null>(null);
+  const [forceReindex, setForceReindex] = useState(false);
+  const [parsePdfs, setParsePdfs] = useState(true);
+  const [indexLoading, setIndexLoading] = useState(false);
+  const [modelName, setModelName] = useState(defaultGraphragModel());
+
+  const [question, setQuestion] = useState('');
+  const [searchMethod, setSearchMethod] = useState<'local' | 'global'>('local');
+  const [queryLoading, setQueryLoading] = useState(false);
+  const [queryResult, setQueryResult] = useState<QueryResponse | null>(null);
+  const [subView, setSubView] = useState<'table' | 'mermaid' | 'json'>('table');
+  const [contextOpen, setContextOpen] = useState(false);
+
+  const [mergeA, setMergeA] = useState('');
+  const [mergeB, setMergeB] = useState('');
+  const [mergeDedupe, setMergeDedupe] = useState(false);
+  const [mergeLoading, setMergeLoading] = useState(false);
+
+  const storageKey = useMemo(() => {
+    const uid = userId || 'global';
+    if (!notebookId) return null;
+    return getWorkspaceStorageKey(uid, notebookId);
+  }, [userId, notebookId]);
+
+  const loadPersist = useCallback(() => {
+    if (!storageKey) {
+      setPersist(null);
+      return;
+    }
+    try {
+      const raw = localStorage.getItem(storageKey);
+      if (!raw) {
+        setPersist(null);
+        return;
+      }
+      const p = JSON.parse(raw) as GraphragWorkspacePersist;
+      if (p?.workspace_dir) setPersist(p);
+      else setPersist(null);
+    } catch {
+      setPersist(null);
+    }
+  }, [storageKey]);
+
+  useEffect(() => {
+    loadPersist();
+  }, [loadPersist]);
+
+  useEffect(() => {
+    if (persist?.workspace_dir) {
+      setMergeA((a) => (a ? a : persist.workspace_dir));
+    }
+  }, [persist?.workspace_dir]);
+
+  const llmBody = useCallback(() => {
+    const settings = getApiSettings(userId);
+    const api_url = settings?.apiUrl?.trim() || '';
+    const api_key = settings?.apiKey?.trim() || '';
+    const model = modelName.trim() || defaultGraphragModel();
+    return { api_url, api_key, model };
+  }, [userId, modelName]);
+
+  const copyText = async (text: string, okMsg?: string) => {
+    try {
+      await navigator.clipboard.writeText(text);
+      showToast(okMsg || L.copied, 'success');
+    } catch {
+      showToast(L.copyFailed, 'error');
+    }
+  };
+
+  const handleIndex = async () => {
+    if (!notebookId) {
+      showToast(L.noNotebook, 'warning');
+      return;
+    }
+    const { api_url, api_key, model } = llmBody();
+    if (!api_url || !api_key) {
+      showToast(L.apiWarn, 'warning');
+      return;
+    }
+    setIndexLoading(true);
+    try {
+      const res = await indexGraphragKb({
+        notebook_id: notebookId,
+        notebook_title: notebookTitle,
+        email: email || '',
+        api_url,
+        api_key,
+        model,
+        source_stems: null,
+        workspace_dir: persist?.workspace_dir || '',
+        force_reindex: forceReindex,
+        parse_pdfs: parsePdfs,
+        skip_kggen: true,
+      });
+      const next: GraphragWorkspacePersist = {
+        workspace_dir: res.workspace_dir,
+        updatedAt: Date.now(),
+        num_chunks: res.num_chunks,
+      };
+      if (storageKey) {
+        localStorage.setItem(storageKey, JSON.stringify(next));
+      }
+      setPersist(next);
+      showToast(L.indexOk, 'success');
+    } catch (e: unknown) {
+      const msg = e instanceof Error ? e.message : String(e);
+      showToast(msg, 'error');
+    } finally {
+      setIndexLoading(false);
+    }
+  };
+
+  const handleQuery = async () => {
+    if (!notebookId) {
+      showToast(L.noNotebook, 'warning');
+      return;
+    }
+    const ws = persist?.workspace_dir?.trim();
+    if (!ws) {
+      showToast(locale === 'zh' ? '请先构建索引或确认已持久化 workspace_dir' : 'Build index first or set workspace_dir', 'warning');
+      return;
+    }
+    const q = question.trim();
+    if (!q) {
+      showToast(locale === 'zh' ? '请输入问题' : 'Enter a question', 'warning');
+      return;
+    }
+    const { api_url, api_key, model } = llmBody();
+    if (!api_url || !api_key) {
+      showToast(L.apiWarn, 'warning');
+      return;
+    }
+    setQueryLoading(true);
+    setQueryResult(null);
+    try {
+      const res = await queryGraphragKb({
+        notebook_id: notebookId,
+        notebook_title: notebookTitle,
+        email: email || '',
+        api_url,
+        api_key,
+        model,
+        question: q,
+        search_method: searchMethod,
+        workspace_dir: ws,
+      });
+      setQueryResult(res);
+    } catch (e: unknown) {
+      const msg = e instanceof Error ? e.message : String(e);
+      showToast(msg, 'error');
+    } finally {
+      setQueryLoading(false);
+    }
+  };
+
+  const handleMerge = async () => {
+    if (!notebookId) {
+      showToast(L.noNotebook, 'warning');
+      return;
+    }
+    const a = mergeA.trim();
+    const b = mergeB.trim();
+    if (!a || !b) {
+      showToast(locale === 'zh' ? '请填写两个 workspace 路径' : 'Enter both workspace paths', 'warning');
+      return;
+    }
+    const { api_url, api_key, model } = llmBody();
+    if (!api_url || !api_key) {
+      showToast(L.apiWarn, 'warning');
+      return;
+    }
+    setMergeLoading(true);
+    try {
+      const res = await mergeGraphragKb({
+        notebook_id: notebookId,
+        notebook_title: notebookTitle,
+        email: email || '',
+        api_url,
+        api_key,
+        model,
+        workspace_dir_a: a,
+        workspace_dir_b: b,
+        dedupe: mergeDedupe,
+      });
+      const next: GraphragWorkspacePersist = {
+        workspace_dir: res.merged_workspace_dir,
+        updatedAt: Date.now(),
+        num_chunks: res.num_chunks,
+      };
+      if (storageKey) localStorage.setItem(storageKey, JSON.stringify(next));
+      setPersist(next);
+      setMergeA(res.merged_workspace_dir);
+      showToast(L.mergeOk, 'success');
+    } catch (e: unknown) {
+      const msg = e instanceof Error ? e.message : String(e);
+      showToast(msg, 'error');
+    } finally {
+      setMergeLoading(false);
+    }
+  };
+
+  const mermaidCode = useMemo(() => {
+    if (!queryResult?.reasoning_subgraph?.length) return null;
+    return reasoningSubgraphToMermaid(queryResult.reasoning_subgraph);
+  }, [queryResult?.reasoning_subgraph]);
+
+  const contextJson = useMemo(() => {
+    if (!queryResult?.context_data) return '';
+    try {
+      return JSON.stringify(queryResult.context_data, null, 2);
+    } catch {
+      return '{}';
+    }
+  }, [queryResult?.context_data]);
+
+  const downloadContext = () => {
+    const blob = new Blob([contextJson], { type: 'application/json' });
+    const url = URL.createObjectURL(blob);
+    const a = document.createElement('a');
+    a.href = url;
+    a.download = `graphrag_context_${notebookId || 'nb'}.json`;
+    a.click();
+    URL.revokeObjectURL(url);
+  };
+
+  const judgePct = queryResult ? Math.round(Math.max(0, Math.min(1, queryResult.judge_score)) * 100) : 0;
+
+  return (
+    <main className="flex-1 flex flex-col relative bg-white min-w-[300px] overflow-hidden">
+      <div className="flex items-center gap-2 px-6 py-3 border-b border-ios-gray-100 shrink-0">
+        <Network className="text-cyan-600" size={20} />
+        <div>
+          <div className="text-sm font-medium text-ios-gray-900">{L.headerTitle}</div>
+          <div className="text-xs text-ios-gray-400">{L.headerSub}</div>
+        </div>
+      </div>
+
+      <div className="flex-1 overflow-y-auto p-6 space-y-8 max-w-[960px] w-full mx-auto">
+        <section className="rounded-2xl border border-ios-gray-100 bg-ios-gray-50/40 p-4 space-y-3">
+          <h3 className="text-sm font-semibold text-ios-gray-800">{L.indexBtn}</h3>
+          <div className="flex flex-wrap gap-4 items-center text-sm">
+            <label className="flex items-center gap-2 cursor-pointer">
+              <input type="checkbox" checked={forceReindex} onChange={(e) => setForceReindex(e.target.checked)} />
+              {L.forceReindex}
+            </label>
+            <label className="flex items-center gap-2 cursor-pointer">
+              <input type="checkbox" checked={parsePdfs} onChange={(e) => setParsePdfs(e.target.checked)} />
+              {L.parsePdfs}
+            </label>
+          </div>
+          <div>
+            <label className="block text-xs font-medium text-ios-gray-500 mb-1">{L.modelLabel}</label>
+            <input
+              value={modelName}
+              onChange={(e) => setModelName(e.target.value)}
+              className="w-full max-w-md px-3 py-2 border border-ios-gray-200 rounded-lg text-sm"
+              placeholder={defaultGraphragModel()}
+            />
+          </div>
+          <button
+            type="button"
+            disabled={indexLoading || !notebookId}
+            onClick={handleIndex}
+            className="inline-flex items-center gap-2 px-4 py-2 rounded-ios bg-slate-900 text-white text-sm font-medium disabled:opacity-50"
+          >
+            {indexLoading ? <Loader2 size={16} className="animate-spin" /> : null}
+            {indexLoading ? L.indexing : L.indexBtn}
+          </button>
+
+          {persist && (
+            <div className="mt-4 rounded-xl border border-ios-gray-200 bg-white p-3 text-xs space-y-2">
+              <div className="font-medium text-ios-gray-700">{L.summary}</div>
+              <div className="grid grid-cols-1 sm:grid-cols-2 gap-2 text-ios-gray-600">
+                <span>{L.chunks}: <b>{persist.num_chunks ?? '—'}</b></span>
+              </div>
+              <div className="flex items-start gap-2 break-all">
+                <span className="shrink-0 text-ios-gray-500">{L.workspace}:</span>
+                <code className="flex-1 text-[11px] bg-ios-gray-50 p-2 rounded">{persist.workspace_dir}</code>
+                <button
+                  type="button"
+                  onClick={() => copyText(persist.workspace_dir)}
+                  className="shrink-0 p-1.5 rounded border border-ios-gray-200 hover:bg-ios-gray-50"
+                  title={L.copy}
+                >
+                  <Copy size={14} />
+                </button>
+              </div>
+            </div>
+          )}
+        </section>
+
+        <section className="rounded-2xl border border-ios-gray-100 p-4 space-y-3">
+          <h3 className="text-sm font-semibold text-ios-gray-800">{L.queryBtn}</h3>
+          <div>
+            <label className="block text-xs font-medium text-ios-gray-500 mb-1">{L.queryQ}</label>
+            <textarea
+              value={question}
+              onChange={(e) => setQuestion(e.target.value)}
+              rows={3}
+              placeholder={L.queryPlaceholder}
+              className="w-full px-3 py-2 border border-ios-gray-200 rounded-lg text-sm"
+            />
+          </div>
+          <div className="flex flex-wrap gap-3 items-center">
+            <span className="text-xs text-ios-gray-500">search_method</span>
+            <select
+              value={searchMethod}
+              onChange={(e) => setSearchMethod(e.target.value as 'local' | 'global')}
+              className="px-3 py-2 border border-ios-gray-200 rounded-lg text-sm"
+            >
+              <option value="local">{L.searchLocal}</option>
+              <option value="global">{L.searchGlobal}</option>
+            </select>
+            <button
+              type="button"
+              disabled={queryLoading}
+              onClick={handleQuery}
+              className="inline-flex items-center gap-2 px-4 py-2 rounded-ios bg-primary text-white text-sm font-medium disabled:opacity-50"
+            >
+              {queryLoading ? <Loader2 size={16} className="animate-spin" /> : null}
+              {queryLoading ? L.querying : L.queryBtn}
+            </button>
+          </div>
+        </section>
+
+        {queryResult && (
+          <>
+            <section className="rounded-2xl border border-ios-gray-100 p-4 space-y-2">
+              <h3 className="text-sm font-semibold text-ios-gray-800">{L.answer}</h3>
+              <div className="prose prose-sm max-w-none text-ios-gray-800">
+                <ReactMarkdown>{queryResult.answer || '—'}</ReactMarkdown>
+              </div>
+              <div className="rounded-xl bg-sky-50 border border-sky-100 px-3 py-2 text-sm">
+                <div className="font-medium text-sky-900">{L.judge}: {judgePct}%</div>
+                {queryResult.judge_rationale ? (
+                  <div className="text-xs text-sky-800 mt-1 opacity-90">{queryResult.judge_rationale}</div>
+                ) : null}
+              </div>
+            </section>
+
+            <section className="rounded-2xl border border-ios-gray-100 p-4 space-y-3">
+              <h3 className="text-sm font-semibold text-ios-gray-800">{L.subgraph}</h3>
+              <div className="flex gap-2 text-xs">
+                {(['table', 'mermaid', 'json'] as const).map((v) => (
+                  <button
+                    key={v}
+                    type="button"
+                    onClick={() => setSubView(v)}
+                    className={`px-3 py-1.5 rounded-lg border ${subView === v ? 'border-primary bg-primary/10' : 'border-ios-gray-200'}`}
+                  >
+                    {v === 'table' ? L.viewTable : v === 'mermaid' ? L.viewMermaid : L.viewJson}
+                  </button>
+                ))}
+              </div>
+              {subView === 'table' && (
+                <div className="overflow-x-auto rounded-lg border border-ios-gray-200">
+                  {queryResult.reasoning_subgraph?.length ? (
+                    <table className="min-w-full text-xs">
+                      <thead className="bg-ios-gray-100">
+                        <tr>
+                          {['source', 'target', 'relation', 'weight'].map((col) => (
+                            <th key={col} className="px-2 py-2 text-left font-medium capitalize">{col}</th>
+                          ))}
+                        </tr>
+                      </thead>
+                      <tbody>
+                        {queryResult.reasoning_subgraph.map((row, i) => (
+                          <tr key={i} className="border-t border-ios-gray-100">
+                            <td className="px-2 py-1.5">{String(row.source ?? row.src ?? row.from ?? '')}</td>
+                            <td className="px-2 py-1.5">{String(row.target ?? row.tgt ?? row.to ?? '')}</td>
+                            <td className="px-2 py-1.5">{String(row.relation ?? row.relationship ?? row.label ?? '')}</td>
+                            <td className="px-2 py-1.5">{row.weight != null ? String(row.weight) : ''}</td>
+                          </tr>
+                        ))}
+                      </tbody>
+                    </table>
+                  ) : (
+                    <div className="p-4 text-sm text-ios-gray-500">{L.noSubgraph}</div>
+                  )}
+                </div>
+              )}
+              {subView === 'mermaid' && (
+                <div className="bg-slate-900 rounded-xl p-2">
+                  {mermaidCode ? (
+                    <MermaidPreview mermaidCode={mermaidCode} title={L.mermaidTitle} />
+                  ) : (
+                    <div className="text-sm text-gray-400 p-4">{L.noSubgraph}</div>
+                  )}
+                </div>
+              )}
+              {subView === 'json' && (
+                <pre className="text-xs bg-ios-gray-50 border border-ios-gray-200 rounded-lg p-3 max-h-80 overflow-auto whitespace-pre-wrap">
+                  {JSON.stringify(queryResult.reasoning_subgraph, null, 2)}
+                </pre>
+              )}
+              {queryResult.reasoning_subgraph_cot ? (
+                <details className="text-xs rounded-lg border border-ios-gray-100 bg-ios-gray-50/60 p-3 mt-2">
+                  <summary className="cursor-pointer font-medium text-ios-gray-700 select-none">
+                    {L.subgraphCot}
+                  </summary>
+                  <div className="mt-2 text-ios-gray-800 whitespace-pre-wrap break-words">
+                    <ReactMarkdown>{queryResult.reasoning_subgraph_cot}</ReactMarkdown>
+                  </div>
+                </details>
+              ) : null}
+            </section>
+
+            <section className="rounded-2xl border border-ios-gray-100 p-4 space-y-2">
+              <h3 className="text-sm font-semibold text-ios-gray-800">{L.hints}</h3>
+              <div className="space-y-2">
+                {(queryResult.highlight_hints || []).map((hint, i) => {
+                  const stem = hint.source_stem != null ? String(hint.source_stem).trim() : '';
+                  const pi = _parsePageIndex(hint.page_index);
+                  const cid = hint.chunk_id != null ? String(hint.chunk_id) : '';
+                  const bbox = hint.bbox;
+                  const hasStructured = stem || pi != null || bbox != null;
+                  return (
+                    <div key={i} className="rounded-lg border border-ios-gray-100 bg-ios-gray-50/80 p-2.5 text-xs space-y-1.5">
+                      {hasStructured ? (
+                        <>
+                          <div className="flex gap-2 flex-wrap">
+                            <span className="text-ios-gray-500 shrink-0">{L.hintDoc}</span>
+                            <span className="break-all font-medium">{stem || '—'}</span>
+                          </div>
+                          <div className="flex gap-2">
+                            <span className="text-ios-gray-500 shrink-0">{L.hintPage}</span>
+                            <span>
+                              {pi != null && pi >= 0
+                                ? locale === 'zh'
+                                  ? `第 ${pi + 1} 页`
+                                  : `Page ${pi + 1}`
+                                : '—'}
+                            </span>
+                          </div>
+                          {bbox != null && String(bbox) !== '' ? (
+                            <div className="flex gap-2 break-all">
+                              <span className="text-ios-gray-500 shrink-0">{L.hintBbox}</span>
+                              <span className="font-mono text-[11px]">
+                                {typeof bbox === 'object' ? JSON.stringify(bbox) : String(bbox)}
+                              </span>
+                            </div>
+                          ) : null}
+                          {onOpenGraphragSource && stem ? (
+                            <button
+                              type="button"
+                              onClick={() =>
+                                onOpenGraphragSource({
+                                  sourceStem: stem,
+                                  pageIndex: pi ?? -1,
+                                  chunkId: cid || undefined,
+                                  workspaceDir: persist?.workspace_dir,
+                                })
+                              }
+                              className="inline-flex items-center gap-1 text-[11px] font-medium text-primary hover:underline pt-1"
+                            >
+                              <ExternalLink size={12} />
+                              {L.openInKb}
+                            </button>
+                          ) : null}
+                        </>
+                      ) : (
+                        <>
+                          {KNOWN_HINT_KEYS.filter((k) => hint[k] != null && hint[k] !== '').map((k) => (
+                            <div key={k} className="flex gap-2 break-all">
+                              <span className="text-ios-gray-500 shrink-0">{k}:</span>
+                              <span>{typeof hint[k] === 'object' ? JSON.stringify(hint[k]) : String(hint[k])}</span>
+                            </div>
+                          ))}
+                          {KNOWN_HINT_KEYS.every((k) => hint[k] == null || hint[k] === '') && (
+                            <pre className="whitespace-pre-wrap">{JSON.stringify(hint, null, 2)}</pre>
+                          )}
+                        </>
+                      )}
+                    </div>
+                  );
+                })}
+                {!(queryResult.highlight_hints || []).length && (
+                  <div className="text-xs text-ios-gray-400">—</div>
+                )}
+              </div>
+            </section>
+
+            <section className="rounded-2xl border border-ios-gray-100 p-4">
+              <button
+                type="button"
+                onClick={() => setContextOpen(!contextOpen)}
+                className="flex items-center gap-2 text-sm font-semibold text-ios-gray-800 w-full text-left"
+              >
+                {contextOpen ? <ChevronDown size={18} /> : <ChevronRight size={18} />}
+                {L.context}
+              </button>
+              {contextOpen && (
+                <div className="mt-3 space-y-2">
+                  <div className="flex gap-2">
+                    <button
+                      type="button"
+                      onClick={downloadContext}
+                      className="inline-flex items-center gap-1 px-3 py-1.5 text-xs rounded-lg border border-ios-gray-200 hover:bg-ios-gray-50"
+                    >
+                      <Download size={14} /> {L.downloadJson}
+                    </button>
+                    <button
+                      type="button"
+                      onClick={() => copyText(contextJson)}
+                      className="inline-flex items-center gap-1 px-3 py-1.5 text-xs rounded-lg border border-ios-gray-200 hover:bg-ios-gray-50"
+                    >
+                      <Copy size={14} /> {L.copyJson}
+                    </button>
+                  </div>
+                  <pre className="text-[11px] bg-ios-gray-50 border border-ios-gray-200 rounded-lg p-3 max-h-64 overflow-auto whitespace-pre-wrap">
+                    {contextJson.slice(0, 120_000)}
+                    {contextJson.length > 120_000 ? '\n…' : ''}
+                  </pre>
+                </div>
+              )}
+            </section>
+          </>
+        )}
+
+        <section className="rounded-2xl border border-dashed border-ios-gray-200 p-4 space-y-3">
+          <h3 className="text-sm font-semibold text-ios-gray-800">{L.mergeTitle}</h3>
+          <div>
+            <label className="block text-xs text-ios-gray-500 mb-1">{L.mergeA}</label>
+            <textarea
+              value={mergeA}
+              onChange={(e) => setMergeA(e.target.value)}
+              rows={2}
+              className="w-full px-3 py-2 border border-ios-gray-200 rounded-lg text-xs font-mono"
+            />
+          </div>
+          <div>
+            <label className="block text-xs text-ios-gray-500 mb-1">{L.mergeB}</label>
+            <textarea
+              value={mergeB}
+              onChange={(e) => setMergeB(e.target.value)}
+              rows={2}
+              className="w-full px-3 py-2 border border-ios-gray-200 rounded-lg text-xs font-mono"
+            />
+          </div>
+          <label className="flex items-center gap-2 text-sm cursor-pointer">
+            <input type="checkbox" checked={mergeDedupe} onChange={(e) => setMergeDedupe(e.target.checked)} />
+            {L.dedupe}
+          </label>
+          <button
+            type="button"
+            disabled={mergeLoading}
+            onClick={handleMerge}
+            className="inline-flex items-center gap-2 px-4 py-2 rounded-ios border border-ios-gray-300 text-sm font-medium disabled:opacity-50"
+          >
+            {mergeLoading ? <Loader2 size={16} className="animate-spin" /> : null}
+            {mergeLoading ? L.merging : L.mergeBtn}
+          </button>
+        </section>
+      </div>
+    </main>
+  );
+}
diff --git a/frontend_en/src/config/api.ts b/frontend_en/src/config/api.ts
index 2918579..5d0b98b 100644
--- a/frontend_en/src/config/api.ts
+++ b/frontend_en/src/config/api.ts
@@ -21,6 +21,8 @@ function getApiBaseUrl(): string {
 
 export const API_BASE_URL = getApiBaseUrl();
 
+export const GRAPHRAG_KB_BASE = '/api/v1/graphrag-kb';
+
 // API key for backend authentication
 export const API_KEY = import.meta.env.VITE_API_KEY || 'df-internal-2024-workflow-key';
 
diff --git a/frontend_en/src/pages/NotebookView.tsx b/frontend_en/src/pages/NotebookView.tsx
index 1961f3e..945f615 100644
--- a/frontend_en/src/pages/NotebookView.tsx
+++ b/frontend_en/src/pages/NotebookView.tsx
@@ -1,11 +1,11 @@
-import React, { useState, useEffect } from 'react';
+import React, { useState, useEffect, useMemo } from 'react';
 import { motion, AnimatePresence } from 'framer-motion';
 import {
   ChevronLeft, Plus, Share2, Settings, MessageSquare,
   BarChart2, Zap, AudioLines, Video, FileText,
   Filter, MoreVertical, Search, Image as ImageIcon, FileStack, Sparkles,
   Mic2, Video as VideoIcon, BrainCircuit, Send, Bot, User, Loader2, Upload, X,
-  Globe, Link2, Cloud, ChevronRight, LayoutGrid, Download, BookOpen, Brain
+  Globe, Link2, Cloud, ChevronRight, LayoutGrid, Download, BookOpen, Brain, Network
 } from 'lucide-react';
 import { useAuthStore } from '../stores/authStore';
 import { apiFetch } from '../config/api';
@@ -13,6 +13,8 @@ import { getApiSettings } from '../services/apiSettingsService';
 import { fetchWithCache, invalidateCacheByPrefix } from '../services/clientCache';
 import type { KnowledgeFile, ChatMessage, ToolType } from '../types';
 import ReactMarkdown from 'react-markdown';
+import rehypeRaw from 'rehype-raw';
+import { injectGraphragHighlightInMarkdown } from '../utils/graphragMarkdownHighlight';
 import { MermaidPreview } from '../components/knowledge-base/tools/MermaidPreview';
 import { SettingsModal } from '../components/SettingsModal';
 import DrawioInlineEditor from '../components/DrawioInlineEditor';
@@ -20,6 +22,8 @@ import { FlashcardViewer } from '../components/flashcards/FlashcardViewer';
 import { QuizContainer } from '../components/quiz/QuizContainer';
 import { NotionEditor } from '../components/notes/NotionEditor';
 import { useToast } from '../hooks/useToast';
+import { GraphRAGKbPanel } from '../components/graphrag-kb/GraphRAGKbPanel';
+import { fetchGraphragChunkSnippet } from '../services/graphragKbService';
 import katex from 'katex';
 import 'katex/dist/katex.min.css';
 
@@ -40,6 +44,7 @@ type CitationReference = {
   preview?: string;
   chunkIndex?: number | null;
   sourceNumber?: string;
+  graphragHighlightText?: string;
 };
 
 type CitationTooltipState = {
@@ -253,6 +258,7 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
 
   // Studio tools
   const studioTools: Array<{icon: React.ReactNode, label: string, id: ToolType}> = [
+    { icon: <Network className="text-cyan-600" />, label: 'GraphRAG KB', id: 'graphrag_kb' },
     { icon: <ImageIcon className="text-orange-500" />, label: 'PPT生成', id: 'ppt' },
     { icon: <BrainCircuit className="text-purple-500" />, label: '思维导图', id: 'mindmap' },
     // DrawIO 图表功能暂时隐藏，后续修复
@@ -266,11 +272,12 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
   ];
 
   // Studio：每个功能卡片各自配置，点卡片上的「…」翻转进该卡片的设置
-  type StudioToolId = 'ppt' | 'mindmap' | 'drawio' | 'flashcard' | 'quiz' | 'podcast' | 'video' | 'note';
+  type StudioToolId = 'graphrag_kb' | 'ppt' | 'mindmap' | 'drawio' | 'flashcard' | 'quiz' | 'podcast' | 'video' | 'note';
   const [studioPanelView, setStudioPanelView] = useState<'tools' | 'settings'>('tools');
   const [studioSettingsTool, setStudioSettingsTool] = useState<StudioToolId | null>(null);
   const STORAGE_STUDIO_CONFIG = `kb_studio_config_${effectiveUser?.id || 'default'}`;
   const defaultByTool: Record<StudioToolId, Record<string, string>> = {
+    graphrag_kb: {},
     ppt: { llmModel: 'deepseek-v3.2', genFigModel: 'gemini-2.5-flash-image', stylePreset: 'modern', stylePrompt: '', language: 'zh', page_count: '10' },
     mindmap: { llmModel: 'deepseek-v3.2', mindmapStyle: 'default' },
     drawio: { llmModel: 'deepseek-v3.2', diagramType: 'auto', diagramStyle: 'default', language: 'zh' },
@@ -1127,6 +1134,17 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
     return () => window.clearTimeout(timer);
   }, [sourceDetailCitationFocus, sourceDetailLoading, sourceDetailContent]);
 
+  const sourceDetailMarkdownWithHighlight = useMemo(() => {
+    if (sourceDetailFormat !== 'markdown' || !sourceDetailContent) return sourceDetailContent;
+    const hl = sourceDetailCitationFocus?.graphragHighlightText?.trim();
+    if (!hl) return sourceDetailContent;
+    return injectGraphragHighlightInMarkdown(sourceDetailContent, hl);
+  }, [
+    sourceDetailFormat,
+    sourceDetailContent,
+    sourceDetailCitationFocus?.graphragHighlightText,
+  ]);
+
   const runFastResearch = async () => {
     if (!fastResearchQuery.trim()) return;
     const settings = getApiSettings(effectiveUser?.id || null);
@@ -2590,8 +2608,10 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
                     <span className="ml-2 text-sm text-gray-500">解析中…</span>
                   </div>
                 ) : sourceDetailFormat === 'markdown' && sourceDetailContent ? (
-                  <div className="prose prose-sm max-w-none text-gray-700 prose-p:text-xs prose-headings:text-sm prose-pre:text-xs">
-                    <ReactMarkdown>{sourceDetailContent}</ReactMarkdown>
+                  <div className="prose prose-sm max-w-none text-gray-700 prose-p:text-xs prose-headings:text-sm prose-pre:text-xs prose-mark:bg-amber-200/90 prose-mark:rounded prose-mark:px-0.5">
+                    <ReactMarkdown rehypePlugins={[rehypeRaw]}>
+                      {sourceDetailMarkdownWithHighlight ?? ''}
+                    </ReactMarkdown>
                   </div>
                 ) : (
                   <pre className="whitespace-pre-wrap text-xs text-gray-700 font-sans leading-relaxed break-words">
@@ -2646,6 +2666,42 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
               }}
             />
           </div>
+        ) : activeTool === 'graphrag_kb' ? (
+          <GraphRAGKbPanel
+            notebook={notebook}
+            userId={effectiveUser?.id || null}
+            email={effectiveUser?.email || effectiveUser?.id || ''}
+            locale="en"
+            showToast={showToast}
+            onOpenGraphragSource={async (p) => {
+              const stem = p.sourceStem.trim();
+              const match = files.find((f) => {
+                const n = f.name || '';
+                const base = n.replace(/\.[^.]+$/, '');
+                return base === stem || n === stem || n.startsWith(`${stem}.`);
+              });
+              if (!match) {
+                showToast(`No source file matching "${stem}"`, 'warning');
+                return;
+              }
+              let graphragHighlightText: string | undefined;
+              if (p.workspaceDir && p.chunkId) {
+                try {
+                  const sn = await fetchGraphragChunkSnippet(p.workspaceDir, p.chunkId);
+                  if (sn.found && sn.text?.trim()) graphragHighlightText = sn.text.trim();
+                } catch {
+                  /* still open full preview */
+                }
+              }
+              await openSourceDetail(match, {
+                fileName: match.name,
+                filePath: match.url,
+                preview: `GraphRAG · ${p.pageIndex >= 0 ? `Page ${p.pageIndex + 1}` : 'unknown page'}`,
+                sourceNumber: 'GR',
+                graphragHighlightText,
+              });
+            }}
+          />
         ) : (
         <main className="flex-1 flex flex-col relative bg-white min-w-[300px] overflow-hidden">
           <div className="flex items-center justify-between px-6 py-3 border-b border-ios-gray-100 shrink-0">
@@ -2822,6 +2878,7 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
                 返回
               </button>
               <h3 className="text-sm font-semibold text-gray-800 mb-3">
+                {studioSettingsTool === 'graphrag_kb' && 'GraphRAG KB'}
                 {studioSettingsTool === 'ppt' && 'PPT 生成'}
                 {studioSettingsTool === 'mindmap' && '思维导图'}
                 {studioSettingsTool === 'drawio' && 'DrawIO 图表'}
@@ -2831,6 +2888,9 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
                 {/* {studioSettingsTool === 'video' && '视频讲解'} */}
               </h3>
               <div className="space-y-4">
+                {studioSettingsTool === 'graphrag_kb' && (
+                  <p className="text-sm text-gray-600">Indexing, query, and merge options are in the center GraphRAG Knowledge Base panel.</p>
+                )}
                 {studioSettingsTool === 'ppt' && (() => {
                   const c = getStudioConfig('ppt');
                   return (
@@ -3201,7 +3261,7 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
                 </motion.div>
               ))}
             </div>
-            {activeTool !== 'chat' && activeTool !== 'search' && (
+            {activeTool !== 'chat' && activeTool !== 'search' && activeTool !== 'graphrag_kb' && (
               <motion.button
                 whileTap={{ scale: 0.97 }}
                 type="button"
diff --git a/frontend_en/src/services/graphragKbService.ts b/frontend_en/src/services/graphragKbService.ts
new file mode 100644
index 0000000..b750d41
--- /dev/null
+++ b/frontend_en/src/services/graphragKbService.ts
@@ -0,0 +1,84 @@
+/**
+ * GraphRAG 知识库前端 API 封装。
+ *
+ * 数据流：调用 ``/api/v1/graphrag-kb/*`` → FastAPI ``graphrag_kb`` 路由 → ``wa_graphrag_kb`` → ``wf_graphrag_kb``。
+ * ``fetchGraphragChunkSnippet`` 用于侧栏打开来源时，按 chunk_id 拉取 ``input/*.txt`` 内嵌段正文，供 NotebookView 在 Markdown 中高亮。
+ */
+import { apiFetch, GRAPHRAG_KB_BASE } from '../config/api';
+import type {
+  IndexRequest,
+  IndexResponse,
+  QueryRequest,
+  QueryResponse,
+  MergeRequest,
+  MergeResponse,
+} from '../types/graphragKb';
+
+const DEFAULT_LLM_MODEL = 'deepseek-v3.2';
+
+async function parseErrorDetail(res: Response): Promise<string> {
+  try {
+    const body = await res.json();
+    const d = body?.detail;
+    if (typeof d === 'string') return d;
+    if (Array.isArray(d)) return d.map((x: { msg?: string }) => x?.msg || String(x)).join('; ');
+    return body?.message || `HTTP ${res.status}`;
+  } catch {
+    const t = await res.text();
+    return t || `HTTP ${res.status}`;
+  }
+}
+
+export function defaultGraphragModel(): string {
+  return DEFAULT_LLM_MODEL;
+}
+
+export async function indexGraphragKb(body: IndexRequest): Promise<IndexResponse> {
+  const res = await apiFetch(`${GRAPHRAG_KB_BASE}/index`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(body),
+  });
+  if (!res.ok) throw new Error(await parseErrorDetail(res));
+  return res.json() as Promise<IndexResponse>;
+}
+
+export async function queryGraphragKb(body: QueryRequest): Promise<QueryResponse> {
+  const res = await apiFetch(`${GRAPHRAG_KB_BASE}/query`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(body),
+  });
+  if (!res.ok) throw new Error(await parseErrorDetail(res));
+  return res.json() as Promise<QueryResponse>;
+}
+
+export async function mergeGraphragKb(body: MergeRequest): Promise<MergeResponse> {
+  const res = await apiFetch(`${GRAPHRAG_KB_BASE}/merge`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(body),
+  });
+  if (!res.ok) throw new Error(await parseErrorDetail(res));
+  return res.json() as Promise<MergeResponse>;
+}
+
+export interface ChunkSnippetResponse {
+  text: string;
+  source_stem: string;
+  found: boolean;
+}
+
+/** 从 GraphRAG workspace ``input/*.txt`` 中解析 ``[chunk:id]`` 对应正文（用于阅读器高亮，非整篇 MinerU MD） */
+export async function fetchGraphragChunkSnippet(
+  workspaceDir: string,
+  chunkId: string
+): Promise<ChunkSnippetResponse> {
+  const res = await apiFetch(`${GRAPHRAG_KB_BASE}/chunk-snippet`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ workspace_dir: workspaceDir, chunk_id: chunkId }),
+  });
+  if (!res.ok) throw new Error(await parseErrorDetail(res));
+  return res.json() as Promise<ChunkSnippetResponse>;
+}
diff --git a/frontend_en/src/types/graphragKb.ts b/frontend_en/src/types/graphragKb.ts
new file mode 100644
index 0000000..a42c2a8
--- /dev/null
+++ b/frontend_en/src/types/graphragKb.ts
@@ -0,0 +1,77 @@
+/**
+ * GraphRAG KB 前后端 JSON 契约（与 ``fastapi_app/routers/graphrag_kb.py`` 一致，字段 snake_case）。
+ *
+ * - Index*：建索引请求/响应（workspace_dir、分块数、可选 KGGen 统计）。
+ * - Query*：查询响应含 answer、context_data、推理子图、source_chunks、highlight_hints、Judge、子图 CoT。
+ * - Merge*：两工作区合并后的路径与 chunk 数。
+ * - GraphragWorkspacePersist：前端 localStorage 持久化的上次索引摘要。
+ */
+
+export interface IndexRequest {
+  notebook_id: string;
+  notebook_title?: string;
+  email?: string;
+  api_url: string;
+  api_key: string;
+  model: string;
+  source_stems?: string[] | null;
+  workspace_dir?: string;
+  force_reindex?: boolean;
+  parse_pdfs?: boolean;
+  /** Default true: server skips KGGen; set false only for internal experiments. */
+  skip_kggen?: boolean;
+}
+
+export interface IndexResponse {
+  workspace_dir: string;
+  num_chunks: number;
+  kg_entities: number;
+  kg_relations: number;
+}
+
+export interface QueryRequest {
+  notebook_id: string;
+  notebook_title?: string;
+  email?: string;
+  api_url: string;
+  api_key: string;
+  model: string;
+  question: string;
+  search_method: 'local' | 'global';
+  workspace_dir: string;
+}
+
+export interface QueryResponse {
+  answer: string;
+  context_data: Record<string, unknown>;
+  reasoning_subgraph: Array<Record<string, unknown>>;
+  source_chunks: string[];
+  highlight_hints: Array<Record<string, unknown>>;
+  judge_score: number;
+  judge_rationale: string;
+  /** LLM chain-of-thought for minimal subgraph selection (hop analysis) */
+  reasoning_subgraph_cot?: string;
+}
+
+export interface MergeRequest {
+  notebook_id?: string;
+  notebook_title?: string;
+  email?: string;
+  api_url: string;
+  api_key: string;
+  model: string;
+  workspace_dir_a: string;
+  workspace_dir_b: string;
+  dedupe?: boolean;
+}
+
+export interface MergeResponse {
+  merged_workspace_dir: string;
+  num_chunks: number;
+}
+
+export interface GraphragWorkspacePersist {
+  workspace_dir: string;
+  updatedAt: number;
+  num_chunks?: number;
+}
diff --git a/frontend_en/src/types/index.ts b/frontend_en/src/types/index.ts
index 517d293..d42ba31 100644
--- a/frontend_en/src/types/index.ts
+++ b/frontend_en/src/types/index.ts
@@ -1,3 +1,13 @@
+export type {
+  IndexRequest as GraphragIndexRequest,
+  IndexResponse as GraphragIndexResponse,
+  QueryRequest as GraphragQueryRequest,
+  QueryResponse as GraphragQueryResponse,
+  MergeRequest as GraphragMergeRequest,
+  MergeResponse as GraphragMergeResponse,
+  GraphragWorkspacePersist,
+} from './graphragKb';
+
 // Knowledge Base Types
 export type MaterialType = 'image' | 'doc' | 'video' | 'link' | 'audio';
 
@@ -34,4 +44,4 @@ export interface ChatMessage {
 }
 
 export type SectionType = 'library' | 'upload' | 'output' | 'settings';
-export type ToolType = 'chat' | 'ppt' | 'mindmap' | 'podcast' | 'video' | 'search' | 'drawio' | 'flashcard' | 'quiz' | 'note';
+export type ToolType = 'chat' | 'ppt' | 'mindmap' | 'podcast' | 'video' | 'search' | 'drawio' | 'flashcard' | 'quiz' | 'note' | 'graphrag_kb';
diff --git a/frontend_en/src/utils/graphragMarkdownHighlight.ts b/frontend_en/src/utils/graphragMarkdownHighlight.ts
new file mode 100644
index 0000000..f18997b
--- /dev/null
+++ b/frontend_en/src/utils/graphragMarkdownHighlight.ts
@@ -0,0 +1,20 @@
+/**
+ * 在 Markdown 源字符串中，将 *snippet* 的首次出现包上一层 ``<mark>``，供 react-markdown + rehype-raw 渲染为正文内高亮。
+ *
+ * 数据流：``fetchGraphragChunkSnippet`` 得到 chunk 正文 → NotebookView 传入全文 MD 与片段 → 本函数注入 HTML → ReactMarkdown 展示。
+ * 内容视为可信（本地 MinerU/索引文件）；片段需与全文字面一致，否则 ``indexOf`` 失败则不注入。
+ */
+export function injectGraphragHighlightInMarkdown(full: string, snippet: string): string {
+  const sn = snippet.trim();
+  if (!full || !sn) return full;
+  const idx = full.indexOf(sn);
+  if (idx < 0) return full;
+  const esc = (s: string) => s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
+  return (
+    full.slice(0, idx) +
+    '<mark class="bg-amber-200/90 rounded px-0.5 ring-1 ring-amber-300/60" data-graphrag-hl="1">' +
+    esc(sn) +
+    '</mark>' +
+    full.slice(idx + sn.length)
+  );
+}
diff --git a/frontend_en/src/vite-env.d.ts b/frontend_en/src/vite-env.d.ts
index 4c5ff97..8d2ba4c 100644
--- a/frontend_en/src/vite-env.d.ts
+++ b/frontend_en/src/vite-env.d.ts
@@ -1,5 +1,7 @@
 /// <reference types="vite/client" />
 
+declare module 'rehype-raw';
+
 interface ImportMetaEnv {
   readonly VITE_SUPABASE_URL?: string
   readonly VITE_SUPABASE_ANON_KEY?: string
diff --git a/frontend_en/vite.config.ts b/frontend_en/vite.config.ts
index b322e8c..0330e8d 100644
--- a/frontend_en/vite.config.ts
+++ b/frontend_en/vite.config.ts
@@ -9,11 +9,11 @@ export default defineConfig({
     allowedHosts: true,
     proxy: {
       '/api': {
-        target: 'http://localhost:8213',
+        target: 'http://localhost:8212',
         changeOrigin: true,
       },
       '/outputs': {
-        target: 'http://localhost:8213',
+        target: 'http://localhost:8212',
         changeOrigin: true,
       },
     },
diff --git a/frontend_zh/package-lock.json b/frontend_zh/package-lock.json
index 873bc08..df09701 100644
--- a/frontend_zh/package-lock.json
+++ b/frontend_zh/package-lock.json
@@ -19,6 +19,7 @@
         "react-dom": "^18.2.0",
         "react-markdown": "^9.1.0",
         "react-pdf": "^10.3.0",
+        "rehype-raw": "^7.0.0",
         "tailwind-merge": "^2.0.0",
         "zustand": "^4.4.7"
       },
@@ -2632,6 +2633,17 @@
       "integrity": "sha512-f/ZeWvW/BCXbhGEf1Ujp29EASo/lk1FDnETgNKwJrsVvGZhUWCZyg3xLJjAsxfOmt8KjswHmI5EwCQcPMpOYhQ==",
       "license": "EPL-2.0"
     },
+    "node_modules/entities": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz",
+      "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
     "node_modules/esbuild": {
       "version": "0.21.5",
       "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz",
@@ -2849,6 +2861,71 @@
         "node": ">= 0.4"
       }
     },
+    "node_modules/hast-util-from-parse5": {
+      "version": "8.0.3",
+      "resolved": "https://registry.npmjs.org/hast-util-from-parse5/-/hast-util-from-parse5-8.0.3.tgz",
+      "integrity": "sha512-3kxEVkEKt0zvcZ3hCRYI8rqrgwtlIOFMWkbclACvjlDw8Li9S2hk/d51OI0nr/gIpdMHNepwgOKqZ/sy0Clpyg==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "@types/unist": "^3.0.0",
+        "devlop": "^1.0.0",
+        "hastscript": "^9.0.0",
+        "property-information": "^7.0.0",
+        "vfile": "^6.0.0",
+        "vfile-location": "^5.0.0",
+        "web-namespaces": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/hast-util-from-parse5/node_modules/@types/unist": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
+      "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="
+    },
+    "node_modules/hast-util-parse-selector": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz",
+      "integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==",
+      "dependencies": {
+        "@types/hast": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/hast-util-raw": {
+      "version": "9.1.0",
+      "resolved": "https://registry.npmjs.org/hast-util-raw/-/hast-util-raw-9.1.0.tgz",
+      "integrity": "sha512-Y8/SBAHkZGoNkpzqqfCldijcuUKh7/su31kEBp67cFY09Wy0mTRgtsLYsiIxMJxlu0f6AA5SUTbDR8K0rxnbUw==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "@types/unist": "^3.0.0",
+        "@ungap/structured-clone": "^1.0.0",
+        "hast-util-from-parse5": "^8.0.0",
+        "hast-util-to-parse5": "^8.0.0",
+        "html-void-elements": "^3.0.0",
+        "mdast-util-to-hast": "^13.0.0",
+        "parse5": "^7.0.0",
+        "unist-util-position": "^5.0.0",
+        "unist-util-visit": "^5.0.0",
+        "vfile": "^6.0.0",
+        "web-namespaces": "^2.0.0",
+        "zwitch": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/hast-util-raw/node_modules/@types/unist": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
+      "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="
+    },
     "node_modules/hast-util-to-jsx-runtime": {
       "version": "2.3.6",
       "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz",
@@ -2882,6 +2959,24 @@
       "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==",
       "license": "MIT"
     },
+    "node_modules/hast-util-to-parse5": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/hast-util-to-parse5/-/hast-util-to-parse5-8.0.1.tgz",
+      "integrity": "sha512-MlWT6Pjt4CG9lFCjiz4BH7l9wmrMkfkJYCxFwKQic8+RTZgWPuWxwAfjJElsXkex7DJjfSJsQIt931ilUgmwdA==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "comma-separated-tokens": "^2.0.0",
+        "devlop": "^1.0.0",
+        "property-information": "^7.0.0",
+        "space-separated-tokens": "^2.0.0",
+        "web-namespaces": "^2.0.0",
+        "zwitch": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
     "node_modules/hast-util-whitespace": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz",
@@ -2895,6 +2990,22 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/hastscript": {
+      "version": "9.0.1",
+      "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-9.0.1.tgz",
+      "integrity": "sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "comma-separated-tokens": "^2.0.0",
+        "hast-util-parse-selector": "^4.0.0",
+        "property-information": "^7.0.0",
+        "space-separated-tokens": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
     "node_modules/html-url-attributes": {
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz",
@@ -2905,6 +3016,15 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/html-void-elements": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz",
+      "integrity": "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
     "node_modules/iceberg-js": {
       "version": "0.8.1",
       "resolved": "https://registry.npmjs.org/iceberg-js/-/iceberg-js-0.8.1.tgz",
@@ -5773,6 +5893,17 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/parse5": {
+      "version": "7.3.0",
+      "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
+      "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==",
+      "dependencies": {
+        "entities": "^6.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
     "node_modules/path-parse": {
       "version": "1.0.7",
       "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
@@ -6149,6 +6280,20 @@
         "node": ">=8.10.0"
       }
     },
+    "node_modules/rehype-raw": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/rehype-raw/-/rehype-raw-7.0.0.tgz",
+      "integrity": "sha512-/aE8hCfKlQeA8LmyeyQvQF3eBiLRGNlfBJEvWH7ivp9sBqs7TNqBL5X3v157rM4IFETqDnIOO+z5M/biZbo9Ww==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "hast-util-raw": "^9.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
     "node_modules/remark-parse": {
       "version": "11.0.0",
       "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz",
@@ -7361,6 +7506,24 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/vfile-location": {
+      "version": "5.0.3",
+      "resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-5.0.3.tgz",
+      "integrity": "sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==",
+      "dependencies": {
+        "@types/unist": "^3.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/vfile-location/node_modules/@types/unist": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
+      "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="
+    },
     "node_modules/vfile-message": {
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz",
@@ -7469,6 +7632,15 @@
         "loose-envify": "^1.0.0"
       }
     },
+    "node_modules/web-namespaces": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz",
+      "integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
     "node_modules/web-worker": {
       "version": "1.5.0",
       "resolved": "https://registry.npmjs.org/web-worker/-/web-worker-1.5.0.tgz",
diff --git a/frontend_zh/package.json b/frontend_zh/package.json
index b6d9db4..d2b2fac 100644
--- a/frontend_zh/package.json
+++ b/frontend_zh/package.json
@@ -20,6 +20,7 @@
     "react-dom": "^18.2.0",
     "react-markdown": "^9.1.0",
     "react-pdf": "^10.3.0",
+    "rehype-raw": "^7.0.0",
     "tailwind-merge": "^2.0.0",
     "zustand": "^4.4.7"
   },
diff --git a/frontend_zh/src/components/graphrag-kb/GraphRAGKbPanel.tsx b/frontend_zh/src/components/graphrag-kb/GraphRAGKbPanel.tsx
new file mode 100644
index 0000000..b229886
--- /dev/null
+++ b/frontend_zh/src/components/graphrag-kb/GraphRAGKbPanel.tsx
@@ -0,0 +1,760 @@
+/**
+ * GraphRAG 知识库侧栏 UI：索引构建、Local/Global 查询、推理子图展示、文档定位卡片、合并工作区。
+ *
+ * 数据流：用户操作 → ``graphragKbService`` → 后端管线 → ``queryResult`` 状态；
+ * 「在知识库中打开」通过 ``onOpenGraphragSource`` 回调把 sourceStem、chunkId、workspaceDir 交给 NotebookView，联动阅读器与高亮。
+ */
+import React, { useCallback, useEffect, useMemo, useState } from 'react';
+import ReactMarkdown from 'react-markdown';
+import { Loader2, Copy, Download, ChevronDown, ChevronRight, Network, ExternalLink } from 'lucide-react';
+import { getApiSettings } from '../../services/apiSettingsService';
+import {
+  indexGraphragKb,
+  queryGraphragKb,
+  mergeGraphragKb,
+  defaultGraphragModel,
+} from '../../services/graphragKbService';
+import type { QueryResponse, GraphragWorkspacePersist } from '../../types/graphragKb';
+import { MermaidPreview } from '../knowledge-base/tools/MermaidPreview';
+
+const KNOWN_HINT_KEYS = ['page', 'page_num', 'bbox', 'sentence', 'text', 'chunk_id', 'source', 'file', 'file_name'];
+
+/** 与阅读器联动时传入的载荷（source_stem 对应知识库里的文件名 stem） */
+export type GraphragOpenSourcePayload = {
+  sourceStem: string;
+  pageIndex: number;
+  chunkId?: string;
+  /** 当前笔记本 GraphRAG 工作区根目录，用于拉取 ``[chunk:…]`` 原文高亮 */
+  workspaceDir?: string;
+  /** reasoning_subgraph 三元组，传给后端让 LLM 从 chunk 中提取最相关的原句 */
+  triples?: Array<Record<string, unknown>>;
+};
+
+function _parsePageIndex(v: unknown): number | undefined {
+  if (typeof v === 'number' && !Number.isNaN(v)) return v;
+  if (typeof v === 'string') {
+    const n = parseInt(v, 10);
+    return Number.isNaN(n) ? undefined : n;
+  }
+  return undefined;
+}
+
+function getWorkspaceStorageKey(userId: string, notebookId: string) {
+  return `graphrag_workspace_${userId}_${notebookId}`;
+}
+
+function sanitizeMermaidLabel(s: string, max = 48): string {
+  return s.replace(/["[\]#]/g, ' ').slice(0, max).trim() || '?';
+}
+
+/** 将 reasoning_subgraph 转为 Mermaid graph LR（边数上限避免卡顿） */
+export function reasoningSubgraphToMermaid(edges: Array<Record<string, unknown>>, maxEdges = 36): string | null {
+  if (!edges.length) return null;
+  const slice = edges.slice(0, maxEdges);
+  const idFor = (() => {
+    const m = new Map<string, string>();
+    let n = 0;
+    return (raw: string) => {
+      const k = raw || `_${n}`;
+      if (!m.has(k)) m.set(k, `N${n++}`);
+      return m.get(k)!;
+    };
+  })();
+  const lines: string[] = ['graph LR'];
+  for (let i = 0; i < slice.length; i++) {
+    const e = slice[i];
+    const src = String(e.source ?? e.src ?? e.from ?? e.head ?? `s${i}`);
+    const tgt = String(e.target ?? e.tgt ?? e.to ?? e.tail ?? `t${i}`);
+    const rel = String(e.relation ?? e.relationship ?? e.label ?? e.predicate ?? '');
+    const sid = idFor(src);
+    const tid = idFor(tgt);
+    const sl = sanitizeMermaidLabel(src);
+    const tl = sanitizeMermaidLabel(tgt);
+    const rl = sanitizeMermaidLabel(rel, 24);
+    lines.push(`  ${sid}["${sl}"] -->|"${rl}"| ${tid}["${tl}"]`);
+  }
+  return lines.join('\n');
+}
+
+const STR = {
+  zh: {
+    headerTitle: 'GraphRAG 知识库',
+    headerSub: '分块（MinerU）+ GraphRAG 建索引与检索（用户路径不含 KGGen）',
+    apiWarn: '请先在设置中配置 API URL 与 API Key',
+    noNotebook: '缺少笔记本 ID',
+    indexBtn: '构建索引',
+    indexing: '索引构建中…',
+    indexOk: '索引构建完成',
+    forceReindex: '强制重建',
+    parsePdfs: '解析 PDF（MinerU）',
+    summary: '上次索引摘要',
+    chunks: '分块数',
+    workspace: '工作区目录',
+    copy: '复制',
+    copied: '已复制',
+    queryQ: '问题',
+    queryPlaceholder: '输入要问的问题…',
+    searchLocal: 'Local',
+    searchGlobal: 'Global',
+    queryBtn: '查询',
+    querying: '查询中…',
+    answer: '回答',
+    judge: 'Judge 分数',
+    rationale: '说明',
+    subgraph: '推理子图',
+    viewTable: '表格',
+    viewMermaid: 'Mermaid',
+    viewJson: 'JSON',
+    noSubgraph: '无子图数据',
+    subgraphCot: '最小子图推理（CoT / 跳数）',
+    hintDoc: '文档',
+    hintPage: '页码',
+    hintBbox: '区域框',
+    openInKb: '在知识库中打开',
+    hints: '文档定位',
+    context: 'context_data（体积可能较大）',
+    downloadJson: '下载 JSON',
+    copyJson: '复制 JSON',
+    mergeTitle: '合并工作区',
+    mergeA: 'workspace_dir A',
+    mergeB: 'workspace_dir B',
+    dedupe: '去重合并',
+    mergeBtn: '合并并重建索引',
+    merging: '合并中…',
+    mergeOk: '合并完成',
+    modelLabel: 'LLM 模型名',
+    mermaidTitle: '子图（Mermaid）',
+    copyFailed: '复制失败',
+  },
+  en: {
+    headerTitle: 'GraphRAG Knowledge Base',
+    headerSub: 'Chunking (MinerU) + GraphRAG index & query (no KGGen on the default path)',
+    apiWarn: 'Configure API URL and API Key in Settings first',
+    noNotebook: 'Notebook ID is missing',
+    indexBtn: 'Build index',
+    indexing: 'Indexing…',
+    indexOk: 'Index completed',
+    forceReindex: 'Force reindex',
+    parsePdfs: 'Parse PDFs (MinerU)',
+    summary: 'Last index summary',
+    chunks: 'Chunks',
+    workspace: 'Workspace directory',
+    copy: 'Copy',
+    copied: 'Copied',
+    queryQ: 'Question',
+    queryPlaceholder: 'Ask a question…',
+    searchLocal: 'Local',
+    searchGlobal: 'Global',
+    queryBtn: 'Query',
+    querying: 'Querying…',
+    answer: 'Answer',
+    judge: 'Judge score',
+    rationale: 'Rationale',
+    subgraph: 'Reasoning subgraph',
+    viewTable: 'Table',
+    viewMermaid: 'Mermaid',
+    viewJson: 'JSON',
+    noSubgraph: 'No subgraph',
+    subgraphCot: 'Minimal subgraph reasoning (CoT / hops)',
+    hintDoc: 'Document',
+    hintPage: 'Page',
+    hintBbox: 'BBox',
+    openInKb: 'Open in knowledge base',
+    hints: 'Source location',
+    context: 'context_data (may be large)',
+    downloadJson: 'Download JSON',
+    copyJson: 'Copy JSON',
+    mergeTitle: 'Merge workspaces',
+    mergeA: 'workspace_dir A',
+    mergeB: 'workspace_dir B',
+    dedupe: 'Deduplicate when merging',
+    mergeBtn: 'Merge and re-index',
+    merging: 'Merging…',
+    mergeOk: 'Merge completed',
+    modelLabel: 'LLM model',
+    mermaidTitle: 'Subgraph (Mermaid)',
+    copyFailed: 'Copy failed',
+  },
+} as const;
+
+export interface GraphRAGKbPanelProps {
+  notebook: { id?: string; title?: string; name?: string };
+  userId: string | null;
+  email: string;
+  locale?: 'zh' | 'en';
+  showToast: (message: string, type?: 'success' | 'error' | 'warning') => void;
+  /** 在笔记本侧栏打开对应来源并展示 MinerU 解析内容（按 stem 匹配文件名） */
+  onOpenGraphragSource?: (payload: GraphragOpenSourcePayload) => void | Promise<void>;
+}
+
+export function GraphRAGKbPanel({
+  notebook,
+  userId,
+  email,
+  locale = 'zh',
+  showToast,
+  onOpenGraphragSource,
+}: GraphRAGKbPanelProps) {
+  const L = STR[locale];
+  const notebookId = notebook?.id || '';
+  const notebookTitle = notebook?.title || notebook?.name || '';
+
+  const [persist, setPersist] = useState<GraphragWorkspacePersist | null>(null);
+  const [forceReindex, setForceReindex] = useState(false);
+  const [parsePdfs, setParsePdfs] = useState(true);
+  const [indexLoading, setIndexLoading] = useState(false);
+  const [modelName, setModelName] = useState(defaultGraphragModel());
+
+  const [question, setQuestion] = useState('');
+  const [searchMethod, setSearchMethod] = useState<'local' | 'global'>('local');
+  const [queryLoading, setQueryLoading] = useState(false);
+  const [queryResult, setQueryResult] = useState<QueryResponse | null>(null);
+  const [subView, setSubView] = useState<'table' | 'mermaid' | 'json'>('table');
+  const [contextOpen, setContextOpen] = useState(false);
+
+  const [mergeA, setMergeA] = useState('');
+  const [mergeB, setMergeB] = useState('');
+  const [mergeDedupe, setMergeDedupe] = useState(false);
+  const [mergeLoading, setMergeLoading] = useState(false);
+
+  const storageKey = useMemo(() => {
+    const uid = userId || 'global';
+    if (!notebookId) return null;
+    return getWorkspaceStorageKey(uid, notebookId);
+  }, [userId, notebookId]);
+
+  const loadPersist = useCallback(() => {
+    if (!storageKey) {
+      setPersist(null);
+      return;
+    }
+    try {
+      const raw = localStorage.getItem(storageKey);
+      if (!raw) {
+        setPersist(null);
+        return;
+      }
+      const p = JSON.parse(raw) as GraphragWorkspacePersist;
+      if (p?.workspace_dir) setPersist(p);
+      else setPersist(null);
+    } catch {
+      setPersist(null);
+    }
+  }, [storageKey]);
+
+  useEffect(() => {
+    loadPersist();
+  }, [loadPersist]);
+
+  useEffect(() => {
+    if (persist?.workspace_dir) {
+      setMergeA((a) => (a ? a : persist.workspace_dir));
+    }
+  }, [persist?.workspace_dir]);
+
+  const llmBody = useCallback(() => {
+    const settings = getApiSettings(userId);
+    const api_url = settings?.apiUrl?.trim() || '';
+    const api_key = settings?.apiKey?.trim() || '';
+    const model = modelName.trim() || defaultGraphragModel();
+    return { api_url, api_key, model };
+  }, [userId, modelName]);
+
+  const copyText = async (text: string, okMsg?: string) => {
+    try {
+      await navigator.clipboard.writeText(text);
+      showToast(okMsg || L.copied, 'success');
+    } catch {
+      showToast(L.copyFailed, 'error');
+    }
+  };
+
+  const handleIndex = async () => {
+    if (!notebookId) {
+      showToast(L.noNotebook, 'warning');
+      return;
+    }
+    const { api_url, api_key, model } = llmBody();
+    if (!api_url || !api_key) {
+      showToast(L.apiWarn, 'warning');
+      return;
+    }
+    setIndexLoading(true);
+    try {
+      const res = await indexGraphragKb({
+        notebook_id: notebookId,
+        notebook_title: notebookTitle,
+        email: email || '',
+        api_url,
+        api_key,
+        model,
+        source_stems: null,
+        workspace_dir: persist?.workspace_dir || '',
+        force_reindex: forceReindex,
+        parse_pdfs: parsePdfs,
+        skip_kggen: true,
+      });
+      const next: GraphragWorkspacePersist = {
+        workspace_dir: res.workspace_dir,
+        updatedAt: Date.now(),
+        num_chunks: res.num_chunks,
+      };
+      if (storageKey) {
+        localStorage.setItem(storageKey, JSON.stringify(next));
+      }
+      setPersist(next);
+      showToast(L.indexOk, 'success');
+    } catch (e: unknown) {
+      const msg = e instanceof Error ? e.message : String(e);
+      showToast(msg, 'error');
+    } finally {
+      setIndexLoading(false);
+    }
+  };
+
+  const handleQuery = async () => {
+    if (!notebookId) {
+      showToast(L.noNotebook, 'warning');
+      return;
+    }
+    const ws = persist?.workspace_dir?.trim();
+    if (!ws) {
+      showToast(locale === 'zh' ? '请先构建索引或确认已持久化 workspace_dir' : 'Build index first or set workspace_dir', 'warning');
+      return;
+    }
+    const q = question.trim();
+    if (!q) {
+      showToast(locale === 'zh' ? '请输入问题' : 'Enter a question', 'warning');
+      return;
+    }
+    const { api_url, api_key, model } = llmBody();
+    if (!api_url || !api_key) {
+      showToast(L.apiWarn, 'warning');
+      return;
+    }
+    setQueryLoading(true);
+    setQueryResult(null);
+    try {
+      const res = await queryGraphragKb({
+        notebook_id: notebookId,
+        notebook_title: notebookTitle,
+        email: email || '',
+        api_url,
+        api_key,
+        model,
+        question: q,
+        search_method: searchMethod,
+        workspace_dir: ws,
+      });
+      setQueryResult(res);
+    } catch (e: unknown) {
+      const msg = e instanceof Error ? e.message : String(e);
+      showToast(msg, 'error');
+    } finally {
+      setQueryLoading(false);
+    }
+  };
+
+  const handleMerge = async () => {
+    if (!notebookId) {
+      showToast(L.noNotebook, 'warning');
+      return;
+    }
+    const a = mergeA.trim();
+    const b = mergeB.trim();
+    if (!a || !b) {
+      showToast(locale === 'zh' ? '请填写两个 workspace 路径' : 'Enter both workspace paths', 'warning');
+      return;
+    }
+    const { api_url, api_key, model } = llmBody();
+    if (!api_url || !api_key) {
+      showToast(L.apiWarn, 'warning');
+      return;
+    }
+    setMergeLoading(true);
+    try {
+      const res = await mergeGraphragKb({
+        notebook_id: notebookId,
+        notebook_title: notebookTitle,
+        email: email || '',
+        api_url,
+        api_key,
+        model,
+        workspace_dir_a: a,
+        workspace_dir_b: b,
+        dedupe: mergeDedupe,
+      });
+      const next: GraphragWorkspacePersist = {
+        workspace_dir: res.merged_workspace_dir,
+        updatedAt: Date.now(),
+        num_chunks: res.num_chunks,
+      };
+      if (storageKey) localStorage.setItem(storageKey, JSON.stringify(next));
+      setPersist(next);
+      setMergeA(res.merged_workspace_dir);
+      showToast(L.mergeOk, 'success');
+    } catch (e: unknown) {
+      const msg = e instanceof Error ? e.message : String(e);
+      showToast(msg, 'error');
+    } finally {
+      setMergeLoading(false);
+    }
+  };
+
+  const mermaidCode = useMemo(() => {
+    if (!queryResult?.reasoning_subgraph?.length) return null;
+    return reasoningSubgraphToMermaid(queryResult.reasoning_subgraph);
+  }, [queryResult?.reasoning_subgraph]);
+
+  const contextJson = useMemo(() => {
+    if (!queryResult?.context_data) return '';
+    try {
+      return JSON.stringify(queryResult.context_data, null, 2);
+    } catch {
+      return '{}';
+    }
+  }, [queryResult?.context_data]);
+
+  const downloadContext = () => {
+    const blob = new Blob([contextJson], { type: 'application/json' });
+    const url = URL.createObjectURL(blob);
+    const a = document.createElement('a');
+    a.href = url;
+    a.download = `graphrag_context_${notebookId || 'nb'}.json`;
+    a.click();
+    URL.revokeObjectURL(url);
+  };
+
+  const judgePct = queryResult ? Math.round(Math.max(0, Math.min(1, queryResult.judge_score)) * 100) : 0;
+
+  return (
+    <main className="flex-1 flex flex-col relative bg-white min-w-[300px] overflow-hidden">
+      <div className="flex items-center gap-2 px-6 py-3 border-b border-ios-gray-100 shrink-0">
+        <Network className="text-cyan-600" size={20} />
+        <div>
+          <div className="text-sm font-medium text-ios-gray-900">{L.headerTitle}</div>
+          <div className="text-xs text-ios-gray-400">{L.headerSub}</div>
+        </div>
+      </div>
+
+      <div className="flex-1 overflow-y-auto p-6 space-y-8 max-w-[960px] w-full mx-auto">
+        <section className="rounded-2xl border border-ios-gray-100 bg-ios-gray-50/40 p-4 space-y-3">
+          <h3 className="text-sm font-semibold text-ios-gray-800">{L.indexBtn}</h3>
+          <div className="flex flex-wrap gap-4 items-center text-sm">
+            <label className="flex items-center gap-2 cursor-pointer">
+              <input type="checkbox" checked={forceReindex} onChange={(e) => setForceReindex(e.target.checked)} />
+              {L.forceReindex}
+            </label>
+            <label className="flex items-center gap-2 cursor-pointer">
+              <input type="checkbox" checked={parsePdfs} onChange={(e) => setParsePdfs(e.target.checked)} />
+              {L.parsePdfs}
+            </label>
+          </div>
+          <div>
+            <label className="block text-xs font-medium text-ios-gray-500 mb-1">{L.modelLabel}</label>
+            <input
+              value={modelName}
+              onChange={(e) => setModelName(e.target.value)}
+              className="w-full max-w-md px-3 py-2 border border-ios-gray-200 rounded-lg text-sm"
+              placeholder={defaultGraphragModel()}
+            />
+          </div>
+          <button
+            type="button"
+            disabled={indexLoading || !notebookId}
+            onClick={handleIndex}
+            className="inline-flex items-center gap-2 px-4 py-2 rounded-ios bg-slate-900 text-white text-sm font-medium disabled:opacity-50"
+          >
+            {indexLoading ? <Loader2 size={16} className="animate-spin" /> : null}
+            {indexLoading ? L.indexing : L.indexBtn}
+          </button>
+
+          {persist && (
+            <div className="mt-4 rounded-xl border border-ios-gray-200 bg-white p-3 text-xs space-y-2">
+              <div className="font-medium text-ios-gray-700">{L.summary}</div>
+              <div className="grid grid-cols-1 sm:grid-cols-2 gap-2 text-ios-gray-600">
+                <span>{L.chunks}: <b>{persist.num_chunks ?? '—'}</b></span>
+              </div>
+              <div className="flex items-start gap-2 break-all">
+                <span className="shrink-0 text-ios-gray-500">{L.workspace}:</span>
+                <code className="flex-1 text-[11px] bg-ios-gray-50 p-2 rounded">{persist.workspace_dir}</code>
+                <button
+                  type="button"
+                  onClick={() => copyText(persist.workspace_dir)}
+                  className="shrink-0 p-1.5 rounded border border-ios-gray-200 hover:bg-ios-gray-50"
+                  title={L.copy}
+                >
+                  <Copy size={14} />
+                </button>
+              </div>
+            </div>
+          )}
+        </section>
+
+        <section className="rounded-2xl border border-ios-gray-100 p-4 space-y-3">
+          <h3 className="text-sm font-semibold text-ios-gray-800">{L.queryBtn}</h3>
+          <div>
+            <label className="block text-xs font-medium text-ios-gray-500 mb-1">{L.queryQ}</label>
+            <textarea
+              value={question}
+              onChange={(e) => setQuestion(e.target.value)}
+              rows={3}
+              placeholder={L.queryPlaceholder}
+              className="w-full px-3 py-2 border border-ios-gray-200 rounded-lg text-sm"
+            />
+          </div>
+          <div className="flex flex-wrap gap-3 items-center">
+            <span className="text-xs text-ios-gray-500">search_method</span>
+            <select
+              value={searchMethod}
+              onChange={(e) => setSearchMethod(e.target.value as 'local' | 'global')}
+              className="px-3 py-2 border border-ios-gray-200 rounded-lg text-sm"
+            >
+              <option value="local">{L.searchLocal}</option>
+              <option value="global">{L.searchGlobal}</option>
+            </select>
+            <button
+              type="button"
+              disabled={queryLoading}
+              onClick={handleQuery}
+              className="inline-flex items-center gap-2 px-4 py-2 rounded-ios bg-primary text-white text-sm font-medium disabled:opacity-50"
+            >
+              {queryLoading ? <Loader2 size={16} className="animate-spin" /> : null}
+              {queryLoading ? L.querying : L.queryBtn}
+            </button>
+          </div>
+        </section>
+
+        {queryResult && (
+          <>
+            <section className="rounded-2xl border border-ios-gray-100 p-4 space-y-2">
+              <h3 className="text-sm font-semibold text-ios-gray-800">{L.answer}</h3>
+              <div className="prose prose-sm max-w-none text-ios-gray-800">
+                <ReactMarkdown>{queryResult.answer || '—'}</ReactMarkdown>
+              </div>
+              <div className="rounded-xl bg-sky-50 border border-sky-100 px-3 py-2 text-sm">
+                <div className="font-medium text-sky-900">{L.judge}: {judgePct}%</div>
+                {queryResult.judge_rationale ? (
+                  <div className="text-xs text-sky-800 mt-1 opacity-90">{queryResult.judge_rationale}</div>
+                ) : null}
+              </div>
+            </section>
+
+            <section className="rounded-2xl border border-ios-gray-100 p-4 space-y-3">
+              <h3 className="text-sm font-semibold text-ios-gray-800">{L.subgraph}</h3>
+              <div className="flex gap-2 text-xs">
+                {(['table', 'mermaid', 'json'] as const).map((v) => (
+                  <button
+                    key={v}
+                    type="button"
+                    onClick={() => setSubView(v)}
+                    className={`px-3 py-1.5 rounded-lg border ${subView === v ? 'border-primary bg-primary/10' : 'border-ios-gray-200'}`}
+                  >
+                    {v === 'table' ? L.viewTable : v === 'mermaid' ? L.viewMermaid : L.viewJson}
+                  </button>
+                ))}
+              </div>
+              {subView === 'table' && (
+                <div className="overflow-x-auto rounded-lg border border-ios-gray-200">
+                  {queryResult.reasoning_subgraph?.length ? (
+                    <table className="min-w-full text-xs">
+                      <thead className="bg-ios-gray-100">
+                        <tr>
+                          {['source', 'target', 'relation', 'weight'].map((col) => (
+                            <th key={col} className="px-2 py-2 text-left font-medium capitalize">{col}</th>
+                          ))}
+                        </tr>
+                      </thead>
+                      <tbody>
+                        {queryResult.reasoning_subgraph.map((row, i) => (
+                          <tr key={i} className="border-t border-ios-gray-100">
+                            <td className="px-2 py-1.5">{String(row.source ?? row.src ?? row.from ?? '')}</td>
+                            <td className="px-2 py-1.5">{String(row.target ?? row.tgt ?? row.to ?? '')}</td>
+                            <td className="px-2 py-1.5">{String(row.relation ?? row.relationship ?? row.label ?? '')}</td>
+                            <td className="px-2 py-1.5">{row.weight != null ? String(row.weight) : ''}</td>
+                          </tr>
+                        ))}
+                      </tbody>
+                    </table>
+                  ) : (
+                    <div className="p-4 text-sm text-ios-gray-500">{L.noSubgraph}</div>
+                  )}
+                </div>
+              )}
+              {subView === 'mermaid' && (
+                <div className="bg-slate-900 rounded-xl p-2">
+                  {mermaidCode ? (
+                    <MermaidPreview mermaidCode={mermaidCode} title={L.mermaidTitle} />
+                  ) : (
+                    <div className="text-sm text-gray-400 p-4">{L.noSubgraph}</div>
+                  )}
+                </div>
+              )}
+              {subView === 'json' && (
+                <pre className="text-xs bg-ios-gray-50 border border-ios-gray-200 rounded-lg p-3 max-h-80 overflow-auto whitespace-pre-wrap">
+                  {JSON.stringify(queryResult.reasoning_subgraph, null, 2)}
+                </pre>
+              )}
+              {queryResult.reasoning_subgraph_cot ? (
+                <details className="text-xs rounded-lg border border-ios-gray-100 bg-ios-gray-50/60 p-3 mt-2">
+                  <summary className="cursor-pointer font-medium text-ios-gray-700 select-none">
+                    {L.subgraphCot}
+                  </summary>
+                  <div className="mt-2 text-ios-gray-800 whitespace-pre-wrap break-words">
+                    <ReactMarkdown>{queryResult.reasoning_subgraph_cot}</ReactMarkdown>
+                  </div>
+                </details>
+              ) : null}
+            </section>
+
+            <section className="rounded-2xl border border-ios-gray-100 p-4 space-y-2">
+              <h3 className="text-sm font-semibold text-ios-gray-800">{L.hints}</h3>
+              <div className="space-y-2">
+                {(queryResult.highlight_hints || []).map((hint, i) => {
+                  const stem = hint.source_stem != null ? String(hint.source_stem).trim() : '';
+                  const pi = _parsePageIndex(hint.page_index);
+                  const cid = hint.chunk_id != null ? String(hint.chunk_id) : '';
+                  const bbox = hint.bbox;
+                  const hasStructured = stem || pi != null || bbox != null;
+                  return (
+                    <div key={i} className="rounded-lg border border-ios-gray-100 bg-ios-gray-50/80 p-2.5 text-xs space-y-1.5">
+                      {hasStructured ? (
+                        <>
+                          <div className="flex gap-2 flex-wrap">
+                            <span className="text-ios-gray-500 shrink-0">{L.hintDoc}</span>
+                            <span className="break-all font-medium">{stem || '—'}</span>
+                          </div>
+                          <div className="flex gap-2">
+                            <span className="text-ios-gray-500 shrink-0">{L.hintPage}</span>
+                            <span>
+                              {pi != null && pi >= 0
+                                ? locale === 'zh'
+                                  ? `第 ${pi + 1} 页`
+                                  : `Page ${pi + 1}`
+                                : '—'}
+                            </span>
+                          </div>
+                          {bbox != null && String(bbox) !== '' ? (
+                            <div className="flex gap-2 break-all">
+                              <span className="text-ios-gray-500 shrink-0">{L.hintBbox}</span>
+                              <span className="font-mono text-[11px]">
+                                {typeof bbox === 'object' ? JSON.stringify(bbox) : String(bbox)}
+                              </span>
+                            </div>
+                          ) : null}
+                          {onOpenGraphragSource && stem ? (
+                            <button
+                              type="button"
+                              onClick={() =>
+                                onOpenGraphragSource({
+                                  sourceStem: stem,
+                                  pageIndex: pi ?? -1,
+                                  chunkId: cid || undefined,
+                                  workspaceDir: persist?.workspace_dir,
+                                  triples: queryResult?.reasoning_subgraph as Array<Record<string, unknown>> | undefined,
+                                })
+                              }
+                              className="inline-flex items-center gap-1 text-[11px] font-medium text-primary hover:underline pt-1"
+                            >
+                              <ExternalLink size={12} />
+                              {L.openInKb}
+                            </button>
+                          ) : null}
+                        </>
+                      ) : (
+                        <>
+                          {KNOWN_HINT_KEYS.filter((k) => hint[k] != null && hint[k] !== '').map((k) => (
+                            <div key={k} className="flex gap-2 break-all">
+                              <span className="text-ios-gray-500 shrink-0">{k}:</span>
+                              <span>{typeof hint[k] === 'object' ? JSON.stringify(hint[k]) : String(hint[k])}</span>
+                            </div>
+                          ))}
+                          {KNOWN_HINT_KEYS.every((k) => hint[k] == null || hint[k] === '') && (
+                            <pre className="whitespace-pre-wrap">{JSON.stringify(hint, null, 2)}</pre>
+                          )}
+                        </>
+                      )}
+                    </div>
+                  );
+                })}
+                {!(queryResult.highlight_hints || []).length && (
+                  <div className="text-xs text-ios-gray-400">—</div>
+                )}
+              </div>
+            </section>
+
+            <section className="rounded-2xl border border-ios-gray-100 p-4">
+              <button
+                type="button"
+                onClick={() => setContextOpen(!contextOpen)}
+                className="flex items-center gap-2 text-sm font-semibold text-ios-gray-800 w-full text-left"
+              >
+                {contextOpen ? <ChevronDown size={18} /> : <ChevronRight size={18} />}
+                {L.context}
+              </button>
+              {contextOpen && (
+                <div className="mt-3 space-y-2">
+                  <div className="flex gap-2">
+                    <button
+                      type="button"
+                      onClick={downloadContext}
+                      className="inline-flex items-center gap-1 px-3 py-1.5 text-xs rounded-lg border border-ios-gray-200 hover:bg-ios-gray-50"
+                    >
+                      <Download size={14} /> {L.downloadJson}
+                    </button>
+                    <button
+                      type="button"
+                      onClick={() => copyText(contextJson)}
+                      className="inline-flex items-center gap-1 px-3 py-1.5 text-xs rounded-lg border border-ios-gray-200 hover:bg-ios-gray-50"
+                    >
+                      <Copy size={14} /> {L.copyJson}
+                    </button>
+                  </div>
+                  <pre className="text-[11px] bg-ios-gray-50 border border-ios-gray-200 rounded-lg p-3 max-h-64 overflow-auto whitespace-pre-wrap">
+                    {contextJson.slice(0, 120_000)}
+                    {contextJson.length > 120_000 ? '\n…' : ''}
+                  </pre>
+                </div>
+              )}
+            </section>
+          </>
+        )}
+
+        <section className="rounded-2xl border border-dashed border-ios-gray-200 p-4 space-y-3">
+          <h3 className="text-sm font-semibold text-ios-gray-800">{L.mergeTitle}</h3>
+          <div>
+            <label className="block text-xs text-ios-gray-500 mb-1">{L.mergeA}</label>
+            <textarea
+              value={mergeA}
+              onChange={(e) => setMergeA(e.target.value)}
+              rows={2}
+              className="w-full px-3 py-2 border border-ios-gray-200 rounded-lg text-xs font-mono"
+            />
+          </div>
+          <div>
+            <label className="block text-xs text-ios-gray-500 mb-1">{L.mergeB}</label>
+            <textarea
+              value={mergeB}
+              onChange={(e) => setMergeB(e.target.value)}
+              rows={2}
+              className="w-full px-3 py-2 border border-ios-gray-200 rounded-lg text-xs font-mono"
+            />
+          </div>
+          <label className="flex items-center gap-2 text-sm cursor-pointer">
+            <input type="checkbox" checked={mergeDedupe} onChange={(e) => setMergeDedupe(e.target.checked)} />
+            {L.dedupe}
+          </label>
+          <button
+            type="button"
+            disabled={mergeLoading}
+            onClick={handleMerge}
+            className="inline-flex items-center gap-2 px-4 py-2 rounded-ios border border-ios-gray-300 text-sm font-medium disabled:opacity-50"
+          >
+            {mergeLoading ? <Loader2 size={16} className="animate-spin" /> : null}
+            {mergeLoading ? L.merging : L.mergeBtn}
+          </button>
+        </section>
+      </div>
+    </main>
+  );
+}
diff --git a/frontend_zh/src/config/api.ts b/frontend_zh/src/config/api.ts
index bbf9aea..422d232 100644
--- a/frontend_zh/src/config/api.ts
+++ b/frontend_zh/src/config/api.ts
@@ -23,6 +23,9 @@ function getApiBaseUrl(): string {
 
 export const API_BASE_URL = getApiBaseUrl();
 
+/** GraphRAG KB REST 前缀（相对 API_BASE_URL） */
+export const GRAPHRAG_KB_BASE = '/api/v1/graphrag-kb';
+
 // API key for backend authentication
 export const API_KEY = import.meta.env.VITE_API_KEY || 'df-internal-2024-workflow-key';
 
diff --git a/frontend_zh/src/pages/NotebookView.tsx b/frontend_zh/src/pages/NotebookView.tsx
index 64c76bd..1389dc7 100644
--- a/frontend_zh/src/pages/NotebookView.tsx
+++ b/frontend_zh/src/pages/NotebookView.tsx
@@ -1,11 +1,11 @@
-import React, { useState, useEffect } from 'react';
+import React, { useState, useEffect, useMemo } from 'react';
 import { motion, AnimatePresence } from 'framer-motion';
 import {
   ChevronLeft, Plus, Share2, Settings, MessageSquare,
   BarChart2, Zap, AudioLines, Video, FileText,
   Filter, MoreVertical, Search, Image as ImageIcon, FileStack, Sparkles,
   Mic2, Video as VideoIcon, BrainCircuit, Send, Bot, User, Loader2, Upload, X,
-  Globe, Link2, Cloud, ChevronRight, LayoutGrid, Download, BookOpen, Brain
+  Globe, Link2, Cloud, ChevronRight, LayoutGrid, Download, BookOpen, Brain, Network
 } from 'lucide-react';
 import { useAuthStore } from '../stores/authStore';
 import { apiFetch } from '../config/api';
@@ -13,6 +13,8 @@ import { getApiSettings } from '../services/apiSettingsService';
 import { fetchWithCache, invalidateCacheByPrefix } from '../services/clientCache';
 import type { KnowledgeFile, ChatMessage, ToolType } from '../types';
 import ReactMarkdown from 'react-markdown';
+import rehypeRaw from 'rehype-raw';
+import { injectGraphragHighlightInMarkdown } from '../utils/graphragMarkdownHighlight';
 import { MermaidPreview } from '../components/knowledge-base/tools/MermaidPreview';
 import { SettingsModal } from '../components/SettingsModal';
 import DrawioInlineEditor from '../components/DrawioInlineEditor';
@@ -20,6 +22,8 @@ import { FlashcardViewer } from '../components/flashcards/FlashcardViewer';
 import { QuizContainer } from '../components/quiz/QuizContainer';
 import { NotionEditor } from '../components/notes/NotionEditor';
 import { useToast } from '../hooks/useToast';
+import { GraphRAGKbPanel } from '../components/graphrag-kb/GraphRAGKbPanel';
+import { fetchGraphragChunkSnippet } from '../services/graphragKbService';
 import katex from 'katex';
 import 'katex/dist/katex.min.css';
 
@@ -129,6 +133,8 @@ type CitationReference = {
   preview?: string;
   chunkIndex?: number | null;
   sourceNumber?: string;
+  /** GraphRAG 索引块原文（来自 workspace input 中 [chunk:…] 段，非整篇 MinerU 预览） */
+  graphragHighlightText?: string;
 };
 
 type CitationTooltipState = {
@@ -366,6 +372,7 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
   const dataExtractTool = { icon: <BarChart2 className="text-emerald-500" />, label: '智能取数', id: 'data_extract' as ToolType };
   const studioTools: Array<{icon: React.ReactNode, label: string, id: ToolType}> = [
     dataExtractTool,
+    { icon: <Network className="text-cyan-600" />, label: 'GraphRAG KB', id: 'graphrag_kb' },
     { icon: <ImageIcon className="text-orange-500" />, label: 'PPT生成', id: 'ppt' },
     { icon: <BrainCircuit className="text-purple-500" />, label: '思维导图', id: 'mindmap' },
     // DrawIO 图表功能暂时隐藏，后续修复
@@ -379,12 +386,13 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
   ];
 
   // Studio：每个功能卡片各自配置，点卡片上的「…」翻转进该卡片的设置
-  type StudioToolId = 'data_extract' | 'ppt' | 'mindmap' | 'drawio' | 'flashcard' | 'quiz' | 'podcast' | 'video' | 'note';
+  type StudioToolId = 'data_extract' | 'graphrag_kb' | 'ppt' | 'mindmap' | 'drawio' | 'flashcard' | 'quiz' | 'podcast' | 'video' | 'note';
   const [studioPanelView, setStudioPanelView] = useState<'tools' | 'settings'>('tools');
   const [studioSettingsTool, setStudioSettingsTool] = useState<StudioToolId | null>(null);
   const STORAGE_STUDIO_CONFIG = `kb_studio_config_${effectiveUser?.id || 'default'}`;
   const defaultByTool: Record<StudioToolId, Record<string, string>> = {
     data_extract: { resultFormat: 'json', executionStrategy: 'auto' },
+    graphrag_kb: {},
     ppt: { llmModel: 'deepseek-v3.2', genFigModel: 'gemini-2.5-flash-image', stylePreset: 'modern', stylePrompt: '', language: 'zh', page_count: '10' },
     mindmap: { llmModel: 'deepseek-v3.2', mindmapStyle: 'default' },
     drawio: { llmModel: 'deepseek-v3.2', diagramType: 'auto', diagramStyle: 'default', language: 'zh' },
@@ -1614,6 +1622,17 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
     return () => window.clearTimeout(timer);
   }, [sourceDetailCitationFocus, sourceDetailLoading, sourceDetailContent]);
 
+  const sourceDetailMarkdownWithHighlight = useMemo(() => {
+    if (sourceDetailFormat !== 'markdown' || !sourceDetailContent) return sourceDetailContent;
+    const hl = sourceDetailCitationFocus?.graphragHighlightText?.trim();
+    if (!hl) return sourceDetailContent;
+    return injectGraphragHighlightInMarkdown(sourceDetailContent, hl);
+  }, [
+    sourceDetailFormat,
+    sourceDetailContent,
+    sourceDetailCitationFocus?.graphragHighlightText,
+  ]);
+
   const runFastResearch = async () => {
     if (!fastResearchQuery.trim()) return;
     const settings = getApiSettings(effectiveUser?.id || null);
@@ -3077,8 +3096,10 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
                     <span className="ml-2 text-sm text-gray-500">解析中…</span>
                   </div>
                 ) : sourceDetailFormat === 'markdown' && sourceDetailContent ? (
-                  <div className="prose prose-sm max-w-none text-gray-700 prose-p:text-xs prose-headings:text-sm prose-pre:text-xs">
-                    <ReactMarkdown>{sourceDetailContent}</ReactMarkdown>
+                  <div className="prose prose-sm max-w-none text-gray-700 prose-p:text-xs prose-headings:text-sm prose-pre:text-xs prose-mark:bg-amber-200/90 prose-mark:rounded prose-mark:px-0.5">
+                    <ReactMarkdown rehypePlugins={[rehypeRaw]}>
+                      {sourceDetailMarkdownWithHighlight ?? ''}
+                    </ReactMarkdown>
                   </div>
                 ) : (
                   <pre className="whitespace-pre-wrap text-xs text-gray-700 font-sans leading-relaxed break-words">
@@ -3123,6 +3144,45 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
               initialBlocks={editingNote?.blocks}
             />
           </div>
+        ) : activeTool === 'graphrag_kb' ? (
+          <GraphRAGKbPanel
+            notebook={notebook}
+            userId={effectiveUser?.id || null}
+            email={effectiveUser?.email || effectiveUser?.id || ''}
+            showToast={showToast}
+            onOpenGraphragSource={async (p) => {
+              const stem = p.sourceStem.trim();
+              const match = files.find((f) => {
+                const n = f.name || '';
+                const base = n.replace(/\.[^.]+$/, '');
+                return base === stem || n === stem || n.startsWith(`${stem}.`);
+              });
+              if (!match) {
+                showToast(`未找到与「${stem}」匹配的来源文件`, 'warning');
+                return;
+              }
+              let graphragHighlightText: string | undefined;
+              if (p.workspaceDir && p.chunkId) {
+                try {
+                  const sn = await fetchGraphragChunkSnippet(p.workspaceDir, p.chunkId, p.triples);
+                  if (sn.found) {
+                    // Prefer LLM-extracted sentence (short + verbatim → indexOf succeeds reliably)
+                    // Fall back to full chunk text
+                    graphragHighlightText = sn.highlighted_sentence?.trim() || sn.text?.trim();
+                  }
+                } catch {
+                  /* 仍打开全文预览 */
+                }
+              }
+              await openSourceDetail(match, {
+                fileName: match.name,
+                filePath: match.url,
+                preview: `GraphRAG · ${p.pageIndex >= 0 ? `第 ${p.pageIndex + 1} 页` : '页码未知'}`,
+                sourceNumber: 'GR',
+                graphragHighlightText,
+              });
+            }}
+          />
         ) : activeTool === 'data_extract' ? (
           <main className="flex-1 flex flex-col relative bg-white min-w-[300px] overflow-hidden">
             <div className="flex items-center justify-between px-6 py-3 border-b border-ios-gray-100 shrink-0">
@@ -3644,6 +3704,7 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
               </button>
               <h3 className="text-sm font-semibold text-gray-800 mb-3">
                 {studioSettingsTool === 'data_extract' && '智能取数'}
+                {studioSettingsTool === 'graphrag_kb' && 'GraphRAG KB'}
                 {studioSettingsTool === 'ppt' && 'PPT 生成'}
                 {studioSettingsTool === 'mindmap' && '思维导图'}
                 {studioSettingsTool === 'drawio' && 'DrawIO 图表'}
@@ -3676,6 +3737,9 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
                     </>
                   );
                 })()}
+                {studioSettingsTool === 'graphrag_kb' && (
+                  <p className="text-sm text-gray-600">索引、查询与合并选项均在中间主面板的 GraphRAG 知识库中配置。</p>
+                )}
                 {studioSettingsTool === 'ppt' && (() => {
                   const c = getStudioConfig('ppt');
                   return (
@@ -4046,7 +4110,7 @@ const NotebookView = ({ notebook, onBack }: { notebook: any, onBack: () => void
                 </motion.div>
               ))}
             </div>
-            {activeTool !== 'chat' && activeTool !== 'search' && activeTool !== 'data_extract' && (
+            {activeTool !== 'chat' && activeTool !== 'search' && activeTool !== 'data_extract' && activeTool !== 'graphrag_kb' && (
               <motion.button
                 whileTap={{ scale: 0.97 }}
                 type="button"
diff --git a/frontend_zh/src/services/graphragKbService.ts b/frontend_zh/src/services/graphragKbService.ts
new file mode 100644
index 0000000..4eddb50
--- /dev/null
+++ b/frontend_zh/src/services/graphragKbService.ts
@@ -0,0 +1,93 @@
+/**
+ * GraphRAG 知识库前端 API 封装。
+ *
+ * 数据流：调用 ``/api/v1/graphrag-kb/*`` → FastAPI ``graphrag_kb`` 路由 → ``wa_graphrag_kb`` → ``wf_graphrag_kb``。
+ * ``fetchGraphragChunkSnippet`` 用于侧栏打开来源时，按 chunk_id 拉取 ``input/*.txt`` 内嵌段正文，供 NotebookView 在 Markdown 中高亮。
+ */
+import { apiFetch, GRAPHRAG_KB_BASE } from '../config/api';
+import type {
+  IndexRequest,
+  IndexResponse,
+  QueryRequest,
+  QueryResponse,
+  MergeRequest,
+  MergeResponse,
+} from '../types/graphragKb';
+
+const DEFAULT_LLM_MODEL = 'deepseek-v3.2';
+
+async function parseErrorDetail(res: Response): Promise<string> {
+  try {
+    const body = await res.json();
+    const d = body?.detail;
+    if (typeof d === 'string') return d;
+    if (Array.isArray(d)) return d.map((x: { msg?: string }) => x?.msg || String(x)).join('; ');
+    return body?.message || `HTTP ${res.status}`;
+  } catch {
+    const t = await res.text();
+    return t || `HTTP ${res.status}`;
+  }
+}
+
+export function defaultGraphragModel(): string {
+  return DEFAULT_LLM_MODEL;
+}
+
+export async function indexGraphragKb(body: IndexRequest): Promise<IndexResponse> {
+  const res = await apiFetch(`${GRAPHRAG_KB_BASE}/index`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(body),
+  });
+  if (!res.ok) throw new Error(await parseErrorDetail(res));
+  return res.json() as Promise<IndexResponse>;
+}
+
+export async function queryGraphragKb(body: QueryRequest): Promise<QueryResponse> {
+  const res = await apiFetch(`${GRAPHRAG_KB_BASE}/query`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(body),
+  });
+  if (!res.ok) throw new Error(await parseErrorDetail(res));
+  return res.json() as Promise<QueryResponse>;
+}
+
+export async function mergeGraphragKb(body: MergeRequest): Promise<MergeResponse> {
+  const res = await apiFetch(`${GRAPHRAG_KB_BASE}/merge`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(body),
+  });
+  if (!res.ok) throw new Error(await parseErrorDetail(res));
+  return res.json() as Promise<MergeResponse>;
+}
+
+export interface ChunkSnippetResponse {
+  text: string;
+  source_stem: string;
+  found: boolean;
+  /** Verbatim sentence/phrase extracted by LLM that best matches the reasoning triples. */
+  highlighted_sentence?: string;
+}
+
+/** 从 GraphRAG workspace ``input/*.txt`` 中解析 ``[chunk:id]`` 对应正文（用于阅读器高亮，非整篇 MinerU MD）。
+ *  可选传入 triples（reasoning_subgraph）让后端调 LLM 精确定位最相关的原句。
+ */
+export async function fetchGraphragChunkSnippet(
+  workspaceDir: string,
+  chunkId: string,
+  triples?: Array<Record<string, unknown>>,
+): Promise<ChunkSnippetResponse> {
+  const res = await apiFetch(`${GRAPHRAG_KB_BASE}/chunk-snippet`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      workspace_dir: workspaceDir,
+      chunk_id: chunkId,
+      ...(triples && triples.length > 0 ? { triples } : {}),
+    }),
+  });
+  if (!res.ok) throw new Error(await parseErrorDetail(res));
+  return res.json() as Promise<ChunkSnippetResponse>;
+}
diff --git a/frontend_zh/src/types/graphragKb.ts b/frontend_zh/src/types/graphragKb.ts
new file mode 100644
index 0000000..a42c2a8
--- /dev/null
+++ b/frontend_zh/src/types/graphragKb.ts
@@ -0,0 +1,77 @@
+/**
+ * GraphRAG KB 前后端 JSON 契约（与 ``fastapi_app/routers/graphrag_kb.py`` 一致，字段 snake_case）。
+ *
+ * - Index*：建索引请求/响应（workspace_dir、分块数、可选 KGGen 统计）。
+ * - Query*：查询响应含 answer、context_data、推理子图、source_chunks、highlight_hints、Judge、子图 CoT。
+ * - Merge*：两工作区合并后的路径与 chunk 数。
+ * - GraphragWorkspacePersist：前端 localStorage 持久化的上次索引摘要。
+ */
+
+export interface IndexRequest {
+  notebook_id: string;
+  notebook_title?: string;
+  email?: string;
+  api_url: string;
+  api_key: string;
+  model: string;
+  source_stems?: string[] | null;
+  workspace_dir?: string;
+  force_reindex?: boolean;
+  parse_pdfs?: boolean;
+  /** Default true: server skips KGGen; set false only for internal experiments. */
+  skip_kggen?: boolean;
+}
+
+export interface IndexResponse {
+  workspace_dir: string;
+  num_chunks: number;
+  kg_entities: number;
+  kg_relations: number;
+}
+
+export interface QueryRequest {
+  notebook_id: string;
+  notebook_title?: string;
+  email?: string;
+  api_url: string;
+  api_key: string;
+  model: string;
+  question: string;
+  search_method: 'local' | 'global';
+  workspace_dir: string;
+}
+
+export interface QueryResponse {
+  answer: string;
+  context_data: Record<string, unknown>;
+  reasoning_subgraph: Array<Record<string, unknown>>;
+  source_chunks: string[];
+  highlight_hints: Array<Record<string, unknown>>;
+  judge_score: number;
+  judge_rationale: string;
+  /** LLM chain-of-thought for minimal subgraph selection (hop analysis) */
+  reasoning_subgraph_cot?: string;
+}
+
+export interface MergeRequest {
+  notebook_id?: string;
+  notebook_title?: string;
+  email?: string;
+  api_url: string;
+  api_key: string;
+  model: string;
+  workspace_dir_a: string;
+  workspace_dir_b: string;
+  dedupe?: boolean;
+}
+
+export interface MergeResponse {
+  merged_workspace_dir: string;
+  num_chunks: number;
+}
+
+export interface GraphragWorkspacePersist {
+  workspace_dir: string;
+  updatedAt: number;
+  num_chunks?: number;
+}
diff --git a/frontend_zh/src/types/index.ts b/frontend_zh/src/types/index.ts
index 0b17279..1d4ff9c 100644
--- a/frontend_zh/src/types/index.ts
+++ b/frontend_zh/src/types/index.ts
@@ -1,3 +1,13 @@
+export type {
+  IndexRequest as GraphragIndexRequest,
+  IndexResponse as GraphragIndexResponse,
+  QueryRequest as GraphragQueryRequest,
+  QueryResponse as GraphragQueryResponse,
+  MergeRequest as GraphragMergeRequest,
+  MergeResponse as GraphragMergeResponse,
+  GraphragWorkspacePersist,
+} from './graphragKb';
+
 // Knowledge Base Types
 export type MaterialType = 'image' | 'doc' | 'video' | 'link' | 'audio' | 'dataset';
 
@@ -34,4 +44,4 @@ export interface ChatMessage {
 }
 
 export type SectionType = 'library' | 'upload' | 'output' | 'settings';
-export type ToolType = 'chat' | 'ppt' | 'mindmap' | 'podcast' | 'video' | 'search' | 'drawio' | 'flashcard' | 'quiz' | 'note' | 'data_extract';
+export type ToolType = 'chat' | 'ppt' | 'mindmap' | 'podcast' | 'video' | 'search' | 'drawio' | 'flashcard' | 'quiz' | 'note' | 'data_extract' | 'graphrag_kb';
diff --git a/frontend_zh/src/utils/graphragMarkdownHighlight.ts b/frontend_zh/src/utils/graphragMarkdownHighlight.ts
new file mode 100644
index 0000000..f18997b
--- /dev/null
+++ b/frontend_zh/src/utils/graphragMarkdownHighlight.ts
@@ -0,0 +1,20 @@
+/**
+ * 在 Markdown 源字符串中，将 *snippet* 的首次出现包上一层 ``<mark>``，供 react-markdown + rehype-raw 渲染为正文内高亮。
+ *
+ * 数据流：``fetchGraphragChunkSnippet`` 得到 chunk 正文 → NotebookView 传入全文 MD 与片段 → 本函数注入 HTML → ReactMarkdown 展示。
+ * 内容视为可信（本地 MinerU/索引文件）；片段需与全文字面一致，否则 ``indexOf`` 失败则不注入。
+ */
+export function injectGraphragHighlightInMarkdown(full: string, snippet: string): string {
+  const sn = snippet.trim();
+  if (!full || !sn) return full;
+  const idx = full.indexOf(sn);
+  if (idx < 0) return full;
+  const esc = (s: string) => s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
+  return (
+    full.slice(0, idx) +
+    '<mark class="bg-amber-200/90 rounded px-0.5 ring-1 ring-amber-300/60" data-graphrag-hl="1">' +
+    esc(sn) +
+    '</mark>' +
+    full.slice(idx + sn.length)
+  );
+}
diff --git a/frontend_zh/src/vite-env.d.ts b/frontend_zh/src/vite-env.d.ts
index 4c5ff97..8d2ba4c 100644
--- a/frontend_zh/src/vite-env.d.ts
+++ b/frontend_zh/src/vite-env.d.ts
@@ -1,5 +1,7 @@
 /// <reference types="vite/client" />
 
+declare module 'rehype-raw';
+
 interface ImportMetaEnv {
   readonly VITE_SUPABASE_URL?: string
   readonly VITE_SUPABASE_ANON_KEY?: string
diff --git a/frontend_zh/vite.config.ts b/frontend_zh/vite.config.ts
index b322e8c..0330e8d 100644
--- a/frontend_zh/vite.config.ts
+++ b/frontend_zh/vite.config.ts
@@ -9,11 +9,11 @@ export default defineConfig({
     allowedHosts: true,
     proxy: {
       '/api': {
-        target: 'http://localhost:8213',
+        target: 'http://localhost:8212',
         changeOrigin: true,
       },
       '/outputs': {
-        target: 'http://localhost:8213',
+        target: 'http://localhost:8212',
         changeOrigin: true,
       },
     },
diff --git a/requirements-base.txt b/requirements-base.txt
index 10b8650..bd7d8ec 100644
--- a/requirements-base.txt
+++ b/requirements-base.txt
@@ -17,19 +17,19 @@ charset-normalizer==3.4.4
 supabase==2.27.2
 
 # ------ LLM / LangChain ------
-openai==2.16.0
-aisuite>=0.1.14
-langchain==0.3.27
-langchain-chroma==0.2.6
-langchain-community==0.3.29
-langchain-core==0.3.76
-langchain-openai==0.3.33
-langchain-text-splitters==0.3.11
-langgraph==0.6.7
-langgraph-checkpoint==2.1.1
-langgraph-prebuilt==0.6.4
-langgraph-sdk==0.2.6
-sseclient-py>=1.8.0
+# openai==2.16.0
+# aisuite>=0.1.14
+# langchain==0.3.27
+# langchain-chroma==0.2.6
+# langchain-community==0.3.29
+# langchain-core==0.3.76
+# langchain-openai==0.3.33
+# langchain-text-splitters==0.3.11
+# langgraph==0.6.7
+# langgraph-checkpoint==2.1.1
+# langgraph-prebuilt==0.6.4
+# langgraph-sdk==0.2.6
+# sseclient-py>=1.8.0
 
 # ------ Vector Store / Embedding ------
 chromadb==1.5.5
@@ -130,7 +130,7 @@ google-api-python-client>=2.187.0
 librosa>=0.11.0
 soundfile>=0.13.0
 qwen-tts==0.1.1
-fireredtts2==0.1
+#fireredtts2==0.1
 
 # ------ OCR (PaddlePaddle / Local Serving) ------
 paddlepaddle==3.2.0
@@ -163,3 +163,5 @@ IPython==8.18.1
 
 # ------ Dev / Test ------
 pytest>=9.0.0
+
+graphrag==2.7.2
\ No newline at end of file
diff --git a/workflow_engine/toolkits/graphrag_ms_tool/__init__.py b/workflow_engine/toolkits/graphrag_ms_tool/__init__.py
new file mode 100644
index 0000000..9149a11
--- /dev/null
+++ b/workflow_engine/toolkits/graphrag_ms_tool/__init__.py
@@ -0,0 +1,21 @@
+"""微软 GraphRAG 工具包（索引 / 查询 / 置信度 Judge）。
+
+数据流（与 ``wf_graphrag_kb`` 配合）：
+    建索引：``build_index`` ← Step1 的 chunk 列表 → 写 ``input/*.txt`` + ``chunk_meta.json`` → ``graphrag index``
+    查询：``query_local`` / ``query_global`` → ``QueryResult``（answer、context_data、子图、chunk 回溯）
+    打分：``judge_confidence`` ← 问题 + 答案 + 推理子图边列表 → ``JudgeResult.score``
+
+本包不负责 HTTP；FastAPI 经 ``wa_graphrag_kb`` 调用工作流，工作流再调用上述函数。
+"""
+from workflow_engine.toolkits.graphrag_ms_tool.indexer import build_index, GraphRAGWorkspace
+from workflow_engine.toolkits.graphrag_ms_tool.querier import query_local, query_global, QueryResult
+from workflow_engine.toolkits.graphrag_ms_tool.judge import judge_confidence
+
+__all__ = [
+    "build_index",
+    "GraphRAGWorkspace",
+    "query_local",
+    "query_global",
+    "QueryResult",
+    "judge_confidence",
+]
diff --git a/workflow_engine/toolkits/graphrag_ms_tool/indexer.py b/workflow_engine/toolkits/graphrag_ms_tool/indexer.py
new file mode 100644
index 0000000..13cd11b
--- /dev/null
+++ b/workflow_engine/toolkits/graphrag_ms_tool/indexer.py
@@ -0,0 +1,284 @@
+"""GraphRAG 工作区构建与索引入口（Step 3）。
+
+【职责】
+    将 Step1 产出的「带元数据的文本块」写成微软 GraphRAG 2.7.x 所需目录结构，
+    并子进程执行 ``graphrag index``，生成 ``output/`` 下的实体、关系、社区等制品。
+
+【数据流】
+    输入 ``chunks``：每项含 ``chunk_id``、``text``、``page_index``、``order``、
+    ``bbox``、``source_stem``（通常来自 ``SourceManager.get_chunks_with_meta``）。
+    → 按 ``source_stem`` 分组写入 ``input/{stem}.txt``，每段前加 ``[chunk:<id>]`` 标记。
+    → 并行写入 ``chunk_meta.json``：``chunk_id → {page_index, order, bbox, source_stem}``，
+      供查询阶段 ``querier`` 将证据中的 chunk 映射回页码/bbox。
+    → 修补 ``settings.yaml``（LLM/Embedding/chunk 参数）后调用 CLI 建索引。
+
+【目录结构】（GraphRAG **2.7.x**）::
+
+    {workspace_dir}/
+    ├── prompts/             ← ``graphrag init`` 生成（2.7 必需）
+    ├── input/               ← GraphRAG 摄取的纯文本（内嵌 [chunk:…]）
+    ├── chunk_meta.json      ← 本项目扩展：chunk 与页码/来源的映射
+    ├── settings.yaml        ← 模型、输出、local_search 等
+    ├── .env                 ← 可选 GRAPHRAG_API_KEY
+    └── output/              ← ``graphrag index`` 产物
+
+依赖：**graphrag==2.7.x**，子进程调用；Python 版本需与该包要求一致。
+"""
+from __future__ import annotations
+
+import json
+import os
+import shutil
+import subprocess
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+from workflow_engine.logger import get_logger
+
+log = get_logger(__name__)
+
+
+def _settings_is_graphrag_v27(settings_path: Path) -> bool:
+    """判断 ``settings.yaml`` 是否为 GraphRAG ≥2.5 风格（顶层含 ``models`` 块）。"""
+    if not settings_path.is_file():
+        return False
+    try:
+        data = yaml.safe_load(settings_path.read_text(encoding="utf-8"))
+        return isinstance(data, dict) and "models" in data
+    except Exception:
+        return False
+
+
+def _ensure_graphrag_v27_project(root: Path, *, force_init: bool) -> None:
+    """在 *root* 下调用官方 ``initialize_project_at``，生成 prompts 与默认 ``settings.yaml``。"""
+    settings_path = root / "settings.yaml"
+    if not force_init and _settings_is_graphrag_v27(settings_path):
+        return
+    try:
+        from graphrag.cli.initialize import initialize_project_at
+    except ImportError as exc:
+        raise RuntimeError(
+            "graphrag package is required. Install with: pip install graphrag==2.7.2"
+        ) from exc
+    log.info("[GraphRAGIndexer] Initializing GraphRAG 2.7 project layout at %s", root)
+    initialize_project_at(path=root, force=True)
+
+
+_EMBEDDING_VECTOR_SIZES: Dict[str, int] = {
+    "text-embedding-3-large": 3072,
+    "text-embedding-3-small": 1536,
+    "text-embedding-ada-002": 1536,
+}
+_DEFAULT_VECTOR_SIZE = 1536
+
+
+def _embedding_vector_size(model: str) -> int:
+    """Return the output dimension for a known OpenAI embedding model; default 1536."""
+    return _EMBEDDING_VECTOR_SIZES.get(model.lower().strip(), _DEFAULT_VECTOR_SIZE)
+
+
+def _patch_settings_yaml(
+    settings_path: Path,
+    *,
+    api_key: str,
+    api_base: str,
+    llm_model: str,
+    embedding_model: str,
+    chunk_size: int,
+    chunk_overlap: int,
+) -> None:
+    """Inject runtime LLM / embedding / chunk params into settings.yaml and write .env.
+
+    Modifies models.default_chat_model, models.default_embedding_model, chunks.size/overlap,
+    and vector_store.default_vector_store.vector_size to match the embedding model dimension.
+    Each user must have an independent workspace_dir to avoid concurrent overwrites.
+    """
+    text = settings_path.read_text(encoding="utf-8")
+    data = yaml.safe_load(text)
+    # graphrag 2.7 的 settings.yaml 必须有顶层 "models" 块，否则说明版本不对
+    if not isinstance(data, dict) or "models" not in data:
+        raise RuntimeError("Invalid GraphRAG settings.yaml — expected 'models' block (graphrag 2.7 layout).")
+
+    models = data["models"]
+    for model_id, model_name in (
+        ("default_chat_model", llm_model),
+        ("default_embedding_model", embedding_model),
+    ):
+        if model_id not in models:
+            log.warning("[GraphRAGIndexer] settings missing model id %r; skipping patch for it", model_id)
+            continue
+        entry = models[model_id]
+        if isinstance(entry, dict):
+            entry["api_key"] = api_key
+            entry["model"] = model_name
+            if api_base:
+                entry["api_base"] = api_base
+
+    if "chunks" not in data:
+        data["chunks"] = {}
+    if isinstance(data["chunks"], dict):
+        data["chunks"]["size"] = int(chunk_size)
+        data["chunks"]["overlap"] = int(chunk_overlap)
+
+    # Ensure vector_store.vector_size matches the actual embedding dimension so that
+    # GraphRAG can open the LanceDB collection during queries (default 3072 causes
+    # 'NoneType has no attribute search' when the stored vectors are 1536-dim).
+    vec_size = _embedding_vector_size(embedding_model)
+    vs = data.setdefault("vector_store", {})
+    if isinstance(vs, dict):
+        store = vs.setdefault("default_vector_store", {})
+        if isinstance(store, dict):
+            store["vector_size"] = vec_size
+
+    settings_path.write_text(yaml.dump(data, default_flow_style=False, allow_unicode=True), encoding="utf-8")
+
+    dotenv = settings_path.parent / ".env"
+    dotenv.write_text(f"GRAPHRAG_API_KEY={api_key}\n", encoding="utf-8")
+
+
+@dataclass
+class GraphRAGWorkspace:
+    """已就绪的 GraphRAG 工作区根目录句柄。
+
+    属性：
+        ``input_dir`` / ``output_dir`` / ``settings_path``：标准子路径；
+        ``load_chunk_meta()``：读取索引阶段写入的 ``chunk_meta.json``，供查询侧解析页码与来源。
+    """
+
+    root: Path
+    chunk_meta_path: Path = field(init=False)
+
+    def __post_init__(self) -> None:
+        self.chunk_meta_path = self.root / "chunk_meta.json"
+
+    @property
+    def input_dir(self) -> Path:
+        return self.root / "input"
+
+    @property
+    def output_dir(self) -> Path:
+        return self.root / "output"
+
+    @property
+    def settings_path(self) -> Path:
+        return self.root / "settings.yaml"
+
+    def load_chunk_meta(self) -> Dict[str, Any]:
+        """从磁盘读取 ``chunk_id → 元数据`` 映射；文件不存在则返回空 dict。"""
+        if not self.chunk_meta_path.exists():
+            return {}
+        return json.loads(self.chunk_meta_path.read_text(encoding="utf-8"))
+
+
+def build_index(
+    chunks: List[Dict[str, Any]],
+    workspace_dir: str,
+    *,
+    llm_model: Optional[str] = None,
+    embedding_model: Optional[str] = None,
+    api_base: Optional[str] = None,
+    api_key: Optional[str] = None,
+    graphrag_cmd: Optional[str] = None,
+    chunk_size: Optional[int] = None,
+    chunk_overlap: Optional[int] = None,
+    force_reindex: bool = False,
+) -> GraphRAGWorkspace:
+    """Prepare workspace from chunks and run ``graphrag index`` to build the knowledge graph.
+
+    Writes input/{stem}.txt with embedded [chunk:ID] markers, chunk_meta.json for page/bbox
+    lookup, patches settings.yaml, then invokes the CLI. Skips indexing if output already
+    exists and force_reindex is False.
+    """
+    from fastapi_app.config.settings import settings as cfg
+
+    llm_model = llm_model or cfg.GRAPHRAG_LLM_MODEL
+    embedding_model = embedding_model or cfg.GRAPHRAG_EMBEDDING_MODEL
+    api_base = api_base or cfg.DEFAULT_LLM_API_URL.rstrip("/")
+    api_key = api_key or os.getenv("DF_API_KEY", "")
+    chunk_size = chunk_size or cfg.GRAPHRAG_CHUNK_SIZE
+    chunk_overlap = chunk_overlap or cfg.GRAPHRAG_CHUNK_OVERLAP
+    graphrag_cmd = (
+        graphrag_cmd
+        or cfg.GRAPHRAG_CMD.strip()
+        or shutil.which("graphrag")
+    )
+    if not graphrag_cmd:
+        raise RuntimeError(
+            "graphrag CLI not found. Install with `pip install graphrag==2.7.2` or "
+            "set GRAPHRAG_CMD in .env to the executable path."
+        )
+
+    ws = GraphRAGWorkspace(root=Path(workspace_dir).resolve())
+    ws.root.mkdir(parents=True, exist_ok=True)
+    ws.input_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── Step A：确保 GraphRAG 2.7 工程骨架存在（prompts/ + settings.yaml）───
+    # graphrag init 会生成 prompts 模板和默认 settings.yaml
+    # 已有合法的 2.7 格式 settings.yaml 且不强制重建时跳过，避免覆盖用户自定义配置
+    need_init = force_reindex or not _settings_is_graphrag_v27(ws.settings_path)
+    _ensure_graphrag_v27_project(ws.root, force_init=need_init)
+
+    # Step B: write input/*.txt with [chunk:ID] markers and chunk_meta.json
+    # [chunk:ID] tags are carried through text_units so querier can map them back to page/bbox
+    stem_to_chunks: Dict[str, List[Dict[str, Any]]] = {}
+    meta: Dict[str, Any] = {}
+    for chunk in chunks:
+        cid = chunk.get("chunk_id", "")
+        stem = chunk.get("source_stem", "unknown")
+        text = (chunk.get("text") or "").strip()
+        if not text or not cid:
+            continue
+        stem_to_chunks.setdefault(stem, []).append(chunk)
+        meta[cid] = {
+            "page_index": chunk.get("page_index", -1),
+            "order": chunk.get("order", -1),
+            "bbox": chunk.get("bbox"),
+            "source_stem": stem,
+        }
+
+    # 按来源分文件写入，同一来源的 chunk 按 order 排序保证顺序一致
+    for stem, cks in stem_to_chunks.items():
+        txt_path = ws.input_dir / f"{stem}.txt"
+        lines = []
+        for ck in sorted(cks, key=lambda c: c.get("order", 0)):
+            # ⚠️ 格式约定：[chunk:十六进制ID]\n文本内容
+            lines.append(f"[chunk:{ck['chunk_id']}]\n{ck['text']}")
+        txt_path.write_text("\n\n".join(lines), encoding="utf-8")
+
+    ws.chunk_meta_path.write_text(
+        json.dumps(meta, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+
+    # Step C: inject API key / model params into settings.yaml
+    _patch_settings_yaml(
+        ws.settings_path,
+        api_key=api_key,
+        api_base=api_base,
+        llm_model=llm_model,
+        embedding_model=embedding_model,
+        chunk_size=int(chunk_size),
+        chunk_overlap=int(chunk_overlap),
+    )
+
+    # Step D: run graphrag index
+    output_dir = ws.output_dir
+    if force_reindex and output_dir.exists():
+        shutil.rmtree(str(output_dir))
+
+    already_indexed = (output_dir / "entities.parquet").exists()
+    if already_indexed and not force_reindex:
+        log.info("[GraphRAGIndexer] Skipping indexing — output already exists at %s", output_dir)
+        return ws
+
+    log.info("[GraphRAGIndexer] Running graphrag index at %s …", ws.root)
+    subprocess.run(
+        [graphrag_cmd, "index", "--root", str(ws.root)],
+        check=True,
+        text=True,
+    )
+    log.info("[GraphRAGIndexer] Indexing complete.")
+    return ws
diff --git a/workflow_engine/toolkits/graphrag_ms_tool/judge.py b/workflow_engine/toolkits/graphrag_ms_tool/judge.py
new file mode 100644
index 0000000..ce99f10
--- /dev/null
+++ b/workflow_engine/toolkits/graphrag_ms_tool/judge.py
@@ -0,0 +1,140 @@
+"""GraphRAG 答案置信度 Judge（独立 LLM 打分）。
+
+【输入】
+    用户问题（question）、GraphRAG 生成的答案（answer）、
+    推理子图边列表（reasoning_subgraph，由 ``querier._induce_subgraph`` 等得到，最多 50 条写入 prompt）。
+
+【输出】
+    ``JudgeResult``：``score`` ∈ [0,1]、``rationale`` 及可选三维 0–10 分（相关性 / 图支持 / 无过度推断）。
+
+【评分维度】（计划 §4.4，固定 rubric）
+    1. 相关性 — 是否答在问题上；
+    2. 图支持 — 子图证据是否支撑结论；
+    3. 无过度推断 — 是否超出证据范围。
+
+【数据流】
+    ``wf_graphrag_kb._action_query`` 在查询（及可选子图裁剪）之后调用；
+    配置来自 ``settings.JUDGE_MODEL`` 与 ``DEFAULT_LLM_API_URL``。
+    LLM 失败时返回 score=0.0 并记录 warning，不抛异常，便于接口仍返回部分结果。
+"""
+from __future__ import annotations
+
+import json
+import os
+import re
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from workflow_engine.logger import get_logger
+
+log = get_logger(__name__)
+
+_JUDGE_SYSTEM_PROMPT = """\
+You are a rigorous evidence quality judge. Given a user question, a
+knowledge-graph reasoning subgraph (as a list of edges), and a candidate
+answer, score the answer on three criteria:
+
+1. Relevance (0-10): Does the answer address the question?
+2. Graph support (0-10): Is the answer supported by the provided subgraph?
+3. No over-reach (0-10): Does the answer avoid making claims beyond the evidence?
+
+Output ONLY valid JSON:
+{
+  "relevance": <int 0-10>,
+  "graph_support": <int 0-10>,
+  "no_over_reach": <int 0-10>,
+  "score": <float 0-1, average of the three divided by 10>,
+  "rationale": "<1-2 sentences>"
+}
+"""
+
+
+@dataclass
+class JudgeResult:
+    score: float           # 综合分 0.0–1.0
+    rationale: str = ""
+    relevance: int = 0
+    graph_support: int = 0
+    no_over_reach: int = 0
+
+
+def judge_confidence(
+    question: str,
+    answer: str,
+    reasoning_subgraph: List[Dict[str, Any]],
+    *,
+    model: Optional[str] = None,
+    api_base: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> JudgeResult:
+    """Score the answer against the question and subgraph via LLM; returns a low-score placeholder on error."""
+    from fastapi_app.config.settings import settings as cfg
+
+    model = model or cfg.JUDGE_MODEL
+    api_base = api_base or cfg.DEFAULT_LLM_API_URL.rstrip("/")
+    api_key = api_key or os.getenv("DF_API_KEY", "")
+
+    # Compress the subgraph to a readable triple list (truncated to 50 edges)
+    edge_lines = [
+        f"  ({e.get('source', '?')}) --[{e.get('relation', '?')}]--> ({e.get('target', '?')})"
+        for e in reasoning_subgraph[:50]
+    ]
+    subgraph_text = "\n".join(edge_lines) if edge_lines else "  (no subgraph available)"
+
+    user_msg = (
+        f"## Question\n{question}\n\n"
+        f"## Reasoning Subgraph\n{subgraph_text}\n\n"
+        f"## Answer\n{answer}\n"
+    )
+
+    try:
+        raw = _call_llm(model, api_base, api_key, _JUDGE_SYSTEM_PROMPT, user_msg)
+        return _parse_judge_response(raw)
+    except Exception as exc:
+        log.warning("[Judge] LLM call failed: %s", exc)
+        return JudgeResult(score=0.0, rationale=f"Judge error: {exc}")
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+def _call_llm(model: str, api_base: str, api_key: str, system: str, user: str) -> str:
+    """OpenAI-compatible chat call for the judge model."""
+    try:
+        from openai import OpenAI
+    except ImportError as exc:
+        raise ImportError("openai package required for Judge module") from exc
+
+    client = OpenAI(api_key=api_key or "none", base_url=api_base)
+    response = client.chat.completions.create(
+        model=model,
+        max_tokens=512,
+        temperature=0,
+        messages=[
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+        ],
+    )
+    return response.choices[0].message.content or ""
+
+
+def _parse_judge_response(raw: str) -> JudgeResult:
+    """Strip markdown fences, parse JSON, and normalise score to [0, 1]."""
+    raw = re.sub(r"^```(?:json)?\s*", "", raw.strip())
+    raw = re.sub(r"\s*```$", "", raw)
+    parsed = json.loads(raw)
+
+    rel = int(parsed.get("relevance", 0))
+    gs = int(parsed.get("graph_support", 0))
+    nor = int(parsed.get("no_over_reach", 0))
+    score = float(parsed.get("score", (rel + gs + nor) / 30.0))
+    rationale = str(parsed.get("rationale", ""))
+
+    return JudgeResult(
+        score=min(1.0, max(0.0, score)),
+        rationale=rationale,
+        relevance=rel,
+        graph_support=gs,
+        no_over_reach=nor,
+    )
diff --git a/workflow_engine/toolkits/graphrag_ms_tool/querier.py b/workflow_engine/toolkits/graphrag_ms_tool/querier.py
new file mode 100644
index 0000000..e25d3d7
--- /dev/null
+++ b/workflow_engine/toolkits/graphrag_ms_tool/querier.py
@@ -0,0 +1,449 @@
+"""GraphRAG 查询封装：本地/全局检索 + 证据打包（与计划 §3.3 / §4.3 契约一致）。
+
+【QueryResult 字段含义】
+    answer               模型生成的自然语言回答（已去掉末尾 ``[Data:…]`` 引用尾标）
+    context_data         GraphRAG ``SearchResult.context_data`` 序列化后的表（实体、关系、text_units 等）
+    reasoning_subgraph   从 ``relationships``（或兼容键）归纳出的边列表，供 Judge 与子图裁剪
+    source_chunks        从证据文本中正则提取的 ``[chunk:十六进制]`` → chunk_id 列表（去重保序）
+    highlight_hints      每个 chunk_id 经 ``chunk_meta.json`` 映射得到的 ``{chunk_id, source_stem, page_index, bbox?}``
+
+【执行路径】
+    优先 ``graphrag.api.local_search`` / ``global_search``（与 CLI 同源配置）；
+    失败则回退 ``graphrag query`` 子进程。**CLI 回退时只有 answer**，其余证据字段为空。
+
+【数据流】
+    索引阶段写入的 ``[chunk:ID]`` 会出现在检索上下文的 text_units 文本中 →
+    ``_extract_source_chunks`` 收集 ID → ``_build_highlight_hints`` 查 meta → 前端「文档定位」。
+"""
+from __future__ import annotations
+
+import inspect
+import json
+import os
+import re
+import shutil
+import subprocess
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from workflow_engine.logger import get_logger
+from workflow_engine.toolkits.graphrag_ms_tool.indexer import (
+    GraphRAGWorkspace,
+    _patch_settings_yaml,
+)
+
+log = get_logger(__name__)
+
+
+def _strip_graphrag_data_citation_suffix(answer: str) -> str:
+    """去掉 GraphRAG 默认拼在回答末尾的 ``[Data: Entities (…); Relationships (…); …]`` 内部引用标记。"""
+    if not (answer and answer.strip()):
+        return answer
+    return re.sub(r"(?:\s*\[Data:[^\]]+\])+\s*$", "", answer.strip())
+
+
+def _coalesce_ctx(ctx: Dict[str, Any], *keys: str) -> Any:
+    """返回 ``ctx`` 中按 *keys* 顺序第一个非 ``None`` 的值。
+
+    注意：不能用 ``a or b`` 合并 DataFrame，pandas 在布尔上下文会抛错，故显式遍历键名。
+    """
+    for k in keys:
+        v = ctx.get(k)
+        if v is not None:
+            return v
+    return None
+
+
+@dataclass
+class QueryResult:
+    """Structured result of a single GraphRAG query.
+
+    answer: cleaned response text (GraphRAG [Data:...] citation suffix stripped).
+    context_data: serialised tables from SearchResult (entities, relationships, sources).
+    reasoning_subgraph: edge list [{source, target, relation, weight}] for visualisation.
+    source_chunks: chunk_ids extracted from context text_units, deduplicated in order.
+    highlight_hints: [{chunk_id, source_stem, page_index, bbox?}] from chunk_meta.json.
+    """
+
+    answer: str
+    context_data: Dict[str, Any] = field(default_factory=dict)
+    reasoning_subgraph: List[Dict[str, Any]] = field(default_factory=list)
+    source_chunks: List[str] = field(default_factory=list)
+    highlight_hints: List[Dict[str, Any]] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# 对外 API：local / global 查询入口
+# ---------------------------------------------------------------------------
+
+def query_local(
+    workspace: GraphRAGWorkspace,
+    question: str,
+    *,
+    llm_model: Optional[str] = None,
+    api_base: Optional[str] = None,
+    api_key: Optional[str] = None,
+    graphrag_cmd: Optional[str] = None,
+) -> QueryResult:
+    """Local search: answer around the most relevant entity/community subgraph.
+
+    Best for factual, entity-specific questions. Falls back to CLI if Python API fails
+    (CLI returns answer only; context_data and highlight_hints will be empty).
+    """
+    return _run_query(
+        workspace, question, method="local",
+        llm_model=llm_model, api_base=api_base, api_key=api_key,
+        graphrag_cmd=graphrag_cmd,
+    )
+
+
+def query_global(
+    workspace: GraphRAGWorkspace,
+    question: str,
+    *,
+    llm_model: Optional[str] = None,
+    api_base: Optional[str] = None,
+    api_key: Optional[str] = None,
+    graphrag_cmd: Optional[str] = None,
+) -> QueryResult:
+    """Global search: summarise across all community reports.
+
+    Best for thematic / overview questions. source_chunks and highlight_hints are
+    typically empty because global search does not return text_units.
+    """
+    return _run_query(
+        workspace, question, method="global",
+        llm_model=llm_model, api_base=api_base, api_key=api_key,
+        graphrag_cmd=graphrag_cmd,
+    )
+
+
+# ---------------------------------------------------------------------------
+# 内部实现：Python API / CLI、结果解析与子图/chunk 归纳
+# ---------------------------------------------------------------------------
+
+def _run_query(
+    workspace: GraphRAGWorkspace,
+    question: str,
+    method: str,
+    *,
+    llm_model: Optional[str],
+    api_base: Optional[str],
+    api_key: Optional[str],
+    graphrag_cmd: Optional[str],
+) -> QueryResult:
+    from fastapi_app.config.settings import settings as cfg
+
+    llm_model = llm_model or cfg.GRAPHRAG_LLM_MODEL
+    api_base = api_base or cfg.DEFAULT_LLM_API_URL.rstrip("/")
+    api_key = api_key or os.getenv("DF_API_KEY", "")
+
+    # Try Python API first
+    try:
+        return _query_via_python_api(
+            workspace, question, method, llm_model, api_base, api_key
+        )
+    except Exception as exc:
+        log.warning(
+            "[GraphRAGQuerier] Python API query failed (%s); falling back to CLI: %s",
+            method, exc,
+        )
+
+    # Fall back to CLI
+    return _query_via_cli(workspace, question, method, graphrag_cmd, cfg)
+
+
+def _query_via_python_api(
+    workspace: GraphRAGWorkspace,
+    question: str,
+    method: str,
+    llm_model: str,
+    api_base: str,
+    api_key: str,
+) -> QueryResult:
+    """经 GraphRAG **2.7.x** 的 ``graphrag.api`` 异步检索（与 ``graphrag query`` CLI 同源配置与输出）。
+
+    会先 ``_patch_settings_yaml`` 刷新密钥与模型，再 ``load_config`` + ``local_search``/``global_search``，
+    最后 ``_parse_search_result`` 打包证据。
+    """
+    import asyncio
+    from types import SimpleNamespace
+
+    try:
+        from graphrag.config.load_config import load_config
+        from graphrag import api as graphrag_api
+        from graphrag.cli.query import _resolve_output_files
+    except ImportError as exc:
+        raise ImportError(
+            "graphrag 2.7.x is required for the Python query path. "
+            "Install: pip install graphrag==2.7.2"
+        ) from exc
+
+    from fastapi_app.config.settings import settings as cfg
+
+    if not workspace.settings_path.is_file():
+        raise FileNotFoundError(f"Missing GraphRAG settings: {workspace.settings_path}")
+
+    # Refresh credentials in workspace yaml so query uses the current request keys.
+    _patch_settings_yaml(
+        workspace.settings_path,
+        api_key=api_key,
+        api_base=api_base,
+        llm_model=llm_model,
+        embedding_model=cfg.GRAPHRAG_EMBEDDING_MODEL,
+        chunk_size=int(cfg.GRAPHRAG_CHUNK_SIZE),
+        chunk_overlap=int(cfg.GRAPHRAG_CHUNK_OVERLAP),
+    )
+
+    # graphrag 3.x: load_config(root_dir, cli_overrides=...); 2.7.x accepts config_filepath=
+    _lc_sig = inspect.signature(load_config)
+    _lc_kw: Dict[str, Any] = {"cli_overrides": {}}
+    if "config_filepath" in _lc_sig.parameters:
+        _lc_kw["config_filepath"] = None
+    config = load_config(workspace.root.resolve(), **_lc_kw)
+
+    community_level = 2
+    response_type = str(cfg.GRAPHRAG_RESPONSE_TYPE or "Single Paragraph").strip() or "Single Paragraph"
+
+    if method == "local":
+        df = _resolve_output_files(
+            config=config,
+            output_list=[
+                "communities",
+                "community_reports",
+                "text_units",
+                "relationships",
+                "entities",
+            ],
+            optional_list=["covariates"],
+        )
+        if df.get("multi-index"):
+            raise RuntimeError("Multi-index GraphRAG workspaces are not supported by this adapter.")
+        response, context_data = asyncio.run(
+            graphrag_api.local_search(
+                config=config,
+                entities=df["entities"],
+                communities=df["communities"],
+                community_reports=df["community_reports"],
+                text_units=df["text_units"],
+                relationships=df["relationships"],
+                covariates=df.get("covariates"),
+                community_level=community_level,
+                response_type=response_type,
+                query=question,
+                verbose=False,
+            )
+        )
+    elif method == "global":
+        df = _resolve_output_files(
+            config=config,
+            output_list=["entities", "communities", "community_reports"],
+            optional_list=[],
+        )
+        if df.get("multi-index"):
+            raise RuntimeError("Multi-index GraphRAG workspaces are not supported by this adapter.")
+        response, context_data = asyncio.run(
+            graphrag_api.global_search(
+                config=config,
+                entities=df["entities"],
+                communities=df["communities"],
+                community_reports=df["community_reports"],
+                community_level=community_level,
+                dynamic_community_selection=False,
+                response_type=response_type,
+                query=question,
+                verbose=False,
+            )
+        )
+    else:
+        raise ValueError(f"Unknown search method: {method}")
+
+    if not isinstance(context_data, dict):
+        context_data = {}
+
+    wrapped = SimpleNamespace(response=response or "", context_data=context_data)
+    return _parse_search_result(wrapped, workspace)
+
+
+def _query_via_cli(
+    workspace: GraphRAGWorkspace,
+    question: str,
+    method: str,
+    graphrag_cmd: Optional[str],
+    cfg: Any,
+) -> QueryResult:
+    """CLI 回退：仅解析标准输出为 ``answer``，无 ``context_data``，故子图/chunk 均为空。"""
+    cmd = (
+        graphrag_cmd
+        or cfg.GRAPHRAG_CMD.strip()
+        or shutil.which("graphrag")
+    )
+    if not cmd:
+        raise RuntimeError(
+            "graphrag CLI not found. Install graphrag or set GRAPHRAG_CMD."
+        )
+
+    proc = subprocess.run(
+        [cmd, "query", "--root", str(workspace.root), "--method", method, "--query", question],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    answer = proc.stdout.strip() or proc.stderr.strip()
+    answer = _strip_graphrag_data_citation_suffix(answer)
+    log.info("[GraphRAGQuerier] CLI answer (%s): %s …", method, answer[:120])
+    return QueryResult(answer=answer)
+
+
+def _parse_search_result(result: Any, workspace: GraphRAGWorkspace) -> QueryResult:
+    """将 GraphRAG API 返回的 ``response`` + ``context_data`` 转为本项目的 ``QueryResult``。
+
+    步骤：清洗 answer → DataFrame 转可 JSON 的 list → 归纳子图 → 提取 chunk → 查 meta 生成高亮提示。
+    """
+    answer = _strip_graphrag_data_citation_suffix(getattr(result, "response", "") or "")
+    ctx: Dict[str, Any] = getattr(result, "context_data", {}) or {}
+
+    # Serialise DataFrames → dicts for JSON transport
+    ctx_serialised: Dict[str, Any] = {}
+    for key, val in ctx.items():
+        try:
+            import pandas as pd
+            if isinstance(val, pd.DataFrame):
+                ctx_serialised[key] = json.loads(val.to_json(orient="records", force_ascii=False))
+            else:
+                ctx_serialised[key] = val
+        except Exception:
+            ctx_serialised[key] = str(val)
+
+    # Reasoning subgraph: induce from entities + relationships tables
+    reasoning_subgraph = _induce_subgraph(ctx)
+
+    # Source chunks: extract chunk_ids from text_units Sources table
+    source_chunks = _extract_source_chunks(ctx, workspace)
+
+    # Highlight hints: map chunk_ids back to page/bbox via chunk_meta
+    highlight_hints = _build_highlight_hints(source_chunks, workspace)
+
+    return QueryResult(
+        answer=answer,
+        context_data=ctx_serialised,
+        reasoning_subgraph=reasoning_subgraph,
+        source_chunks=source_chunks,
+        highlight_hints=highlight_hints,
+    )
+
+
+def _induce_subgraph(ctx: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """从 ``context_data`` 的 ``relationships``（或 ``relations``）表中归纳有向边列表。
+
+    兼容 DataFrame 与已序列化的 ``list[dict]``；边字段统一为 source/target/relation/weight。
+    """
+    edges: List[Dict[str, Any]] = []
+    try:
+        import pandas as pd
+        rels = _coalesce_ctx(ctx, "relationships", "relations")
+        if rels is None:
+            return edges
+        # GraphRAG Python API typically returns DataFrames, but some adapters / JSON
+        # serialisation paths may already convert them to list[dict]. Support both.
+        if isinstance(rels, pd.DataFrame):
+            rows = rels.to_dict(orient="records")
+        elif isinstance(rels, list):
+            rows = [r for r in rels if isinstance(r, dict)]
+        else:
+            rows = []
+
+        for row in rows:
+            src = row.get("source") or row.get("source_id") or row.get("head") or row.get("from")
+            tgt = row.get("target") or row.get("target_id") or row.get("tail") or row.get("to")
+            rel = (
+                row.get("description")
+                or row.get("relationship")
+                or row.get("relation")
+                or row.get("predicate")
+                or row.get("label")
+                or ""
+            )
+            w = row.get("weight", 1.0)
+            try:
+                w_f = float(w)  # may be str in JSON payloads
+            except Exception:
+                w_f = 1.0
+            edges.append(
+                {
+                    "source": str(src or ""),
+                    "target": str(tgt or ""),
+                    "relation": str(rel or ""),
+                    "weight": w_f,
+                }
+            )
+    except Exception as exc:
+        log.debug("[GraphRAGQuerier] subgraph induction failed: %s", exc)
+    return edges
+
+
+def _extract_source_chunks(ctx: Dict[str, Any], workspace: GraphRAGWorkspace) -> List[str]:
+    """Extract [chunk:ID] markers from retrieved text_units, deduplicated in context order.
+
+    The markers were embedded by indexer.build_index and preserved through text_units.parquet.
+    Returns at most GRAPHRAG_MAX_HIGHLIGHT_HINTS ids (0 = unlimited).
+    """
+    from fastapi_app.config.settings import settings as cfg
+
+    chunk_ids: List[str] = []
+    try:
+        import pandas as pd
+        sources = _coalesce_ctx(ctx, "sources", "text_units")
+        if sources is None:
+            return chunk_ids
+        if isinstance(sources, pd.DataFrame):
+            rows = sources.to_dict(orient="records")
+        else:
+            rows = sources if isinstance(sources, list) else []
+        pattern = re.compile(r"\[chunk:([a-f0-9]+)\]")
+        for row in rows:
+            text = str(row.get("text") or row.get("content") or "")
+            chunk_ids.extend(pattern.findall(text))
+        seen: set[str] = set()
+        deduped = []
+        for cid in chunk_ids:
+            if cid not in seen:
+                seen.add(cid)
+                deduped.append(cid)
+        max_n = int(getattr(cfg, "GRAPHRAG_MAX_HIGHLIGHT_HINTS", 10) or 0)
+        if max_n > 0 and len(deduped) > max_n:
+            deduped = deduped[:max_n]
+        return deduped
+    except Exception as exc:
+        log.debug("[GraphRAGQuerier] source_chunks extraction failed: %s", exc)
+    return chunk_ids
+
+
+def _build_highlight_hints(
+    chunk_ids: List[str],
+    workspace: GraphRAGWorkspace,
+) -> List[Dict[str, Any]]:
+    """Map chunk_ids to document location hints by looking up chunk_meta.json.
+
+    Returns [{chunk_id, source_stem, page_index, bbox?}] for the frontend PDF viewer.
+    chunk_ids not found in chunk_meta are silently skipped.
+    """
+    if not chunk_ids:
+        return []
+    meta = workspace.load_chunk_meta()
+    hints = []
+    for cid in chunk_ids:
+        m = meta.get(cid)
+        if not m:
+            continue  # chunk_id 在 meta 中找不到时跳过（可能是旧索引遗留）
+        hint: Dict[str, Any] = {
+            "chunk_id": cid,
+            "source_stem": m.get("source_stem", ""),
+            "page_index": m.get("page_index", -1),
+        }
+        bbox = m.get("bbox")
+        if bbox:
+            hint["bbox"] = bbox
+        hints.append(hint)
+    return hints
diff --git a/workflow_engine/toolkits/graphrag_ms_tool/subgraph_pruner.py b/workflow_engine/toolkits/graphrag_ms_tool/subgraph_pruner.py
new file mode 100644
index 0000000..97c5b13
--- /dev/null
+++ b/workflow_engine/toolkits/graphrag_ms_tool/subgraph_pruner.py
@@ -0,0 +1,185 @@
+"""基于 LLM 的推理子图裁剪：从完整关系边列表中选出「支撑答案所需」的最小子集，并输出 CoT。
+
+【数据流】
+    输入：来自 ``querier._induce_subgraph`` 的边列表（可能很长）、用户问题、GraphRAG 草稿答案。
+    处理：将边编号为 0..N-1 写入 prompt，要求模型输出 ``keep_indices``、``analysis``（链式思考）、``max_hops``。
+    输出：``SubgraphPruneResult`` — 保留的边列表、展示用 CoT 文本、跳数估计。
+
+【调用关系】
+    ``wf_graphrag_kb._action_query`` 在 ``GRAPHRAG_SUBGRAPH_PRUNE_ENABLED`` 为真且子图非空时调用；
+    裁剪后的边写回 ``result.reasoning_subgraph``，CoT 写入 ``reasoning_subgraph_cot`` 供前端展示。
+
+【失败策略】
+    LLM 解析失败或索引非法时，回退为截断后的边列表并附带错误说明，避免查询整体失败。
+"""
+from __future__ import annotations
+
+import json
+import os
+import re
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from workflow_engine.logger import get_logger
+
+log = get_logger(__name__)
+
+_SUBGRAPH_PRUNE_SYSTEM = """You are a knowledge-graph analyst.
+
+You will receive:
+1. A user question (Q)
+2. A draft answer (A) produced by GraphRAG retrieval + generation
+3. A numbered list of directed edges: (source) --[relation]--> (target)
+
+Your tasks:
+- In the JSON field "analysis", write a clear chain-of-thought: which edges are
+  strictly necessary to justify A for Q, and why. Discuss **hops**: when you
+  connect entities along these edges, what is the longest shortest-path length
+  (in edges) among pairs of entities that matter for the answer? Name approximate
+  hop counts (e.g. "entity X to Y is 2 hops via ...").
+- In "keep_indices", list the 0-based indices of edges to KEEP. Prefer a SMALL
+  minimal set (typically 3–15 edges) that still supports the answer. Indices
+  MUST refer only to edges in the provided numbered list (0 to N-1).
+- In "max_hops", give a single integer: your estimate of the maximum hop count
+  among important entity pairs in the kept subgraph (0 if a single edge or none).
+
+Output ONLY valid JSON (no markdown code fences):
+{
+  "analysis": "<chain-of-thought in English or Chinese>",
+  "keep_indices": [<int>, ...],
+  "max_hops": <int>
+}
+
+If the edge list is empty, return {"analysis":"(no edges)","keep_indices":[],"max_hops":0}.
+"""
+
+
+@dataclass
+class SubgraphPruneResult:
+    """Result of LLM-based subgraph pruning: kept edges, CoT text, and estimated max hops."""
+
+    edges: List[Dict[str, Any]]
+    cot: str
+    max_hops: int = 0
+
+
+def _call_llm(
+    model: str,
+    api_base: str,
+    api_key: str,
+    system: str,
+    user: str,
+    *,
+    max_tokens: int = 2048,
+) -> str:
+    """OpenAI-compatible chat call used by the pruner."""
+    try:
+        from openai import OpenAI
+    except ImportError as exc:
+        raise ImportError("openai package required for subgraph pruner") from exc
+
+    client = OpenAI(api_key=api_key or "none", base_url=api_base)
+    response = client.chat.completions.create(
+        model=model,
+        max_tokens=max_tokens,
+        temperature=0.1,
+        messages=[
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+        ],
+    )
+    return response.choices[0].message.content or ""
+
+
+def _parse_json_object(raw: str) -> Dict[str, Any]:
+    """Strip optional markdown fences from LLM output and parse as JSON."""
+    raw = raw.strip()
+    raw = re.sub(r"^```(?:json)?\s*", "", raw)
+    raw = re.sub(r"\s*```$", "", raw)
+    return json.loads(raw)
+
+
+def prune_reasoning_subgraph_llm(
+    question: str,
+    answer: str,
+    edges: List[Dict[str, Any]],
+    *,
+    model: Optional[str] = None,
+    api_base: Optional[str] = None,
+    api_key: Optional[str] = None,
+    max_edges_input: int = 80,
+) -> SubgraphPruneResult:
+    """Return pruned edges and CoT; falls back to a truncated copy of the input edges on failure."""
+    from fastapi_app.config.settings import settings as cfg
+
+    model = model or cfg.GRAPHRAG_LLM_MODEL
+    api_base = (api_base or cfg.DEFAULT_LLM_API_URL).rstrip("/")
+    api_key = api_key or os.getenv("DF_API_KEY", "")
+
+    if not edges:
+        return SubgraphPruneResult(edges=[], cot="")
+
+    sl = edges[: max(1, int(max_edges_input))]
+    lines = [
+        f"{i}: ({e.get('source', '?')}) --[{e.get('relation', '?')}]--> ({e.get('target', '?')})"
+        for i, e in enumerate(sl)
+    ]
+    edge_block = "\n".join(lines)
+
+    user_msg = (
+        f"## Question\n{question}\n\n"
+        f"## Draft answer\n{answer}\n\n"
+        f"## Edges (index 0..{len(sl)-1})\n{edge_block}\n"
+    )
+
+    try:
+        raw = _call_llm(
+            model,
+            api_base,
+            api_key,
+            _SUBGRAPH_PRUNE_SYSTEM,
+            user_msg,
+            max_tokens=2048,
+        )
+        parsed = _parse_json_object(raw)
+    except Exception as exc:
+        log.warning("[SubgraphPruner] LLM prune failed: %s", exc)
+        return SubgraphPruneResult(
+            edges=sl[: min(12, len(sl))],
+            cot=f"(automatic fallback: prune failed: {exc})",
+            max_hops=0,
+        )
+
+    analysis = str(parsed.get("analysis", "") or "")
+    max_hops = int(parsed.get("max_hops", 0) or 0)
+    cot_display = analysis
+    if max_hops > 0 and "**max_hops" not in analysis:
+        cot_display = f"{analysis}\n\n**max_hops (estimate):** {max_hops}"
+    idx_raw = parsed.get("keep_indices")
+    if not isinstance(idx_raw, list):
+        return SubgraphPruneResult(
+            edges=sl[: min(12, len(sl))],
+            cot=cot_display or "(invalid keep_indices; truncated)",
+            max_hops=max_hops,
+        )
+
+    kept: List[Dict[str, Any]] = []
+    seen: set[int] = set()
+    for x in idx_raw:
+        try:
+            i = int(x)
+        except (TypeError, ValueError):
+            continue
+        if i < 0 or i >= len(sl) or i in seen:
+            continue
+        seen.add(i)
+        kept.append(sl[i])
+
+    if not kept:
+        return SubgraphPruneResult(
+            edges=sl[: min(12, len(sl))],
+            cot=cot_display or "(empty keep_indices; truncated)",
+            max_hops=max_hops,
+        )
+
+    return SubgraphPruneResult(edges=kept, cot=cot_display, max_hops=max_hops)
diff --git a/workflow_engine/toolkits/kggen_tool/__init__.py b/workflow_engine/toolkits/kggen_tool/__init__.py
new file mode 100644
index 0000000..97c9348
--- /dev/null
+++ b/workflow_engine/toolkits/kggen_tool/__init__.py
@@ -0,0 +1,12 @@
+"""KGGen 工具包：从文本块抽取三元组，以及合并两个 ``kg_gen.Graph``。
+
+【数据流】
+    ``extract_kg`` / ``extract_kg_from_chunks``：输出带 ``source_chunk_ids`` 的关系列表，便于回溯；
+    ``merge_two_kgs``：对两个图做集合合并（可选 ``dedupe`` 占位）。
+
+默认用户路径为 ``skip_kggen=True``，GraphRAG 索引不依赖本包；仅 ``skip_kggen=False`` 时由 ``wf_graphrag_kb`` 调用。
+"""
+from workflow_engine.toolkits.kggen_tool.kg_extractor import extract_kg, extract_kg_from_chunks
+from workflow_engine.toolkits.kggen_tool.kg_merger import merge_two_kgs
+
+__all__ = ["extract_kg", "extract_kg_from_chunks", "merge_two_kgs"]
diff --git a/workflow_engine/toolkits/kggen_tool/kg_extractor.py b/workflow_engine/toolkits/kggen_tool/kg_extractor.py
new file mode 100644
index 0000000..3726d73
--- /dev/null
+++ b/workflow_engine/toolkits/kggen_tool/kg_extractor.py
@@ -0,0 +1,219 @@
+"""KGGen-based KG extraction — currently unused; kept for optional integration."""
+# from __future__ import annotations
+
+# import os
+# from typing import Any, Dict, List, Optional
+
+# from workflow_engine.logger import get_logger
+
+# log = get_logger(__name__)
+
+
+# def normalize_model_for_litellm(model: str) -> str:
+#     """dspy/kg-gen routes calls through LiteLLM, which requires ``provider/model``.
+
+#     Bare names like ``deepseek-v3.2`` raise *LLM Provider NOT provided*.  This project
+#     defaults to OpenAI-compatible gateways (``api_base`` + deployment id), so we
+#     prefix ``openai/`` when no provider is present.  If you use another LiteLLM
+#     provider, set the full id in config (e.g. ``deepseek/deepseek-chat``).
+#     """
+#     m = (model or "").strip()
+#     if not m or "/" in m:
+#         return m
+#     return f"openai/{m}"
+
+
+# def kggen_init_extras(litellm_model: str) -> Dict[str, Any]:
+#     """Extra ``KGGen(...)`` kwargs required by kg-gen's own validators.
+
+#     For GPT-5–family ids, ``kg_gen.KGGen`` enforces ``temperature == 1.0`` and
+#     ``max_tokens >= 16000`` (see ``validate_temperature`` / ``validate_max_tokens``).
+#     """
+#     m = (litellm_model or "").lower()
+#     if "gpt-5" in m:
+#         return {"temperature": 1.0, "max_tokens": 16000}
+#     return {}
+
+
+# def _get_kggen(model: str, api_base: str, api_key: str):
+#     """Import KGGen and return a configured instance.
+
+#     Raises ImportError if kg_gen is not installed.
+#     """
+#     try:
+#         from kg_gen import KGGen  # type: ignore[import]
+#     except ImportError as exc:
+#         raise ImportError(
+#             "kg-gen is not installed. Run: pip install kg-gen"
+#         ) from exc
+
+#     litellm_model = normalize_model_for_litellm(model)
+#     if litellm_model != model.strip():
+#         log.debug("[KGGen] LiteLLM model id: %r → %r", model, litellm_model)
+
+#     extras = kggen_init_extras(litellm_model)
+#     return KGGen(
+#         model=litellm_model,
+#         api_base=api_base,
+#         api_key=api_key,
+#         **extras,
+#     )
+
+
+# def _default_settings():
+#     from fastapi_app.config.settings import settings
+#     return settings
+
+
+# def extract_kg(
+#     text: str,
+#     source_chunk_ids: Optional[List[str]] = None,
+#     *,
+#     model: Optional[str] = None,
+#     api_base: Optional[str] = None,
+#     api_key: Optional[str] = None,
+# ) -> Dict[str, Any]:
+#     """Extract a knowledge graph from *text* and annotate with chunk IDs.
+
+#     Parameters
+#     ----------
+#     text:
+#         The raw text to extract triples from.
+#     source_chunk_ids:
+#         List of chunk_id values the text originated from.  Stored on every
+#         relation in the result under ``source_chunk_ids``.
+#     model / api_base / api_key:
+#         LLM settings; fall back to ``settings.KGGEN_MODEL`` / ``DEFAULT_LLM_API_URL``.
+
+#     Returns
+#     -------
+#     dict with keys:
+#         ``entities`` (list[str])
+#         ``relations`` (list[dict]) — each dict has keys:
+#             ``subject``, ``predicate``, ``object``, ``source_chunk_ids``
+#         ``raw_graph`` — the original ``kg_gen.Graph`` object
+#     """
+#     cfg = _default_settings()
+#     model = model or cfg.KGGEN_MODEL
+#     api_base = api_base or cfg.DEFAULT_LLM_API_URL.rstrip("/")
+#     api_key = api_key or os.getenv("DF_API_KEY", "")
+
+#     kggen = _get_kggen(model, api_base, api_key)
+#     # kg-gen 0.3.x / 0.4.x: first argument is *input_data* (str or message list), not input_text.
+#     graph = kggen.generate(text)
+
+#     chunk_ids = source_chunk_ids or []
+
+#     relations = []
+#     # Triples live on graph.relations (set of (s, p, o)). graph.edges is only predicate labels.
+#     for edge in (graph.relations or []):
+#         # KGGen edge can be a tuple (subj, pred, obj) or a dict
+#         if isinstance(edge, (list, tuple)) and len(edge) >= 3:
+#             subj, pred, obj = edge[0], edge[1], edge[2]
+#         elif isinstance(edge, dict):
+#             subj = edge.get("source") or edge.get("subject", "")
+#             pred = edge.get("relation") or edge.get("predicate", "")
+#             obj = edge.get("target") or edge.get("object", "")
+#         else:
+#             continue
+#         relations.append(
+#             {
+#                 "subject": str(subj),
+#                 "predicate": str(pred),
+#                 "object": str(obj),
+#                 "source_chunk_ids": chunk_ids,
+#             }
+#         )
+
+#     return {
+#         "entities": list(graph.entities or []),
+#         "relations": relations,
+#         "raw_graph": graph,
+#     }
+
+
+# def extract_kg_from_chunks(
+#     chunks: List[Dict[str, Any]],
+#     *,
+#     model: Optional[str] = None,
+#     api_base: Optional[str] = None,
+#     api_key: Optional[str] = None,
+# ) -> Dict[str, Any]:
+#     """Extract and merge KGs from a list of chunk dicts.
+
+#     Each item in *chunks* must have at least ``chunk_id`` and ``text`` keys
+#     (as produced by ``SourceManager.get_chunks_with_meta``).
+
+#     Returns the same shape as ``extract_kg`` but with relations carrying
+#     ``source_chunk_ids`` from their respective chunk.
+#     """
+#     cfg = _default_settings()
+#     per_chunk: bool = cfg.KGGEN_PER_CHUNK
+#     log_iv = int(getattr(cfg, "KGGEN_LOG_CHUNK_INTERVAL", 10) or 0)
+
+#     if per_chunk:
+#         all_entities: List[str] = []
+#         all_relations: List[Dict[str, Any]] = []
+#         raw_graphs = []
+
+#         to_process = [c for c in chunks if (c.get("text") or "").strip()]
+#         total = len(to_process)
+#         if log_iv == 0:
+#             log.info("[KGGen] per-chunk: starting %d chunks (progress logs off)", total)
+#         else:
+#             log.info(
+#                 "[KGGen] per-chunk: starting %d chunks (interval=%d, set KGGEN_LOG_CHUNK_INTERVAL=1 for each)",
+#                 total,
+#                 log_iv,
+#             )
+
+#         def _log_chunk_progress(i: int, cid: str) -> None:
+#             if log_iv == 0:
+#                 return
+#             if log_iv == 1:
+#                 log.info("[KGGen] chunk %d/%d id=%s", i, total, cid)
+#                 return
+#             if i == 1 or i == total or (i % log_iv) == 0:
+#                 log.info("[KGGen] chunk %d/%d id=%s", i, total, cid)
+
+#         for i, chunk in enumerate(to_process, start=1):
+#             cid = chunk.get("chunk_id", "")
+#             text = (chunk.get("text") or "").strip()
+#             _log_chunk_progress(i, cid or "?")
+#             try:
+#                 result = extract_kg(
+#                     text,
+#                     source_chunk_ids=[cid],
+#                     model=model,
+#                     api_base=api_base,
+#                     api_key=api_key,
+#                 )
+#                 all_entities.extend(result["entities"])
+#                 all_relations.extend(result["relations"])
+#                 raw_graphs.append(result["raw_graph"])
+#             except Exception as exc:
+#                 log.warning("[KGGen] chunk %s extraction failed: %s", cid, exc)
+
+#         if log_iv == 0:
+#             log.info("[KGGen] per-chunk: finished %d chunks", total)
+#         else:
+#             log.info("[KGGen] per-chunk: finished %d chunks", total)
+
+#         return {
+#             "entities": list(set(all_entities)),
+#             "relations": all_relations,
+#             "raw_graphs": raw_graphs,
+#         }
+
+#     # Whole-document mode: concatenate text, collect all chunk_ids
+#     n = len([c for c in chunks if (c.get("text") or "").strip()])
+#     log.info("[KGGen] whole-document mode: 1 LLM call over %d chunks (text merged)", n)
+#     full_text = "\n\n".join(c.get("text", "") for c in chunks if c.get("text"))
+#     all_ids = [c["chunk_id"] for c in chunks if c.get("chunk_id")]
+#     return extract_kg(
+#         full_text,
+#         source_chunk_ids=all_ids,
+#         model=model,
+#         api_base=api_base,
+#         api_key=api_key,
+#     )
diff --git a/workflow_engine/toolkits/kggen_tool/kg_merger.py b/workflow_engine/toolkits/kggen_tool/kg_merger.py
new file mode 100644
index 0000000..d4ca4c1
--- /dev/null
+++ b/workflow_engine/toolkits/kggen_tool/kg_merger.py
@@ -0,0 +1,87 @@
+"""KGGen-based KG merging — currently unused; kept for optional integration."""
+# from __future__ import annotations
+
+# from typing import Any, Optional
+
+# from workflow_engine.logger import get_logger
+# from workflow_engine.toolkits.kggen_tool.kg_extractor import (
+#     kggen_init_extras,
+#     normalize_model_for_litellm,
+# )
+
+# log = get_logger(__name__)
+
+
+# def _deduplicate_merged_graph(merged: Any, kggen: Any) -> Any:
+#     """Semantic deduplication after aggregate. Placeholder for next release.
+
+#     Parameters are reserved for a future implementation (e.g. ``kggen`` or custom LLM).
+#     """
+#     pass
+
+#     return merged
+
+
+# def merge_two_kgs(
+#     graph_a: Any,
+#     graph_b: Any,
+#     *,
+#     dedupe: bool = False,
+#     model: Optional[str] = None,
+#     api_base: Optional[str] = None,
+#     api_key: Optional[str] = None,
+# ) -> Any:
+#     """合并两个 ``kg_gen.Graph`` 为一张图（实体/关系/边集合并）。
+
+#     ``graph_a`` / ``graph_b`` 通常来自 ``extract_kg`` 的 ``raw_graph``；
+#     ``dedupe=True`` 时在聚合后调用占位去重（当前无实际逻辑）；
+#     若未安装 kg-gen 则 ``ImportError``。
+#     """
+#     try:
+#         from kg_gen import KGGen  # type: ignore[import]
+#     except ImportError as exc:
+#         raise ImportError("kg-gen is not installed. Run: pip install kg-gen") from exc
+
+#     import os
+
+#     from kg_gen.models import Graph as KGGraph  # type: ignore[import]
+
+#     # kg-gen exposes aggregate as an instance method; mirror its set-union logic so we
+#     # do not rely on KGGen.aggregate([...]) (invalid call) or a dummy KGGen instance.
+#     def _union_graphs(graphs: list[Any]) -> Any:
+#         all_entities: set = set()
+#         all_relations: set = set()
+#         all_edges: set = set()
+#         for g in graphs:
+#             all_entities.update(g.entities)
+#             all_relations.update(g.relations)
+#             all_edges.update(g.edges)
+#         return KGGraph(entities=all_entities, relations=all_relations, edges=all_edges)
+
+#     merged = _union_graphs([graph_a, graph_b])
+#     log.info(
+#         "[KGMerger] aggregate → %d entities, %d edges",
+#         len(merged.entities or []),
+#         len(merged.edges or []),
+#     )
+
+#     if dedupe:
+#         cfg_model = model
+#         cfg_base = api_base
+#         cfg_key = api_key
+#         if not cfg_model:
+#             from fastapi_app.config.settings import settings
+#             cfg_model = settings.KGGEN_MODEL
+#             cfg_base = cfg_base or settings.DEFAULT_LLM_API_URL.rstrip("/")
+#             cfg_key = cfg_key or os.getenv("DF_API_KEY", "")
+
+#         lm_model = normalize_model_for_litellm(cfg_model)
+#         kggen = KGGen(
+#             model=lm_model,
+#             api_base=cfg_base,
+#             api_key=cfg_key,
+#             **kggen_init_extras(lm_model),
+#         )
+#         merged = _deduplicate_merged_graph(merged, kggen)
+
+#     return merged
diff --git a/workflow_engine/workflow/wf_graphrag_kb.py b/workflow_engine/workflow/wf_graphrag_kb.py
new file mode 100644
index 0000000..d66f115
--- /dev/null
+++ b/workflow_engine/workflow/wf_graphrag_kb.py
@@ -0,0 +1,390 @@
+"""GraphRAG 知识库管线工作流（注册名 ``"graphrag_kb"``）。
+
+【图结构】（单节点派发，便于维护）::
+
+    _start_ → _dispatch_ → END
+
+``_dispatch_`` 读取 ``state.request.action``，分别路由到：
+    ``index``  → ``_action_index``   （MinerU 可选 → 分块 → 可选 KGGen → GraphRAG 建索引）
+    ``query``  → ``_action_query``   （本地/全局检索 → 可选子图剪枝 → Judge）
+    ``merge``  → ``_action_merge``   （两工作区 chunk 合并 → 强制重索引）
+
+【数据流边界】
+    本模块**不处理 HTTP**；FastAPI 经 ``wa_graphrag_kb`` 构造 ``GraphRAGKBState`` 后 ``run_workflow``。
+    成功结果写入 ``state.agent_results``；异常写入 ``state.temp_data["errors"]``。
+"""
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from workflow_engine.graphbuilder.graph_builder import GenericGraphBuilder
+from workflow_engine.logger import get_logger
+from workflow_engine.state import MainRequest, MainState
+from workflow_engine.workflow.registry import register
+
+log = get_logger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Request / State  (dataclasses matching project convention)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class GraphRAGKBRequest(MainRequest):
+    """Single-action request for the GraphRAG KB pipeline (index / query / merge)."""
+
+    # ── 动作选择 ────────────────────────────────────────────────────────────────
+    # index   源文件分块 →（可选 KGGen）→ GraphRAG 建索引
+    # query   在已有 workspace 上做 local/global 检索
+    # merge   合并两个 workspace 并重新索引
+    action: str = "index"
+
+    # ── Index / Query shared ──────────────────────────────────────────────────
+    notebook_id: str = ""
+    notebook_title: str = ""
+    email: str = ""
+    workspace_dir: str = ""          # override default workspace path
+
+    # ── Index ─────────────────────────────────────────────────────────────────
+    source_stems: List[str] = field(default_factory=list)
+    force_reindex: bool = False
+    # When True, run MinerU on any PDF that has not been parsed yet before
+    # chunk extraction.  Skips PDFs that already have a mineru/ directory.
+    parse_pdfs: bool = True
+    # When False, run optional KGGen triple extraction (not used by GraphRAG index).
+    # Default True: user-facing index path is MinerU/chunks → GraphRAG only.
+    skip_kggen: bool = True
+
+    # ── Query ─────────────────────────────────────────────────────────────────
+    question: str = ""
+    search_method: str = "local"     # "local" | "global"
+
+    # ── Merge ─────────────────────────────────────────────────────────────────
+    workspace_dir_b: str = ""
+    dedupe: bool = False
+
+
+@dataclass
+class GraphRAGKBState(MainState):
+    """Workflow state; ``agent_results`` accumulates action outputs."""
+
+    request: GraphRAGKBRequest = field(default_factory=GraphRAGKBRequest)
+
+
+# ---------------------------------------------------------------------------
+# Graph factory
+# ---------------------------------------------------------------------------
+
+@register("graphrag_kb")
+def create_graphrag_kb_graph() -> GenericGraphBuilder:
+    """Register workflow nodes/edges and return a ``GenericGraphBuilder``."""
+
+    builder = GenericGraphBuilder(state_model=GraphRAGKBState, entry_point="_start_")
+
+    async def _start_(state: GraphRAGKBState) -> GraphRAGKBState:
+        return state
+
+    async def _dispatch_(state: GraphRAGKBState) -> GraphRAGKBState:
+        action = (state.request.action or "").strip().lower()
+        try:
+            if action == "index":
+                await _action_index(state)
+            elif action == "query":
+                await _action_query(state)
+            elif action == "merge":
+                await _action_merge(state)
+            else:
+                state.temp_data["errors"] = [f"Unknown action: {action!r}"]
+        except Exception as exc:
+            log.exception("[GraphRAGKB] Workflow error (action=%s): %s", action, exc)
+            state.temp_data["errors"] = [str(exc)]
+        return state
+
+    nodes = {"_start_": _start_, "_dispatch_": _dispatch_}
+    edges = [("_start_", "_dispatch_")]
+
+    builder.add_nodes(nodes).add_edges(edges)
+    return builder
+
+
+# ---------------------------------------------------------------------------
+# Action implementations
+# ---------------------------------------------------------------------------
+
+async def _action_index(state: GraphRAGKBState) -> None:
+    """MinerU (opt) → chunk extraction → KGGen (opt) → ``build_index``; writes ``agent_results["index"]``."""
+    import asyncio
+    from fastapi_app.notebook_paths import get_notebook_paths
+    from fastapi_app.source_manager import SourceManager
+    from fastapi_app.config.settings import settings as cfg
+    from workflow_engine.toolkits.graphrag_ms_tool.indexer import build_index
+
+    req = state.request
+    nb_paths = get_notebook_paths(req.notebook_id, req.notebook_title, req.email)
+    manager = SourceManager(nb_paths)
+
+    # Step 0 — collect sources and (optionally) trigger MinerU for unparsed PDFs
+    sources = manager.list_sources()
+    if req.source_stems:
+        sources = [s for s in sources if s.stem in req.source_stems]
+
+    if req.parse_pdfs:
+        await _ensure_mineru_parsed(manager, nb_paths, sources, req.force_reindex)
+
+    # Step 1 — collect structured chunks from all (or selected) sources
+
+    all_chunks: List[Dict[str, Any]] = []
+    n_src = len(sources)
+    for si, src in enumerate(sources, start=1):
+        chunks = manager.get_chunks_with_meta(
+            src.stem,
+            chunk_size=cfg.GRAPHRAG_CHUNK_SIZE,
+            chunk_overlap=cfg.GRAPHRAG_CHUNK_OVERLAP,
+        )
+        all_chunks.extend(chunks)
+        log.info(
+            "[GraphRAGKB] Step1 source=%s → %d chunks (%d/%d sources)",
+            src.stem,
+            len(chunks),
+            si,
+            n_src,
+        )
+
+    if not all_chunks:
+        raise ValueError(
+            "No text chunks found. Ensure sources have been imported into the notebook first."
+        )
+    log.info("[GraphRAGKB] Step1 done: %d total chunks", len(all_chunks))
+
+    # Step 2 — optional KGGen (not fed into GraphRAG; default off for user-facing index)
+    kg_result: Optional[Dict[str, Any]] = None
+    if not req.skip_kggen:
+        from workflow_engine.toolkits.kggen_tool.kg_extractor import extract_kg_from_chunks
+
+        try:
+            kg_llm_model = req.model or cfg.GRAPHRAG_LLM_MODEL or cfg.KGGEN_MODEL
+            log.info(
+                "[GraphRAGKB] Step2 KGGen starting: %d chunks, per_chunk=%s, log_interval=%s",
+                len(all_chunks),
+                cfg.KGGEN_PER_CHUNK,
+                getattr(cfg, "KGGEN_LOG_CHUNK_INTERVAL", 10),
+            )
+            kg_result = await asyncio.to_thread(
+                extract_kg_from_chunks,
+                all_chunks,
+                model=kg_llm_model,
+                api_base=req.chat_api_url.rstrip("/"),
+                api_key=req.api_key,
+            )
+            log.info(
+                "[GraphRAGKB] KGGen → %d entities, %d relations",
+                len(kg_result.get("entities", [])),
+                len(kg_result.get("relations", [])),
+            )
+        except Exception as exc:
+            log.warning("[GraphRAGKB] KGGen extraction skipped: %s", exc)
+    else:
+        log.debug("[GraphRAGKB] KGGen skipped (skip_kggen=True)")
+
+    # Step 3 — GraphRAG workspace + indexing
+    workspace_dir = req.workspace_dir or _default_workspace_dir(req)
+    log.info("[GraphRAGKB] Step3 GraphRAG index → %s", workspace_dir)
+    ws = await asyncio.to_thread(
+        build_index,
+        all_chunks,
+        workspace_dir,
+        llm_model=req.model or cfg.GRAPHRAG_LLM_MODEL,
+        embedding_model=cfg.GRAPHRAG_EMBEDDING_MODEL,
+        api_base=req.chat_api_url.rstrip("/"),
+        api_key=req.api_key,
+        force_reindex=req.force_reindex,
+    )
+
+    state.agent_results["index"] = {
+        "workspace_dir": str(ws.root),
+        "num_chunks": len(all_chunks),
+        "kg_entities": len(kg_result.get("entities", [])) if kg_result else 0,
+        "kg_relations": len(kg_result.get("relations", [])) if kg_result else 0,
+    }
+
+
+async def _action_query(state: GraphRAGKBState) -> None:
+    """GraphRAG query → optional subgraph prune → judge; writes full result to ``agent_results["query"]``."""
+    import asyncio
+    from fastapi_app.config.settings import settings as cfg
+    from workflow_engine.toolkits.graphrag_ms_tool.indexer import GraphRAGWorkspace
+    from workflow_engine.toolkits.graphrag_ms_tool.querier import query_local, query_global
+    from workflow_engine.toolkits.graphrag_ms_tool.judge import judge_confidence
+    from workflow_engine.toolkits.graphrag_ms_tool.subgraph_pruner import (
+        prune_reasoning_subgraph_llm,
+    )
+
+    req = state.request
+    workspace_dir = req.workspace_dir or _default_workspace_dir(req)
+    ws = GraphRAGWorkspace(root=Path(workspace_dir).resolve())
+
+    search_fn = query_local if req.search_method == "local" else query_global
+
+    result = await asyncio.to_thread(
+        search_fn,
+        ws,
+        req.question,
+        api_base=req.chat_api_url.rstrip("/"),
+        api_key=req.api_key,
+    )
+
+    reasoning_subgraph_cot = ""
+    if cfg.GRAPHRAG_SUBGRAPH_PRUNE_ENABLED and result.reasoning_subgraph:
+        prune_out = await asyncio.to_thread(
+            prune_reasoning_subgraph_llm,
+            req.question,
+            result.answer,
+            result.reasoning_subgraph,
+            model=req.model,
+            api_base=req.chat_api_url.rstrip("/"),
+            api_key=req.api_key,
+            max_edges_input=int(cfg.GRAPHRAG_SUBGRAPH_PRUNE_MAX_EDGES_INPUT),
+        )
+        result.reasoning_subgraph = prune_out.edges
+        reasoning_subgraph_cot = prune_out.cot
+
+    judge = await asyncio.to_thread(
+        judge_confidence,
+        req.question,
+        result.answer,
+        result.reasoning_subgraph,
+        api_base=req.chat_api_url.rstrip("/"),
+        api_key=req.api_key,
+    )
+
+    state.agent_results["query"] = {
+        "answer": result.answer,
+        "context_data": result.context_data,
+        "reasoning_subgraph": result.reasoning_subgraph,
+        "reasoning_subgraph_cot": reasoning_subgraph_cot,
+        "source_chunks": result.source_chunks,
+        "highlight_hints": result.highlight_hints,
+        "judge_score": judge.score,
+        "judge_rationale": judge.rationale,
+    }
+
+
+async def _action_merge(state: GraphRAGKBState) -> None:
+    """Reconstruct chunks from two workspaces, re-index into ``{ws_a}_merged``; writes ``agent_results["merge"]``."""
+    import asyncio
+    from fastapi_app.config.settings import settings as cfg
+    from workflow_engine.toolkits.graphrag_ms_tool.indexer import GraphRAGWorkspace, build_index
+
+    req = state.request
+    ws_a = GraphRAGWorkspace(root=Path(req.workspace_dir).resolve())
+    ws_b = GraphRAGWorkspace(root=Path(req.workspace_dir_b).resolve())
+
+    all_chunks: List[Dict[str, Any]] = []
+    _chunk_pattern = re.compile(r"\[chunk:([a-f0-9]+)\]\n")
+
+    for ws in (ws_a, ws_b):
+        meta = ws.load_chunk_meta()
+        for txt in ws.input_dir.glob("*.txt"):
+            stem = txt.stem
+            text = txt.read_text(encoding="utf-8")
+            # Reconstruct chunks from embedded [chunk:ID] markers
+            parts = _chunk_pattern.split(text)
+            # parts = ['', cid1, text1, cid2, text2, ...]
+            i = 1
+            while i + 1 < len(parts):
+                cid = parts[i].strip()
+                chunk_text = parts[i + 1].strip()
+                m = meta.get(cid, {})
+                all_chunks.append(
+                    {
+                        "chunk_id": cid,
+                        "text": chunk_text,
+                        "page_index": m.get("page_index", -1),
+                        "order": m.get("order", -1),
+                        "bbox": m.get("bbox"),
+                        "source_stem": m.get("source_stem", stem),
+                    }
+                )
+                i += 2
+
+    if not all_chunks:
+        raise ValueError("No chunks found in either workspace.")
+
+    merged_dir = str(ws_a.root) + "_merged"
+    ws_merged = await asyncio.to_thread(
+        build_index,
+        all_chunks,
+        merged_dir,
+        llm_model=req.model or cfg.GRAPHRAG_LLM_MODEL,
+        embedding_model=cfg.GRAPHRAG_EMBEDDING_MODEL,
+        api_base=req.chat_api_url.rstrip("/"),
+        api_key=req.api_key,
+        force_reindex=True,
+    )
+
+    state.agent_results["merge"] = {
+        "merged_workspace_dir": str(ws_merged.root),
+        "num_chunks": len(all_chunks),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+async def _ensure_mineru_parsed(manager: Any, nb_paths: Any, sources: list, force: bool) -> None:
+    """Run MinerU on PDF sources that have not been parsed yet.
+
+    Skips sources that already have a ``mineru/`` directory (unless *force*
+    is True, which re-runs MinerU and regenerates the unified markdown).
+
+    Non-PDF sources are silently skipped.
+    """
+    import asyncio
+
+    for src in sources:
+        if src.file_type != "pdf":
+            continue
+
+        already_parsed = manager.get_mineru_root(src.stem) is not None
+        if already_parsed and not force:
+            log.info("[GraphRAGKB] MinerU already done for %s — skipping", src.stem)
+            continue
+
+        orig = manager.get_original_path(src.stem)
+        if not orig or not orig.exists():
+            log.warning("[GraphRAGKB] Original PDF not found for %s — skipping MinerU", src.stem)
+            continue
+
+        mineru_dir = nb_paths.source_mineru_dir(orig.name)
+        mineru_dir.mkdir(parents=True, exist_ok=True)
+        log.info("[GraphRAGKB] Running MinerU on %s …", orig.name)
+        try:
+            await manager._run_mineru(orig, mineru_dir)
+        except Exception as exc:
+            log.warning("[GraphRAGKB] MinerU failed for %s: %s", orig.name, exc)
+            continue
+
+        # Regenerate unified markdown now that MinerU output exists
+        md_text = manager._generate_markdown(orig, ".pdf", mineru_dir)
+        if md_text:
+            md_dir = nb_paths.source_markdown_dir(orig.name)
+            md_dir.mkdir(parents=True, exist_ok=True)
+            (md_dir / f"{src.stem}.md").write_text(md_text, encoding="utf-8")
+        log.info("[GraphRAGKB] MinerU + markdown done for %s", orig.name)
+
+
+def _default_workspace_dir(req: GraphRAGKBRequest) -> str:
+    """Build default workspace path from ``GRAPHRAG_OUTPUT_DIR`` / sanitized email / notebook id."""
+    from workflow_engine.utils import get_project_root
+    from fastapi_app.config.settings import settings as cfg
+    from fastapi_app.notebook_paths import _sanitize_user_id
+
+    root = get_project_root()
+    safe_email = _sanitize_user_id(req.email) if req.email else "local"
+    nb_id = (req.notebook_id or "default").replace("/", "_")[:64]
+    return str(root / cfg.GRAPHRAG_OUTPUT_DIR / safe_email / nb_id)