diff --git a/backend/app/config.py b/backend/app/config.py
index aa78176..d139a2b 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -1,5 +1,6 @@
 """Application configuration using Pydantic Settings."""
 
+import os
 from pathlib import Path
 from typing import Literal
 
@@ -101,3 +102,8 @@ def cors_origin_list(self) -> list[str]:
 
 
 settings = Settings()
+
+# Propagate CUDA_VISIBLE_DEVICES to os.environ so PyTorch (which reads it
+# at import time, before our code runs) respects the user's .env config.
+if settings.cuda_visible_devices and "CUDA_VISIBLE_DEVICES" not in os.environ:
+    os.environ["CUDA_VISIBLE_DEVICES"] = settings.cuda_visible_devices
diff --git a/backend/app/pipelines/chat/nodes.py b/backend/app/pipelines/chat/nodes.py
index 7bfd42e..aad4a25 100644
--- a/backend/app/pipelines/chat/nodes.py
+++ b/backend/app/pipelines/chat/nodes.py
@@ -161,18 +161,12 @@ async def retrieve_node(state: ChatState, config: RunnableConfig) -> dict[str, A
     results = await asyncio.gather(*tasks, return_exceptions=True)
 
     all_sources: list[dict[str, Any]] = []
-    all_contexts: list[str] = []
     for result in results:
         if isinstance(result, Exception):
             logger.warning("RAG query failed for a KB: %s", result)
             continue
         if result.get("sources"):
             all_sources.extend(result["sources"])
-            for src in result["sources"]:
-                all_contexts.append(
-                    f"[Source: {src.get('paper_title', 'Unknown')}, "
-                    f"p.{src.get('page_number', '?')}]\n{src.get('excerpt', '')}"
-                )
 
     _emit_thinking(
         writer,
@@ -183,7 +177,7 @@ async def retrieve_node(state: ChatState, config: RunnableConfig) -> dict[str, A
         summary=f"Found {len(all_sources)} relevant sources",
     )
 
-    return {"rag_results": all_sources, "all_contexts": all_contexts}
+    return {"rag_results": all_sources}
 
 
 # ---------------------------------------------------------------------------
@@ -212,14 +206,17 @@ async def rank_node(state: ChatState, config: RunnableConfig) -> dict[str, Any]:
         papers_by_id = {p.id: p for p in result.scalars().all()}
 
     citations: list[CitationDict] = []
+    all_contexts: list[str] = []
     for i, src in enumerate(all_sources, 1):
         paper = papers_by_id.get(src.get("paper_id")) if src.get("paper_id") else None
+        title = paper.title if paper and paper.title else src.get("paper_title", "")
+        excerpt = src.get("excerpt", "")
         cit: CitationDict = {
             "index": i,
             "paper_id": src.get("paper_id"),
-            "paper_title": src.get("paper_title", ""),
+            "paper_title": title,
             "page_number": src.get("page_number"),
-            "excerpt": src.get("excerpt", ""),
+            "excerpt": excerpt,
             "relevance_score": src.get("relevance_score", 0),
             "chunk_type": src.get("chunk_type", "text"),
             "authors": paper.authors if paper else None,
@@ -227,6 +224,7 @@ async def rank_node(state: ChatState, config: RunnableConfig) -> dict[str, Any]:
             "doi": paper.doi if paper else None,
         }
         citations.append(cit)
+        all_contexts.append(f"[Source: {title}, p.{src.get('page_number', '?')}]\n{excerpt}")
         writer(
             {
                 "type": "data-citation",
@@ -245,7 +243,7 @@ async def rank_node(state: ChatState, config: RunnableConfig) -> dict[str, Any]:
         summary=f"Selected {high_relevance} high-relevance citations (>60%)",
     )
 
-    return {"citations": citations}
+    return {"citations": citations, "all_contexts": all_contexts}
 
 
 # ---------------------------------------------------------------------------
@@ -430,6 +428,15 @@ async def persist_node(state: ChatState, config: RunnableConfig) -> dict[str, An
         db.add(assistant_msg)
         await db.commit()
 
+        citation_count = len(state.get("citations") or [])
+        _emit_thinking(
+            writer,
+            "complete",
+            "Complete",
+            status="done",
+            summary=f"Generating answer · {citation_count} citations" if citation_count else "Generating answer",
+        )
+
         writer(
             {
                 "type": "data-conversation",
diff --git a/backend/app/services/ocr_service.py b/backend/app/services/ocr_service.py
index 0ac88c3..9199c73 100644
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -28,7 +28,7 @@ def extract_text_native(self, pdf_path: str) -> list[dict]:
         try:
             with pdfplumber.open(pdf_path) as pdf:
                 for i, page in enumerate(pdf.pages):
-                    text = page.extract_text() or ""
+                    text = page.extract_text(x_tolerance=1) or ""
                     tables = page.extract_tables() or []
 
                     page_data = {
diff --git a/backend/app/services/rag_service.py b/backend/app/services/rag_service.py
index 059a676..f31d646 100644
--- a/backend/app/services/rag_service.py
+++ b/backend/app/services/rag_service.py
@@ -245,7 +245,7 @@ async def query(
                     "page_number": meta.get("page_number"),
                     "chunk_type": meta.get("chunk_type", "text"),
                     "relevance_score": round(float(score), 3),
-                    "excerpt": text[:500] + "..." if len(text) > 500 else text,
+                    "excerpt": full_context[:800] + "..." if len(full_context) > 800 else full_context,
                 }
             )
 
diff --git a/docs/brainstorms/2026-03-12-thinking-chain-citation-quality-brainstorm.md b/docs/brainstorms/2026-03-12-thinking-chain-citation-quality-brainstorm.md
new file mode 100644
index 0000000..f1fad52
--- /dev/null
+++ b/docs/brainstorms/2026-03-12-thinking-chain-citation-quality-brainstorm.md
@@ -0,0 +1,61 @@
+---
+date: 2026-03-12
+topic: thinking-chain-citation-quality
+---
+
+# 思维链 UX + 引用质量优化
+
+## 要解决的问题
+
+用户在使用聊天 Playground 时发现 4 个问题：
+
+1. **思维链步骤重复显示**：每个步骤（Understanding query、Searching knowledge base 等）出现两行，因为后端每步发送 `running` + `done` 两条事件，前端未去重
+2. **完成后未自动折叠**：思维链应在生成完成后自动折叠为摘要，用户点击可展开查看过程
+3. **论文标题错误**：5 篇引用的 `paper_title` 都显示为 "This is an open access article published under a Creative Commons..."，PDF 元数据提取将版权声明误识别为标题
+4. **引用上下文不完整**：展开引用卡片后，摘录文本内容不完整且 OCR 文本粘连
+
+## 选择的方案
+
+### 思维链去重 + 自动折叠（前端修复）
+
+在 `ThinkingChain.tsx` 渲染前按 `step` 字段去重，同一步骤保留最新状态。逻辑：
+
+- 遍历 steps 数组，用 `Map<step, ThinkingStep>` 去重
+- running → 覆盖前一个 running；done → 覆盖前一个 running
+- 去重后 `allDoneLocal` 正确判断，自动折叠生效
+
+### 论文标题修复（后端修复）
+
+`rank_node` 中已经从数据库加载了 `Paper` 模型对象（含 Crossref/DOI 丰富后的标题），但 `paper_title` 仍取自 chunk 元数据。
+
+修复：`paper_title` 优先使用 `paper.title`（DB），fallback 到 `src.get("paper_title")`。
+
+### 引用上下文扩展（后端修复）
+
+当前 `rag_service.py` 的 `query()` 方法中：
+- `contexts`（给 LLM 的上下文）使用了 `full_context`（含相邻 chunks）
+- `sources`（给前端的引用数据）只使用 `text[:500]`
+
+修复：`excerpt` 也使用 `full_context` 的截断版本，让用户看到的摘录与 LLM 看到的一致。
+
+### OCR 文本质量
+
+依赖现有的 `clean_node` LLM 清洗，暂不额外添加正则后处理。
+
+## 关键决策
+
+- **去重在前端做**：后端的 running/done 双发机制是有意为之（支持实时更新），前端负责去重展示
+- **标题优先用 DB 数据**：DB 中的 `Paper.title` 可能经过 Crossref API 丰富，比 PDF 启发式提取更准确
+- **摘录用 full_context**：与 LLM 看到的上下文保持一致，提供更完整的引用上下文
+
+## 涉及文件
+
+| 文件 | 修改内容 |
+|------|----------|
+| `frontend/src/components/playground/ThinkingChain.tsx` | 步骤去重逻辑 |
+| `backend/app/pipelines/chat/nodes.py` (`rank_node`) | `paper_title` 优先取 `paper.title` |
+| `backend/app/services/rag_service.py` (`query`) | `excerpt` 使用 `full_context` |
+
+## 下一步
+
+→ 直接实现修复（代码改动量小，无需详细计划）
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index d0a7999..89562e8 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -33,8 +33,8 @@ function App() {
           <Suspense fallback={<LoadingState className="h-screen" />}>
             <Routes>
               <Route path="/" element={<AppShell />}>
-                <Route index element={<PlaygroundPage />} />
-                <Route path="chat/:conversationId" element={<PlaygroundPage />} />
+                <Route index element={<PlaygroundPage key="playground" />} />
+                <Route path="chat/:conversationId" element={<PlaygroundPage key="playground" />} />
                 <Route path="knowledge-bases" element={<KnowledgeBasesPage />} />
                 <Route path="history" element={<ChatHistoryPage />} />
                 <Route path="settings" element={<SettingsPage />} />
diff --git a/frontend/src/components/playground/CitationCard.tsx b/frontend/src/components/playground/CitationCard.tsx
index 391625a..4263f25 100644
--- a/frontend/src/components/playground/CitationCard.tsx
+++ b/frontend/src/components/playground/CitationCard.tsx
@@ -15,8 +15,8 @@ const RELEVANCE_STYLES = {
   low: "bg-zinc-100 text-zinc-500 dark:bg-zinc-800 dark:text-zinc-400",
 } as const;
 
-const EXCERPT_PREVIEW_LENGTH = 300;
-const EXCERPT_MAX_DISPLAY = 500;
+const EXCERPT_PREVIEW_LENGTH = 400;
+const EXCERPT_MAX_DISPLAY = 800;
 
 export const CITATION_COLORS = [
   "#3B82F6",
diff --git a/frontend/src/components/playground/ThinkingChain.tsx b/frontend/src/components/playground/ThinkingChain.tsx
index 838c2a1..e745d07 100644
--- a/frontend/src/components/playground/ThinkingChain.tsx
+++ b/frontend/src/components/playground/ThinkingChain.tsx
@@ -50,10 +50,19 @@ function formatDuration(ms?: number): string {
   return `${(ms / 1000).toFixed(1)}s`;
 }
 
-function ThinkingChain({ steps }: ThinkingChainProps) {
+function deduplicateSteps(raw: ThinkingStep[]): ThinkingStep[] {
+  const map = new Map<string, ThinkingStep>();
+  for (const step of raw) {
+    map.set(step.step, step);
+  }
+  return Array.from(map.values());
+}
+
+function ThinkingChain({ steps: rawSteps }: ThinkingChainProps) {
   const { t } = useTranslation();
   const [userOverride, setUserOverride] = useState<boolean | null>(null);
 
+  const steps = deduplicateSteps(rawSteps);
   const allDoneLocal = steps.every((s) => s.status !== 'running');
   const expanded = userOverride !== null ? userOverride : !allDoneLocal;
 
diff --git a/frontend/src/hooks/use-chat-stream.ts b/frontend/src/hooks/use-chat-stream.ts
index f9a1a36..0fb0de7 100644
--- a/frontend/src/hooks/use-chat-stream.ts
+++ b/frontend/src/hooks/use-chat-stream.ts
@@ -1,6 +1,6 @@
 import { useMemo, useDeferredValue, useRef, useCallback } from 'react';
 import { useChat } from '@ai-sdk/react';
-import { createChatTransport } from '@/lib/chat-transport';
+import { createRefChatTransport } from '@/lib/chat-transport';
 import type {
   OmeletteUIMessage,
   OmeletteDataParts,
@@ -44,17 +44,13 @@ export function useChatStream({
   const onConversationIdRef = useRef(onConversationId);
   onConversationIdRef.current = onConversationId;
 
-  const transport = useMemo(
-    () =>
-      createChatTransport({
-        conversationId,
-        knowledgeBaseIds,
-        toolMode,
-        model,
-      }),
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-    [conversationId, JSON.stringify(knowledgeBaseIds), toolMode, model],
-  );
+  // AI SDK's useChat stores the Chat instance in a useRef and never recreates
+  // it when the transport prop changes. To work around this, we create ONE
+  // stable transport that reads the latest options from a ref on every request.
+  const optionsRef = useRef({ conversationId, knowledgeBaseIds, toolMode, model });
+  optionsRef.current = { conversationId, knowledgeBaseIds, toolMode, model };
+
+  const transport = useMemo(() => createRefChatTransport(optionsRef), []);
 
   const chat = useChat<OmeletteUIMessage>({
     transport,
diff --git a/frontend/src/lib/chat-transport.ts b/frontend/src/lib/chat-transport.ts
index cf00e5c..c3e5867 100644
--- a/frontend/src/lib/chat-transport.ts
+++ b/frontend/src/lib/chat-transport.ts
@@ -1,28 +1,40 @@
+import type { MutableRefObject } from 'react';
 import { DefaultChatTransport } from 'ai';
 import type { OmeletteUIMessage } from '@/types/chat';
 import { getMessageText } from '@/types/chat';
 
-interface ChatTransportOptions {
+export interface ChatTransportOptions {
   conversationId?: number;
   knowledgeBaseIds?: number[];
   toolMode?: string;
   model?: string;
 }
 
-export function createChatTransport(options: ChatTransportOptions) {
+/**
+ * Create a stable transport that reads options from a ref on every request.
+ *
+ * AI SDK 5.0's `useChat` stores the `Chat` instance in a `useRef` and never
+ * recreates it when the `transport` prop changes. A ref-based transport
+ * ensures each request always uses the *latest* knowledge-base selection,
+ * tool mode, etc. without needing to recreate the Chat object.
+ */
+export function createRefChatTransport(
+  optionsRef: MutableRefObject<ChatTransportOptions>,
+) {
   return new DefaultChatTransport<OmeletteUIMessage>({
     api: '/api/v1/chat/stream',
     prepareSendMessagesRequest({ messages, trigger }) {
+      const opts = optionsRef.current;
       const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user');
       const messageText = lastUserMsg ? getMessageText(lastUserMsg) : '';
 
       return {
         body: {
           message: messageText,
-          conversation_id: options.conversationId ?? null,
-          knowledge_base_ids: options.knowledgeBaseIds ?? [],
-          tool_mode: options.toolMode ?? 'qa',
-          model: options.model ?? null,
+          conversation_id: opts.conversationId ?? null,
+          knowledge_base_ids: opts.knowledgeBaseIds ?? [],
+          tool_mode: opts.toolMode ?? 'qa',
+          model: opts.model ?? null,
           trigger,
         },
         headers: {
diff --git a/frontend/src/pages/PlaygroundPage.tsx b/frontend/src/pages/PlaygroundPage.tsx
index 91d888e..7913a18 100644
--- a/frontend/src/pages/PlaygroundPage.tsx
+++ b/frontend/src/pages/PlaygroundPage.tsx
@@ -81,10 +81,13 @@ export default function PlaygroundPage() {
     (cid: number) => {
       setNewConversationId(cid);
       if (!routeConvId) {
-        navigate(`/chat/${cid}`, { replace: true });
+        // Update URL without React Router navigation to avoid unmount/remount
+        // during streaming. The key="playground" on the Route prevents remount
+        // for sidebar clicks, but replaceState is safer during active streams.
+        window.history.replaceState(null, '', `/chat/${cid}`);
       }
     },
-    [routeConvId, navigate],
+    [routeConvId],
   );
 
   const {
@@ -102,6 +105,24 @@ export default function PlaygroundPage() {
     onError: (err) => toast.error(err.message || t('playground.streamError')),
   });
 
+  // When navigating to a different conversation via sidebar, load its messages
+  // into the existing Chat instance (which is preserved thanks to key="playground").
+  const prevConvIdRef = useRef(convIdNum);
+  useEffect(() => {
+    if (convIdNum !== prevConvIdRef.current) {
+      prevConvIdRef.current = convIdNum;
+      setNewConversationId(undefined);
+      setToolModeOverride(null);
+      setSelectedKBsOverride(null);
+    }
+  }, [convIdNum]);
+
+  useEffect(() => {
+    if (restoredConv && restoredMessages.length > 0 && convIdNum != null) {
+      setMessages(restoredMessages);
+    }
+  }, [restoredConv, restoredMessages, convIdNum, setMessages]);
+
   useEffect(() => {
     bottomRef.current?.scrollIntoView({ behavior: 'smooth' });
   }, [messages]);
@@ -120,7 +141,6 @@ export default function PlaygroundPage() {
   const handleNewChat = () => {
     stop();
     setMessages([]);
-    setMessages([]);
     setNewConversationId(undefined);
     setToolModeOverride(null);
     setSelectedKBsOverride(null);