diff --git a/backend/app/config.py b/backend/app/config.py index aa78176..d139a2b 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,5 +1,6 @@ """Application configuration using Pydantic Settings.""" +import os from pathlib import Path from typing import Literal @@ -101,3 +102,8 @@ def cors_origin_list(self) -> list[str]: settings = Settings() + +# Propagate CUDA_VISIBLE_DEVICES to os.environ so PyTorch (which reads it +# at import time, before our code runs) respects the user's .env config. +if settings.cuda_visible_devices and "CUDA_VISIBLE_DEVICES" not in os.environ: + os.environ["CUDA_VISIBLE_DEVICES"] = settings.cuda_visible_devices diff --git a/backend/app/pipelines/chat/nodes.py b/backend/app/pipelines/chat/nodes.py index 7bfd42e..aad4a25 100644 --- a/backend/app/pipelines/chat/nodes.py +++ b/backend/app/pipelines/chat/nodes.py @@ -161,18 +161,12 @@ async def retrieve_node(state: ChatState, config: RunnableConfig) -> dict[str, A results = await asyncio.gather(*tasks, return_exceptions=True) all_sources: list[dict[str, Any]] = [] - all_contexts: list[str] = [] for result in results: if isinstance(result, Exception): logger.warning("RAG query failed for a KB: %s", result) continue if result.get("sources"): all_sources.extend(result["sources"]) - for src in result["sources"]: - all_contexts.append( - f"[Source: {src.get('paper_title', 'Unknown')}, " - f"p.{src.get('page_number', '?')}]\n{src.get('excerpt', '')}" - ) _emit_thinking( writer, @@ -183,7 +177,7 @@ async def retrieve_node(state: ChatState, config: RunnableConfig) -> dict[str, A summary=f"Found {len(all_sources)} relevant sources", ) - return {"rag_results": all_sources, "all_contexts": all_contexts} + return {"rag_results": all_sources} # --------------------------------------------------------------------------- @@ -212,14 +206,17 @@ async def rank_node(state: ChatState, config: RunnableConfig) -> dict[str, Any]: papers_by_id = {p.id: p for p in result.scalars().all()} citations: list[CitationDict] = [] + all_contexts: list[str] = [] for i, src in enumerate(all_sources, 1): paper = papers_by_id.get(src.get("paper_id")) if src.get("paper_id") else None + title = paper.title if paper and paper.title else src.get("paper_title", "") + excerpt = src.get("excerpt", "") cit: CitationDict = { "index": i, "paper_id": src.get("paper_id"), - "paper_title": src.get("paper_title", ""), + "paper_title": title, "page_number": src.get("page_number"), - "excerpt": src.get("excerpt", ""), + "excerpt": excerpt, "relevance_score": src.get("relevance_score", 0), "chunk_type": src.get("chunk_type", "text"), "authors": paper.authors if paper else None, @@ -227,6 +224,7 @@ async def rank_node(state: ChatState, config: RunnableConfig) -> dict[str, Any]: "doi": paper.doi if paper else None, } citations.append(cit) + all_contexts.append(f"[Source: {title}, p.{src.get('page_number', '?')}]\n{excerpt}") writer( { "type": "data-citation", @@ -245,7 +243,7 @@ async def rank_node(state: ChatState, config: RunnableConfig) -> dict[str, Any]: summary=f"Selected {high_relevance} high-relevance citations (>60%)", ) - return {"citations": citations} + return {"citations": citations, "all_contexts": all_contexts} # --------------------------------------------------------------------------- @@ -430,6 +428,15 @@ async def persist_node(state: ChatState, config: RunnableConfig) -> dict[str, An db.add(assistant_msg) await db.commit() + citation_count = len(state.get("citations") or []) + _emit_thinking( + writer, + "complete", + "Complete", + status="done", + summary=f"Generating answer · {citation_count} citations" if citation_count else "Generating answer", + ) + writer( { "type": "data-conversation", diff --git a/backend/app/services/ocr_service.py b/backend/app/services/ocr_service.py index 0ac88c3..9199c73 100644 --- a/backend/app/services/ocr_service.py +++ b/backend/app/services/ocr_service.py @@ -28,7 +28,7 @@ def extract_text_native(self, pdf_path: str) -> list[dict]: try: with pdfplumber.open(pdf_path) as pdf: for i, page in enumerate(pdf.pages): - text = page.extract_text() or "" + text = page.extract_text(x_tolerance=1) or "" tables = page.extract_tables() or [] page_data = { diff --git a/backend/app/services/rag_service.py b/backend/app/services/rag_service.py index 059a676..f31d646 100644 --- a/backend/app/services/rag_service.py +++ b/backend/app/services/rag_service.py @@ -245,7 +245,7 @@ async def query( "page_number": meta.get("page_number"), "chunk_type": meta.get("chunk_type", "text"), "relevance_score": round(float(score), 3), - "excerpt": text[:500] + "..." if len(text) > 500 else text, + "excerpt": full_context[:800] + "..." if len(full_context) > 800 else full_context, } ) diff --git a/docs/brainstorms/2026-03-12-thinking-chain-citation-quality-brainstorm.md b/docs/brainstorms/2026-03-12-thinking-chain-citation-quality-brainstorm.md new file mode 100644 index 0000000..f1fad52 --- /dev/null +++ b/docs/brainstorms/2026-03-12-thinking-chain-citation-quality-brainstorm.md @@ -0,0 +1,61 @@ +--- +date: 2026-03-12 +topic: thinking-chain-citation-quality +--- + +# 思维链 UX + 引用质量优化 + +## 要解决的问题 + +用户在使用聊天 Playground 时发现 4 个问题: + +1. **思维链步骤重复显示**:每个步骤(Understanding query、Searching knowledge base 等)出现两行,因为后端每步发送 `running` + `done` 两条事件,前端未去重 +2. **完成后未自动折叠**:思维链应在生成完成后自动折叠为摘要,用户点击可展开查看过程 +3. **论文标题错误**:5 篇引用的 `paper_title` 都显示为 "This is an open access article published under a Creative Commons...",PDF 元数据提取将版权声明误识别为标题 +4. **引用上下文不完整**:展开引用卡片后,摘录文本内容不完整且 OCR 文本粘连 + +## 选择的方案 + +### 思维链去重 + 自动折叠(前端修复) + +在 `ThinkingChain.tsx` 渲染前按 `step` 字段去重,同一步骤保留最新状态。逻辑: + +- 遍历 steps 数组,用 `Map` 去重 +- running → 覆盖前一个 running;done → 覆盖前一个 running +- 去重后 `allDoneLocal` 正确判断,自动折叠生效 + +### 论文标题修复(后端修复) + +`rank_node` 中已经从数据库加载了 `Paper` 模型对象(含 Crossref/DOI 丰富后的标题),但 `paper_title` 仍取自 chunk 元数据。 + +修复:`paper_title` 优先使用 `paper.title`(DB),fallback 到 `src.get("paper_title")`。 + +### 引用上下文扩展(后端修复) + +当前 `rag_service.py` 的 `query()` 方法中: +- `contexts`(给 LLM 的上下文)使用了 `full_context`(含相邻 chunks) +- `sources`(给前端的引用数据)只使用 `text[:500]` + +修复:`excerpt` 也使用 `full_context` 的截断版本,让用户看到的摘录与 LLM 看到的一致。 + +### OCR 文本质量 + +依赖现有的 `clean_node` LLM 清洗,暂不额外添加正则后处理。 + +## 关键决策 + +- **去重在前端做**:后端的 running/done 双发机制是有意为之(支持实时更新),前端负责去重展示 +- **标题优先用 DB 数据**:DB 中的 `Paper.title` 可能经过 Crossref API 丰富,比 PDF 启发式提取更准确 +- **摘录用 full_context**:与 LLM 看到的上下文保持一致,提供更完整的引用上下文 + +## 涉及文件 + +| 文件 | 修改内容 | +|------|----------| +| `frontend/src/components/playground/ThinkingChain.tsx` | 步骤去重逻辑 | +| `backend/app/pipelines/chat/nodes.py` (`rank_node`) | `paper_title` 优先取 `paper.title` | +| `backend/app/services/rag_service.py` (`query`) | `excerpt` 使用 `full_context` | + +## 下一步 + +→ 直接实现修复(代码改动量小,无需详细计划) diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index d0a7999..89562e8 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -33,8 +33,8 @@ function App() { }> }> - } /> - } /> + } /> + } /> } /> } /> } /> diff --git a/frontend/src/components/playground/CitationCard.tsx b/frontend/src/components/playground/CitationCard.tsx index 391625a..4263f25 100644 --- a/frontend/src/components/playground/CitationCard.tsx +++ b/frontend/src/components/playground/CitationCard.tsx @@ -15,8 +15,8 @@ const RELEVANCE_STYLES = { low: "bg-zinc-100 text-zinc-500 dark:bg-zinc-800 dark:text-zinc-400", } as const; -const EXCERPT_PREVIEW_LENGTH = 300; -const EXCERPT_MAX_DISPLAY = 500; +const EXCERPT_PREVIEW_LENGTH = 400; +const EXCERPT_MAX_DISPLAY = 800; export const CITATION_COLORS = [ "#3B82F6", diff --git a/frontend/src/components/playground/ThinkingChain.tsx b/frontend/src/components/playground/ThinkingChain.tsx index 838c2a1..e745d07 100644 --- a/frontend/src/components/playground/ThinkingChain.tsx +++ b/frontend/src/components/playground/ThinkingChain.tsx @@ -50,10 +50,19 @@ function formatDuration(ms?: number): string { return `${(ms / 1000).toFixed(1)}s`; } -function ThinkingChain({ steps }: ThinkingChainProps) { +function deduplicateSteps(raw: ThinkingStep[]): ThinkingStep[] { + const map = new Map(); + for (const step of raw) { + map.set(step.step, step); + } + return Array.from(map.values()); +} + +function ThinkingChain({ steps: rawSteps }: ThinkingChainProps) { const { t } = useTranslation(); const [userOverride, setUserOverride] = useState(null); + const steps = deduplicateSteps(rawSteps); const allDoneLocal = steps.every((s) => s.status !== 'running'); const expanded = userOverride !== null ? userOverride : !allDoneLocal; diff --git a/frontend/src/hooks/use-chat-stream.ts b/frontend/src/hooks/use-chat-stream.ts index f9a1a36..0fb0de7 100644 --- a/frontend/src/hooks/use-chat-stream.ts +++ b/frontend/src/hooks/use-chat-stream.ts @@ -1,6 +1,6 @@ import { useMemo, useDeferredValue, useRef, useCallback } from 'react'; import { useChat } from '@ai-sdk/react'; -import { createChatTransport } from '@/lib/chat-transport'; +import { createRefChatTransport } from '@/lib/chat-transport'; import type { OmeletteUIMessage, OmeletteDataParts, @@ -44,17 +44,13 @@ export function useChatStream({ const onConversationIdRef = useRef(onConversationId); onConversationIdRef.current = onConversationId; - const transport = useMemo( - () => - createChatTransport({ - conversationId, - knowledgeBaseIds, - toolMode, - model, - }), - // eslint-disable-next-line react-hooks/exhaustive-deps - [conversationId, JSON.stringify(knowledgeBaseIds), toolMode, model], - ); + // AI SDK's useChat stores the Chat instance in a useRef and never recreates + // it when the transport prop changes. To work around this, we create ONE + // stable transport that reads the latest options from a ref on every request. + const optionsRef = useRef({ conversationId, knowledgeBaseIds, toolMode, model }); + optionsRef.current = { conversationId, knowledgeBaseIds, toolMode, model }; + + const transport = useMemo(() => createRefChatTransport(optionsRef), []); const chat = useChat({ transport, diff --git a/frontend/src/lib/chat-transport.ts b/frontend/src/lib/chat-transport.ts index cf00e5c..c3e5867 100644 --- a/frontend/src/lib/chat-transport.ts +++ b/frontend/src/lib/chat-transport.ts @@ -1,28 +1,40 @@ +import type { MutableRefObject } from 'react'; import { DefaultChatTransport } from 'ai'; import type { OmeletteUIMessage } from '@/types/chat'; import { getMessageText } from '@/types/chat'; -interface ChatTransportOptions { +export interface ChatTransportOptions { conversationId?: number; knowledgeBaseIds?: number[]; toolMode?: string; model?: string; } -export function createChatTransport(options: ChatTransportOptions) { +/** + * Create a stable transport that reads options from a ref on every request. + * + * AI SDK 5.0's `useChat` stores the `Chat` instance in a `useRef` and never + * recreates it when the `transport` prop changes. A ref-based transport + * ensures each request always uses the *latest* knowledge-base selection, + * tool mode, etc. without needing to recreate the Chat object. + */ +export function createRefChatTransport( + optionsRef: MutableRefObject, +) { return new DefaultChatTransport({ api: '/api/v1/chat/stream', prepareSendMessagesRequest({ messages, trigger }) { + const opts = optionsRef.current; const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user'); const messageText = lastUserMsg ? getMessageText(lastUserMsg) : ''; return { body: { message: messageText, - conversation_id: options.conversationId ?? null, - knowledge_base_ids: options.knowledgeBaseIds ?? [], - tool_mode: options.toolMode ?? 'qa', - model: options.model ?? null, + conversation_id: opts.conversationId ?? null, + knowledge_base_ids: opts.knowledgeBaseIds ?? [], + tool_mode: opts.toolMode ?? 'qa', + model: opts.model ?? null, trigger, }, headers: { diff --git a/frontend/src/pages/PlaygroundPage.tsx b/frontend/src/pages/PlaygroundPage.tsx index 91d888e..7913a18 100644 --- a/frontend/src/pages/PlaygroundPage.tsx +++ b/frontend/src/pages/PlaygroundPage.tsx @@ -81,10 +81,13 @@ export default function PlaygroundPage() { (cid: number) => { setNewConversationId(cid); if (!routeConvId) { - navigate(`/chat/${cid}`, { replace: true }); + // Update URL without React Router navigation to avoid unmount/remount + // during streaming. The key="playground" on the Route prevents remount + // for sidebar clicks, but replaceState is safer during active streams. + window.history.replaceState(null, '', `/chat/${cid}`); } }, - [routeConvId, navigate], + [routeConvId], ); const { @@ -102,6 +105,24 @@ export default function PlaygroundPage() { onError: (err) => toast.error(err.message || t('playground.streamError')), }); + // When navigating to a different conversation via sidebar, load its messages + // into the existing Chat instance (which is preserved thanks to key="playground"). + const prevConvIdRef = useRef(convIdNum); + useEffect(() => { + if (convIdNum !== prevConvIdRef.current) { + prevConvIdRef.current = convIdNum; + setNewConversationId(undefined); + setToolModeOverride(null); + setSelectedKBsOverride(null); + } + }, [convIdNum]); + + useEffect(() => { + if (restoredConv && restoredMessages.length > 0 && convIdNum != null) { + setMessages(restoredMessages); + } + }, [restoredConv, restoredMessages, convIdNum, setMessages]); + useEffect(() => { bottomRef.current?.scrollIntoView({ behavior: 'smooth' }); }, [messages]); @@ -120,7 +141,6 @@ export default function PlaygroundPage() { const handleNewChat = () => { stop(); setMessages([]); - setMessages([]); setNewConversationId(undefined); setToolModeOverride(null); setSelectedKBsOverride(null);