Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backend/app/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Application configuration using Pydantic Settings."""

import os
from pathlib import Path
from typing import Literal

Expand Down Expand Up @@ -101,3 +102,8 @@ def cors_origin_list(self) -> list[str]:


settings = Settings()

# Propagate CUDA_VISIBLE_DEVICES to os.environ so PyTorch (which reads it
# at import time, before our code runs) respects the user's .env config.
if settings.cuda_visible_devices and "CUDA_VISIBLE_DEVICES" not in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = settings.cuda_visible_devices
27 changes: 17 additions & 10 deletions backend/app/pipelines/chat/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,18 +161,12 @@ async def retrieve_node(state: ChatState, config: RunnableConfig) -> dict[str, A
results = await asyncio.gather(*tasks, return_exceptions=True)

all_sources: list[dict[str, Any]] = []
all_contexts: list[str] = []
for result in results:
if isinstance(result, Exception):
logger.warning("RAG query failed for a KB: %s", result)
continue
if result.get("sources"):
all_sources.extend(result["sources"])
for src in result["sources"]:
all_contexts.append(
f"[Source: {src.get('paper_title', 'Unknown')}, "
f"p.{src.get('page_number', '?')}]\n{src.get('excerpt', '')}"
)

_emit_thinking(
writer,
Expand All @@ -183,7 +177,7 @@ async def retrieve_node(state: ChatState, config: RunnableConfig) -> dict[str, A
summary=f"Found {len(all_sources)} relevant sources",
)

return {"rag_results": all_sources, "all_contexts": all_contexts}
return {"rag_results": all_sources}


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -212,21 +206,25 @@ async def rank_node(state: ChatState, config: RunnableConfig) -> dict[str, Any]:
papers_by_id = {p.id: p for p in result.scalars().all()}

citations: list[CitationDict] = []
all_contexts: list[str] = []
for i, src in enumerate(all_sources, 1):
paper = papers_by_id.get(src.get("paper_id")) if src.get("paper_id") else None
title = paper.title if paper and paper.title else src.get("paper_title", "")
excerpt = src.get("excerpt", "")
cit: CitationDict = {
"index": i,
"paper_id": src.get("paper_id"),
"paper_title": src.get("paper_title", ""),
"paper_title": title,
"page_number": src.get("page_number"),
"excerpt": src.get("excerpt", ""),
"excerpt": excerpt,
"relevance_score": src.get("relevance_score", 0),
"chunk_type": src.get("chunk_type", "text"),
"authors": paper.authors if paper else None,
"year": paper.year if paper else None,
"doi": paper.doi if paper else None,
}
citations.append(cit)
all_contexts.append(f"[Source: {title}, p.{src.get('page_number', '?')}]\n{excerpt}")
writer(
{
"type": "data-citation",
Expand All @@ -245,7 +243,7 @@ async def rank_node(state: ChatState, config: RunnableConfig) -> dict[str, Any]:
summary=f"Selected {high_relevance} high-relevance citations (>60%)",
)

return {"citations": citations}
return {"citations": citations, "all_contexts": all_contexts}


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -430,6 +428,15 @@ async def persist_node(state: ChatState, config: RunnableConfig) -> dict[str, An
db.add(assistant_msg)
await db.commit()

citation_count = len(state.get("citations") or [])
_emit_thinking(
writer,
"complete",
"Complete",
status="done",
summary=f"Generating answer · {citation_count} citations" if citation_count else "Generating answer",
)

writer(
{
"type": "data-conversation",
Expand Down
2 changes: 1 addition & 1 deletion backend/app/services/ocr_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def extract_text_native(self, pdf_path: str) -> list[dict]:
try:
with pdfplumber.open(pdf_path) as pdf:
for i, page in enumerate(pdf.pages):
text = page.extract_text() or ""
text = page.extract_text(x_tolerance=1) or ""
tables = page.extract_tables() or []

page_data = {
Expand Down
2 changes: 1 addition & 1 deletion backend/app/services/rag_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ async def query(
"page_number": meta.get("page_number"),
"chunk_type": meta.get("chunk_type", "text"),
"relevance_score": round(float(score), 3),
"excerpt": text[:500] + "..." if len(text) > 500 else text,
"excerpt": full_context[:800] + "..." if len(full_context) > 800 else full_context,
}
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
---
date: 2026-03-12
topic: thinking-chain-citation-quality
---

# 思维链 UX + 引用质量优化

## 要解决的问题

用户在使用聊天 Playground 时发现 4 个问题:

1. **思维链步骤重复显示**:每个步骤(Understanding query、Searching knowledge base 等)出现两行,因为后端每步发送 `running` + `done` 两条事件,前端未去重
2. **完成后未自动折叠**:思维链应在生成完成后自动折叠为摘要,用户点击可展开查看过程
3. **论文标题错误**:5 篇引用的 `paper_title` 都显示为 "This is an open access article published under a Creative Commons...",PDF 元数据提取将版权声明误识别为标题
4. **引用上下文不完整**:展开引用卡片后,摘录文本内容不完整且 OCR 文本粘连

## 选择的方案

### 思维链去重 + 自动折叠(前端修复)

在 `ThinkingChain.tsx` 渲染前按 `step` 字段去重,同一步骤保留最新状态。逻辑:

- 遍历 steps 数组,用 `Map<step, ThinkingStep>` 去重
- running → 覆盖前一个 running;done → 覆盖前一个 running
- 去重后 `allDoneLocal` 正确判断,自动折叠生效

### 论文标题修复(后端修复)

`rank_node` 中已经从数据库加载了 `Paper` 模型对象(含 Crossref/DOI 丰富后的标题),但 `paper_title` 仍取自 chunk 元数据。

修复:`paper_title` 优先使用 `paper.title`(DB),fallback 到 `src.get("paper_title")`。

### 引用上下文扩展(后端修复)

当前 `rag_service.py` 的 `query()` 方法中:
- `contexts`(给 LLM 的上下文)使用了 `full_context`(含相邻 chunks)
- `sources`(给前端的引用数据)只使用 `text[:500]`

修复:`excerpt` 也使用 `full_context` 的截断版本,让用户看到的摘录与 LLM 看到的一致。

### OCR 文本质量

依赖现有的 `clean_node` LLM 清洗,暂不额外添加正则后处理。

## 关键决策

- **去重在前端做**:后端的 running/done 双发机制是有意为之(支持实时更新),前端负责去重展示
- **标题优先用 DB 数据**:DB 中的 `Paper.title` 可能经过 Crossref API 丰富,比 PDF 启发式提取更准确
- **摘录用 full_context**:与 LLM 看到的上下文保持一致,提供更完整的引用上下文

## 涉及文件

| 文件 | 修改内容 |
|------|----------|
| `frontend/src/components/playground/ThinkingChain.tsx` | 步骤去重逻辑 |
| `backend/app/pipelines/chat/nodes.py` (`rank_node`) | `paper_title` 优先取 `paper.title` |
| `backend/app/services/rag_service.py` (`query`) | `excerpt` 使用 `full_context` |

## 下一步

→ 直接实现修复(代码改动量小,无需详细计划)
4 changes: 2 additions & 2 deletions frontend/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ function App() {
<Suspense fallback={<LoadingState className="h-screen" />}>
<Routes>
<Route path="/" element={<AppShell />}>
<Route index element={<PlaygroundPage />} />
<Route path="chat/:conversationId" element={<PlaygroundPage />} />
<Route index element={<PlaygroundPage key="playground" />} />
<Route path="chat/:conversationId" element={<PlaygroundPage key="playground" />} />
<Route path="knowledge-bases" element={<KnowledgeBasesPage />} />
<Route path="history" element={<ChatHistoryPage />} />
<Route path="settings" element={<SettingsPage />} />
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/components/playground/CitationCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ const RELEVANCE_STYLES = {
low: "bg-zinc-100 text-zinc-500 dark:bg-zinc-800 dark:text-zinc-400",
} as const;

const EXCERPT_PREVIEW_LENGTH = 300;
const EXCERPT_MAX_DISPLAY = 500;
const EXCERPT_PREVIEW_LENGTH = 400;
const EXCERPT_MAX_DISPLAY = 800;

export const CITATION_COLORS = [
"#3B82F6",
Expand Down
11 changes: 10 additions & 1 deletion frontend/src/components/playground/ThinkingChain.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,19 @@ function formatDuration(ms?: number): string {
return `${(ms / 1000).toFixed(1)}s`;
}

function ThinkingChain({ steps }: ThinkingChainProps) {
function deduplicateSteps(raw: ThinkingStep[]): ThinkingStep[] {
const map = new Map<string, ThinkingStep>();
for (const step of raw) {
map.set(step.step, step);
}
return Array.from(map.values());
}

function ThinkingChain({ steps: rawSteps }: ThinkingChainProps) {
const { t } = useTranslation();
const [userOverride, setUserOverride] = useState<boolean | null>(null);

const steps = deduplicateSteps(rawSteps);
const allDoneLocal = steps.every((s) => s.status !== 'running');
const expanded = userOverride !== null ? userOverride : !allDoneLocal;

Expand Down
20 changes: 8 additions & 12 deletions frontend/src/hooks/use-chat-stream.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { useMemo, useDeferredValue, useRef, useCallback } from 'react';
import { useChat } from '@ai-sdk/react';
import { createChatTransport } from '@/lib/chat-transport';
import { createRefChatTransport } from '@/lib/chat-transport';
import type {
OmeletteUIMessage,
OmeletteDataParts,
Expand Down Expand Up @@ -44,17 +44,13 @@ export function useChatStream({
const onConversationIdRef = useRef(onConversationId);
onConversationIdRef.current = onConversationId;

const transport = useMemo(
() =>
createChatTransport({
conversationId,
knowledgeBaseIds,
toolMode,
model,
}),
// eslint-disable-next-line react-hooks/exhaustive-deps
[conversationId, JSON.stringify(knowledgeBaseIds), toolMode, model],
);
// AI SDK's useChat stores the Chat instance in a useRef and never recreates
// it when the transport prop changes. To work around this, we create ONE
// stable transport that reads the latest options from a ref on every request.
const optionsRef = useRef({ conversationId, knowledgeBaseIds, toolMode, model });
optionsRef.current = { conversationId, knowledgeBaseIds, toolMode, model };

const transport = useMemo(() => createRefChatTransport(optionsRef), []);

const chat = useChat<OmeletteUIMessage>({
transport,
Expand Down
24 changes: 18 additions & 6 deletions frontend/src/lib/chat-transport.ts
Original file line number Diff line number Diff line change
@@ -1,28 +1,40 @@
import type { MutableRefObject } from 'react';
import { DefaultChatTransport } from 'ai';
import type { OmeletteUIMessage } from '@/types/chat';
import { getMessageText } from '@/types/chat';

interface ChatTransportOptions {
export interface ChatTransportOptions {
conversationId?: number;
knowledgeBaseIds?: number[];
toolMode?: string;
model?: string;
}

export function createChatTransport(options: ChatTransportOptions) {
/**
* Create a stable transport that reads options from a ref on every request.
*
* AI SDK 5.0's `useChat` stores the `Chat` instance in a `useRef` and never
* recreates it when the `transport` prop changes. A ref-based transport
* ensures each request always uses the *latest* knowledge-base selection,
* tool mode, etc. without needing to recreate the Chat object.
*/
export function createRefChatTransport(
optionsRef: MutableRefObject<ChatTransportOptions>,
) {
return new DefaultChatTransport<OmeletteUIMessage>({
api: '/api/v1/chat/stream',
prepareSendMessagesRequest({ messages, trigger }) {
const opts = optionsRef.current;
const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user');
const messageText = lastUserMsg ? getMessageText(lastUserMsg) : '';

return {
body: {
message: messageText,
conversation_id: options.conversationId ?? null,
knowledge_base_ids: options.knowledgeBaseIds ?? [],
tool_mode: options.toolMode ?? 'qa',
model: options.model ?? null,
conversation_id: opts.conversationId ?? null,
knowledge_base_ids: opts.knowledgeBaseIds ?? [],
tool_mode: opts.toolMode ?? 'qa',
model: opts.model ?? null,
trigger,
},
headers: {
Expand Down
26 changes: 23 additions & 3 deletions frontend/src/pages/PlaygroundPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,13 @@ export default function PlaygroundPage() {
(cid: number) => {
setNewConversationId(cid);
if (!routeConvId) {
navigate(`/chat/${cid}`, { replace: true });
// Update URL without React Router navigation to avoid unmount/remount
// during streaming. The key="playground" on the Route prevents remount
// for sidebar clicks, but replaceState is safer during active streams.
window.history.replaceState(null, '', `/chat/${cid}`);
}
},
[routeConvId, navigate],
[routeConvId],
);

const {
Expand All @@ -102,6 +105,24 @@ export default function PlaygroundPage() {
onError: (err) => toast.error(err.message || t('playground.streamError')),
});

// When navigating to a different conversation via sidebar, load its messages
// into the existing Chat instance (which is preserved thanks to key="playground").
const prevConvIdRef = useRef(convIdNum);
useEffect(() => {
if (convIdNum !== prevConvIdRef.current) {
prevConvIdRef.current = convIdNum;
setNewConversationId(undefined);
setToolModeOverride(null);
setSelectedKBsOverride(null);
}
}, [convIdNum]);

useEffect(() => {
if (restoredConv && restoredMessages.length > 0 && convIdNum != null) {
setMessages(restoredMessages);
}
}, [restoredConv, restoredMessages, convIdNum, setMessages]);

useEffect(() => {
bottomRef.current?.scrollIntoView({ behavior: 'smooth' });
}, [messages]);
Expand All @@ -120,7 +141,6 @@ export default function PlaygroundPage() {
const handleNewChat = () => {
stop();
setMessages([]);
setMessages([]);
setNewConversationId(undefined);
setToolModeOverride(null);
setSelectedKBsOverride(null);
Expand Down
Loading