diff --git a/app/services/grok/processor.py b/app/services/grok/processor.py index b50cb0ae..101ce5b5 100644 --- a/app/services/grok/processor.py +++ b/app/services/grok/processor.py @@ -1,6 +1,7 @@ """ OpenAI 响应格式处理器 """ +import re import time import uuid import random @@ -37,9 +38,64 @@ def _build_video_poster_preview(video_url: str, thumbnail_url: str = "") -> str: ''' +_GROK_RENDER_TAG_RE = re.compile( + r"]*>(?:]*>[^<]*)*\s*" +) + + +def _extract_web_sources(mr: dict) -> list[dict]: + """Extract web search sources from modelResponse fields.""" + sources: list[dict] = [] + seen: set[str] = set() + + for item in mr.get("citedWebSearchResults", mr.get("webSearchResults", [])): + if isinstance(item, dict): + url = (item.get("url") or "").strip() + if url and url not in seen: + seen.add(url) + sources.append({ + "title": (item.get("title") or "").strip(), + "url": url, + }) + + if not sources: + for raw in mr.get("cardAttachmentsJson", []): + try: + card = orjson.loads(raw) if isinstance(raw, (str, bytes)) else raw + except Exception: + continue + if not isinstance(card, dict): + continue + url = (card.get("url") or "").strip() + if url and url not in seen: + seen.add(url) + sources.append({ + "title": (card.get("title") or "").strip(), + "url": url, + }) + + return sources + + +def _strip_grok_render_tags(text: str) -> str: + """Remove citation placeholder tags from text.""" + return _GROK_RENDER_TAG_RE.sub("", text) + + +def _format_sources_as_references(sources: list[dict]) -> str: + """Format sources as a Markdown references section.""" + if not sources: + return "" + lines = ["\n\n## References\n"] + for i, s in enumerate(sources, 1): + title = s.get("title") or s["url"] + lines.append(f"{i}. [{title}]({s['url']})") + return "\n".join(lines) + + class BaseProcessor: """基础处理器""" - + def __init__(self, model: str, token: str = ""): self.model = model self.token = token @@ -109,7 +165,7 @@ def _sse(self, content: str = "", role: str = None, finish: str = None) -> str: class StreamProcessor(BaseProcessor): """流式响应处理器""" - + def __init__(self, model: str, token: str = "", think: bool = None): super().__init__(model, token) self.response_id: Optional[str] = None @@ -118,7 +174,8 @@ def __init__(self, model: str, token: str = "", think: bool = None): self.role_sent: bool = False self.filter_tags = get_config("grok.filter_tags", []) self.image_format = get_config("app.image_format", "url") - + self.web_sources: list[dict] = [] + if think is None: self.show_think = get_config("grok.thinking", False) else: @@ -166,7 +223,11 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N yield self._sse(msg + "\n") yield self._sse("\n") self.think_opened = False - + + # 提取 web search sources + if sources := _extract_web_sources(mr): + self.web_sources = sources + # 处理生成的图片 for url in mr.get("generatedImageUrls", []): parts = url.split("/") @@ -190,11 +251,19 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N # 普通 token if (token := resp.get("token")) is not None: - if token and not (self.filter_tags and any(t in token for t in self.filter_tags)): + if token: + # 剥离 grok:render 标签而非丢弃整个 token + if self.filter_tags and any(t in token for t in self.filter_tags): + token = _strip_grok_render_tags(token) + if not token.strip(): + continue yield self._sse(token) - + if self.think_opened: yield self._sse("\n") + # 输出 web search references + if self.web_sources: + yield self._sse(_format_sources_as_references(self.web_sources)) yield self._sse(finish="stop") yield "data: [DONE]\n\n" except Exception as e: @@ -216,7 +285,8 @@ async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]: response_id = "" fingerprint = "" content = "" - + web_sources: list[dict] = [] + try: async for line in response: if not line: @@ -234,7 +304,10 @@ async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]: if mr := resp.get("modelResponse"): response_id = mr.get("responseId", "") content = mr.get("message", "") - + + # 提取 web search sources + web_sources = _extract_web_sources(mr) + if urls := mr.get("generatedImageUrls"): content += "\n" for url in urls: @@ -255,11 +328,16 @@ async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]: if (meta := mr.get("metadata", {})).get("llm_info", {}).get("modelHash"): fingerprint = meta["llm_info"]["modelHash"] - + except Exception as e: logger.error(f"Collect processing error: {e}", extra={"model": self.model}) finally: await self.close() + + # 清理 grok:render 标签并附加 web search references + content = _strip_grok_render_tags(content) + if web_sources: + content += _format_sources_as_references(web_sources) return { "id": response_id, diff --git a/src/grok/processor.ts b/src/grok/processor.ts index d4f2fe33..e9490cc1 100644 --- a/src/grok/processor.ts +++ b/src/grok/processor.ts @@ -168,6 +168,7 @@ export function createOpenAiStreamFromGrokNdjson( let thinkingFinished = false; let videoProgressStarted = false; let lastVideoProgress = -1; + const collectedSources = new Map(); let buffer = ""; @@ -253,6 +254,17 @@ export function createOpenAiStreamFromGrokNdjson( const userRespModel = grok.userResponse?.model; if (typeof userRespModel === "string" && userRespModel.trim()) currentModel = userRespModel.trim(); + // Collect web search sources early (before rawToken checks that may skip this frame) + if (grok.webSearchResults?.results && Array.isArray(grok.webSearchResults.results)) { + for (const r of grok.webSearchResults.results) { + const url = typeof r.url === "string" ? r.url.trim() : ""; + if (url && !collectedSources.has(url)) { + const title = typeof r.title === "string" ? r.title.trim() : url; + collectedSources.set(url, { title, url }); + } + } + } + // Video generation stream const videoResp = grok.streamingVideoGenerationResponse; if (videoResp) { @@ -337,7 +349,15 @@ export function createOpenAiStreamFromGrokNdjson( if (typeof rawToken !== "string" || !rawToken) continue; let token = rawToken; - if (filteredTags.some((t) => token.includes(t))) continue; + // Strip filtered tags from token instead of dropping entire token + for (const t of filteredTags) { + if (token.includes(t)) { + token = token.replace(new RegExp(`<[^>]*${t.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}[^>]*>(?:<[^>]*>[^<]*<\/[^>]*>)*\\s*<\/[^>]*>`, "g"), ""); + token = token.replace(new RegExp(`<[^>]*${t.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}[^>]*\\/?>`, "g"), ""); + } + } + token = token.trim(); + if (!token) continue; const currentIsThinking = Boolean(grok.isThinking); const messageTag = grok.messageTag; @@ -382,6 +402,17 @@ export function createOpenAiStreamFromGrokNdjson( } } + // Append collected web search sources as References section + if (collectedSources.size > 0) { + const refLines = ["\n\n## References\n"]; + let idx = 1; + for (const s of collectedSources.values()) { + refLines.push(`${idx}. [${s.title}](${s.url})`); + idx++; + } + controller.enqueue(encoder.encode(makeChunk(id, created, currentModel, refLines.join("\n")))); + } + controller.enqueue(encoder.encode(makeChunk(id, created, currentModel, "", "stop"))); controller.enqueue(encoder.encode(makeDone())); if (opts.onFinish) await opts.onFinish({ status: finalStatus, duration: (Date.now() - startTime) / 1000 }); @@ -417,6 +448,8 @@ export async function parseOpenAiFromGrokNdjson( let content = ""; let model = requestedModel; + const collectedSources = new Map(); + for (const line of lines) { let data: GrokNdjson; try { @@ -431,6 +464,17 @@ export async function parseOpenAiFromGrokNdjson( const grok = (data as any).result?.response; if (!grok) continue; + // Collect web search sources from all frames + if (grok.webSearchResults?.results && Array.isArray(grok.webSearchResults.results)) { + for (const r of grok.webSearchResults.results) { + const url = typeof r.url === "string" ? r.url.trim() : ""; + if (url && !collectedSources.has(url)) { + const title = typeof r.title === "string" ? r.title.trim() : url; + collectedSources.set(url, { title, url }); + } + } + } + const videoResp = grok.streamingVideoGenerationResponse; if (videoResp?.videoUrl && typeof videoResp.videoUrl === "string") { const videoPath = encodeAssetPath(videoResp.videoUrl); @@ -476,6 +520,17 @@ export async function parseOpenAiFromGrokNdjson( break; } + // Append collected web search sources as References section + if (collectedSources.size > 0) { + const refLines = ["\n\n## References\n"]; + let idx = 1; + for (const s of collectedSources.values()) { + refLines.push(`${idx}. [${s.title}](${s.url})`); + idx++; + } + content += refLines.join("\n"); + } + return { id: `chatcmpl-${crypto.randomUUID()}`, object: "chat.completion",