diff --git a/powerrag/parser/mineru_parser.py b/powerrag/parser/mineru_parser.py
index 1ad3765c1..81405bfde 100644
--- a/powerrag/parser/mineru_parser.py
+++ b/powerrag/parser/mineru_parser.py
@@ -248,18 +248,10 @@ def store_images(self, md_content: str, images: ImageDict, output_dir: str) -> s
# Store image in OceanBase
STORAGE_IMPL.put(output_dir, img_name, img_bytes)
- # Generate URL for the image using PowerRAG image access endpoint
- # Get PowerRAG server configuration
- powerrag_config = get_base_config("powerrag", {}) or {}
- server_url = os.environ.get("PUBLIC_SERVER_URL", "http://localhost:6000")
-
- # Ensure server_url has protocol prefix
- if not server_url.startswith("http://") and not server_url.startswith("https://"):
- server_url = f"http://{server_url}"
-
- # Construct the image URL using PowerRAG chunk image endpoint
+ # Use relative path so frontend treats it as same-origin (proxied to backend).
+ # Avoids sanitizer blocking external img src while preventing data exfiltration.
kb_id = output_dir.split('/')[0] if '/' in output_dir else output_dir
- image_url = f"{server_url}/api/v1/powerrag/chunk/image/{kb_id}/{img_name}"
+ image_url = f"/api/v1/powerrag/chunk/image/{kb_id}/{img_name}"
# Add to result list
image_info.append((img_name, image_url))
diff --git a/powerrag/parser/vllm_parser.py b/powerrag/parser/vllm_parser.py
index 30778d1b7..6ba1d9d4d 100644
--- a/powerrag/parser/vllm_parser.py
+++ b/powerrag/parser/vllm_parser.py
@@ -415,10 +415,8 @@ def layoutjson2md(self, image: Image.Image, cells: list, text_key: str = 'text',
# Store image in storage (bucket)
STORAGE_IMPL.put(output_dir, img_filename, img_bytes)
- # Generate URL for the image
- powerrag_config = get_base_config("powerrag", {}) or {}
- api_url = os.environ.get("PUBLIC_SERVER_URL", "http://localhost:6000")
- image_url = f"http://{api_url}/v1/chunk/image/{output_dir}/{img_filename}"
+ # Use relative path so frontend treats it as same-origin (proxied to backend)
+ image_url = f"/v1/chunk/image/{output_dir}/{img_filename}"
# Use HTML img tag with URL
text_items.append(f'
')
@@ -662,13 +660,9 @@ def store_images(self, md_content: str, images: ImageDict, output_dir: str) -> s
# Store image in storage
STORAGE_IMPL.put(output_dir, img_name, img_bytes)
- # Generate URL for the image using RAGFlow image access endpoint
- # Get RAGFlow server configuration
- powerrag_config = get_base_config("powerrag", {}) or {}
- api_url = os.environ.get("PUBLIC_SERVER_URL", "http://localhost:6000")
-
- # Construct the image URL using the auth_image endpoint
- image_url = f"http://{api_url}/v1/chunk/image/{output_dir}/{img_name}"
+ # Use relative path so frontend treats it as same-origin (proxied to backend).
+ # Avoids sanitizer blocking external img src while preventing data exfiltration.
+ image_url = f"/v1/chunk/image/{output_dir}/{img_name}"
# Add to result list
image_info.append((img_name, image_url))
diff --git a/web/.env b/web/.env
deleted file mode 100644
index a6cbd9ccd..000000000
--- a/web/.env
+++ /dev/null
@@ -1,2 +0,0 @@
-PORT=9222
-DID_YOU_KNOW=none
\ No newline at end of file
diff --git a/web/.gitignore b/web/.gitignore
index fe4e6bf38..b934a778d 100644
--- a/web/.gitignore
+++ b/web/.gitignore
@@ -1,4 +1,5 @@
/node_modules
+/.env
/.env.local
/.umirc.local.ts
/config/config.local.ts
diff --git a/web/src/components/floating-chat-widget-markdown.tsx b/web/src/components/floating-chat-widget-markdown.tsx
index b4a7db1f2..f7b316a39 100644
--- a/web/src/components/floating-chat-widget-markdown.tsx
+++ b/web/src/components/floating-chat-widget-markdown.tsx
@@ -14,10 +14,10 @@ import {
showImage,
} from '@/utils/chat';
import { getExtension } from '@/utils/document-util';
+import { sanitizeChatContent } from '@/utils/sanitize';
import { InfoCircleOutlined } from '@ant-design/icons';
import { Button, Flex, Popover, Tooltip } from 'antd';
import classNames from 'classnames';
-import DOMPurify from 'dompurify';
import 'katex/dist/katex.min.css';
import { omit } from 'lodash';
import { pipe } from 'lodash/fp';
@@ -57,7 +57,8 @@ const FloatingChatWidgetMarkdown = ({
const isDarkTheme = useIsDarkTheme();
const contentWithCursor = useMemo(() => {
- let text = content === '' ? t('chat.searching') : content;
+ let text =
+ content === '' ? t('chat.searching') : sanitizeChatContent(content);
const nextText = replaceTextByOldReg(text);
return pipe(replaceThinkToSection, preprocessLaTeX)(nextText);
}, [content, t]);
@@ -179,7 +180,7 @@ const FloatingChatWidgetMarkdown = ({
diff --git a/web/src/components/highlight-markdown/index.tsx b/web/src/components/highlight-markdown/index.tsx
index 50b752680..0b1394c2f 100644
--- a/web/src/components/highlight-markdown/index.tsx
+++ b/web/src/components/highlight-markdown/index.tsx
@@ -13,6 +13,7 @@ import remarkMath from 'remark-math';
import 'katex/dist/katex.min.css'; // `rehype-katex` does not import the CSS for you
import { preprocessLaTeX } from '@/utils/chat';
+import { sanitizeChatContent } from '@/utils/sanitize';
import { useIsDarkTheme } from '../theme-provider';
import styles from './index.less';
@@ -51,7 +52,7 @@ const HighLightMarkdown = ({
} as any
}
>
- {children ? preprocessLaTeX(children) : children}
+ {children ? preprocessLaTeX(sanitizeChatContent(children)) : children}
);
};
diff --git a/web/src/components/markdown-content/index.tsx b/web/src/components/markdown-content/index.tsx
index 6e93bf134..3d9bf161e 100644
--- a/web/src/components/markdown-content/index.tsx
+++ b/web/src/components/markdown-content/index.tsx
@@ -2,7 +2,7 @@ import Image from '@/components/image';
import SvgIcon from '@/components/svg-icon';
import { IReference, IReferenceChunk } from '@/interfaces/database/chat';
import { getExtension } from '@/utils/document-util';
-import DOMPurify from 'dompurify';
+import { sanitizeChatContent } from '@/utils/sanitize';
import { useCallback, useEffect, useMemo } from 'react';
import Markdown from 'react-markdown';
import SyntaxHighlighter from 'react-syntax-highlighter';
@@ -52,10 +52,7 @@ const MarkdownContent = ({
const { setDocumentIds, data: fileThumbnails } =
useFetchDocumentThumbnailsByIds();
const contentWithCursor = useMemo(() => {
- let text = DOMPurify.sanitize(content, {
- ADD_TAGS: ['think', 'section'],
- ADD_ATTR: ['class'],
- });
+ let text = sanitizeChatContent(content);
// let text = content;
if (text === '') {
@@ -166,7 +163,7 @@ const MarkdownContent = ({
diff --git a/web/src/components/next-markdown-content/index.tsx b/web/src/components/next-markdown-content/index.tsx
index 8a300b226..be4a0d3dd 100644
--- a/web/src/components/next-markdown-content/index.tsx
+++ b/web/src/components/next-markdown-content/index.tsx
@@ -2,7 +2,7 @@ import Image from '@/components/image';
import SvgIcon from '@/components/svg-icon';
import { IReferenceChunk, IReferenceObject } from '@/interfaces/database/chat';
import { getExtension } from '@/utils/document-util';
-import DOMPurify from 'dompurify';
+import { sanitizeChatContent } from '@/utils/sanitize';
import { memo, useCallback, useEffect, useMemo } from 'react';
import Markdown from 'react-markdown';
import SyntaxHighlighter from 'react-syntax-highlighter';
@@ -53,10 +53,7 @@ function MarkdownContent({
const { setDocumentIds, data: fileThumbnails } =
useFetchDocumentThumbnailsByIds();
const contentWithCursor = useMemo(() => {
- let text = DOMPurify.sanitize(content, {
- ADD_TAGS: ['think', 'section'],
- ADD_ATTR: ['class'],
- });
+ let text = sanitizeChatContent(content);
// let text = content;
if (text === '') {
text = t('chat.searching');
@@ -168,7 +165,7 @@ function MarkdownContent({
diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/chunk-card/index.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/chunk-card/index.tsx
index 97a5af714..5c50ac666 100644
--- a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/chunk-card/index.tsx
+++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/chunk-card/index.tsx
@@ -10,9 +10,9 @@ import {
} from '@/components/ui/tooltip';
import type { ChunkDocType, IChunk } from '@/interfaces/database/knowledge';
import { cn } from '@/lib/utils';
+import { sanitizeChatContent } from '@/utils/sanitize';
import { CheckedState } from '@radix-ui/react-checkbox';
import classNames from 'classnames';
-import DOMPurify from 'dompurify';
import { useEffect, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { ChunkTextMode } from '../../constant';
@@ -124,7 +124,7 @@ const ChunkCard = ({
>
{
- let text = DOMPurify.sanitize(content, {
- ADD_TAGS: ['think', 'section'],
- ADD_ATTR: ['class'],
- });
+ let text = sanitizeChatContent(content);
// let text = content;
if (text === '') {
text = t('chat.searching');
@@ -168,7 +165,7 @@ const MarkdownContent = ({
diff --git a/web/src/pages/next-search/search-view.tsx b/web/src/pages/next-search/search-view.tsx
index 890e28072..4b1068355 100644
--- a/web/src/pages/next-search/search-view.tsx
+++ b/web/src/pages/next-search/search-view.tsx
@@ -14,7 +14,7 @@ import {
import { RAGFlowPagination } from '@/components/ui/ragflow-pagination';
import { IReference } from '@/interfaces/database/chat';
import { cn } from '@/lib/utils';
-import DOMPurify from 'dompurify';
+import { sanitizeChatContent } from '@/utils/sanitize';
import { isEmpty } from 'lodash';
import { BrainCircuit, Search, X } from 'lucide-react';
import { Dispatch, SetStateAction, useEffect, useState } from 'react';
@@ -208,7 +208,7 @@ export default function SearchingView({
{
>
s.trim().split(/\s+/)[0])
+ .filter(Boolean);
+}
+
+function isSrcsetAllowed(srcset: string): boolean {
+ const urls = getUrlsFromSrcset(srcset);
+ return urls.length > 0 && urls.every((url) => isAllowedImageUrl(url));
+}
+
+function sanitizeImageUrlAttributes(node: Element): void {
+ const tag = node.tagName;
+ if (tag !== 'IMG' && tag !== 'SOURCE') return;
+
+ const src = node.getAttribute('src');
+ if (src && !isAllowedImageUrl(src)) {
+ node.removeAttribute('src');
+ }
+
+ const srcset = node.getAttribute('srcset');
+ if (srcset && !isSrcsetAllowed(srcset)) {
+ node.removeAttribute('srcset');
+ }
+}
+
+let secureImageHookAdded = false;
+
+function ensureSecureImageHook(): void {
+ if (secureImageHookAdded) return;
+ DOMPurify.addHook('afterSanitizeAttributes', (node) => {
+ if (node.nodeType === 1) {
+ sanitizeImageUrlAttributes(node as Element);
+ }
+ });
+ secureImageHookAdded = true;
+}
+
+/**
+ * Sanitize content for safe rendering. Restricts img/srcset (and source src/srcset)
+ * to same-origin, relative, or data: URLs only. Use for all chat/AI output and user content.
+ */
+export function sanitizeChatContent(dirty: string, config?: Config): string {
+ ensureSecureImageHook();
+ const result = DOMPurify.sanitize(dirty, {
+ ADD_TAGS: ['think', 'section'],
+ ADD_ATTR: ['class'],
+ ...config,
+ });
+ return typeof result === 'string' ? result : '';
+}