From 25a7b50ee121f0c853a479ecf9d5b202fece3cf0 Mon Sep 17 00:00:00 2001 From: He Wang Date: Wed, 25 Feb 2026 17:17:03 +0800 Subject: [PATCH] fix: use module-level access for STORAGE_IMPL to avoid stale None binding `from common.settings import STORAGE_IMPL` captures the value at import time (None), which is never updated when init_settings() later reassigns it. Switch to `from common import settings` and access via `settings.STORAGE_IMPL` so the current value is resolved at call time. Co-authored-by: Cursor --- powerrag/parser/mineru_parser.py | 4 ++-- powerrag/parser/vllm_parser.py | 6 +++--- powerrag/server/services/convert_service.py | 4 ++-- powerrag/server/services/parse_service.py | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/powerrag/parser/mineru_parser.py b/powerrag/parser/mineru_parser.py index 1ad3765c1..b8ce9b9ea 100644 --- a/powerrag/parser/mineru_parser.py +++ b/powerrag/parser/mineru_parser.py @@ -26,7 +26,7 @@ import pdfplumber from typing import Union, Dict, TypedDict, Tuple from api.utils.configs import get_base_config -from common.settings import STORAGE_IMPL +from common import settings from PIL import Image LOCK_KEY_pdfplumber = "global_shared_lock_pdfplumber" @@ -246,7 +246,7 @@ def store_images(self, md_content: str, images: ImageDict, output_dir: str) -> s img_bytes = base64.b64decode(img_base64) # Store image in OceanBase - STORAGE_IMPL.put(output_dir, img_name, img_bytes) + settings.STORAGE_IMPL.put(output_dir, img_name, img_bytes) # Generate URL for the image using PowerRAG image access endpoint # Get PowerRAG server configuration diff --git a/powerrag/parser/vllm_parser.py b/powerrag/parser/vllm_parser.py index 30778d1b7..b960da269 100644 --- a/powerrag/parser/vllm_parser.py +++ b/powerrag/parser/vllm_parser.py @@ -24,7 +24,7 @@ import pdfplumber from typing import Union, Dict, TypedDict, Tuple, List, Optional from api.utils.configs import get_base_config -from common.settings import STORAGE_IMPL +from common import settings from openai import OpenAI from PIL import Image import io @@ -413,7 +413,7 @@ def layoutjson2md(self, image: Image.Image, cells: list, text_key: str = 'text', img_bytes = buffered.getvalue() # Store image in storage (bucket) - STORAGE_IMPL.put(output_dir, img_filename, img_bytes) + settings.STORAGE_IMPL.put(output_dir, img_filename, img_bytes) # Generate URL for the image powerrag_config = get_base_config("powerrag", {}) or {} @@ -660,7 +660,7 @@ def store_images(self, md_content: str, images: ImageDict, output_dir: str) -> s img_bytes = base64.b64decode(img_base64) # Store image in storage - STORAGE_IMPL.put(output_dir, img_name, img_bytes) + settings.STORAGE_IMPL.put(output_dir, img_name, img_bytes) # Generate URL for the image using RAGFlow image access endpoint # Get RAGFlow server configuration diff --git a/powerrag/server/services/convert_service.py b/powerrag/server/services/convert_service.py index 25b72a760..fd4ac8a26 100644 --- a/powerrag/server/services/convert_service.py +++ b/powerrag/server/services/convert_service.py @@ -28,7 +28,7 @@ from api.db.services.document_service import DocumentService from api.db.services.file2document_service import File2DocumentService -from common.settings import STORAGE_IMPL +from common import settings from powerrag.parser import MinerUPdfParser, DotsOcrParser logger = logging.getLogger(__name__) @@ -86,7 +86,7 @@ def convert_document(self, doc_id: str, source_format: str, target_format: str, # Get binary data bucket, name = File2DocumentService.get_storage_address(doc_id=doc_id) - binary = STORAGE_IMPL.get(bucket, name) + binary = settings.STORAGE_IMPL.get(bucket, name) if not binary: raise ValueError(f"Document binary not found for {doc_id}") diff --git a/powerrag/server/services/parse_service.py b/powerrag/server/services/parse_service.py index 227e779a7..d24bb40ce 100644 --- a/powerrag/server/services/parse_service.py +++ b/powerrag/server/services/parse_service.py @@ -30,7 +30,7 @@ from api.db.services.document_service import DocumentService from api.db.services.file2document_service import File2DocumentService from common.constants import ParserType -from common.settings import STORAGE_IMPL +from common import settings # Import split service for text chunking from powerrag.server.services.split_service import PowerRAGSplitService @@ -119,7 +119,7 @@ def parse_document(self, doc_id: str) -> Dict[str, Any]: parser_config = doc.get["parser_config"] # Get document binary data from storage bucket, name = File2DocumentService.get_storage_address(doc_id=doc_id) - binary = STORAGE_IMPL.get(bucket, name) + binary = settings.STORAGE_IMPL.get(bucket, name) if not binary: raise ValueError(f"Document binary data not found for {doc_id}") @@ -662,7 +662,7 @@ def _parse_to_markdown_for_task(self, doc_id: str = None, filename: str = None, if not bucket or not name: raise ValueError(f"Invalid storage address for document {doc_id}: bucket={bucket}, name={name}") - storage = STORAGE_IMPL + storage = settings.STORAGE_IMPL if not storage: raise ValueError("Storage implementation not available")