From 25a7b50ee121f0c853a479ecf9d5b202fece3cf0 Mon Sep 17 00:00:00 2001
From: He Wang <wanghechn@qq.com>
Date: Wed, 25 Feb 2026 17:17:03 +0800
Subject: [PATCH] fix: use module-level access for STORAGE_IMPL to avoid stale
 None binding

`from common.settings import STORAGE_IMPL` captures the value at import
time (None), which is never updated when init_settings() later reassigns
it. Switch to `from common import settings` and access via
`settings.STORAGE_IMPL` so the current value is resolved at call time.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 powerrag/parser/mineru_parser.py            | 4 ++--
 powerrag/parser/vllm_parser.py              | 6 +++---
 powerrag/server/services/convert_service.py | 4 ++--
 powerrag/server/services/parse_service.py   | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/powerrag/parser/mineru_parser.py b/powerrag/parser/mineru_parser.py
index 1ad3765c1..b8ce9b9ea 100644
--- a/powerrag/parser/mineru_parser.py
+++ b/powerrag/parser/mineru_parser.py
@@ -26,7 +26,7 @@
 import pdfplumber
 from typing import Union, Dict, TypedDict, Tuple
 from api.utils.configs import get_base_config
-from common.settings import STORAGE_IMPL
+from common import settings
 from PIL import Image
 
 LOCK_KEY_pdfplumber = "global_shared_lock_pdfplumber"
@@ -246,7 +246,7 @@ def store_images(self, md_content: str, images: ImageDict, output_dir: str) -> s
                 img_bytes = base64.b64decode(img_base64)
 
                 # Store image in OceanBase
-                STORAGE_IMPL.put(output_dir, img_name, img_bytes)
+                settings.STORAGE_IMPL.put(output_dir, img_name, img_bytes)
 
                 # Generate URL for the image using PowerRAG image access endpoint
                 # Get PowerRAG server configuration
diff --git a/powerrag/parser/vllm_parser.py b/powerrag/parser/vllm_parser.py
index 30778d1b7..b960da269 100644
--- a/powerrag/parser/vllm_parser.py
+++ b/powerrag/parser/vllm_parser.py
@@ -24,7 +24,7 @@
 import pdfplumber
 from typing import Union, Dict, TypedDict, Tuple, List, Optional
 from api.utils.configs import get_base_config
-from common.settings import STORAGE_IMPL
+from common import settings
 from openai import OpenAI
 from PIL import Image
 import io
@@ -413,7 +413,7 @@ def layoutjson2md(self, image: Image.Image, cells: list, text_key: str = 'text',
                         img_bytes = buffered.getvalue()
                         
                         # Store image in storage (bucket)
-                        STORAGE_IMPL.put(output_dir, img_filename, img_bytes)
+                        settings.STORAGE_IMPL.put(output_dir, img_filename, img_bytes)
                         
                         # Generate URL for the image
                         powerrag_config = get_base_config("powerrag", {}) or {}
@@ -660,7 +660,7 @@ def store_images(self, md_content: str, images: ImageDict, output_dir: str) -> s
                 img_bytes = base64.b64decode(img_base64)
 
                 # Store image in storage
-                STORAGE_IMPL.put(output_dir, img_name, img_bytes)
+                settings.STORAGE_IMPL.put(output_dir, img_name, img_bytes)
 
                 # Generate URL for the image using RAGFlow image access endpoint
                 # Get RAGFlow server configuration
diff --git a/powerrag/server/services/convert_service.py b/powerrag/server/services/convert_service.py
index 25b72a760..fd4ac8a26 100644
--- a/powerrag/server/services/convert_service.py
+++ b/powerrag/server/services/convert_service.py
@@ -28,7 +28,7 @@
 
 from api.db.services.document_service import DocumentService
 from api.db.services.file2document_service import File2DocumentService
-from common.settings import STORAGE_IMPL
+from common import settings
 from powerrag.parser import MinerUPdfParser, DotsOcrParser
 
 logger = logging.getLogger(__name__)
@@ -86,7 +86,7 @@ def convert_document(self, doc_id: str, source_format: str, target_format: str,
             
             # Get binary data
             bucket, name = File2DocumentService.get_storage_address(doc_id=doc_id)
-            binary = STORAGE_IMPL.get(bucket, name)
+            binary = settings.STORAGE_IMPL.get(bucket, name)
             
             if not binary:
                 raise ValueError(f"Document binary not found for {doc_id}")
diff --git a/powerrag/server/services/parse_service.py b/powerrag/server/services/parse_service.py
index 227e779a7..d24bb40ce 100644
--- a/powerrag/server/services/parse_service.py
+++ b/powerrag/server/services/parse_service.py
@@ -30,7 +30,7 @@
 from api.db.services.document_service import DocumentService
 from api.db.services.file2document_service import File2DocumentService
 from common.constants import ParserType
-from common.settings import STORAGE_IMPL
+from common import settings
 
 # Import split service for text chunking
 from powerrag.server.services.split_service import PowerRAGSplitService
@@ -119,7 +119,7 @@ def parse_document(self, doc_id: str) -> Dict[str, Any]:
             parser_config = doc.get["parser_config"]
             # Get document binary data from storage
             bucket, name = File2DocumentService.get_storage_address(doc_id=doc_id)
-            binary = STORAGE_IMPL.get(bucket, name)
+            binary = settings.STORAGE_IMPL.get(bucket, name)
             
             if not binary:
                 raise ValueError(f"Document binary data not found for {doc_id}")
@@ -662,7 +662,7 @@ def _parse_to_markdown_for_task(self, doc_id: str = None, filename: str = None,
             if not bucket or not name:
                 raise ValueError(f"Invalid storage address for document {doc_id}: bucket={bucket}, name={name}")
             
-            storage = STORAGE_IMPL
+            storage = settings.STORAGE_IMPL
             
             if not storage:
                 raise ValueError("Storage implementation not available")