Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions powerrag/parser/mineru_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import pdfplumber
from typing import Union, Dict, TypedDict, Tuple
from api.utils.configs import get_base_config
from common.settings import STORAGE_IMPL
from common import settings
from PIL import Image

LOCK_KEY_pdfplumber = "global_shared_lock_pdfplumber"
Expand Down Expand Up @@ -246,7 +246,7 @@ def store_images(self, md_content: str, images: ImageDict, output_dir: str) -> s
img_bytes = base64.b64decode(img_base64)

# Store image in OceanBase
STORAGE_IMPL.put(output_dir, img_name, img_bytes)
settings.STORAGE_IMPL.put(output_dir, img_name, img_bytes)

# Generate URL for the image using PowerRAG image access endpoint
# Get PowerRAG server configuration
Expand Down
6 changes: 3 additions & 3 deletions powerrag/parser/vllm_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import pdfplumber
from typing import Union, Dict, TypedDict, Tuple, List, Optional
from api.utils.configs import get_base_config
from common.settings import STORAGE_IMPL
from common import settings
from openai import OpenAI
from PIL import Image
import io
Expand Down Expand Up @@ -413,7 +413,7 @@ def layoutjson2md(self, image: Image.Image, cells: list, text_key: str = 'text',
img_bytes = buffered.getvalue()

# Store image in storage (bucket)
STORAGE_IMPL.put(output_dir, img_filename, img_bytes)
settings.STORAGE_IMPL.put(output_dir, img_filename, img_bytes)

# Generate URL for the image
powerrag_config = get_base_config("powerrag", {}) or {}
Expand Down Expand Up @@ -660,7 +660,7 @@ def store_images(self, md_content: str, images: ImageDict, output_dir: str) -> s
img_bytes = base64.b64decode(img_base64)

# Store image in storage
STORAGE_IMPL.put(output_dir, img_name, img_bytes)
settings.STORAGE_IMPL.put(output_dir, img_name, img_bytes)

# Generate URL for the image using RAGFlow image access endpoint
# Get RAGFlow server configuration
Expand Down
4 changes: 2 additions & 2 deletions powerrag/server/services/convert_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from common.settings import STORAGE_IMPL
from common import settings
from powerrag.parser import MinerUPdfParser, DotsOcrParser

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -86,7 +86,7 @@ def convert_document(self, doc_id: str, source_format: str, target_format: str,

# Get binary data
bucket, name = File2DocumentService.get_storage_address(doc_id=doc_id)
binary = STORAGE_IMPL.get(bucket, name)
binary = settings.STORAGE_IMPL.get(bucket, name)

if not binary:
raise ValueError(f"Document binary not found for {doc_id}")
Expand Down
6 changes: 3 additions & 3 deletions powerrag/server/services/parse_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from common.constants import ParserType
from common.settings import STORAGE_IMPL
from common import settings

# Import split service for text chunking
from powerrag.server.services.split_service import PowerRAGSplitService
Expand Down Expand Up @@ -119,7 +119,7 @@ def parse_document(self, doc_id: str) -> Dict[str, Any]:
parser_config = doc.get["parser_config"]
# Get document binary data from storage
bucket, name = File2DocumentService.get_storage_address(doc_id=doc_id)
binary = STORAGE_IMPL.get(bucket, name)
binary = settings.STORAGE_IMPL.get(bucket, name)

if not binary:
raise ValueError(f"Document binary data not found for {doc_id}")
Expand Down Expand Up @@ -662,7 +662,7 @@ def _parse_to_markdown_for_task(self, doc_id: str = None, filename: str = None,
if not bucket or not name:
raise ValueError(f"Invalid storage address for document {doc_id}: bucket={bucket}, name={name}")

storage = STORAGE_IMPL
storage = settings.STORAGE_IMPL

if not storage:
raise ValueError("Storage implementation not available")
Expand Down