diff --git a/crates/ov_cli/README.md b/crates/ov_cli/README.md index 12115a0f..e14b0a71 100644 --- a/crates/ov_cli/README.md +++ b/crates/ov_cli/README.md @@ -13,6 +13,8 @@ curl -fsSL https://raw.githubusercontent.com/volcengine/OpenViking/main/crates/o ### From Source ```bash +# openviking need rust >= 1.88, please upgrade it if necessary +# brew upgrade rust cargo install --path crates/ov_cli ``` diff --git a/examples/ov.conf.example b/examples/ov.conf.example index 5f78f734..54e938e2 100644 --- a/examples/ov.conf.example +++ b/examples/ov.conf.example @@ -6,10 +6,10 @@ "cors_origins": ["*"] }, "storage": { + "workspace": "./data", "vectordb": { "name": "context", "backend": "local", - "path": "./data", "project": "default", "volcengine": { "region": "cn-beijing", @@ -20,7 +20,6 @@ "agfs": { "port": 1833, "log_level": "warn", - "path": "./data", "backend": "local", "timeout": 10, "retry_times": 3, diff --git a/examples/server_client/ov.conf.example b/examples/server_client/ov.conf.example index 13eb55db..57fe2ef6 100644 --- a/examples/server_client/ov.conf.example +++ b/examples/server_client/ov.conf.example @@ -6,15 +6,14 @@ "cors_origins": ["*"] }, "storage": { + "workspace": "./data", "vectordb": { "name": "context", - "backend": "local", - "path": "./data" + "backend": "local" }, "agfs": { "port": 1833, "log_level": "warn", - "path": "./data", "backend": "local" } }, diff --git a/openviking/parse/directory_scan.py b/openviking/parse/directory_scan.py index 8da532f2..07b10283 100644 --- a/openviking/parse/directory_scan.py +++ b/openviking/parse/directory_scan.py @@ -175,7 +175,7 @@ def _classify_file( def scan_directory( root: Union[str, Path], registry: Optional[ParserRegistry] = None, - strict: bool = True, + strict: bool = False, ignore_dirs: Optional[Set[str]] = None, include: Optional[str] = None, exclude: Optional[str] = None, @@ -272,7 +272,10 @@ def scan_directory( f"Unsupported: {unsupported_paths[:10]}{'...' if len(unsupported_paths) > 10 else ''}" ) if strict: + logger.error(msg) raise UnsupportedDirectoryFilesError(msg, unsupported_paths) + else: + logger.warning(msg) result.warnings.append(msg) for rel in unsupported_paths: result.warnings.append(f" - {rel}") diff --git a/openviking/server/routers/pack.py b/openviking/server/routers/pack.py index e486870b..6a29d4ce 100644 --- a/openviking/server/routers/pack.py +++ b/openviking/server/routers/pack.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 """Pack endpoints for OpenViking HTTP Server.""" +from typing import Optional + from fastapi import APIRouter, Depends from pydantic import BaseModel @@ -22,7 +24,8 @@ class ExportRequest(BaseModel): class ImportRequest(BaseModel): """Request model for import.""" - file_path: str + file_path: Optional[str] = None + temp_path: Optional[str] = None parent: str force: bool = False vectorize: bool = True @@ -46,8 +49,13 @@ async def import_ovpack( ): """Import .ovpack file.""" service = get_service() + + file_path = request.file_path + if request.temp_path: + file_path = request.temp_path + result = await service.pack.import_ovpack( - request.file_path, + file_path, request.parent, force=request.force, vectorize=request.vectorize, diff --git a/openviking/server/routers/resources.py b/openviking/server/routers/resources.py index 7291dc2f..b1705988 100644 --- a/openviking/server/routers/resources.py +++ b/openviking/server/routers/resources.py @@ -2,14 +2,18 @@ # SPDX-License-Identifier: Apache-2.0 """Resource endpoints for OpenViking HTTP Server.""" +import time +import uuid +from pathlib import Path from typing import Any, Optional -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, File, UploadFile from pydantic import BaseModel from openviking.server.auth import verify_api_key from openviking.server.dependencies import get_service from openviking.server.models import Response +from openviking_cli.utils.config.open_viking_config import get_openviking_config router = APIRouter(prefix="/api/v1", tags=["resources"]) @@ -17,7 +21,8 @@ class AddResourceRequest(BaseModel): """Request model for add_resource.""" - path: str + path: Optional[str] = None + temp_path: Optional[str] = None target: Optional[str] = None reason: str = "" instruction: str = "" @@ -33,6 +38,44 @@ class AddSkillRequest(BaseModel): timeout: Optional[float] = None +def _cleanup_temp_files(temp_dir: Path, max_age_hours: int = 1): + """Clean up temporary files older than max_age_hours.""" + if not temp_dir.exists(): + return + + now = time.time() + max_age_seconds = max_age_hours * 3600 + + for file_path in temp_dir.iterdir(): + if file_path.is_file(): + file_age = now - file_path.stat().st_mtime + if file_age > max_age_seconds: + file_path.unlink(missing_ok=True) + + +@router.post("/resources/temp_upload") +async def temp_upload( + file: UploadFile = File(...), + _: bool = Depends(verify_api_key), +): + """Upload a temporary file for add_resource or import_ovpack.""" + config = get_openviking_config() + temp_dir = config.storage.get_upload_temp_dir() + + # Clean up old temporary files + _cleanup_temp_files(temp_dir) + + # Save the uploaded file + file_ext = Path(file.filename).suffix if file.filename else ".tmp" + temp_filename = f"upload_{uuid.uuid4().hex}{file_ext}" + temp_file_path = temp_dir / temp_filename + + with open(temp_file_path, "wb") as f: + f.write(await file.read()) + + return Response(status="ok", result={"temp_path": str(temp_file_path)}) + + @router.post("/resources") async def add_resource( request: AddResourceRequest, @@ -40,8 +83,13 @@ async def add_resource( ): """Add resource to OpenViking.""" service = get_service() + + path = request.path + if request.temp_path: + path = request.temp_path + result = await service.resources.add_resource( - path=request.path, + path=path, target=request.target, reason=request.reason, instruction=request.instruction, diff --git a/openviking/utils/media_processor.py b/openviking/utils/media_processor.py index 3ff58b2e..3e2475bc 100644 --- a/openviking/utils/media_processor.py +++ b/openviking/utils/media_processor.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 """Unified resource processor with strategy-based routing.""" +import tempfile +import zipfile from pathlib import Path from typing import TYPE_CHECKING, Optional @@ -103,6 +105,15 @@ async def _process_file( instruction: str, ) -> ParseResult: """Process file with unified parsing.""" + # Check if it's a zip file + if zipfile.is_zipfile(file_path): + temp_dir = Path(tempfile.mkdtemp()) + try: + with zipfile.ZipFile(file_path, "r") as zipf: + zipf.extractall(temp_dir) + return await self._process_directory(temp_dir, instruction) + finally: + pass # Don't delete temp_dir yet, it will be used by TreeBuilder return await parse( str(file_path), instruction=instruction, diff --git a/openviking_cli/client/http.py b/openviking_cli/client/http.py index a5cb6903..b223ffc2 100644 --- a/openviking_cli/client/http.py +++ b/openviking_cli/client/http.py @@ -5,6 +5,10 @@ Implements BaseClient interface using HTTP calls to OpenViking Server. """ +import tempfile +import uuid +import zipfile +from pathlib import Path from typing import Any, Dict, List, Optional, Union import httpx @@ -219,6 +223,42 @@ def _raise_exception(self, error: Dict[str, Any]) -> None: else: raise exc_class(message) + def _is_local_server(self) -> bool: + """Check if the server URL is localhost or 127.0.0.1.""" + from urllib.parse import urlparse + + parsed_url = urlparse(self._url) + hostname = parsed_url.hostname + return hostname in ("localhost", "127.0.0.1") + + def _zip_directory(self, dir_path: str) -> str: + """Create a temporary zip file from a directory.""" + dir_path = Path(dir_path) + if not dir_path.is_dir(): + raise ValueError(f"Path {dir_path} is not a directory") + + temp_dir = tempfile.gettempdir() + zip_path = Path(temp_dir) / f"temp_upload_{uuid.uuid4().hex}.zip" + + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf: + for file_path in dir_path.rglob("*"): + if file_path.is_file(): + arcname = file_path.relative_to(dir_path) + zipf.write(file_path, arcname=arcname) + + return str(zip_path) + + async def _upload_temp_file(self, file_path: str) -> str: + """Upload a file to /api/v1/resources/temp_upload and return the temp_path.""" + with open(file_path, "rb") as f: + files = {"file": (Path(file_path).name, f, "application/octet-stream")} + response = await self._http.post( + "/api/v1/resources/temp_upload", + files=files, + ) + result = self._handle_response(response) + return result.get("temp_path", "") + # ============= Resource Management ============= async def add_resource( @@ -231,16 +271,28 @@ async def add_resource( timeout: Optional[float] = None, ) -> Dict[str, Any]: """Add resource to OpenViking.""" + request_data = { + "target": target, + "reason": reason, + "instruction": instruction, + "wait": wait, + "timeout": timeout, + } + + path_obj = Path(path) + if path_obj.exists() and path_obj.is_dir() and not self._is_local_server(): + zip_path = self._zip_directory(path) + try: + temp_path = await self._upload_temp_file(zip_path) + request_data["temp_path"] = temp_path + finally: + Path(zip_path).unlink(missing_ok=True) + else: + request_data["path"] = path + response = await self._http.post( "/api/v1/resources", - json={ - "path": path, - "target": target, - "reason": reason, - "instruction": instruction, - "wait": wait, - "timeout": timeout, - }, + json=request_data, ) return self._handle_response(response) @@ -554,14 +606,22 @@ async def import_ovpack( ) -> str: """Import .ovpack file.""" parent = VikingURI.normalize(parent) + request_data = { + "parent": parent, + "force": force, + "vectorize": vectorize, + } + + file_path_obj = Path(file_path) + if file_path_obj.exists() and file_path_obj.is_file() and not self._is_local_server(): + temp_path = await self._upload_temp_file(file_path) + request_data["temp_path"] = temp_path + else: + request_data["file_path"] = file_path + response = await self._http.post( "/api/v1/pack/import", - json={ - "file_path": file_path, - "parent": parent, - "force": force, - "vectorize": vectorize, - }, + json=request_data, ) result = self._handle_response(response) return result.get("uri", "") diff --git a/openviking_cli/utils/config/agfs_config.py b/openviking_cli/utils/config/agfs_config.py index ef40abe1..a152042e 100644 --- a/openviking_cli/utils/config/agfs_config.py +++ b/openviking_cli/utils/config/agfs_config.py @@ -71,7 +71,10 @@ def validate_config(self): class AGFSConfig(BaseModel): """Configuration for AGFS (Agent Global File System).""" - path: str = Field(default="./data", description="AGFS data storage path") + path: Optional[str] = Field( + default=None, + description="[Deprecated in favor of `storage.workspace`] AGFS data storage path. This will be ignored if `storage.workspace` is set.", + ) port: int = Field(default=1833, description="AGFS service port") @@ -110,8 +113,7 @@ def validate_config(self): ) if self.backend == "local": - if not self.path: - raise ValueError("AGFS local backend requires 'path' to be set") + pass elif self.backend == "s3": # Validate S3 configuration diff --git a/openviking_cli/utils/config/open_viking_config.py b/openviking_cli/utils/config/open_viking_config.py index ac615e9c..14a36b74 100644 --- a/openviking_cli/utils/config/open_viking_config.py +++ b/openviking_cli/utils/config/open_viking_config.py @@ -138,7 +138,7 @@ def from_dict(cls, config: Dict[str, Any]) -> "OpenVikingConfig": # Remove sections managed by other loaders (e.g. server config) config_copy.pop("server", None) - + # Handle parser configurations from nested "parsers" section parser_configs = {} if "parsers" in config_copy: @@ -316,7 +316,7 @@ def initialize_openviking_config( Args: user: UserIdentifier for session management - path: Local storage path for embedded mode + path: Local storage path (workspace) for embedded mode Returns: Configured OpenVikingConfig instance @@ -337,9 +337,8 @@ def initialize_openviking_config( if path: # Embedded mode: local storage config.storage.agfs.backend = config.storage.agfs.backend or "local" - config.storage.agfs.path = path config.storage.vectordb.backend = config.storage.vectordb.backend or "local" - config.storage.vectordb.path = path + config.storage.workspace = path # Ensure vector dimension is synced if not set in storage if config.storage.vectordb.dimension == 0: diff --git a/openviking_cli/utils/config/storage_config.py b/openviking_cli/utils/config/storage_config.py index b6ce378a..8daf6a79 100644 --- a/openviking_cli/utils/config/storage_config.py +++ b/openviking_cli/utils/config/storage_config.py @@ -1,15 +1,27 @@ # Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. # SPDX-License-Identifier: Apache-2.0 +from pathlib import Path from typing import Any, Dict -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator + +from openviking_cli.utils.logger import get_logger from .agfs_config import AGFSConfig from .vectordb_config import VectorDBBackendConfig +logger = get_logger(__name__) + class StorageConfig(BaseModel): - """Configuration for storage backend.""" + """Configuration for storage backend. + + The `workspace` field is the primary configuration for local data storage. + When `workspace` is set, it overrides the deprecated `path` fields in + `agfs` and `vectordb` configurations. + """ + + workspace: str = Field(default="./data", description="Local data storage path (primary)") agfs: AGFSConfig = Field(default_factory=lambda: AGFSConfig(), description="AGFS configuration") @@ -23,3 +35,37 @@ class StorageConfig(BaseModel): ) model_config = {"extra": "forbid"} + + @model_validator(mode="after") + def resolve_paths(self): + if self.agfs.path is not None: + logger.warning( + f"StorageConfig: 'agfs.path' is deprecated and will be ignored. " + f"Using '{self.workspace}' from workspace instead of '{self.agfs.path}'" + ) + + if self.vectordb.path is not None: + logger.warning( + f"StorageConfig: 'vectordb.path' is deprecated and will be ignored. " + f"Using '{self.workspace}' from workspace instead of '{self.vectordb.path}'" + ) + + # Update paths to use workspace + workspace_path = Path(self.workspace).resolve() + workspace_path.mkdir(parents=True, exist_ok=True) + self.workspace = str(workspace_path) + self.agfs.path = self.workspace + self.vectordb.path = self.workspace + # logger.info(f"StorageConfig: Using workspace '{self.workspace}' for storage") + return self + + def get_upload_temp_dir(self) -> Path: + """Get the temporary directory for file uploads. + + Returns: + Path to {workspace}/temp/upload directory + """ + workspace_path = Path(self.workspace).resolve() + upload_temp_dir = workspace_path / "temp" / "upload" + upload_temp_dir.mkdir(parents=True, exist_ok=True) + return upload_temp_dir diff --git a/openviking_cli/utils/config/vectordb_config.py b/openviking_cli/utils/config/vectordb_config.py index 090e1c88..4052e0c4 100644 --- a/openviking_cli/utils/config/vectordb_config.py +++ b/openviking_cli/utils/config/vectordb_config.py @@ -47,7 +47,10 @@ class VectorDBBackendConfig(BaseModel): name: Optional[str] = Field(default=COLLECTION_NAME, description="Collection name for VectorDB") - path: Optional[str] = Field(default="./data", description="Local storage path for 'local' type") + path: Optional[str] = Field( + default=None, + description="[Deprecated in favor of `storage.workspace`] Local storage path for 'local' type. This will be ignored if `storage.workspace` is set.", + ) url: Optional[str] = Field( default=None, @@ -98,8 +101,7 @@ def validate_config(self): ) if self.backend == "local": - if not self.path: - raise ValueError("VectorDB local backend requires 'path' to be set") + pass elif self.backend == "http": if not self.url: diff --git a/pyproject.toml b/pyproject.toml index b6efd649..0924e749 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ dependencies = [ "pdfminer-six>=20251230", "typer>=0.12.0", "litellm>=1.0.0", + "python-multipart>=0.0.22", ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index 07a6ba5d..678904d5 100644 --- a/uv.lock +++ b/uv.lock @@ -1915,6 +1915,7 @@ dependencies = [ { name = "pyagfs" }, { name = "pydantic" }, { name = "python-docx" }, + { name = "python-multipart" }, { name = "python-pptx" }, { name = "pyyaml" }, { name = "readabilipy" }, @@ -1973,6 +1974,7 @@ requires-dist = [ { name = "pytest-asyncio", marker = "extra == 'test'", specifier = ">=0.21.0" }, { name = "pytest-cov", marker = "extra == 'test'", specifier = ">=4.0.0" }, { name = "python-docx", specifier = ">=1.0.0" }, + { name = "python-multipart", specifier = ">=0.0.22" }, { name = "python-pptx", specifier = ">=1.0.0" }, { name = "pyyaml", specifier = ">=6.0" }, { name = "readabilipy", specifier = ">=0.2.0" }, @@ -2588,6 +2590,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, ] +[[package]] +name = "python-multipart" +version = "0.0.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" }, +] + [[package]] name = "python-pptx" version = "1.0.2"