Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions openviking/core/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def __init__(
self,
uri: str,
parent_uri: Optional[str] = None,
temp_uri: Optional[str] = None,
is_leaf: bool = False,
abstract: str = "",
context_type: Optional[str] = None,
Expand All @@ -78,6 +79,7 @@ def __init__(
self.id = id or str(uuid4())
self.uri = uri
self.parent_uri = parent_uri
self.temp_uri = temp_uri
self.is_leaf = is_leaf
self.abstract = abstract
self.context_type = context_type or self._derive_context_type()
Expand Down Expand Up @@ -159,6 +161,7 @@ def to_dict(self) -> Dict[str, Any]:
"id": self.id,
"uri": self.uri,
"parent_uri": self.parent_uri,
"temp_uri": self.temp_uri,
"is_leaf": self.is_leaf,
"abstract": self.abstract,
"context_type": self.context_type,
Expand Down Expand Up @@ -194,6 +197,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "Context":
obj = cls(
uri=data["uri"],
parent_uri=data.get("parent_uri"),
temp_uri=data.get("temp_uri"),
is_leaf=data.get("is_leaf", False),
abstract=data.get("abstract", ""),
context_type=data.get("context_type"),
Expand Down
2 changes: 2 additions & 0 deletions openviking/parse/parsers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
".graphql",
".gql",
".prisma",
".conf",
}

# Documentation file extensions for file type detection
Expand Down Expand Up @@ -224,6 +225,7 @@
".yarnrc",
".env",
".env.example",
".jsonl",
}

# Common text encodings to try for encoding detection (in order of likelihood)
Expand Down
5 changes: 5 additions & 0 deletions openviking/parse/parsers/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ async def parse(
viking_fs = self._get_viking_fs()
temp_uri = self._create_temp_uri()
target_uri = f"{temp_uri}/{dir_name}"
logger.info(
f"Scanning directory: {source_path}, "
f"processable files: {len(processable_files)}, "
f"warnings: {warnings}"
)
await viking_fs.mkdir(temp_uri, exist_ok=True)
await viking_fs.mkdir(target_uri, exist_ok=True)

Expand Down
1 change: 1 addition & 0 deletions openviking/parse/parsers/upload_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"NEWS",
"NOTICE",
"TODO",
"BUILD",
}


Expand Down
99 changes: 4 additions & 95 deletions openviking/parse/tree_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,6 @@ async def finalize_from_temp(
base_uri = parent_uri or auto_base_uri
# 3. Determine candidate_uri
if to_uri:
# Exact target URI: must not exist yet
try:
await viking_fs.stat(to_uri, ctx=ctx)
# If we get here, it already exists
raise FileExistsError(f"Target URI already exists: {to_uri}")
except FileExistsError:
raise
except Exception:
# It doesn't exist, good to use
pass
candidate_uri = to_uri
else:
if parent_uri:
Expand All @@ -160,34 +150,7 @@ async def finalize_from_temp(
raise ValueError(f"Parent URI is not a directory: {parent_uri}")
candidate_uri = VikingURI(base_uri).join(final_doc_name).uri

final_uri = await self._resolve_unique_uri(candidate_uri, ctx=ctx)

if final_uri != candidate_uri:
logger.info(f"[TreeBuilder] Resolved name conflict: {candidate_uri} -> {final_uri}")
else:
logger.info(f"[TreeBuilder] Finalizing from temp: {final_uri}")

# 4. Move directory tree from temp to final location in AGFS
await self._move_temp_to_dest(viking_fs, temp_doc_uri, final_uri, ctx=ctx)
logger.info(f"[TreeBuilder] Moved temp tree: {temp_doc_uri} -> {final_uri}")

# 5. Cleanup temporary root directory
try:
await viking_fs.delete_temp(temp_uri, ctx=ctx)
logger.info(f"[TreeBuilder] Cleaned up temp root: {temp_uri}")
except Exception as e:
logger.warning(f"[TreeBuilder] Failed to cleanup temp root: {e}")

# 6. Enqueue to SemanticQueue for async semantic generation
if trigger_semantic:
try:
await self._enqueue_semantic_generation(final_uri, "resource", ctx=ctx)
logger.info(f"[TreeBuilder] Enqueued semantic generation for: {final_uri}")
except Exception as e:
logger.error(
f"[TreeBuilder] Failed to enqueue semantic generation: {e}", exc_info=True
)

final_uri = candidate_uri
# 7. Return simple BuildingTree (no scanning needed)
tree = BuildingTree(
source_path=source_path,
Expand All @@ -196,39 +159,11 @@ async def finalize_from_temp(
tree._root_uri = final_uri

# Create a minimal Context object for the root so that tree.root is not None
root_context = Context(uri=final_uri)
root_context = Context(uri=final_uri, temp_uri=temp_doc_uri)
tree.add_context(root_context)

return tree

async def _resolve_unique_uri(
self, uri: str, max_attempts: int = 100, ctx: Optional[RequestContext] = None
) -> str:
"""Return a URI that does not collide with an existing resource.

If *uri* is free, return it unchanged. Otherwise append ``_1``,
``_2``, … until a free name is found (like macOS Finder / Windows
Explorer).
"""
viking_fs = get_viking_fs()

async def _exists(u: str) -> bool:
try:
await viking_fs.stat(u, ctx=ctx)
return True
except Exception:
return False

if not await _exists(uri):
return uri

for i in range(1, max_attempts + 1):
candidate = f"{uri}_{i}"
if not await _exists(candidate):
return candidate

raise FileExistsError(f"Cannot resolve unique name for {uri} after {max_attempts} attempts")

async def _move_temp_to_dest(
self, viking_fs, src_uri: str, dst_uri: str, ctx: RequestContext
) -> None:
Expand Down Expand Up @@ -261,7 +196,7 @@ async def _ensure_parent_dirs(self, uri: str, ctx: RequestContext) -> None:
logger.debug(f"Parent dir {parent_uri} may already exist: {e}")

async def _enqueue_semantic_generation(
self, uri: str, context_type: str, ctx: RequestContext
self, uri: str, final_uri: str, context_type: str, ctx: RequestContext
) -> None:
"""
Enqueue a directory for semantic generation.
Expand All @@ -284,32 +219,6 @@ async def _enqueue_semantic_generation(
user_id=ctx.user.user_id,
agent_id=ctx.user.agent_id,
role=ctx.role.value,
target_uri=final_uri,
)
await semantic_queue.enqueue(msg)

async def _load_content(self, uri: str, content_type: str) -> str:
"""Helper to load content with proper type handling"""
import json

if content_type == "abstract":
result = await get_viking_fs().abstract(uri)
elif content_type == "overview":
result = await get_viking_fs().overview(uri)
elif content_type == "detail":
result = await get_viking_fs().read_file(uri)
else:
return ""

# Handle different return types
if isinstance(result, str):
return result
elif isinstance(result, bytes):
return result.decode("utf-8")
elif hasattr(result, "to_dict") and not isinstance(result, list):
# Handle FindResult by converting to dict (skip lists)
return str(result.to_dict())
elif isinstance(result, list):
# Handle list results
return json.dumps(result)
else:
return str(result)
1 change: 1 addition & 0 deletions openviking/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class Response(BaseModel):
"INVALID_URI": 400,
"NOT_FOUND": 404,
"ALREADY_EXISTS": 409,
"CONFLICT": 409,
"PERMISSION_DENIED": 403,
"UNAUTHENTICATED": 401,
"RESOURCE_EXHAUSTED": 429,
Expand Down
4 changes: 3 additions & 1 deletion openviking/service/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,9 @@ async def initialize(self) -> None:
)

# Initialize processors
self._resource_processor = ResourceProcessor(vikingdb=self._vikingdb_manager)
self._resource_processor = ResourceProcessor(
vikingdb=self._vikingdb_manager,
)
self._skill_processor = SkillProcessor(vikingdb=self._vikingdb_manager)
self._session_compressor = SessionCompressor(vikingdb=self._vikingdb_manager)

Expand Down
Loading
Loading