Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion bot/vikingbot/config/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
import os
from pathlib import Path
from typing import Any

from loguru import logger

from vikingbot.config.schema import Config

CONFIG_PATH = None


def get_config_path() -> Path:
"""Get the path to ov.conf config file.

Expand Down Expand Up @@ -217,4 +220,4 @@ def camel_to_snake(name: str) -> str:
def snake_to_camel(name: str) -> str:
"""Convert snake_case to camelCase."""
components = name.split("_")
return components[0] + "".join(x.title() for x in components[1:])
return components[0] + "".join(x.title() for x in components[1:])
13 changes: 10 additions & 3 deletions bot/vikingbot/config/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ class SandboxMode(str, Enum):
SHARED = "shared"
PER_CHANNEL = "per-channel"


class AgentMemoryMode(str, Enum):
"""Agent memory mode enumeration."""

PER_SESSION = "per-session"
SHARED = "shared"
PER_CHANNEL = "per-channel"
Expand Down Expand Up @@ -109,7 +111,10 @@ class FeishuChannelConfig(BaseChannelConfig):
encrypt_key: str = ""
verification_token: str = ""
allow_from: list[str] = Field(default_factory=list) ## 允许更新Agent对话的Feishu用户ID列表
thread_require_mention: bool = Field(default=True, description="话题群模式下是否需要@才响应:默认True=所有消息必须@才响应;False=新话题首条消息无需@,后续回复必须@")
thread_require_mention: bool = Field(
default=True,
description="话题群模式下是否需要@才响应:默认True=所有消息必须@才响应;False=新话题首条消息无需@,后续回复必须@",
)

def channel_id(self) -> str:
# Use app_id directly as the ID
Expand Down Expand Up @@ -396,7 +401,9 @@ class ProviderConfig(BaseModel):

api_key: str = ""
api_base: Optional[str] = None
extra_headers: Optional[dict[str, str]] = Field(default_factory=dict) # Custom headers (e.g. APP-Code for AiHubMix)
extra_headers: Optional[dict[str, str]] = Field(
default_factory=dict
) # Custom headers (e.g. APP-Code for AiHubMix)


class ProvidersConfig(BaseModel):
Expand Down Expand Up @@ -734,4 +741,4 @@ def from_safe_name(safe_name: str):
file_name_split = safe_name.split("__")
return SessionKey(
type=file_name_split[0], channel_id=file_name_split[1], chat_id=file_name_split[2]
)
)
15 changes: 8 additions & 7 deletions bot/vikingbot/openviking_mount/ov_server.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import asyncio
import hashlib
from typing import List, Dict, Any, Optional
import time
from typing import Any, Dict, List, Optional

from loguru import logger
import time

import openviking as ov
from vikingbot.config.loader import load_config
Expand Down Expand Up @@ -99,9 +99,7 @@ async def find(self, query: str, target_uri: Optional[str] = None):
return await self.client.find(query, target_uri=target_uri)
return await self.client.find(query)

async def add_resource(
self, local_path: str, desc: str
) -> Optional[Dict[str, Any]]:
async def add_resource(self, local_path: str, desc: str) -> Optional[Dict[str, Any]]:
"""添加资源到 Viking"""
result = await self.client.add_resource(path=local_path, reason=desc)
return result
Expand Down Expand Up @@ -327,7 +325,9 @@ async def search_memory(

async def grep(self, uri: str, pattern: str, case_insensitive: bool = False) -> Dict[str, Any]:
"""通过模式(正则表达式)搜索内容"""
return await self.client.grep(uri, pattern, case_insensitive=case_insensitive, node_limit=10)
return await self.client.grep(
uri, pattern, case_insensitive=case_insensitive, node_limit=10
)

async def glob(self, pattern: str, uri: Optional[str] = None) -> Dict[str, Any]:
"""通过 glob 模式匹配文件"""
Expand All @@ -337,7 +337,8 @@ async def commit(self, session_id: str, messages: list[dict[str, Any]], user_id:
"""提交会话"""
import re
import uuid
from openviking.message.part import Part, TextPart, ToolPart

from openviking.message.part import TextPart, ToolPart

user_exists = await self._check_user_exists(user_id)
if not user_exists:
Expand Down
2 changes: 1 addition & 1 deletion openviking/models/embedder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
)
from openviking.models.embedder.jina_embedders import JinaDenseEmbedder
from openviking.models.embedder.openai_embedders import OpenAIDenseEmbedder
from openviking.models.embedder.voyage_embedders import VoyageDenseEmbedder
from openviking.models.embedder.vikingdb_embedders import (
VikingDBDenseEmbedder,
VikingDBHybridEmbedder,
Expand All @@ -36,6 +35,7 @@
VolcengineHybridEmbedder,
VolcengineSparseEmbedder,
)
from openviking.models.embedder.voyage_embedders import VoyageDenseEmbedder

__all__ = [
# Base classes
Expand Down
13 changes: 10 additions & 3 deletions openviking/parse/parsers/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ def _format_xls_cell(cell, wb, xlrd) -> str:
dt = xlrd.xldate_as_tuple(cell.value, wb.datemode)
# Include time component if non-zero
if dt[3] or dt[4] or dt[5]:
return f"{dt[0]:04d}-{dt[1]:02d}-{dt[2]:02d} {dt[3]:02d}:{dt[4]:02d}:{dt[5]:02d}"
return (
f"{dt[0]:04d}-{dt[1]:02d}-{dt[2]:02d} {dt[3]:02d}:{dt[4]:02d}:{dt[5]:02d}"
)
return f"{dt[0]:04d}-{dt[1]:02d}-{dt[2]:02d}"
except Exception:
return str(cell.value)
Expand All @@ -151,8 +153,13 @@ def _format_xls_cell(cell, wb, xlrd) -> str:
if cell.ctype == xlrd.XL_CELL_ERROR:
# xlrd error code map
error_map = {
0x00: "#NULL!", 0x07: "#DIV/0!", 0x0F: "#VALUE!",
0x17: "#REF!", 0x1D: "#NAME?", 0x24: "#NUM!", 0x2A: "#N/A",
0x00: "#NULL!",
0x07: "#DIV/0!",
0x0F: "#VALUE!",
0x17: "#REF!",
0x1D: "#NAME?",
0x24: "#NUM!",
0x2A: "#N/A",
}
return error_map.get(cell.value, f"#ERR({cell.value})")
if cell.ctype == xlrd.XL_CELL_NUMBER:
Expand Down
21 changes: 13 additions & 8 deletions openviking/parse/parsers/legacy_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
logger = get_logger(__name__)


# Max stream size to read (50MB) — prevents DoS from crafted files
# Max stream size to read (50MB) — prevents DoS from crafted files
_MAX_STREAM_SIZE = 50 * 1024 * 1024
# Max character count sanity cap for ccpText
_MAX_CCP_TEXT = 10_000_000
Expand Down Expand Up @@ -154,9 +154,7 @@ def _extract_from_ole(self, ole) -> str:
if fc_clx <= 0 or lcb_clx <= 0 or fc_clx + lcb_clx > len(table_data):
return self._simple_text_extract(word_doc, ccp_text)

return self._extract_via_clx(
word_doc, table_data, fc_clx, lcb_clx, ccp_text
)
return self._extract_via_clx(word_doc, table_data, fc_clx, lcb_clx, ccp_text)

def _simple_text_extract(self, word_doc: bytes, ccp_text: int) -> str:
"""
Expand All @@ -177,7 +175,10 @@ def _simple_text_extract(self, word_doc: bytes, ccp_text: int) -> str:
raw = word_doc[text_start:end]
text = raw.decode("utf-16-le", errors="replace")
# Sanity: if mostly printable, it's likely correct
if sum(1 for c in text[:200] if c.isprintable() or c in "\n\r\t") > len(text[:200]) * 0.5:
if (
sum(1 for c in text[:200] if c.isprintable() or c in "\n\r\t")
> len(text[:200]) * 0.5
):
return self._clean_word_text(text)

# Fall back to CP1252 single-byte
Expand Down Expand Up @@ -277,7 +278,9 @@ def _extract_via_clx(
raw = word_doc[byte_offset:byte_end]
text_parts.append(self._decode_cp1252(raw))
else:
logger.warning(f"Piece {i} extends beyond stream ({byte_end} > {len(word_doc)})")
logger.warning(
f"Piece {i} extends beyond stream ({byte_end} > {len(word_doc)})"
)
else:
# UTF-16LE
byte_offset = fc_real
Expand All @@ -286,7 +289,9 @@ def _extract_via_clx(
raw = word_doc[byte_offset:byte_end]
text_parts.append(raw.decode("utf-16-le", errors="replace"))
else:
logger.warning(f"Piece {i} extends beyond stream ({byte_end} > {len(word_doc)})")
logger.warning(
f"Piece {i} extends beyond stream ({byte_end} > {len(word_doc)})"
)

chars_extracted += piece_char_count

Expand All @@ -305,7 +310,7 @@ def _clean_word_text(text: str) -> str:
"""Normalize Word control characters to readable equivalents."""
text = text.replace("\r\n", "\n").replace("\r", "\n")
# \x07 = cell/row end, \x0B = soft line break, \x0C = section break
text = text.replace("\x07", "\t").replace("\x0B", "\n").replace("\x0C", "\n\n")
text = text.replace("\x07", "\t").replace("\x0b", "\n").replace("\x0c", "\n\n")
return text

def _fallback_extract(self, path: Path) -> str:
Expand Down
6 changes: 3 additions & 3 deletions openviking/parse/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@

# Import will be handled dynamically to avoid dependency issues
from openviking.parse.parsers.html import HTMLParser

# Import markitdown-inspired parsers
from openviking.parse.parsers.legacy_doc import LegacyDocParser
from openviking.parse.parsers.markdown import MarkdownParser
from openviking.parse.parsers.media import AudioParser, ImageParser, VideoParser
from openviking.parse.parsers.pdf import PDFParser
from openviking.parse.parsers.powerpoint import PowerPointParser
from openviking.parse.parsers.text import TextParser

# Import markitdown-inspired parsers
from openviking.parse.parsers.legacy_doc import LegacyDocParser
from openviking.parse.parsers.word import WordParser
from openviking.parse.parsers.zip_parser import ZipParser

Expand Down
Loading