Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions openviking/models/embedder/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,19 @@ def _estimate_tokens(self, text: str) -> int:
# CJK text (3 bytes per char in UTF-8) is not underestimated.
return max(len(text) // 3, len(text.encode("utf-8")) // 4)

def _chunk_text(self, text: str) -> List[str]:
def _chunk_text(self, text: str, override_max_tokens: Optional[int] = None) -> List[str]:
"""Split text into chunks, each within max_tokens.
Splitting priority: paragraphs (\\n\\n) > sentences (。.!?\\n) > fixed length.
Args:
text: Input text
override_max_tokens: If set, use this instead of self.max_tokens
Returns:
List of text chunks
"""
max_tok = self.max_tokens
max_tok = override_max_tokens if override_max_tokens is not None else self.max_tokens
if self._estimate_tokens(text) <= max_tok:
return [text]

Expand Down Expand Up @@ -188,7 +189,12 @@ def _fixed_length_split(self, text: str, max_tok: int) -> List[str]:
start = end
return chunks

def _chunk_and_embed(self, text: str, is_query: bool = False) -> EmbedResult:
def _chunk_and_embed(
self,
text: str,
is_query: bool = False,
override_max_tokens: Optional[int] = None,
) -> EmbedResult:
"""Chunk text if it exceeds max_tokens, embed each chunk, and merge results.
For text within limits, delegates to _embed_single directly.
Expand All @@ -198,15 +204,17 @@ def _chunk_and_embed(self, text: str, is_query: bool = False) -> EmbedResult:
Args:
text: Input text
is_query: Flag to indicate if this is a query embedding
override_max_tokens: If set, use this instead of self.max_tokens
Returns:
EmbedResult with merged embedding
"""
max_tok = override_max_tokens if override_max_tokens is not None else self.max_tokens
estimated = self._estimate_tokens(text)
if estimated <= self.max_tokens:
if estimated <= max_tok:
return self._embed_single(text, is_query=is_query)

chunks = self._chunk_text(text)
chunks = self._chunk_text(text, override_max_tokens=override_max_tokens)
logger.debug(
"Chunking text: original ~%d tokens -> %d chunks",
estimated,
Expand Down
28 changes: 26 additions & 2 deletions openviking/models/embedder/openai_embedders.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,9 @@ def _embed_single(self, text: str, is_query: bool = False) -> EmbedResult:
def embed(self, text: str, is_query: bool = False) -> EmbedResult:
"""Embed single text, with automatic chunking for oversized input.

If the initial embedding fails due to input length exceeding the model's
limit, retries with progressively smaller chunk sizes.

Args:
text: Input text
is_query: Flag to indicate if this is a query embedding
Expand All @@ -285,14 +288,35 @@ def embed(self, text: str, is_query: bool = False) -> EmbedResult:
EmbedResult: Result containing only dense_vector

Raises:
RuntimeError: When API call fails
RuntimeError: When API call fails after all retry attempts
"""
if not text:
return self._embed_single(text, is_query=is_query)

if self._estimate_tokens(text) > self.max_tokens:
return self._chunk_and_embed(text, is_query=is_query)
return self._embed_single(text, is_query=is_query)

try:
return self._embed_single(text, is_query=is_query)
except RuntimeError as e:
error_msg = str(e).lower()
if (
("input" in error_msg and "too large" in error_msg)
or ("token" in error_msg and ("too long" in error_msg or "too many" in error_msg))
or "context length" in error_msg
):
# Token estimation was wrong (common with non-OpenAI models).
# Retry with chunking at half the current max_tokens.
reduced = max(self.max_tokens // 2, 128)
logger.warning(
"Embedding failed due to input length. "
"Retrying with chunk size %d tokens. "
"Set embedding.dense.max_tokens in ov.conf to match "
"your model's actual limit.",
reduced,
)
return self._chunk_and_embed(text, is_query=is_query, override_max_tokens=reduced)
raise

def embed_batch(self, texts: List[str], is_query: bool = False) -> List[EmbedResult]:
"""Batch embedding with automatic chunking for oversized inputs.
Expand Down
Loading