Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions docs/en/guides/01-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -410,11 +410,27 @@ Reranking model for search result refinement.
}
```

**OpenAI-compatible provider (e.g. DashScope qwen3-rerank):**

```json
{
"rerank": {
"provider": "openai",
"api_key": "your-api-key",
"api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
"model": "qwen3-rerank",
"threshold": 0.1
}
}
```

| Parameter | Type | Description |
|-----------|------|-------------|
| `provider` | str | `"volcengine"` |
| `provider` | str | `"volcengine"` or `"openai"` |
| `api_key` | str | API key |
| `model` | str | Model name |
| `api_base` | str | Endpoint URL (openai provider only) |
| `threshold` | float | Score threshold; results below this are filtered out. Default: `0.1` |

If rerank is not configured, search uses vector similarity only.

Expand Down Expand Up @@ -729,9 +745,11 @@ For details on the lock mechanism, see [Path Locks and Crash Recovery](../concep
"stream": false
},
"rerank": {
"provider": "volcengine",
"provider": "volcengine|openai",
"api_key": "string",
"model": "string"
"model": "string",
"api_base": "string",
"threshold": 0.1
},
"storage": {
"workspace": "string",
Expand Down
24 changes: 21 additions & 3 deletions docs/zh/guides/01-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -383,11 +383,27 @@ AST 提取支持:Python、JavaScript/TypeScript、Rust、Go、Java、C/C++。
}
```

**OpenAI 兼容提供方(如 DashScope qwen3-rerank):**

```json
{
"rerank": {
"provider": "openai",
"api_key": "your-api-key",
"api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
"model": "qwen3-rerank",
"threshold": 0.1
}
}
```

| 参数 | 类型 | 说明 |
|------|------|------|
| `provider` | str | `"volcengine"` |
| `provider` | str | `"volcengine"` 或 `"openai"` |
| `api_key` | str | API Key |
| `model` | str | 模型名称 |
| `api_base` | str | 接口地址(openai 提供方专用) |
| `threshold` | float | 分数阈值,低于此值的结果会被过滤。默认:`0.1` |

如果未配置 Rerank,搜索仅使用向量相似度。

Expand Down Expand Up @@ -704,9 +720,11 @@ HTTP 客户端(`SyncHTTPClient` / `AsyncHTTPClient`)和 CLI 工具连接远
"stream": false
},
"rerank": {
"provider": "volcengine",
"provider": "volcengine|openai",
"api_key": "string",
"model": "string"
"model": "string",
"api_base": "string",
"threshold": 0.1
},
"storage": {
"workspace": "string",
Expand Down
13 changes: 11 additions & 2 deletions examples/ov.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,22 @@
"thinking": false
},
"rerank": {
"ak": null,
"sk": null,
"provider": "vikingdb",
"ak": "{your-ak}",
"sk": "{your-sk}",
"host": "api-vikingdb.vikingdb.cn-beijing.volces.com",
"model_name": "doubao-seed-rerank",
"model_version": "251028",
"threshold": 0.1
},
"rerank_openai_example": {
"_comment": "For OpenAI-compatible rerank providers (e.g. DashScope qwen3-rerank):",
"provider": "openai",
"api_key": "{your-api-key}",
"api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
"model": "qwen3-rerank",
"threshold": 0.1
},
"auto_generate_l0": true,
"auto_generate_l1": true,
"default_search_mode": "thinking",
Expand Down
25 changes: 23 additions & 2 deletions openviking_cli/utils/config/rerank_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,46 @@
# SPDX-License-Identifier: Apache-2.0
from typing import Optional

from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, model_validator


class RerankConfig(BaseModel):
"""Configuration for VikingDB Rerank API."""
"""Configuration for rerank API (VikingDB or OpenAI-compatible providers)."""

provider: str = Field(default="vikingdb", description="Rerank provider: 'vikingdb' or 'openai'")

# VikingDB fields
ak: Optional[str] = Field(default=None, description="VikingDB Access Key")
sk: Optional[str] = Field(default=None, description="VikingDB Secret Key")
host: str = Field(
default="api-vikingdb.vikingdb.cn-beijing.volces.com", description="VikingDB API host"
)
model_name: str = Field(default="doubao-seed-rerank", description="Rerank model name")
model_version: str = Field(default="251028", description="Rerank model version")

# OpenAI-compatible fields
api_key: Optional[str] = Field(default=None, description="Bearer token for OpenAI-compatible providers")
api_base: Optional[str] = Field(default=None, description="Custom endpoint URL")
model: Optional[str] = Field(default=None, description="Model name for OpenAI-compatible providers")

threshold: float = Field(
default=0.1, description="Relevance threshold (score > threshold is relevant)"
)

model_config = {"extra": "forbid"}

@model_validator(mode="after")
def validate_provider_fields(self) -> "RerankConfig":
allowed = ["vikingdb", "openai"]
if self.provider not in allowed:
raise ValueError(f"Rerank provider must be one of {allowed}, got '{self.provider}'")
if self.provider == "openai":
if not self.api_key or not self.api_base:
raise ValueError("OpenAI-compatible rerank provider requires 'api_key' and 'api_base'")
return self

def is_available(self) -> bool:
"""Check if rerank is configured."""
if self.provider == "openai":
return self.api_key is not None and self.api_base is not None
return self.ak is not None and self.sk is not None
4 changes: 4 additions & 0 deletions openviking_cli/utils/rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ def from_config(cls, config) -> Optional["RerankClient"]:
if not config or not config.is_available():
return None

if config.provider == "openai":
from openviking_cli.utils.rerank_openai import OpenAIRerankClient
return OpenAIRerankClient.from_config(config)

return cls(
ak=config.ak,
sk=config.sk,
Expand Down
122 changes: 122 additions & 0 deletions openviking_cli/utils/rerank_openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
# SPDX-License-Identifier: Apache-2.0
"""
OpenAI-compatible Rerank API Client.

Supports third-party rerank services like Alibaba Cloud DashScope (qwen3-rerank)
via api_key + api_base configuration.
"""

from typing import List, Optional

import requests

from openviking_cli.utils.logger import get_logger

logger = get_logger(__name__)


class OpenAIRerankClient:
"""
OpenAI-compatible rerank API client using Bearer token auth.

Compatible with services like Alibaba Cloud DashScope.
"""

def __init__(self, api_key: str, api_base: str, model_name: str):
"""
Initialize OpenAI-compatible rerank client.

Args:
api_key: Bearer token for authentication
api_base: Full endpoint URL for the rerank API
model_name: Model name to use for reranking
"""
self.api_key = api_key
self.api_base = api_base
self.model_name = model_name

def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]]:
"""
Batch rerank documents against a query.

Args:
query: Query text
documents: List of document texts to rank

Returns:
List of rerank scores for each document (same order as input),
or None when rerank fails and the caller should fall back
"""
if not documents:
return []

req_body = {
"model": self.model_name,
"query": query,
"documents": documents,
}

try:
response = requests.post(
url=self.api_base,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json=req_body,
timeout=30,
)
response.raise_for_status()
result = response.json()

# Standard OpenAI/Cohere rerank format: results[].{index, relevance_score}
results = result.get("results")
if not results:
logger.warning(f"[OpenAIRerankClient] Unexpected response format: {result}")
return None

if len(results) != len(documents):
logger.warning(
"[OpenAIRerankClient] Unexpected rerank result length: expected=%s actual=%s",
len(documents),
len(results),
)
return None

# Results may not be in original order — sort by index
scores = [0.0] * len(documents)
for item in results:
idx = item.get("index")
if idx is None or not (0 <= idx < len(documents)):
logger.warning(
"[OpenAIRerankClient] Out-of-bounds or missing index in result: %s", item
)
return None
scores[idx] = item.get("relevance_score", 0.0)

logger.debug(f"[OpenAIRerankClient] Reranked {len(documents)} documents")
return scores

except Exception as e:
logger.error(f"[OpenAIRerankClient] Rerank failed: {e}")
return None

@classmethod
def from_config(cls, config) -> Optional["OpenAIRerankClient"]:
"""
Create OpenAIRerankClient from RerankConfig.

Args:
config: RerankConfig instance with provider='openai'

Returns:
OpenAIRerankClient instance or None if config is not available
"""
if not config or not config.is_available():
return None
return cls(
api_key=config.api_key,
api_base=config.api_base,
model_name=config.model or "qwen3-rerank",
)
Loading
Loading