Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 76 additions & 7 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,86 @@ on:
branches: [ "main" ]
workflow_call:

env:
POETRY_HOME: "/opt/poetry"

jobs:
test:
name: Tests
# Unit tests without local dependencies (tests import error handling)
unit-tests-no-local-deps:
name: Unit Tests (no local deps)
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
run: |
python3 -m venv $POETRY_HOME
$POETRY_HOME/bin/pip install poetry==1.8.4
$POETRY_HOME/bin/poetry --version
- name: Install ffmpeg 7
run: |
sudo apt-get update
sudo apt-get install -y software-properties-common
sudo add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg7
sudo apt-get update
sudo apt-get install -y ffmpeg
ffmpeg -version
- name: Install package (without local extras)
run: $POETRY_HOME/bin/poetry install
- name: Run unit tests (no API key, no local deps)
run: $POETRY_HOME/bin/poetry run pytest tests/test_client_local.py tests/test_client_local_async.py -v

# Local model tests with local dependencies but no API key
local-model-tests:
name: Local Model Tests
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
run: |
python3 -m venv $POETRY_HOME
$POETRY_HOME/bin/pip install poetry==1.8.4
$POETRY_HOME/bin/poetry --version
- name: Install ffmpeg 7
run: |
sudo apt-get update
sudo apt-get install -y software-properties-common
sudo add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg7
sudo apt-get update
sudo apt-get install -y ffmpeg
ffmpeg -version
- name: Install package with local extras
run: $POETRY_HOME/bin/poetry install --extras local
- name: Run local model tests (no API key)
run: $POETRY_HOME/bin/poetry run pytest tests/test_client_local.py tests/test_client_local_async.py -v

# Full integration tests with API key and all dependencies
integration-tests:
name: Integration Tests
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
env:
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
POETRY_HOME: "/opt/poetry"
steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand All @@ -38,7 +107,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y ffmpeg
ffmpeg -version
- name: Install package
run: $POETRY_HOME/bin/poetry install
- name: Run tests
run: $POETRY_HOME/bin/poetry run pytest
- name: Install package with local extras
run: $POETRY_HOME/bin/poetry install --extras local
- name: Run all tests
run: $POETRY_HOME/bin/poetry run pytest
13 changes: 12 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "voyageai"
version = "0.3.8"
version = "0.3.9"
description = ""
authors = ["Yujie Qian <yujieq@voyageai.com>"]
readme = "README.md"
Expand All @@ -20,6 +20,11 @@ pydantic = ">=1.10.8"
tokenizers = ">=0.14.0"
langchain-text-splitters = ">=0.3.8"
ffmpeg-python = "*"
sentence-transformers = {version = ">=3.0.0", optional = true}
torch = {version = ">=2.0.0", optional = true}

[tool.poetry.extras]
local = ["sentence-transformers", "torch"]

[tool.poetry.group.test.dependencies]
pytest = "^7.4.2"
Expand All @@ -40,6 +45,12 @@ quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false

[tool.pytest.ini_options]
markers = [
"integration: marks tests as integration tests (require external dependencies or API)",
]
asyncio_mode = "auto"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
132 changes: 132 additions & 0 deletions tests/test_client_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Tests for local model support in Client."""

import pytest

# ruff: noqa: F401

# Check if real dependencies are available
try:
import sentence_transformers
import torch

REAL_DEPS_AVAILABLE = True
except ImportError:
REAL_DEPS_AVAILABLE = False


class TestLocalModelSupport:
"""Test local model detection and routing."""

@pytest.mark.skipif(REAL_DEPS_AVAILABLE, reason="Only run when deps not installed")
def test_import_error_when_deps_missing(self):
"""Test helpful error message when sentence-transformers not installed."""
from voyageai.local import _ensure_local_deps

with pytest.raises(ImportError) as exc_info:
_ensure_local_deps()

assert "pip install voyageai[local]" in str(exc_info.value)

def test_has_local_constant(self):
"""Test HAS_LOCAL constant reflects dependency availability."""
import voyageai

assert voyageai.HAS_LOCAL == REAL_DEPS_AVAILABLE


@pytest.mark.integration
class TestLocalModelIntegration:
"""Integration tests for local models using the standard Client.

Run with: pytest -m integration
"""

@pytest.fixture
def check_deps(self):
"""Skip if dependencies not installed."""
if not REAL_DEPS_AVAILABLE:
pytest.skip("sentence-transformers or torch not installed")

def test_seamless_local_embedding(self, check_deps):
"""Test that Client.embed() seamlessly uses local model."""
from voyageai import Client

# No API key needed for local models
client = Client()
result = client.embed(["Hello, world!"], model="voyage-4-nano", input_type="document")

assert len(result.embeddings) == 1
assert len(result.embeddings[0]) == 2048
assert result.total_tokens > 0

def test_all_dimensions(self, check_deps):
"""Test all supported dimensions work."""
from voyageai import Client

client = Client()

for dim in [256, 512, 1024, 2048]:
result = client.embed(
["Test text"], model="voyage-4-nano", input_type="document", output_dimension=dim
)
assert len(result.embeddings[0]) == dim, f"Expected {dim}, got {len(result.embeddings[0])}"

def test_float32_dtype(self, check_deps):
"""Test float32 output data type (default)."""
from voyageai import Client

client = Client()

result = client.embed(["Test"], model="voyage-4-nano", input_type="document", output_dtype="float32")
assert isinstance(result.embeddings[0][0], float)

def test_query_vs_document_different(self, check_deps):
"""Test query and document embeddings are different."""
from voyageai import Client

client = Client()

query_result = client.embed(["What is machine learning?"], model="voyage-4-nano", input_type="query")
doc_result = client.embed(["What is machine learning?"], model="voyage-4-nano", input_type="document")

# Embeddings should be different due to different prompts
assert query_result.embeddings[0] != doc_result.embeddings[0]

def test_batch_embedding(self, check_deps):
"""Test batch embedding works."""
from voyageai import Client

client = Client()

texts = [
"First document",
"Second document",
"Third document",
]
result = client.embed(texts, model="voyage-4-nano", input_type="document")

assert len(result.embeddings) == 3
for emb in result.embeddings:
assert len(emb) == 2048

def test_invalid_dimension_raises_error(self, check_deps):
"""Test invalid dimension raises ValueError."""
from voyageai import Client

client = Client()

with pytest.raises(ValueError) as exc_info:
client.embed(["test"], model="voyage-4-nano", output_dimension=999)

assert "Invalid output_dimension" in str(exc_info.value)

def test_invalid_dtype_raises_error(self, check_deps):
"""Test invalid dtype raises ValueError."""
from voyageai import Client

client = Client()

with pytest.raises(ValueError) as exc_info:
client.embed(["test"], model="voyage-4-nano", output_dtype="invalid")

assert "Invalid output_dtype" in str(exc_info.value)
101 changes: 101 additions & 0 deletions tests/test_client_local_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""Tests for async local model support in AsyncClient."""

import asyncio

import pytest

# ruff: noqa: F401

# Check if real dependencies are available
try:
import sentence_transformers
import torch

REAL_DEPS_AVAILABLE = True
except ImportError:
REAL_DEPS_AVAILABLE = False


@pytest.mark.integration
class TestAsyncLocalModelIntegration:
"""Integration tests for async local models using the standard AsyncClient.

Run with: pytest -m integration
"""

@pytest.fixture
def check_deps(self):
"""Skip if dependencies not installed."""
if not REAL_DEPS_AVAILABLE:
pytest.skip("sentence-transformers or torch not installed")

@pytest.mark.asyncio
async def test_seamless_async_local_embedding(self, check_deps):
"""Test that AsyncClient.embed() seamlessly uses local model."""
from voyageai import AsyncClient

client = AsyncClient()
result = await client.embed(["Hello, world!"], model="voyage-4-nano", input_type="document")

assert len(result.embeddings) == 1
assert len(result.embeddings[0]) == 2048
assert result.total_tokens > 0

@pytest.mark.asyncio
async def test_concurrent_local_embeddings(self, check_deps):
"""Test concurrent local embedding calls."""
from voyageai import AsyncClient

client = AsyncClient()

texts = [
["First document"],
["Second document"],
["Third document"],
]

tasks = [client.embed(t, model="voyage-4-nano", input_type="document") for t in texts]
results = await asyncio.gather(*tasks)

assert len(results) == 3
for result in results:
assert len(result.embeddings) == 1
assert len(result.embeddings[0]) == 2048

@pytest.mark.asyncio
async def test_async_all_dimensions(self, check_deps):
"""Test all supported dimensions in async context."""
from voyageai import AsyncClient

client = AsyncClient()

for dim in [256, 512, 1024, 2048]:
result = await client.embed(
["Test text"], model="voyage-4-nano", input_type="document", output_dimension=dim
)
assert len(result.embeddings[0]) == dim, f"Expected {dim}, got {len(result.embeddings[0])}"

@pytest.mark.asyncio
async def test_async_query_vs_document(self, check_deps):
"""Test query and document embeddings are different in async context."""
from voyageai import AsyncClient

client = AsyncClient()

query_result = await client.embed(["What is AI?"], model="voyage-4-nano", input_type="query")
doc_result = await client.embed(["What is AI?"], model="voyage-4-nano", input_type="document")

assert query_result.embeddings[0] != doc_result.embeddings[0]

@pytest.mark.asyncio
async def test_async_batch_embedding(self, check_deps):
"""Test batch embedding in async context."""
from voyageai import AsyncClient

client = AsyncClient()

result = await client.embed(
["First", "Second", "Third"], model="voyage-4-nano", input_type="document"
)
assert len(result.embeddings) == 3

14 changes: 14 additions & 0 deletions voyageai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@
)
from voyageai.version import VERSION


def _is_local_available() -> bool:
"""Check if sentence-transformers and torch are installed."""
try:
import sentence_transformers # noqa: F401
import torch # noqa: F401

return True
except ImportError:
return False


HAS_LOCAL = _is_local_available()

if TYPE_CHECKING:
import requests
from aiohttp import ClientSession
Expand Down
Loading