Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 118 additions & 82 deletions src/codeweaver/providers/embedding/fastembed_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@

from __future__ import annotations

from typing import overload
from typing import TYPE_CHECKING, Any, overload

from codeweaver.core.di import dependency_provider
from codeweaver.core.utils import has_package

_FASTEMBED_AVAILABLE = has_package("fastembed") or has_package("fastembed-gpu")

try:
if TYPE_CHECKING:
from fastembed.common.model_description import (
BaseModelDescription,
DenseModelDescription,
Expand All @@ -24,87 +26,118 @@
from fastembed.rerank.cross_encoder import TextCrossEncoder
from fastembed.sparse import SparseTextEmbedding
from fastembed.text import TextEmbedding
Comment on lines 16 to 28
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_FASTEMBED_AVAILABLE is based on package presence, but the fastembed imports in the next block are not guarded. If fastembed (or fastembed-gpu) is installed but fails to import (missing binary deps, incompatible Python, etc.), this module will still raise at import time. To actually defer import errors to use-time, wrap these imports in try/except ImportError and fall back to the Any placeholders + _require_fastembed() raising ConfigurationError when the exported helpers are called.

Copilot uses AI. Check for mistakes.
elif _FASTEMBED_AVAILABLE:
try:
from fastembed.common.model_description import (
BaseModelDescription,
DenseModelDescription,
ModelSource,
PoolingType,
)
from fastembed.rerank.cross_encoder import TextCrossEncoder
from fastembed.sparse import SparseTextEmbedding
from fastembed.text import TextEmbedding
except ImportError:
_FASTEMBED_AVAILABLE = False

if not (TYPE_CHECKING or _FASTEMBED_AVAILABLE):
BaseModelDescription = Any
DenseModelDescription = Any
ModelSource = Any
PoolingType = Any
TextCrossEncoder = Any
SparseTextEmbedding = Any
TextEmbedding = Any


def _require_fastembed() -> None:
"""Raise ConfigurationError if fastembed is not installed."""
if not _FASTEMBED_AVAILABLE:
from codeweaver.core import ConfigurationError

raise ConfigurationError(
"fastembed is not installed. Please install it with "
"`pip install code-weaver[fastembed]` or `pip install code-weaver[fastembed-gpu]`."
)


if _FASTEMBED_AVAILABLE:
"""
SPARSE_MODELS = (
SparseModelDescription(
model="prithivida/Splade_PP_en_v2",
vocab_size=30522, # BERT base uncased vocab
description="SPLADE++ v2",
license="apache-2.0",
size_in_GB=0.6,
sources=ModelSource(hf="prithivida/Splade_PP_en_v2"),
model_file="model.onnx",
),
)
"""
DENSE_MODELS = (
DenseModelDescription(
model="Alibaba-NLP/gte-modernbert-base",
license="apache-2.0",
sources=ModelSource(hf="Alibaba-NLP/gte-modernbert-base"),
description="""Text embeddings, Unimodal (text), multilingual, 8192 input tokens truncation, Prefixes for queries/documents: not necessary, 2024 year.""",
model_file="onnx/model.onnx",
size_in_GB=0.60,
dim=768,
),
DenseModelDescription(
model="BAAI/bge-m3",
license="mit",
sources=ModelSource(hf="BAAI/bge-m3"),
# if this seems like a strange description, it's because it mirrors the FastEmbed format, which gets parsed
description="""Text embeddings, Unimodal (text), multilingual, 8192 input tokens truncation, Prefixes for queries/documents: not necessary, 2024 year.""",
model_file="onnx/model.onnx",
additional_files=["onnx/model.onnx_data"],
size_in_GB=2.27,
dim=1024,
),
DenseModelDescription(
model="WhereIsAI/UAE-Large-V1",
license="mit",
sources=ModelSource(hf="WhereIsAI/UAE-Large-V1"),
description="""Text embeddings, Unimodal (text), multilingual, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year.""",
model_file="onnx/model.onnx",
size_in_GB=1.23,
dim=1024,
),
DenseModelDescription(
model="snowflake/snowflake-arctic-embed-l-v2.0",
license="apache-2.0",
sources=ModelSource(hf="Snowflake/snowflake-arctic-embed-l-v2.0"),
description="""Text embeddings, Unimodal (text), multilingual, 8192 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year.""",
model_file="onnx/model.onnx",
size_in_GB=1.79,
dim=1024,
),
DenseModelDescription(
model="snowflake/snowflake-arctic-embed-m-v2.0",
license="apache-2.0",
sources=ModelSource(hf="Snowflake/snowflake-arctic-embed-m-v2.0"),
description="""Text embeddings, Unimodal (text), multilingual, 8192 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year.""",
model_file="onnx/model.onnx",
size_in_GB=1.23,
dim=768,
),
)

except ImportError as e:
from codeweaver.core import ConfigurationError

raise ConfigurationError(
"fastembed is not installed. Please install it with `pip install code-weaver[fastembed]` or `codeweaver[fastembed-gpu]`."
) from e

"""
SPARSE_MODELS = (
SparseModelDescription(
model="prithivida/Splade_PP_en_v2",
vocab_size=30522, # BERT base uncased vocab
description="SPLADE++ v2",
license="apache-2.0",
size_in_GB=0.6,
sources=ModelSource(hf="prithivida/Splade_PP_en_v2"),
model_file="model.onnx",
),
)
"""
DENSE_MODELS = (
DenseModelDescription(
model="Alibaba-NLP/gte-modernbert-base",
license="apache-2.0",
sources=ModelSource(hf="Alibaba-NLP/gte-modernbert-base"),
description="""Text embeddings, Unimodal (text), multilingual, 8192 input tokens truncation, Prefixes for queries/documents: not necessary, 2024 year.""",
model_file="onnx/model.onnx",
size_in_GB=0.60,
dim=768,
),
DenseModelDescription(
model="BAAI/bge-m3",
license="mit",
sources=ModelSource(hf="BAAI/bge-m3"),
# if this seems like a strange description, it's because it mirrors the FastEmbed format, which gets parsed
description="""Text embeddings, Unimodal (text), multilingual, 8192 input tokens truncation, Prefixes for queries/documents: not necessary, 2024 year.""",
model_file="onnx/model.onnx",
additional_files=["onnx/model.onnx_data"],
size_in_GB=2.27,
dim=1024,
),
DenseModelDescription(
model="WhereIsAI/UAE-Large-V1",
license="mit",
sources=ModelSource(hf="WhereIsAI/UAE-Large-V1"),
description="""Text embeddings, Unimodal (text), multilingual, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year.""",
model_file="onnx/model.onnx",
size_in_GB=1.23,
dim=1024,
),
DenseModelDescription(
model="snowflake/snowflake-arctic-embed-l-v2.0",
license="apache-2.0",
sources=ModelSource(hf="Snowflake/snowflake-arctic-embed-l-v2.0"),
description="""Text embeddings, Unimodal (text), multilingual, 8192 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year.""",
model_file="onnx/model.onnx",
size_in_GB=1.79,
dim=1024,
),
DenseModelDescription(
model="snowflake/snowflake-arctic-embed-m-v2.0",
license="apache-2.0",
sources=ModelSource(hf="Snowflake/snowflake-arctic-embed-m-v2.0"),
description="""Text embeddings, Unimodal (text), multilingual, 8192 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year.""",
model_file="onnx/model.onnx",
size_in_GB=1.23,
dim=768,
),
)

RERANKING_MODELS: tuple[BaseModelDescription, ...] = (
BaseModelDescription(
model="Alibaba-NLP/gte-reranker-modernbert-base",
license="apache-2.0",
sources=ModelSource(hf="Alibaba-NLP/gte-reranker-modernbert-base"),
description="""A lightweight high-performance cross-encoder with 8192 token context length.""",
model_file="onnx/model_fp16.onnx",
size_in_GB=0.3,
),
)
RERANKING_MODELS: tuple[BaseModelDescription, ...] = (
BaseModelDescription(
model="Alibaba-NLP/gte-reranker-modernbert-base",
license="apache-2.0",
sources=ModelSource(hf="Alibaba-NLP/gte-reranker-modernbert-base"),
description="""A lightweight high-performance cross-encoder with 8192 token context length.""",
model_file="onnx/model_fp16.onnx",
size_in_GB=0.3,
),
)
else:
DENSE_MODELS = ()
RERANKING_MODELS = ()


@overload
Expand Down Expand Up @@ -161,6 +194,7 @@ def get_cross_encoder() -> type[TextCrossEncoder]:
"""
Get the cross encoder with added custom models.
"""
_require_fastembed()
return add_models(TextCrossEncoder, RERANKING_MODELS)


Expand All @@ -171,6 +205,7 @@ def get_sparse_embedder() -> type[SparseTextEmbedding]:

TODO: Temporarily disabled until we can work out the bugs on added sparse models in FastEmbed.
"""
_require_fastembed()
# splade_pp.supported_splade_models.append(SPARSE_MODELS[0])
return SparseTextEmbedding

Expand All @@ -182,6 +217,7 @@ def get_text_embedder() -> type[TextEmbedding]:

Only adds models that aren't already in FastEmbed's native registry to avoid conflicts.
"""
_require_fastembed()
from fastembed.common.model_description import PoolingType

# we don't add these yet, but they're here for when we do
Expand Down
47 changes: 38 additions & 9 deletions src/codeweaver/providers/embedding/providers/fastembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import logging

from collections.abc import Callable, Iterable, Sequence
from typing import Any, ClassVar, Literal, cast, override
from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast, override

import numpy as np

Expand All @@ -26,10 +26,10 @@
from codeweaver.core import (
CodeChunk,
CodeWeaverSparseEmbedding,
ConfigurationError,
Provider,
rpartial,
)
from codeweaver.core.utils import has_package
from codeweaver.providers.embedding.capabilities.base import SparseEmbeddingModelCapabilities
from codeweaver.providers.embedding.providers.base import (
EmbeddingCustomDeps,
Expand All @@ -38,22 +38,49 @@
SparseEmbeddingProvider,
)

_FASTEMBED_AVAILABLE = has_package("fastembed") or has_package("fastembed-gpu")

try:
if TYPE_CHECKING:
from fastembed.sparse import SparseTextEmbedding
from fastembed.text import TextEmbedding

Comment on lines +41 to 46
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file now defers the absence of fastembed, but it no longer defers ImportError during fastembed import (the from fastembed... imports run whenever has_package(...) returns true). To fully meet the PR’s goal, the imports should be protected so that a broken/partial fastembed install doesn’t raise at module import time; instead, record the failure and raise ConfigurationError when FastEmbedEmbeddingProvider/FastEmbedSparseProvider functionality is invoked.

Copilot uses AI. Check for mistakes.
from codeweaver.providers.embedding.fastembed_extensions import (
get_sparse_embedder,
get_text_embedder,
)
except ImportError as e:
raise ConfigurationError(
r"FastEmbed is not installed. Please install it with `pip install code-weaver\[fastembed]` or `code-weaver\[fastembed-gpu]`."
) from e
elif _FASTEMBED_AVAILABLE:
try:
from fastembed.sparse import SparseTextEmbedding
from fastembed.text import TextEmbedding

from codeweaver.providers.embedding.fastembed_extensions import (
get_sparse_embedder,
get_text_embedder,
)
except ImportError:
_FASTEMBED_AVAILABLE = False

if not (TYPE_CHECKING or _FASTEMBED_AVAILABLE):
TextEmbedding = Any
SparseTextEmbedding = Any

if _FASTEMBED_AVAILABLE:
_TextEmbedding = get_text_embedder()
_SparseTextEmbedding = get_sparse_embedder()
else:
_TextEmbedding = None
_SparseTextEmbedding = None

_TextEmbedding = get_text_embedder()
_SparseTextEmbedding = get_sparse_embedder()

def _require_fastembed() -> None:
"""Raise ConfigurationError if fastembed is not installed."""
if not _FASTEMBED_AVAILABLE:
from codeweaver.core import ConfigurationError

raise ConfigurationError(
"fastembed is not installed. Please install it with "
"`pip install code-weaver[fastembed]` or `pip install code-weaver[fastembed-gpu]`."
)


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -109,6 +136,7 @@ def _initialize(
**kwargs: Any,
) -> None:
"""Initialize the FastEmbed client."""
_require_fastembed()

@property
def base_url(self) -> str | None:
Expand Down Expand Up @@ -176,6 +204,7 @@ def _initialize(
**kwargs: Any,
) -> None:
"""Initialize the FastEmbed sparse client."""
_require_fastembed()
# impl_deps and custom_deps are ignored for FastEmbed sparse provider;
# caps may be passed as a keyword argument via **kwargs from the base class.
# 1. Set caps using object.__setattr__ because pydantic model isn't fully initialized yet
Expand Down
33 changes: 23 additions & 10 deletions src/codeweaver/providers/reranking/providers/fastembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,41 @@

from collections.abc import Callable, Sequence
from functools import partial
from typing import Any, ClassVar, cast
from typing import TYPE_CHECKING, Any, ClassVar, cast

import numpy as np

from codeweaver.core import Provider, ProviderError
from codeweaver.core.constants import DEFAULT_RERANKING_MAX_RESULTS
from codeweaver.core.utils import has_package
from codeweaver.providers.reranking.providers.base import RerankingProvider


logger = logging.getLogger(__name__)

try:
_FASTEMBED_AVAILABLE = has_package("fastembed") or has_package("fastembed-gpu")

if TYPE_CHECKING:
from fastembed.rerank.cross_encoder import TextCrossEncoder
elif _FASTEMBED_AVAILABLE:
try:
from fastembed.rerank.cross_encoder import TextCrossEncoder
except ImportError:
_FASTEMBED_AVAILABLE = False

if not (TYPE_CHECKING or _FASTEMBED_AVAILABLE):
TextCrossEncoder = Any
Comment on lines +26 to +37
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_FASTEMBED_AVAILABLE is computed from has_package(...), but the subsequent from fastembed... import is unconditional when that flag is true. If the distribution is present but import fastembed fails (e.g., missing platform wheels/compiled deps), this will still crash at module import time, which defeats the goal of deferring fastembed import errors to use-time. Consider wrapping the fastembed import in try/except ImportError and, on failure, treating fastembed as unavailable (optionally stash the ImportError) so you can raise a ConfigurationError from a small _require_fastembed() guard when the provider is actually used.

Copilot uses AI. Check for mistakes.


except ImportError as e:
logger.warning(
"Failed to import TextCrossEncoder from fastembed.rerank.cross_encoder", exc_info=True
)
from codeweaver.core import ConfigurationError
def _require_fastembed() -> None:
"""Raise ConfigurationError if fastembed is not installed."""
if not _FASTEMBED_AVAILABLE:
from codeweaver.core import ConfigurationError

raise ConfigurationError(
r"FastEmbed is not installed. Please install it with `pip install code-weaver\[fastembed]` or `codeweaver\[fastembed-gpu]`."
) from e
raise ConfigurationError(
"fastembed is not installed. Please install it with "
"`pip install code-weaver[fastembed]` or `pip install code-weaver[fastembed-gpu]`."
)


class FastEmbedRerankingProvider(RerankingProvider[TextCrossEncoder]):
Expand All @@ -55,6 +67,7 @@ async def _execute_rerank(
**kwargs: Any,
) -> Any:
"""Execute the reranking process."""
_require_fastembed()
try:
# our batch_size needs to be the number of documents because we only get back the scores.
# If we set it to a lower number, we wouldn't know what documents the scores correspond to without some extra setup.
Expand Down
Loading