Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion langchain/langchain_vectorize/retrievers.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def _get_relevant_documents(
rerank: bool | None = None,
metadata_filters: list[dict[str, Any]] | None = None,
) -> list[Document]:
request = RetrieveDocumentsRequest( # type: ignore[call-arg]
request = RetrieveDocumentsRequest(
question=query,
num_results=num_results or self.num_results,
rerank=rerank or self.rerank,
Expand Down
9 changes: 8 additions & 1 deletion langchain/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ keywords = ["langchain", "vectorize", "retrieval", "search"]
authors = [
{ name = "Vectorize", email = "contact@vectorize.io" },
]
requires-python = ">=3.9"
requires-python = ">=3.9,<4.0"
dependencies = [
"langchain-core>=0.3.45",
"vectorize-client>=0.4.0",
Expand All @@ -33,7 +33,11 @@ Issues = "https://github.com/vectorize-io/integrations-python/issues"
dev = [
"mypy>=1.17.1,<1.18",
"pytest>=8.3.3",
"pytest-asyncio>=0.26.0",
"ruff>=0.12.7,<0.13",
"langchain-tests>=0.3.21",
"requests>=2.31.0",
"types-requests>=2.31.0",
]

[tool.ruff.lint]
Expand All @@ -58,6 +62,7 @@ pyupgrade.keep-runtime-typing = true
]

[tool.mypy]
plugins = ["pydantic.mypy"]
strict = true
strict_bytes = true
enable_error_code = "deprecated"
Expand All @@ -73,3 +78,5 @@ packages = ["langchain_vectorize"]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.pytest.ini_options]
asyncio_mode = "auto"
212 changes: 212 additions & 0 deletions langchain/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
import json
import logging
import os
import time
from collections.abc import Iterator
from pathlib import Path
from typing import Literal

import pytest
import requests
from vectorize_client.api.ai_platform_connectors_api import AIPlatformConnectorsApi
from vectorize_client.api.destination_connectors_api import DestinationConnectorsApi
from vectorize_client.api.pipelines_api import PipelinesApi
from vectorize_client.api.source_connectors_api import SourceConnectorsApi
from vectorize_client.api.uploads_api import UploadsApi
from vectorize_client.api_client import ApiClient
from vectorize_client.configuration import Configuration
from vectorize_client.exceptions import ApiException
from vectorize_client.models.ai_platform_config_schema import AIPlatformConfigSchema
from vectorize_client.models.ai_platform_type_for_pipeline import (
AIPlatformTypeForPipeline,
)
from vectorize_client.models.create_source_connector_request import (
CreateSourceConnectorRequest,
)
from vectorize_client.models.destination_connector_type_for_pipeline import (
DestinationConnectorTypeForPipeline,
)
from vectorize_client.models.file_upload import FileUpload
from vectorize_client.models.pipeline_ai_platform_connector_schema import (
PipelineAIPlatformConnectorSchema,
)
from vectorize_client.models.pipeline_configuration_schema import (
PipelineConfigurationSchema,
)
from vectorize_client.models.pipeline_destination_connector_schema import (
PipelineDestinationConnectorSchema,
)
from vectorize_client.models.pipeline_source_connector_schema import (
PipelineSourceConnectorSchema,
)
from vectorize_client.models.retrieve_documents_request import RetrieveDocumentsRequest
from vectorize_client.models.schedule_schema import ScheduleSchema
from vectorize_client.models.schedule_schema_type import ScheduleSchemaType
from vectorize_client.models.source_connector_type import SourceConnectorType
from vectorize_client.models.start_file_upload_to_connector_request import (
StartFileUploadToConnectorRequest,
)

logger = logging.getLogger(__name__)


@pytest.fixture(scope="session")
def api_token() -> str:
token = os.getenv("VECTORIZE_TOKEN")
if not token:
msg = "Please set the VECTORIZE_TOKEN environment variable"
raise ValueError(msg)
return token


@pytest.fixture(scope="session")
def org_id() -> str:
org = os.getenv("VECTORIZE_ORG")
if not org:
msg = "Please set the VECTORIZE_ORG environment variable"
raise ValueError(msg)
return org


@pytest.fixture(scope="session")
def environment() -> Literal["prod", "dev", "local", "staging"]:
env = os.getenv("VECTORIZE_ENV", "prod")
if env not in {"prod", "dev", "local", "staging"}:
msg = "Invalid VECTORIZE_ENV environment variable."
raise ValueError(msg)
return env # type: ignore[return-value]


@pytest.fixture(scope="session")
def api_client(api_token: str, environment: str) -> Iterator[ApiClient]:
header_name = None
header_value = None
if environment == "prod":
host = "https://api.vectorize.io/v1"
elif environment == "dev":
host = "https://api-dev.vectorize.io/v1"
elif environment == "local":
host = "http://localhost:3000/api"
header_name = "x-lambda-api-key"
header_value = api_token
else:
host = "https://api-staging.vectorize.io/v1"

with ApiClient(
Configuration(host=host, access_token=api_token, debug=True),
header_name,
header_value,
) as api:
yield api


@pytest.fixture(scope="session")
def pipeline_id(api_client: ApiClient, org_id: str) -> Iterator[str]:
pipelines = PipelinesApi(api_client)

connectors_api = SourceConnectorsApi(api_client)
response = connectors_api.create_source_connector(
org_id,
CreateSourceConnectorRequest(FileUpload(name="from api", type="FILE_UPLOAD")),
)
source_connector_id = response.connector.id
logger.info("Created source connector %s", source_connector_id)

uploads_api = UploadsApi(api_client)
upload_response = uploads_api.start_file_upload_to_connector(
org_id,
source_connector_id,
StartFileUploadToConnectorRequest(
name="research.pdf",
content_type="application/pdf",
metadata=json.dumps({"created-from-api": True}),
),
)

this_dir = Path(__file__).parent
file_path = this_dir / "research.pdf"

with file_path.open("rb") as f:
http_response = requests.put(
upload_response.upload_url,
data=f,
headers={
"Content-Type": "application/pdf",
},
timeout=60,
)
http_response.raise_for_status()

logger.info("Upload successful")

ai_platforms = AIPlatformConnectorsApi(api_client).get_ai_platform_connectors(
org_id
)
builtin_ai_platform = next(
c.id for c in ai_platforms.ai_platform_connectors if c.type == "VECTORIZE"
)
logger.info("Using AI platform %s", builtin_ai_platform)

vector_databases = DestinationConnectorsApi(api_client).get_destination_connectors(
org_id
)
builtin_vector_db = next(
c.id for c in vector_databases.destination_connectors if c.type == "VECTORIZE"
)
logger.info("Using destination connector %s", builtin_vector_db)

pipeline_response = pipelines.create_pipeline(
org_id,
PipelineConfigurationSchema(
source_connectors=[
PipelineSourceConnectorSchema(
id=source_connector_id,
type=SourceConnectorType.FILE_UPLOAD,
config={},
)
],
destination_connector=PipelineDestinationConnectorSchema(
id=builtin_vector_db,
type=DestinationConnectorTypeForPipeline.VECTORIZE,
config={},
),
ai_platform_connector=PipelineAIPlatformConnectorSchema(
id=builtin_ai_platform,
type=AIPlatformTypeForPipeline.VECTORIZE,
config=AIPlatformConfigSchema(),
),
pipeline_name="Test pipeline",
schedule=ScheduleSchema(type=ScheduleSchemaType.MANUAL),
),
)
pipeline_id = pipeline_response.data.id

# Wait for the pipeline to be created
request = RetrieveDocumentsRequest(
question="query",
num_results=2,
)
start = time.time()
while True:
try:
doc_response = pipelines.retrieve_documents(org_id, pipeline_id, request)
except ApiException as e:
if "503" not in str(e):
raise
else:
docs = doc_response.documents
if len(docs) == 2:
break
if time.time() - start > 180:
msg = "Docs not retrieved in time"
raise RuntimeError(msg)
time.sleep(1)

logger.info("Created pipeline %s", pipeline_id)

yield pipeline_id

try:
pipelines.delete_pipeline(org_id, pipeline_id)
except Exception:
logger.exception("Failed to delete pipeline %s", pipeline_id)
42 changes: 42 additions & 0 deletions langchain/tests/test_langchain_integration_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import Any, Literal

import pytest
from langchain_tests.integration_tests import RetrieversIntegrationTests

from langchain_vectorize import VectorizeRetriever


class TestVectorizeRetrieverIntegration(RetrieversIntegrationTests):
@pytest.fixture(autouse=True)
def setup(
self,
environment: Literal["prod", "dev", "local", "staging"],
api_token: str,
org_id: str,
pipeline_id: str,
) -> None:
self._environment = environment
self._api_token = api_token
self._org_id = org_id
self._pipeline_id = pipeline_id

@property
def retriever_constructor(self) -> type[VectorizeRetriever]:
return VectorizeRetriever

@property
def retriever_constructor_params(self) -> dict[str, Any]:
return {
"environment": self._environment,
"api_token": self._api_token,
"organization": self._org_id,
"pipeline_id": self._pipeline_id,
}

@property
def retriever_query_example(self) -> str:
return "What are you?"

@property
def num_results_arg_name(self) -> str:
return "num_results"
Loading
Loading