Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/scripts/check_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

LIB_DIRS = [
"libs/langchain-mongodb",
"libs/langchain-mongodb-retrievers",
"libs/langgraph-checkpoint-mongodb",
"libs/langgraph-store-mongodb",
]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
fail-fast: false
matrix:
python-version:
- "3.9"
- "3.10"
- "3.13"
name: "run test #${{ matrix.python-version }}"
steps:
Expand Down
1 change: 1 addition & 0 deletions libs/langchain-mongodb-retrievers/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__
7 changes: 7 additions & 0 deletions libs/langchain-mongodb-retrievers/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Changelog

---

## Changes in version 1.0 (2025/XX/XX)

- Initial release, supporting LangChain 1.0.
21 changes: 21 additions & 0 deletions libs/langchain-mongodb-retrievers/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2024 LangChain, Inc.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
38 changes: 38 additions & 0 deletions libs/langchain-mongodb-retrievers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from libs.community.tests.unit_tests.chains.test_pebblo_retrieval import retriever

# langchain-mongodb-retriever

# Installation
```
pip install -U langchain-mongodb-retriever
```

# Usage
- [Integrate Atlas Vector Search with LangChain](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/#get-started-with-the-langchain-integration) for a walkthrough on using your first LangChain implementation with MongoDB Atlas.

## Using MongoDBAtlasVectorSearch
```python
import os
from langchain_mongodb_retrievers import MongoDBAtlasVectorSearch
from langchain_openai import OpenAIEmbeddings

# Pull MongoDB Atlas URI from environment variables
MONGODB_ATLAS_CONNECTION_STRING = os.environ["MONGODB_CONNECTION_STRING"]
DB_NAME = "langchain_db"
COLLECTION_NAME = "test"
VECTOR_SEARCH_INDEX_NAME = "index_name"

MODEL_NAME = "text-embedding-3-large"
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]


vectorstore = MongoDBAtlasVectorSearch.from_connection_string(
connection_string=MONGODB_ATLAS_CONNECTION_STRING,
namespace=DB_NAME + "." + COLLECTION_NAME,
embedding=OpenAIEmbeddings(model=MODEL_NAME),
index_name=VECTOR_SEARCH_INDEX_NAME,
)

retrieved_docs = vectorstore.similarity_search(
"How do I deploy MongoDBAtlasVectorSearch in our production environment?")
```
40 changes: 40 additions & 0 deletions libs/langchain-mongodb-retrievers/justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
set shell := ["bash", "-c"]
set dotenv-load
set dotenv-filename := "../../.local_atlas_uri"

# Default target executed when no arguments are given.
[private]
default:
@just --list

install:
uv sync --frozen

[group('test')]
integration_tests *args="":
uv run pytest tests/integration_tests/ {{args}}

[group('test')]
unit_tests *args="":
uv run pytest tests/unit_tests {{args}}

[group('test')]
tests *args="":
uv run pytest {{args}}

[group('test')]
test_watch filename:
uv run ptw --snapshot-update --now . -- -vv {{filename}}

[group('lint')]
lint:
git ls-files -- '*.py' | xargs uv run pre-commit run ruff --files
git ls-files -- '*.py' | xargs uv run pre-commit run ruff-format --files

[group('lint')]
typing:
uv run mypy --install-types --non-interactive .

[group('lint')]
codespell:
git ls-files -- '*.py' | xargs uv run pre-commit run --hook-stage manual codespell --files
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@
to create MongoDB's core Vector Search Retriever.
"""

from langchain_mongodb.retrievers.full_text_search import (
from langchain_mongodb_retrievers.full_text_search import (
MongoDBAtlasFullTextSearchRetriever,
)
from langchain_mongodb.retrievers.graphrag import MongoDBGraphRAGRetriever
from langchain_mongodb.retrievers.hybrid_search import MongoDBAtlasHybridSearchRetriever
from langchain_mongodb.retrievers.parent_document import (
from langchain_mongodb_retrievers.hybrid_search import MongoDBAtlasHybridSearchRetriever
from langchain_mongodb_retrievers.parent_document import (
MongoDBAtlasParentDocumentRetriever,
)
from langchain_mongodb.retrievers.self_querying import MongoDBAtlasSelfQueryRetriever
from langchain_mongodb_retrievers.self_querying import MongoDBAtlasSelfQueryRetriever
from langchain_mongodb_retrievers.vectorstores import MongoDBAtlasVectorSearch

__all__ = [
"MongoDBAtlasHybridSearchRetriever",
"MongoDBAtlasFullTextSearchRetriever",
"MongoDBAtlasParentDocumentRetriever",
"MongoDBGraphRAGRetriever",
"MongoDBAtlasSelfQueryRetriever",
"MongoDBAtlasVectorSearch",
]
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
from langchain_core.load.dump import dumps
from langchain_core.load.load import loads
from langchain_core.outputs import Generation
from langchain_mongodb.utils import DRIVER_METADATA
from pymongo import MongoClient
from pymongo.collection import Collection
from pymongo.database import Database

from langchain_mongodb.utils import DRIVER_METADATA
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
from langchain_mongodb_retrievers.vectorstores import MongoDBAtlasVectorSearch

logger = logging.getLogger(__file__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from pydantic import Field
from pymongo.collection import Collection

from langchain_mongodb.pipelines import text_search_stage
from langchain_mongodb.utils import _append_client_metadata, make_serializable
from pydantic import Field
from pymongo.collection import Collection


class MongoDBAtlasFullTextSearchRetriever(BaseRetriever):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from pydantic import Field
from pymongo.collection import Collection

from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_mongodb.pipelines import (
combine_pipelines,
final_hybrid_stage,
Expand All @@ -16,6 +12,10 @@
vector_search_stage,
)
from langchain_mongodb.utils import make_serializable
from pydantic import Field
from pymongo.collection import Collection

from langchain_mongodb_retrievers.vectorstores import MongoDBAtlasVectorSearch


class MongoDBAtlasHybridSearchRetriever(BaseRetriever):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,23 @@
from typing import Any, List, Optional

import pymongo
from langchain.retrievers.parent_document_retriever import ParentDocumentRetriever
from langchain_classic.retrievers.parent_document_retriever import (
ParentDocumentRetriever,
)
from langchain_core.callbacks import (
AsyncCallbackManagerForRetrieverRun,
CallbackManagerForRetrieverRun,
)
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.runnables import run_in_executor
from langchain_text_splitters import TextSplitter
from pymongo import MongoClient

from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_mongodb.docstores import MongoDBDocStore
from langchain_mongodb.pipelines import vector_search_stage
from langchain_mongodb.utils import DRIVER_METADATA, make_serializable
from langchain_text_splitters import TextSplitter
from pymongo import MongoClient

from langchain_mongodb_retrievers.vectorstores import MongoDBAtlasVectorSearch


class MongoDBAtlasParentDocumentRetriever(ParentDocumentRetriever):
Expand All @@ -39,8 +41,8 @@ class MongoDBAtlasParentDocumentRetriever(ParentDocumentRetriever):
by the same MongoDB Collection.

For more details, see superclasses
:class:`~langchain.retrievers.parent_document_retriever.ParentDocumentRetriever`
and :class:`~langchain.retrievers.MultiVectorRetriever`.
:class:`~langchain_classic.retrievers.parent_document_retriever.ParentDocumentRetriever`
and :class:`~langchain_classic.retrievers.MultiVectorRetriever`.

Examples:
>>> from langchain_mongodb.retrievers.parent_document import (
Expand Down Expand Up @@ -148,8 +150,8 @@ def from_connection_string(
"""Construct Retriever using one Collection for VectorStore and one for DocStore

See parent classes
:class:`~langchain.retrievers.parent_document_retriever.ParentDocumentRetriever`
and :class:`~langchain.retrievers.MultiVectorRetriever` for further details.
:class:`~langchain_classic.retrievers.parent_document_retriever.ParentDocumentRetriever`
and :class:`~langchain_classic.retrievers.MultiVectorRetriever` for further details.

Args:
connection_string: A valid MongoDB Atlas connection URI.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Any, Dict, Sequence, Tuple, Union

from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_classic.chains.query_constructor.schema import AttributeInfo
from langchain_classic.retrievers.self_query.base import SelfQueryRetriever
from langchain_core.language_models import BaseLanguageModel
from langchain_core.runnables import Runnable
from langchain_core.structured_query import (
Expand All @@ -15,7 +15,7 @@
from langchain_core.vectorstores import VectorStore
from pydantic import Field

from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_mongodb_retrievers.vectorstores import MongoDBAtlasVectorSearch


class MongoDBStructuredQueryTranslator(Visitor):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Various Utility Functions"""

from __future__ import annotations

import logging
from importlib.metadata import version

from pymongo import MongoClient
from pymongo.driver_info import DriverInfo

logger = logging.getLogger(__name__)

DRIVER_METADATA = DriverInfo(
name="langchain-mongodb-retrievers", version=version("langchain-mongodb-retrievers")
)


def _append_client_metadata(client: MongoClient) -> None:
# append_metadata was added in PyMongo 4.14.0, but is a valid database name on earlier versions
if callable(client.append_metadata):
client.append_metadata(DRIVER_METADATA)
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@
from langchain_core.embeddings import Embeddings
from langchain_core.runnables.config import run_in_executor
from langchain_core.vectorstores import VectorStore
from pymongo import MongoClient, ReplaceOne
from pymongo.collection import Collection
from pymongo.errors import CollectionInvalid

from langchain_mongodb.index import (
create_vector_search_index,
update_vector_search_index,
Expand All @@ -39,6 +35,9 @@
oid_to_str,
str_to_oid,
)
from pymongo import MongoClient, ReplaceOne
from pymongo.collection import Collection
from pymongo.errors import CollectionInvalid

VST = TypeVar("VST", bound=VectorStore)

Expand Down Expand Up @@ -356,7 +355,7 @@ def add_texts(
metadatas_batch = []
size = 0
i = 0
for j, (text, metadata) in enumerate(zip(texts, _metadatas)):
for j, (text, metadata) in enumerate(zip(texts, _metadatas, strict=False)):
size += len(text) + len(metadata)
texts_batch.append(text)
metadatas_batch.append(metadata)
Expand Down Expand Up @@ -442,7 +441,9 @@ def bulk_embed_and_insert_texts(
self._embedding_key: embedding,
**m,
}
for i, t, m, embedding in zip(ids, texts, metadatas, embeddings)
for i, t, m, embedding in zip(
ids, texts, metadatas, embeddings, strict=False
)
]
operations = [ReplaceOne({"_id": doc["_id"]}, doc, upsert=True) for doc in docs]
# insert the documents in MongoDB Atlas
Expand Down Expand Up @@ -478,7 +479,8 @@ def add_documents(
start = 0
for end in range(batch_size, n_docs + batch_size, batch_size):
texts, metadatas = zip(
*[(doc.page_content, doc.metadata) for doc in documents[start:end]]
*[(doc.page_content, doc.metadata) for doc in documents[start:end]],
strict=False,
)
result_ids.extend(
self.bulk_embed_and_insert_texts(
Expand Down
Loading
Loading