Skip to content

Commit 99ffeb2

Browse files
authored
add ingest for mongo (langchain-ai#12897)
1 parent ce21308 commit 99ffeb2

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

templates/rag-mongo/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@ from rag_mongo import chain as rag_mongo_chain
4040
add_routes(app, rag_mongo_chain, path="/rag-mongo")
4141
```
4242

43+
If you want to set up an ingestion pipeline, you can add the following code to your `server.py` file:
44+
```python
45+
from rag_mongo import ingest as rag_mongo_ingest
46+
47+
add_routes(app, rag_mongo_ingest, path="/rag-mongo-ingest")
48+
```
49+
4350
(Optional) Let's now configure LangSmith.
4451
LangSmith will help us trace, monitor and debug LangChain applications.
4552
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).

templates/rag-mongo/rag_mongo/chain.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
import os
22

33
from langchain.chat_models import ChatOpenAI
4+
from langchain.document_loaders import PyPDFLoader
45
from langchain.embeddings import OpenAIEmbeddings
56
from langchain.prompts import ChatPromptTemplate
67
from langchain.pydantic_v1 import BaseModel
78
from langchain.schema.output_parser import StrOutputParser
8-
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
9+
from langchain.schema.runnable import (
10+
RunnableLambda,
11+
RunnableParallel,
12+
RunnablePassthrough,
13+
)
14+
from langchain.text_splitter import RecursiveCharacterTextSplitter
915
from langchain.vectorstores import MongoDBAtlasVectorSearch
1016
from pymongo import MongoClient
1117

@@ -54,3 +60,24 @@ class Question(BaseModel):
5460

5561

5662
chain = chain.with_types(input_type=Question)
63+
64+
65+
def _ingest(url: str) -> dict:
66+
loader = PyPDFLoader(url)
67+
data = loader.load()
68+
69+
# Split docs
70+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
71+
docs = text_splitter.split_documents(data)
72+
73+
# Insert the documents in MongoDB Atlas Vector Search
74+
_ = MongoDBAtlasVectorSearch.from_documents(
75+
documents=docs,
76+
embedding=OpenAIEmbeddings(disallowed_special=()),
77+
collection=MONGODB_COLLECTION,
78+
index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
79+
)
80+
return {}
81+
82+
83+
ingest = RunnableLambda(_ingest)

0 commit comments

Comments
 (0)