Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Compose a postgres database together with the extension pgvector
# source: https://github.com/johannesocean/pgvector-demo/blob/main/docker-compose.yml
services:
db:
hostname: db
image: ankane/pgvector
ports:
- 5555:5432
restart: always
environment:
- POSTGRES_DB=vectordb
- POSTGRES_USER=testuser
- POSTGRES_PASSWORD=testpwd
- POSTGRES_HOST_AUTH_METHOD=trust
volumes:
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
1 change: 0 additions & 1 deletion dockers/llm.rag.service/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ RUN pip3 install --no-cache-dir \

COPY __init__.py .
COPY serveragllm.py .
COPY serverragllm_jira_cvs_local.py .
COPY common.py .
COPY pyproject.toml .

Expand Down
108 changes: 108 additions & 0 deletions dockers/llm.rag.service/serverragllm_csv_to_pgvector_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "faiss-cpu",
# "fastapi",
# "langchain-community",
# "langchain-huggingface",
# "langchain-postgres",
# "openai",
# "psycopg2-binary",
# "uvicorn",
# ]
# ///

import os
import sys
import uvicorn

from functools import partial
from typing import Union

import click
from fastapi import FastAPI
from openai import OpenAI

from common import get_answer_with_settings


def setup(
relevant_docs: int,
llm_server_url:str,
model_id: str,
max_tokens: int,
model_temperature: float,
):
app = FastAPI()

# TODO: move to imports
from langchain_postgres import PGVector
from langchain_huggingface import HuggingFaceEmbeddings

# TODO: pass through settings or params
connection_string = "postgresql+psycopg2://testuser:testpwd@localhost:5555/vectordb"
collection_name = "jira_tickets"
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"

embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

vectorstore = PGVector(
embeddings=embeddings,
collection_name=collection_name,
connection=connection_string,
use_jsonb=True,
)

retriever = vectorstore.as_retriever(search_kwargs={"k": relevant_docs})
print("Created Vector DB retriever successfully. \n")

print("Creating an OpenAI client to the hosted model at URL: ", llm_server_url)
try:
client = OpenAI(base_url=llm_server_url, api_key="na")
except Exception as e:
print("Error creating client:", e)
sys.exit(1)

get_answer = partial(
get_answer_with_settings,
retriever=retriever,
client=client,
model_id=model_id,
max_tokens=max_tokens,
model_temperature=model_temperature,
)

@app.get("/answer/{question}")
def read_item(question: Union[str, None] = None):
print(f"Received question: {question}")
answer = get_answer(question)
return {"question": question, "answer": answer}

return app


MICROSOFT_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
MOSAICML_MODEL_ID = "mosaicml/mpt-7b-chat"
RELEVANT_DOCS_DEFAULT = 2
MAX_TOKENS_DEFAULT = 64
MODEL_TEMPERATURE_DEFAULT = 0.01

relevant_docs = os.getenv("RELEVANT_DOCS", RELEVANT_DOCS_DEFAULT)
llm_server_url = os.getenv("LLM_SERVER_URL", "http://localhost:11434/v1")
model_id = os.getenv("MODEL_ID", "llama2")
max_tokens = int(os.getenv("MAX_TOKENS", MAX_TOKENS_DEFAULT))
model_temperature = float(os.getenv("MODEL_TEMPERATURE", MODEL_TEMPERATURE_DEFAULT))

app = setup(relevant_docs, llm_server_url, model_id, max_tokens, model_temperature)


@click.command()
@click.option("--host", default="127.0.0.1", help="Host for the FastAPI server (default: 127.0.0.1)")
@click.option("--port", type=int, default=8000, help="Port for the FastAPI server (default: 8000)")
def run(host, port):
# Serve the app using Uvicorn
uvicorn.run("serverragllm_csv_to_pgvector_local:app", host=host, port=port, reload=True)


if __name__ == "__main__":
run()
1 change: 0 additions & 1 deletion dockers/llm.vdb.service/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ RUN pip3 install --no-cache-dir \

COPY __init__.py .
COPY createvectordb.py .
COPY createvectordb_jira_csv_local.py .
COPY common.py .
COPY pyproject.toml .

Expand Down
39 changes: 39 additions & 0 deletions dockers/llm.vdb.service/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
from typing import List

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
Expand Down Expand Up @@ -79,3 +80,41 @@ def create_vectordb(
print("Convert to FAISS vectorstore")
vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)
return vectorstore


def create_vectordb_pgvector(
local_tmp_dir: str,
embedding_model_name: str,
connection_string: str,
collection_name: str,
chunk_size: int = EMBEDDING_CHUNK_SIZE_DEFAULT,
chunk_overlap: int = EMBEDDING_CHUNK_OVERLAP_DEFAULT,
):
data = load_jsonl_files_from_directory(local_tmp_dir)

# no chunking
# texts, metadatas = get_documents_with_metadata(data)
# with chunking texts
texts, metadatas = chunk_documents_with_metadata(data, chunk_size, chunk_overlap)

embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

# TODO: move to imports
from langchain_postgres import PGVector
from langchain_core.documents import Document

# adapt data
documents: List[Document] = []
for txt, met in zip(texts, metadatas):
document = Document(
page_content=txt,
metadata=met
)
documents.append(document)

return PGVector.from_documents(
embedding=embeddings,
documents=documents,
collection_name=collection_name,
connection=connection_string,
)
42 changes: 42 additions & 0 deletions dockers/llm.vdb.service/createvectordb_csv_to_pgvector_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "click",
# "langchain",
# "langchain-community",
# "langchain-huggingface",
# "langchain_postgres",
# "pgvector",
# "psycopg2-binary",
# ]
# ///

# Source: dockers/llm.vdb.service/createvectordb.py

import click

from common import create_vectordb_pgvector


@click.command()
@click.argument("local_tmp_dir", type=click.Path(exists=True))
@click.argument(
"embedding_model_name", default="sentence-transformers/all-MiniLM-L6-v2"
)
def run(local_tmp_dir: str, embedding_model_name: str):
# TODO: pass through settings or params
connection_string = "postgresql+psycopg2://testuser:testpwd@localhost:5555/vectordb"
collection_name = "jira_tickets"

db = create_vectordb_pgvector(
local_tmp_dir,
embedding_model_name,
connection_string,
collection_name,
)

print(f"Data saved to {db.collection_name}")


if __name__ == "__main__":
run()