diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..17a252d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,16 @@ +# Compose a postgres database together with the extension pgvector +# source: https://github.com/johannesocean/pgvector-demo/blob/main/docker-compose.yml +services: + db: + hostname: db + image: ankane/pgvector + ports: + - 5555:5432 + restart: always + environment: + - POSTGRES_DB=vectordb + - POSTGRES_USER=testuser + - POSTGRES_PASSWORD=testpwd + - POSTGRES_HOST_AUTH_METHOD=trust + volumes: + - ./init.sql:/docker-entrypoint-initdb.d/init.sql \ No newline at end of file diff --git a/dockers/llm.rag.service/Dockerfile b/dockers/llm.rag.service/Dockerfile index 9f8e478..4bc063a 100644 --- a/dockers/llm.rag.service/Dockerfile +++ b/dockers/llm.rag.service/Dockerfile @@ -42,7 +42,6 @@ RUN pip3 install --no-cache-dir \ COPY __init__.py . COPY serveragllm.py . -COPY serverragllm_jira_cvs_local.py . COPY common.py . COPY pyproject.toml . diff --git a/dockers/llm.rag.service/serverragllm_csv_to_pgvector_local.py b/dockers/llm.rag.service/serverragllm_csv_to_pgvector_local.py new file mode 100644 index 0000000..fb82d14 --- /dev/null +++ b/dockers/llm.rag.service/serverragllm_csv_to_pgvector_local.py @@ -0,0 +1,108 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "faiss-cpu", +# "fastapi", +# "langchain-community", +# "langchain-huggingface", +# "langchain-postgres", +# "openai", +# "psycopg2-binary", +# "uvicorn", +# ] +# /// + +import os +import sys +import uvicorn + +from functools import partial +from typing import Union + +import click +from fastapi import FastAPI +from openai import OpenAI + +from common import get_answer_with_settings + + +def setup( + relevant_docs: int, + llm_server_url:str, + model_id: str, + max_tokens: int, + model_temperature: float, +): + app = FastAPI() + + # TODO: move to imports + from langchain_postgres import PGVector + from langchain_huggingface import HuggingFaceEmbeddings + + # TODO: pass through settings or params + connection_string = "postgresql+psycopg2://testuser:testpwd@localhost:5555/vectordb" + collection_name = "jira_tickets" + embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2" + + embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name) + + vectorstore = PGVector( + embeddings=embeddings, + collection_name=collection_name, + connection=connection_string, + use_jsonb=True, + ) + + retriever = vectorstore.as_retriever(search_kwargs={"k": relevant_docs}) + print("Created Vector DB retriever successfully. \n") + + print("Creating an OpenAI client to the hosted model at URL: ", llm_server_url) + try: + client = OpenAI(base_url=llm_server_url, api_key="na") + except Exception as e: + print("Error creating client:", e) + sys.exit(1) + + get_answer = partial( + get_answer_with_settings, + retriever=retriever, + client=client, + model_id=model_id, + max_tokens=max_tokens, + model_temperature=model_temperature, + ) + + @app.get("/answer/{question}") + def read_item(question: Union[str, None] = None): + print(f"Received question: {question}") + answer = get_answer(question) + return {"question": question, "answer": answer} + + return app + + +MICROSOFT_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct" +MOSAICML_MODEL_ID = "mosaicml/mpt-7b-chat" +RELEVANT_DOCS_DEFAULT = 2 +MAX_TOKENS_DEFAULT = 64 +MODEL_TEMPERATURE_DEFAULT = 0.01 + +relevant_docs = os.getenv("RELEVANT_DOCS", RELEVANT_DOCS_DEFAULT) +llm_server_url = os.getenv("LLM_SERVER_URL", "http://localhost:11434/v1") +model_id = os.getenv("MODEL_ID", "llama2") +max_tokens = int(os.getenv("MAX_TOKENS", MAX_TOKENS_DEFAULT)) +model_temperature = float(os.getenv("MODEL_TEMPERATURE", MODEL_TEMPERATURE_DEFAULT)) + +app = setup(relevant_docs, llm_server_url, model_id, max_tokens, model_temperature) + + +@click.command() +@click.option("--host", default="127.0.0.1", help="Host for the FastAPI server (default: 127.0.0.1)") +@click.option("--port", type=int, default=8000, help="Port for the FastAPI server (default: 8000)") +def run(host, port): + # Serve the app using Uvicorn + uvicorn.run("serverragllm_csv_to_pgvector_local:app", host=host, port=port, reload=True) + + +if __name__ == "__main__": + run() diff --git a/dockers/llm.vdb.service/Dockerfile b/dockers/llm.vdb.service/Dockerfile index 8628d1d..a2019dd 100644 --- a/dockers/llm.vdb.service/Dockerfile +++ b/dockers/llm.vdb.service/Dockerfile @@ -44,7 +44,6 @@ RUN pip3 install --no-cache-dir \ COPY __init__.py . COPY createvectordb.py . -COPY createvectordb_jira_csv_local.py . COPY common.py . COPY pyproject.toml . diff --git a/dockers/llm.vdb.service/common.py b/dockers/llm.vdb.service/common.py index e548794..e35e212 100644 --- a/dockers/llm.vdb.service/common.py +++ b/dockers/llm.vdb.service/common.py @@ -1,5 +1,6 @@ import json import os +from typing import List from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS @@ -79,3 +80,41 @@ def create_vectordb( print("Convert to FAISS vectorstore") vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas) return vectorstore + + +def create_vectordb_pgvector( + local_tmp_dir: str, + embedding_model_name: str, + connection_string: str, + collection_name: str, + chunk_size: int = EMBEDDING_CHUNK_SIZE_DEFAULT, + chunk_overlap: int = EMBEDDING_CHUNK_OVERLAP_DEFAULT, +): + data = load_jsonl_files_from_directory(local_tmp_dir) + + # no chunking + # texts, metadatas = get_documents_with_metadata(data) + # with chunking texts + texts, metadatas = chunk_documents_with_metadata(data, chunk_size, chunk_overlap) + + embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name) + + # TODO: move to imports + from langchain_postgres import PGVector + from langchain_core.documents import Document + + # adapt data + documents: List[Document] = [] + for txt, met in zip(texts, metadatas): + document = Document( + page_content=txt, + metadata=met + ) + documents.append(document) + + return PGVector.from_documents( + embedding=embeddings, + documents=documents, + collection_name=collection_name, + connection=connection_string, + ) diff --git a/dockers/llm.vdb.service/createvectordb_csv_to_pgvector_local.py b/dockers/llm.vdb.service/createvectordb_csv_to_pgvector_local.py new file mode 100644 index 0000000..2676e32 --- /dev/null +++ b/dockers/llm.vdb.service/createvectordb_csv_to_pgvector_local.py @@ -0,0 +1,42 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "click", +# "langchain", +# "langchain-community", +# "langchain-huggingface", +# "langchain_postgres", +# "pgvector", +# "psycopg2-binary", +# ] +# /// + +# Source: dockers/llm.vdb.service/createvectordb.py + +import click + +from common import create_vectordb_pgvector + + +@click.command() +@click.argument("local_tmp_dir", type=click.Path(exists=True)) +@click.argument( + "embedding_model_name", default="sentence-transformers/all-MiniLM-L6-v2" +) +def run(local_tmp_dir: str, embedding_model_name: str): + # TODO: pass through settings or params + connection_string = "postgresql+psycopg2://testuser:testpwd@localhost:5555/vectordb" + collection_name = "jira_tickets" + + db = create_vectordb_pgvector( + local_tmp_dir, + embedding_model_name, + connection_string, + collection_name, + ) + + print(f"Data saved to {db.collection_name}") + + +if __name__ == "__main__": + run()