diff --git a/llama2-medical-chatbot/.env b/llama2-medical-chatbot/.env new file mode 100644 index 0000000..7c29f49 --- /dev/null +++ b/llama2-medical-chatbot/.env @@ -0,0 +1 @@ +HUGGINGFACEHUB_API_TOKEN=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 \ No newline at end of file diff --git a/llama2-medical-chatbot/.gitignore b/llama2-medical-chatbot/.gitignore new file mode 100644 index 0000000..ae412d6 --- /dev/null +++ b/llama2-medical-chatbot/.gitignore @@ -0,0 +1 @@ +env/ \ No newline at end of file diff --git a/llama2-medical-chatbot/README.md b/llama2-medical-chatbot/README.md new file mode 100644 index 0000000..ff067a5 --- /dev/null +++ b/llama2-medical-chatbot/README.md @@ -0,0 +1,60 @@ +# Llama2 Medical Chatbot + +## Overview + +Llama2 Medical Chatbot is an intelligent assistant designed to provide information and assistance related to medical queries. + +## Libraries Used - +[ ] Streamlit: A framework for creating web applications. +[ ] streamlit_chat: Custom module for displaying chat messages. +[ ] LangChain: A library for building applications with large language models (LLMs). +[ ] langchain_community: Extensions for LangChain. +[ ] PyPDFLoader and DirectoryLoader: For loading PDF documents. +[ ] HuggingFaceEmbeddings: For creating embeddings using Hugging Face models. +[ ] CTransformers: For using transformer models. +[ ] RecursiveCharacterTextSplitter: For splitting text into manageable chunks. +[ ] FAISS: A library for efficient similarity search. +[ ] ConversationBufferMemory: For maintaining chat history + +## How the Program is Running +1. Imports necessary libraries for document processing, creating embeddings, setting up the chatbot, and the web interface using Streamlit. +2. Uses DirectoryLoader to load all PDF files from the data/ directory. +3. Splits the content of the PDF files into smaller chunks using RecursiveCharacterTextSplitter. +4. Uses a Hugging Face model to generate embeddings for the text chunks. +5. Stores the text chunks and their embeddings in a FAISS vector store for efficient retrieval. +6. Loads a pre-trained language model using CTransformers. +7. Creates a ConversationalRetrievalChain that uses the language model and vector store to handle queries, along with conversation memory to track chat history. +8. Sets up the Streamlit app interface, including the title and layout for displaying chat history and user input. +9. Initializes session state variables to store chat history and generated responses. +10. Displays the chat interface, handling user input and displaying the chat history using Streamlit. + +## Use of the CHATBOT +- Provides a user-friendly interface for interacting with a healthcare chatbot. +- Users can ask questions related to the content of the loaded PDF documents. +- Answers user queries based on the content of the PDF documents. +- Maintains context of the conversation using conversation memory. +- Enhances user experience by providing a continuous conversation flow. +- Runs as a web application using Streamlit, making it accessible through a web browser. +- Allows for easy deployment and use without requiring extensive setup by the end-users +- Handles user queries in real-time, providing immediate responses. +- Useful for quick information retrieval and assistance based on the provided documents. + +## To run the Program + +``` +pip install -r requirements.txt +``` +Change the Environment to a env in which you have installed all of this + +``` +streamlit run app.py +``` +Make sure to install +# llama-2-7b-chat.ggmlv3.q4_0.bin +from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main for easy running of the app. + +## FOR STEP BY STEP PROCESS CHECK THIS VIDEO +[Process](https://www.youtube.com/watch?v=XNmFIkViEBU) + +## OUTPUT - +![alt text](image.png) diff --git a/llama2-medical-chatbot/app.py b/llama2-medical-chatbot/app.py new file mode 100644 index 0000000..c6d068b --- /dev/null +++ b/llama2-medical-chatbot/app.py @@ -0,0 +1,76 @@ +import streamlit as st +from streamlit_chat import message +from langchain.chains import ConversationalRetrievalChain +from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader +from langchain_huggingface import HuggingFaceEmbeddings +from langchain_community.llms import CTransformers +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.vectorstores import FAISS +from langchain.memory import ConversationBufferMemory + +#load the pdf files from the path +loader = DirectoryLoader('data/',glob="*.pdf",loader_cls=PyPDFLoader) +documents = loader.load() + +#split text into chunks +text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50) +text_chunks = text_splitter.split_documents(documents) + +#create embeddings +embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", + model_kwargs={'device':"cpu"}) + +#vectorstore +vector_store = FAISS.from_documents(text_chunks,embeddings) + +#create llm +llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin",model_type="llama", + config={'max_new_tokens':128,'temperature':0.01}) + +memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) + +chain = ConversationalRetrievalChain.from_llm(llm=llm,chain_type='stuff', + retriever=vector_store.as_retriever(search_kwargs={"k":2}), + memory=memory) + +st.title("HealthCare ChatBot 🧑🏽‍⚕️") +def conversation_chat(query): + result = chain({"question": query, "chat_history": st.session_state['history']}) + st.session_state['history'].append((query, result["answer"])) + return result["answer"] + +def initialize_session_state(): + if 'history' not in st.session_state: + st.session_state['history'] = [] + + if 'generated' not in st.session_state: + st.session_state['generated'] = ["Hello! Ask me anything"] + + if 'past' not in st.session_state: + st.session_state['past'] = ["Hey! 👋"] + +def display_chat_history(): + reply_container = st.container() + container = st.container() + + with container: + with st.form(key='my_form', clear_on_submit=True): + user_input = st.text_input("Question:", placeholder="Type your question here", key='input') + submit_button = st.form_submit_button(label='Send') + + if submit_button and user_input: + output = conversation_chat(user_input) + + st.session_state['past'].append(user_input) + st.session_state['generated'].append(output) + + if st.session_state['generated']: + with reply_container: + for i in range(len(st.session_state['generated'])): + message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs") + message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji") + +# Initialize session state +initialize_session_state() +# Display chat history +display_chat_history() \ No newline at end of file diff --git a/llama2-medical-chatbot/data/Allergies.pdf b/llama2-medical-chatbot/data/Allergies.pdf new file mode 100644 index 0000000..ed37f4f Binary files /dev/null and b/llama2-medical-chatbot/data/Allergies.pdf differ diff --git a/llama2-medical-chatbot/data/med1.pdf b/llama2-medical-chatbot/data/med1.pdf new file mode 100644 index 0000000..cdf39d5 Binary files /dev/null and b/llama2-medical-chatbot/data/med1.pdf differ diff --git a/llama2-medical-chatbot/data/med2.pdf b/llama2-medical-chatbot/data/med2.pdf new file mode 100644 index 0000000..417b9d3 Binary files /dev/null and b/llama2-medical-chatbot/data/med2.pdf differ diff --git a/llama2-medical-chatbot/data/mental_health_Document.pdf b/llama2-medical-chatbot/data/mental_health_Document.pdf new file mode 100644 index 0000000..de2a556 Binary files /dev/null and b/llama2-medical-chatbot/data/mental_health_Document.pdf differ diff --git a/llama2-medical-chatbot/image.png b/llama2-medical-chatbot/image.png new file mode 100644 index 0000000..42b7c17 Binary files /dev/null and b/llama2-medical-chatbot/image.png differ diff --git a/llama2-medical-chatbot/requirements.txt b/llama2-medical-chatbot/requirements.txt new file mode 100644 index 0000000..2ec9624 --- /dev/null +++ b/llama2-medical-chatbot/requirements.txt @@ -0,0 +1,12 @@ +langchain +torch +accelerate +#bitsandbytes +transformers +sentence_transformers +streamlit +streamlit_chat +faiss-cpu +altair +tiktoken +huggingface-hub \ No newline at end of file