elevatebox · ruhika-manyam · Jun 25, 2024
diff --git a/llama2-medical-chatbot/.env b/llama2-medical-chatbot/.env
@@ -0,0 +1 @@
+HUGGINGFACEHUB_API_TOKEN=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
diff --git a/llama2-medical-chatbot/.gitignore b/llama2-medical-chatbot/.gitignore
@@ -0,0 +1 @@
+env/
diff --git a/llama2-medical-chatbot/README.md b/llama2-medical-chatbot/README.md
@@ -0,0 +1,60 @@
+# Llama2 Medical Chatbot
+
+## Overview
+
+Llama2 Medical Chatbot is an intelligent assistant designed to provide information and assistance related to medical queries.
+
+## Libraries Used -
+[ ] Streamlit: A framework for creating web applications.
+[ ] streamlit_chat: Custom module for displaying chat messages.
+[ ] LangChain: A library for building applications with large language models (LLMs).
+[ ] langchain_community: Extensions for LangChain.
+[ ] PyPDFLoader and DirectoryLoader: For loading PDF documents.
+[ ] HuggingFaceEmbeddings: For creating embeddings using Hugging Face models.
+[ ] CTransformers: For using transformer models.
+[ ] RecursiveCharacterTextSplitter: For splitting text into manageable chunks.
+[ ] FAISS: A library for efficient similarity search.
+[ ] ConversationBufferMemory: For maintaining chat history
+
+## How the Program is Running
+1. Imports necessary libraries for document processing, creating embeddings, setting up the chatbot, and the web interface using Streamlit.
+2. Uses DirectoryLoader to load all PDF files from the data/ directory.
+3. Splits the content of the PDF files into smaller chunks using RecursiveCharacterTextSplitter.
+4. Uses a Hugging Face model to generate embeddings for the text chunks.
+5. Stores the text chunks and their embeddings in a FAISS vector store for efficient retrieval.
+6. Loads a pre-trained language model using CTransformers.
+7. Creates a ConversationalRetrievalChain that uses the language model and vector store to handle queries, along with conversation memory to track chat history.
+8. Sets up the Streamlit app interface, including the title and layout for displaying chat history and user input.
+9. Initializes session state variables to store chat history and generated responses.
+10. Displays the chat interface, handling user input and displaying the chat history using Streamlit.
+
+## Use of the CHATBOT
+- Provides a user-friendly interface for interacting with a healthcare chatbot.
+- Users can ask questions related to the content of the loaded PDF documents.
+- Answers user queries based on the content of the PDF documents.
+- Maintains context of the conversation using conversation memory.
+- Enhances user experience by providing a continuous conversation flow.
+- Runs as a web application using Streamlit, making it accessible through a web browser.
+- Allows for easy deployment and use without requiring extensive setup by the end-users
+- Handles user queries in real-time, providing immediate responses.
+- Useful for quick information retrieval and assistance based on the provided documents.
+
+## To run the Program
+
+```
+pip install -r requirements.txt
+```
+Change the Environment to a env in which you have installed all of this
+
+```
+streamlit run app.py
+```
+Make sure to install 
+# llama-2-7b-chat.ggmlv3.q4_0.bin 
+from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main for easy running of the app.
+
+## FOR STEP BY STEP PROCESS CHECK THIS VIDEO
+[Process](https://www.youtube.com/watch?v=XNmFIkViEBU)
+
+## OUTPUT -
+![alt text](image.png)
diff --git a/llama2-medical-chatbot/app.py b/llama2-medical-chatbot/app.py
@@ -0,0 +1,76 @@
+import streamlit as st
+from streamlit_chat import message
+from langchain.chains import ConversationalRetrievalChain
+from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.llms import CTransformers
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain.memory import ConversationBufferMemory
+
+#load the pdf files from the path
+loader = DirectoryLoader('data/',glob="*.pdf",loader_cls=PyPDFLoader)
+documents = loader.load()
+
+#split text into chunks
+text_splitter  = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
+text_chunks = text_splitter.split_documents(documents)
+
+#create embeddings
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
+                                   model_kwargs={'device':"cpu"})
+
+#vectorstore
+vector_store = FAISS.from_documents(text_chunks,embeddings)
+
+#create llm
+llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin",model_type="llama",
+                    config={'max_new_tokens':128,'temperature':0.01})
+
+memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+
+chain = ConversationalRetrievalChain.from_llm(llm=llm,chain_type='stuff',
+                                              retriever=vector_store.as_retriever(search_kwargs={"k":2}),
+                                              memory=memory)
+
+st.title("HealthCare ChatBot 🧑🏽‍⚕️")
+def conversation_chat(query):
+    result = chain({"question": query, "chat_history": st.session_state['history']})
+    st.session_state['history'].append((query, result["answer"]))
+    return result["answer"]
+
+def initialize_session_state():
+    if 'history' not in st.session_state:
+        st.session_state['history'] = []
+
+    if 'generated' not in st.session_state:
+        st.session_state['generated'] = ["Hello! Ask me anything"]
+
+    if 'past' not in st.session_state:
+        st.session_state['past'] = ["Hey! 👋"]
+
+def display_chat_history():
+    reply_container = st.container()
+    container = st.container()
+
+    with container:
+        with st.form(key='my_form', clear_on_submit=True):
+            user_input = st.text_input("Question:", placeholder="Type your question here", key='input')
+            submit_button = st.form_submit_button(label='Send')
+
+        if submit_button and user_input:
+            output = conversation_chat(user_input)
+
+            st.session_state['past'].append(user_input)
+            st.session_state['generated'].append(output)
+
+    if st.session_state['generated']:
+        with reply_container:
+            for i in range(len(st.session_state['generated'])):
+                message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
+                message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")
+
+# Initialize session state
+initialize_session_state()
+# Display chat history
+display_chat_history()
diff --git a/llama2-medical-chatbot/data/Allergies.pdf b/llama2-medical-chatbot/data/Allergies.pdf
diff --git a/llama2-medical-chatbot/data/med1.pdf b/llama2-medical-chatbot/data/med1.pdf
diff --git a/llama2-medical-chatbot/data/med2.pdf b/llama2-medical-chatbot/data/med2.pdf
diff --git a/llama2-medical-chatbot/data/mental_health_Document.pdf b/llama2-medical-chatbot/data/mental_health_Document.pdf
diff --git a/llama2-medical-chatbot/image.png b/llama2-medical-chatbot/image.png
diff --git a/llama2-medical-chatbot/requirements.txt b/llama2-medical-chatbot/requirements.txt
@@ -0,0 +1,12 @@
+langchain
+torch
+accelerate
+#bitsandbytes
+transformers
+sentence_transformers
+streamlit
+streamlit_chat
+faiss-cpu
+altair
+tiktoken
+huggingface-hub
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		HUGGINGFACEHUB_API_TOKEN=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