diff --git a/modules/CLIP-vectorizer/.dockerignore b/modules/CLIP-vectorizer/.dockerignore
deleted file mode 100644
index 42061c0..0000000
--- a/modules/CLIP-vectorizer/.dockerignore
+++ /dev/null
@@ -1 +0,0 @@
-README.md
\ No newline at end of file
diff --git a/modules/CLIP-vectorizer/Dockerfile b/modules/CLIP-vectorizer/Dockerfile
deleted file mode 100644
index 8c53dcf..0000000
--- a/modules/CLIP-vectorizer/Dockerfile
+++ /dev/null
@@ -1,15 +0,0 @@
-FROM python
-
-WORKDIR /src
-
-COPY  . /src
-
-RUN pip3 install torch --index-url https://download.pytorch.org/whl/cu124
-
-RUN pip3 install -r requirements.txt
-
-RUN apt-get update
-
-EXPOSE 8080
-
-CMD ["uvicorn","app.main:app","--host", "0.0.0.0", "--port", "8080"]
\ No newline at end of file
diff --git a/modules/CLIP-vectorizer/README.md b/modules/CLIP-vectorizer/README.md
deleted file mode 100644
index 9d5296e..0000000
--- a/modules/CLIP-vectorizer/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# ***CLIP-Vectorizer***
-This is openAI's CLIP model based API that creates text and image vector-embeddings to be stored and query a vector database.
-
-## ***Steps To run on localhost using Docker***
-- Make sure Docker is installed and running (and using WSL2 engine if in windows).
-
-- Follow the steps given in [Nvidia docs](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) to install nvidia drivers for your distribution (WSL for windows).
-
-- If CUDA drivers are not present or GPU access is not provided to the container, then it will automatically default to computing on CPU.
-
-- This command builds the image to be run inside a container  
-  > `docker build -t vectorizer .`
-
-- Run the program inside a container using
-  > `docker run -it --gpus all -p 5000:8080 vectorizer`  
-
-## ***API routes***
-- `/vectors`   
-
-  > Post route for sending text to be embedded in JSON format.   
-  > Example Input JSON:  
-  > {  
-  > &emsp;"text" : "Your text here",  
-  > }
-- `/vectors_img`  
-
-  > Post route for sending images to be embedded in form-data format.   
-  > Example Input Form-data:  
-  > Key : file | Value : (Your image file)  
-
diff --git a/modules/CLIP-vectorizer/app/main.py b/modules/CLIP-vectorizer/app/main.py
deleted file mode 100644
index 66ad465..0000000
--- a/modules/CLIP-vectorizer/app/main.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import time
-# import PIL
-from PIL import Image
-from io import BytesIO
-# import requests
-from fastapi import FastAPI , File ,Form , UploadFile 
-import torch
-from torch.nn.functional import normalize
-from transformers import CLIPProcessor, CLIPModel
-from pydantic import BaseModel
-
-model_id="openai/clip-vit-base-patch32"
-torch_dtype = torch.float16
-
-model = CLIPModel.from_pretrained(model_id,torch_dtype=torch_dtype,)
-processor = CLIPProcessor.from_pretrained(model_id,clean_up_tokenization_spaces=True)
-
-
-# if you have cuda set it to the active device
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(device)
-# to display CUDA device name
-# print(torch.cuda.get_device_name(torch.cuda.current_device()))  
-
-# move the model to the device
-model.to(device)
-
-app = FastAPI()
-
-class Text_input(BaseModel):
-    text: str
-
-@app.post("/vectors")
-async def generate_text_embedding(text_input: Text_input ):
-    phrase = text_input.text
-    # print("Input: ",phrase)
-    label_tokens = processor(
-        text=phrase,
-        padding=True,
-        images=None,
-        return_tensors='pt'
-    ).to(device)
-    # encode tokens to sentence embeddings
-    label_embeddings = model.get_text_features(**label_tokens)
-    
-    # normalize the vector embeddings
-    label_embeddings = normalize(label_embeddings, p=2, dim=1)
-    
-    # detach from pytorch gradient computation 
-    label_embeddings = label_embeddings.detach().cpu().tolist()
-    return {"result":label_embeddings[0]}
-
-
-
-@app.post("/vectors_img")
-async def generate_image_embedding(file: UploadFile = File(...)):
-    # demo_url = "https://www.androidauthority.com/wp-content/uploads/2022/11/twitter-1-scaled-1000w-563h.jpg.webp"
-    # img  = Image.open(requests.get(img_url, stream=True).raw)
-    
-    # Read the file contents as bytes
-    file_bytes = await file.read()
-    # Wrap the bytes in a BytesIO object
-    image_stream = BytesIO(file_bytes)
-    # Open the image using PIL
-    img = Image.open(image_stream)
-    image = processor(
-        text=None,
-        images=img,
-        return_tensors='pt'
-    ).to(device)['pixel_values']
-    # encode tokens to image embeddings
-    image_embeddings = model.get_image_features(image)
-
-    # normalize the vector embeddings    
-    image_embeddings = normalize(image_embeddings, p=2, dim=1)
-    
-    # detach from pytorch gradient computation 
-    image_embeddings = image_embeddings.detach().cpu().tolist()
-    return {"result":image_embeddings[0]}
-
-
-
-def benchmark_text():
-    sentences = ["List of test sentences"] # Add your test sentences
-    # print(len(sentences))
-    lap_times=[]
-    rounds=10
-    for j in range(rounds):
-        start_time = time.time()
-        for i in sentences:
-            generate_text_embedding(i)
-        total_time = time.time()-start_time
-        lap_times.append(total_time)
-    print("The program while running on",device,"took:"+str(sum(lap_times)/rounds))
-
-
-
-def benchmark_image():
-    images = ["URLs of images"] #Add your test URLS
-    lap_times=[]
-    rounds=10
-    for j in range(rounds):
-        start_time = time.time()
-        k=0
-        for i in images:
-            try:
-                generate_image_embedding(i)  # uncomment the img url line before running image benchmark and change the input parameter to a string img_url instead of file also comment all lines related to direct image input
-            except Exception as e:
-                return print("Can't convert: ",i,"at k = ",k,"\n",e)
-            k+=1
-        total_time = time.time()-start_time
-        lap_times.append(total_time)
-    print("The program while running on",device,"took:"+str(sum(lap_times)/rounds))
-
-
-
-# benchmark_text()
-# benchmark_image() 
-
-    
\ No newline at end of file
diff --git a/modules/CLIP-vectorizer/requirements.txt b/modules/CLIP-vectorizer/requirements.txt
deleted file mode 100644
index fe82c74..0000000
Binary files a/modules/CLIP-vectorizer/requirements.txt and /dev/null differ
diff --git a/modules/all-mpnet-base_v2/Dockerfile b/modules/all-mpnet-base_v2/Dockerfile
deleted file mode 100644
index 0cdb655..0000000
--- a/modules/all-mpnet-base_v2/Dockerfile
+++ /dev/null
@@ -1,19 +0,0 @@
-FROM nvcr.io/nvidia/cuda:12.4.0-base-ubuntu22.04
-
-RUN apt-get update && apt-get -y install sudo
-
-RUN sudo apt-get install -y python3 python3-pip
-
-RUN sudo apt-get install -y python3-venv
-
-RUN mkdir /vectordb && cd /vectordb && \
-    python3 -m venv myenv
-
-WORKDIR /vectordb
-
-RUN . myenv/bin/activate && \
-    pip install torch transformers uvicorn fastapi
-
-COPY . /vectordb
-
-ENTRYPOINT ["myenv/bin/python3", "testapi.py"]
diff --git a/modules/all-mpnet-base_v2/README.md b/modules/all-mpnet-base_v2/README.md
deleted file mode 100644
index 25229e7..0000000
--- a/modules/all-mpnet-base_v2/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# Vectorizer
-
-This project aims to build an optimized inference in a containerized environment. It utilizes the `all-mpnet-base-v2` sentence vectorization model, which can be found [here](https://huggingface.co/sentence-transformers/all-mpnet-base-v2).
-
-The base image used for running CUDA and PyTorch is `nvcr.io/nvidia/cuda:12.4.0-base-ubuntu22.04`.
-
-## Setup
-
-Follow these steps to run the server inside a container:
-
-1. Build the Docker image:
-    ```sh
-    docker build -t hawkeye/vectorizer:v1 .
-    ```
-
-2. Run the Docker container:
-    ```sh
-    docker run -it --rm --gpus all hawkeye/vectorizer:v1
-    ```
\ No newline at end of file
diff --git a/modules/all-mpnet-base_v2/requirements.txt b/modules/all-mpnet-base_v2/requirements.txt
deleted file mode 100644
index cfdd0f8..0000000
--- a/modules/all-mpnet-base_v2/requirements.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-certifi==2024.7.4
-charset-normalizer==3.3.2
-filelock==3.15.4
-fsspec==2024.6.1
-huggingface-hub==0.24.6
-idna==3.8
-Jinja2==3.1.4
-MarkupSafe==2.1.5
-mpmath==1.3.0
-networkx==3.3
-numpy==2.1.0
-nvidia-cublas-cu12==12.1.3.1
-nvidia-cuda-cupti-cu12==12.1.105
-nvidia-cuda-nvrtc-cu12==12.1.105
-nvidia-cuda-runtime-cu12==12.1.105
-nvidia-cudnn-cu12==9.1.0.70
-nvidia-cufft-cu12==11.0.2.54
-nvidia-curand-cu12==10.3.2.106
-nvidia-cusolver-cu12==11.4.5.107
-nvidia-cusparse-cu12==12.1.0.106
-nvidia-nccl-cu12==2.20.5
-nvidia-nvjitlink-cu12==12.6.20
-nvidia-nvtx-cu12==12.1.105
-packaging==24.1
-PyYAML==6.0.2
-regex==2024.7.24
-requests==2.32.3
-safetensors==0.4.4
-sympy==1.13.2
-tokenizers==0.19.1
-torch==2.4.0
-tqdm==4.66.5
-transformers==4.44.2
-triton==3.0.0
-typing_extensions==4.12.2
-urllib3==2.2.2
\ No newline at end of file
diff --git a/modules/all-mpnet-base_v2/script.py b/modules/all-mpnet-base_v2/script.py
deleted file mode 100644
index bb0717d..0000000
--- a/modules/all-mpnet-base_v2/script.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from transformers import AutoTokenizer, AutoModel
-import torch
-import torch.nn.functional as F
-import time
-# from torch.cuda.amp import autocast
-
-
-# Mean Pooling - Take attention mask into account for correct averaging
-def mean_pooling(model_output, attention_mask):
-    token_embeddings = model_output[0] 
-    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
-    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
-
-# Demo sentences
-sentences = [
-    "The cat slept peacefully on the windowsill.",
-    "She opened the door to a surprise party in her honor.",
-    "The sky turned orange as the sun set behind the mountains.",
-    "A mysterious letter arrived in the mail, with no return address.",
-    "He brewed a strong cup of coffee to start his day.",
-    "The sound of waves crashing on the shore was calming.",
-    "She found an old photograph album in the attic.",
-    "A bird perched on the fence, singing a cheerful tune.",
-    "The old clock in the hallway struck midnight.",
-    "He lost his keys again, just as he was about to leave.",
-    "The garden was full of blooming flowers in every color.",
-    "A soft breeze rustled the leaves on the trees.",
-    "They decided to go for a walk in the park after dinner.",
-    "The lights flickered during the thunderstorm.",
-    "She wrote a heartfelt letter to her best friend.",
-    "The children laughed as they played in the rain.",
-    "A gentle snow began to fall, covering the ground in white.",
-    "He couldn’t stop smiling after hearing the good news.",
-    "The aroma of freshly baked bread filled the kitchen.",
-    "She spotted a shooting star while gazing at the night sky.",
-    "The bus arrived just as he reached the stop.",
-    "A rainbow appeared after the heavy rain.",
-    "The dog barked excitedly when its owner came home.",
-    "She read a book by the fireplace on a chilly evening.",
-    "The city lights twinkled in the distance.",
-    "He found a seashell on the beach during his morning jog.",
-    "The cake she baked turned out perfectly golden.",
-    "They watched a movie under the stars in the backyard.",
-    "The sound of laughter echoed through the house.",
-    "A butterfly landed gently on her shoulder.",
-    "He carefully wrapped the gift with a bright ribbon.",
-    "The old bookstore had a musty, comforting smell.",
-    "They danced together in the living room to their favorite song.",
-    "The scent of fresh pine filled the air during their hike.",
-    "She wore a warm scarf to keep the winter chill away.",
-    "The cat purred contentedly as it curled up on the couch.",
-    "He painted the walls a vibrant shade of blue.",
-    "The ice cream truck jingled its tune down the street.",
-    "She planted a small herb garden on her windowsill.",
-    "The kids built a fort out of blankets and pillows.",
-    "He wrote his thoughts down in a leather-bound journal.",
-    "The rain tapped lightly against the windowpane.",
-    "She practiced her guitar late into the night.",
-    "The smell of popcorn filled the theater.",
-    "He admired the sunrise from the top of the hill.",
-    "The library was quiet, except for the turning of pages.",
-    "She dipped her toes into the cool, clear water.",
-    "The airplane soared above the clouds.",
-    "He arranged the flowers in a vase on the table.",
-    "The stars twinkled brightly in the night sky."
-]
-
-
-
-# Load model from HuggingFace Hub
-tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-mpnet-base-v2', torch_dtype=torch.float16, clean_up_tokenization_spaces = True)
-model = AutoModel.from_pretrained('sentence-transformers/all-mpnet-base-v2', torch_dtype=torch.float16)
-
-# Set the device to CUDA if available
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-model = model.to(device)
-
-
-# this method is supposed to make the inference faster but it doesn't
-# model = torch.compile(model)
-
-
-#We are noting the time taken in tokenization and inference
-start_time = time.time()
-
-
-# Tokenize sentences
-encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt').to(device)
-
-# Compute token embeddings, do pooling n normalize
-with torch.inference_mode():
-    model_output = model(**encoded_input)
-sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
-sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
-
-end_time = time.time()
-
-# Print the results and execution time
-print("Sentence embeddings:")
-print(sentence_embeddings)
-print(f"Execution time: {end_time - start_time:.4f} seconds")
diff --git a/modules/all-mpnet-base_v2/testapi.py b/modules/all-mpnet-base_v2/testapi.py
deleted file mode 100644
index 336edcf..0000000
--- a/modules/all-mpnet-base_v2/testapi.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from typing import Optional
-
-
-
-from transformers import AutoTokenizer, AutoModel
-import torch
-import torch.nn.functional as F
-
-
-class VectorInputConfig(BaseModel):
-    pooling_strategy: str
-
-
-class VectorInput(BaseModel):
-    text: str
-    config: Optional[VectorInputConfig] = None
-
-
-# Mean Pooling - Take attention mask into account for correct averaging
-def mean_pooling(model_output, attention_mask):
-    token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
-    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
-    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
-
-
-# Load model from HuggingFace Hub
-tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-mpnet-base-v2', torch_dtype=torch.float16, clean_up_tokenization_spaces = True)
-model = AutoModel.from_pretrained('sentence-transformers/all-mpnet-base-v2', torch_dtype=torch.float16)
-
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-print(device)
-model = model.to(device)
-
-print("model ready for inference")
-
-def generate_embeddings_single(sentence, config: VectorInputConfig):
-    sentences = [sentence]
-    sentence_embeddings = generate_embeddings_batch(sentences, config)
-    return sentence_embeddings
-
-
-def generate_embeddings_batch(sentences, config: VectorInputConfig):
-    encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt').to(device)
-    with torch.inference_mode():
-        model_output = model(**encoded_input)
-    
-    if config.pooling_strategy == 'mean':
-        sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
-    else:
-        raise ValueError(f"Pooling strategy {config.pooling_strategy} not supported.")
-    
-    sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
-    return sentence_embeddings
-
-
-app = FastAPI()
-
-class SentenceInput(BaseModel):
-    sentence: str
-
-@app.post("/vectorize/")
-@app.post("/vectorize")
-async def vectorize(input: VectorInput):
-    try:
-        SentenceInput = input.text
-        config = input.config
-        sentence_embeddings = generate_embeddings_single(SentenceInput, config)
-        
-        vectorize_handler = sentence_embeddings.cpu().tolist()
-        vectorize_handler = vectorize_handler[0]
-        
-        return {"text":input.text,"vector": vectorize_handler, "dim": len(vectorize_handler)}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-        
-
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)