diff --git a/modules/CLIP-vectorizer/.dockerignore b/modules/CLIP-vectorizer/.dockerignore deleted file mode 100644 index 42061c0..0000000 --- a/modules/CLIP-vectorizer/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -README.md \ No newline at end of file diff --git a/modules/CLIP-vectorizer/Dockerfile b/modules/CLIP-vectorizer/Dockerfile deleted file mode 100644 index 8c53dcf..0000000 --- a/modules/CLIP-vectorizer/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM python - -WORKDIR /src - -COPY . /src - -RUN pip3 install torch --index-url https://download.pytorch.org/whl/cu124 - -RUN pip3 install -r requirements.txt - -RUN apt-get update - -EXPOSE 8080 - -CMD ["uvicorn","app.main:app","--host", "0.0.0.0", "--port", "8080"] \ No newline at end of file diff --git a/modules/CLIP-vectorizer/README.md b/modules/CLIP-vectorizer/README.md deleted file mode 100644 index 9d5296e..0000000 --- a/modules/CLIP-vectorizer/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# ***CLIP-Vectorizer*** -This is openAI's CLIP model based API that creates text and image vector-embeddings to be stored and query a vector database. - -## ***Steps To run on localhost using Docker*** -- Make sure Docker is installed and running (and using WSL2 engine if in windows). - -- Follow the steps given in [Nvidia docs](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) to install nvidia drivers for your distribution (WSL for windows). - -- If CUDA drivers are not present or GPU access is not provided to the container, then it will automatically default to computing on CPU. - -- This command builds the image to be run inside a container - > `docker build -t vectorizer .` - -- Run the program inside a container using - > `docker run -it --gpus all -p 5000:8080 vectorizer` - -## ***API routes*** -- `/vectors` - - > Post route for sending text to be embedded in JSON format. - > Example Input JSON: - > { - >  "text" : "Your text here", - > } -- `/vectors_img` - - > Post route for sending images to be embedded in form-data format. - > Example Input Form-data: - > Key : file | Value : (Your image file) - diff --git a/modules/CLIP-vectorizer/app/main.py b/modules/CLIP-vectorizer/app/main.py deleted file mode 100644 index 66ad465..0000000 --- a/modules/CLIP-vectorizer/app/main.py +++ /dev/null @@ -1,120 +0,0 @@ -import time -# import PIL -from PIL import Image -from io import BytesIO -# import requests -from fastapi import FastAPI , File ,Form , UploadFile -import torch -from torch.nn.functional import normalize -from transformers import CLIPProcessor, CLIPModel -from pydantic import BaseModel - -model_id="openai/clip-vit-base-patch32" -torch_dtype = torch.float16 - -model = CLIPModel.from_pretrained(model_id,torch_dtype=torch_dtype,) -processor = CLIPProcessor.from_pretrained(model_id,clean_up_tokenization_spaces=True) - - -# if you have cuda set it to the active device -device = "cuda" if torch.cuda.is_available() else "cpu" -print(device) -# to display CUDA device name -# print(torch.cuda.get_device_name(torch.cuda.current_device())) - -# move the model to the device -model.to(device) - -app = FastAPI() - -class Text_input(BaseModel): - text: str - -@app.post("/vectors") -async def generate_text_embedding(text_input: Text_input ): - phrase = text_input.text - # print("Input: ",phrase) - label_tokens = processor( - text=phrase, - padding=True, - images=None, - return_tensors='pt' - ).to(device) - # encode tokens to sentence embeddings - label_embeddings = model.get_text_features(**label_tokens) - - # normalize the vector embeddings - label_embeddings = normalize(label_embeddings, p=2, dim=1) - - # detach from pytorch gradient computation - label_embeddings = label_embeddings.detach().cpu().tolist() - return {"result":label_embeddings[0]} - - - -@app.post("/vectors_img") -async def generate_image_embedding(file: UploadFile = File(...)): - # demo_url = "https://www.androidauthority.com/wp-content/uploads/2022/11/twitter-1-scaled-1000w-563h.jpg.webp" - # img = Image.open(requests.get(img_url, stream=True).raw) - - # Read the file contents as bytes - file_bytes = await file.read() - # Wrap the bytes in a BytesIO object - image_stream = BytesIO(file_bytes) - # Open the image using PIL - img = Image.open(image_stream) - image = processor( - text=None, - images=img, - return_tensors='pt' - ).to(device)['pixel_values'] - # encode tokens to image embeddings - image_embeddings = model.get_image_features(image) - - # normalize the vector embeddings - image_embeddings = normalize(image_embeddings, p=2, dim=1) - - # detach from pytorch gradient computation - image_embeddings = image_embeddings.detach().cpu().tolist() - return {"result":image_embeddings[0]} - - - -def benchmark_text(): - sentences = ["List of test sentences"] # Add your test sentences - # print(len(sentences)) - lap_times=[] - rounds=10 - for j in range(rounds): - start_time = time.time() - for i in sentences: - generate_text_embedding(i) - total_time = time.time()-start_time - lap_times.append(total_time) - print("The program while running on",device,"took:"+str(sum(lap_times)/rounds)) - - - -def benchmark_image(): - images = ["URLs of images"] #Add your test URLS - lap_times=[] - rounds=10 - for j in range(rounds): - start_time = time.time() - k=0 - for i in images: - try: - generate_image_embedding(i) # uncomment the img url line before running image benchmark and change the input parameter to a string img_url instead of file also comment all lines related to direct image input - except Exception as e: - return print("Can't convert: ",i,"at k = ",k,"\n",e) - k+=1 - total_time = time.time()-start_time - lap_times.append(total_time) - print("The program while running on",device,"took:"+str(sum(lap_times)/rounds)) - - - -# benchmark_text() -# benchmark_image() - - \ No newline at end of file diff --git a/modules/CLIP-vectorizer/requirements.txt b/modules/CLIP-vectorizer/requirements.txt deleted file mode 100644 index fe82c74..0000000 Binary files a/modules/CLIP-vectorizer/requirements.txt and /dev/null differ diff --git a/modules/all-mpnet-base_v2/Dockerfile b/modules/all-mpnet-base_v2/Dockerfile deleted file mode 100644 index 0cdb655..0000000 --- a/modules/all-mpnet-base_v2/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM nvcr.io/nvidia/cuda:12.4.0-base-ubuntu22.04 - -RUN apt-get update && apt-get -y install sudo - -RUN sudo apt-get install -y python3 python3-pip - -RUN sudo apt-get install -y python3-venv - -RUN mkdir /vectordb && cd /vectordb && \ - python3 -m venv myenv - -WORKDIR /vectordb - -RUN . myenv/bin/activate && \ - pip install torch transformers uvicorn fastapi - -COPY . /vectordb - -ENTRYPOINT ["myenv/bin/python3", "testapi.py"] diff --git a/modules/all-mpnet-base_v2/README.md b/modules/all-mpnet-base_v2/README.md deleted file mode 100644 index 25229e7..0000000 --- a/modules/all-mpnet-base_v2/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Vectorizer - -This project aims to build an optimized inference in a containerized environment. It utilizes the `all-mpnet-base-v2` sentence vectorization model, which can be found [here](https://huggingface.co/sentence-transformers/all-mpnet-base-v2). - -The base image used for running CUDA and PyTorch is `nvcr.io/nvidia/cuda:12.4.0-base-ubuntu22.04`. - -## Setup - -Follow these steps to run the server inside a container: - -1. Build the Docker image: - ```sh - docker build -t hawkeye/vectorizer:v1 . - ``` - -2. Run the Docker container: - ```sh - docker run -it --rm --gpus all hawkeye/vectorizer:v1 - ``` \ No newline at end of file diff --git a/modules/all-mpnet-base_v2/requirements.txt b/modules/all-mpnet-base_v2/requirements.txt deleted file mode 100644 index cfdd0f8..0000000 --- a/modules/all-mpnet-base_v2/requirements.txt +++ /dev/null @@ -1,36 +0,0 @@ -certifi==2024.7.4 -charset-normalizer==3.3.2 -filelock==3.15.4 -fsspec==2024.6.1 -huggingface-hub==0.24.6 -idna==3.8 -Jinja2==3.1.4 -MarkupSafe==2.1.5 -mpmath==1.3.0 -networkx==3.3 -numpy==2.1.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==9.1.0.70 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.6.20 -nvidia-nvtx-cu12==12.1.105 -packaging==24.1 -PyYAML==6.0.2 -regex==2024.7.24 -requests==2.32.3 -safetensors==0.4.4 -sympy==1.13.2 -tokenizers==0.19.1 -torch==2.4.0 -tqdm==4.66.5 -transformers==4.44.2 -triton==3.0.0 -typing_extensions==4.12.2 -urllib3==2.2.2 \ No newline at end of file diff --git a/modules/all-mpnet-base_v2/script.py b/modules/all-mpnet-base_v2/script.py deleted file mode 100644 index bb0717d..0000000 --- a/modules/all-mpnet-base_v2/script.py +++ /dev/null @@ -1,101 +0,0 @@ -from transformers import AutoTokenizer, AutoModel -import torch -import torch.nn.functional as F -import time -# from torch.cuda.amp import autocast - - -# Mean Pooling - Take attention mask into account for correct averaging -def mean_pooling(model_output, attention_mask): - token_embeddings = model_output[0] - input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() - return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) - -# Demo sentences -sentences = [ - "The cat slept peacefully on the windowsill.", - "She opened the door to a surprise party in her honor.", - "The sky turned orange as the sun set behind the mountains.", - "A mysterious letter arrived in the mail, with no return address.", - "He brewed a strong cup of coffee to start his day.", - "The sound of waves crashing on the shore was calming.", - "She found an old photograph album in the attic.", - "A bird perched on the fence, singing a cheerful tune.", - "The old clock in the hallway struck midnight.", - "He lost his keys again, just as he was about to leave.", - "The garden was full of blooming flowers in every color.", - "A soft breeze rustled the leaves on the trees.", - "They decided to go for a walk in the park after dinner.", - "The lights flickered during the thunderstorm.", - "She wrote a heartfelt letter to her best friend.", - "The children laughed as they played in the rain.", - "A gentle snow began to fall, covering the ground in white.", - "He couldn’t stop smiling after hearing the good news.", - "The aroma of freshly baked bread filled the kitchen.", - "She spotted a shooting star while gazing at the night sky.", - "The bus arrived just as he reached the stop.", - "A rainbow appeared after the heavy rain.", - "The dog barked excitedly when its owner came home.", - "She read a book by the fireplace on a chilly evening.", - "The city lights twinkled in the distance.", - "He found a seashell on the beach during his morning jog.", - "The cake she baked turned out perfectly golden.", - "They watched a movie under the stars in the backyard.", - "The sound of laughter echoed through the house.", - "A butterfly landed gently on her shoulder.", - "He carefully wrapped the gift with a bright ribbon.", - "The old bookstore had a musty, comforting smell.", - "They danced together in the living room to their favorite song.", - "The scent of fresh pine filled the air during their hike.", - "She wore a warm scarf to keep the winter chill away.", - "The cat purred contentedly as it curled up on the couch.", - "He painted the walls a vibrant shade of blue.", - "The ice cream truck jingled its tune down the street.", - "She planted a small herb garden on her windowsill.", - "The kids built a fort out of blankets and pillows.", - "He wrote his thoughts down in a leather-bound journal.", - "The rain tapped lightly against the windowpane.", - "She practiced her guitar late into the night.", - "The smell of popcorn filled the theater.", - "He admired the sunrise from the top of the hill.", - "The library was quiet, except for the turning of pages.", - "She dipped her toes into the cool, clear water.", - "The airplane soared above the clouds.", - "He arranged the flowers in a vase on the table.", - "The stars twinkled brightly in the night sky." -] - - - -# Load model from HuggingFace Hub -tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-mpnet-base-v2', torch_dtype=torch.float16, clean_up_tokenization_spaces = True) -model = AutoModel.from_pretrained('sentence-transformers/all-mpnet-base-v2', torch_dtype=torch.float16) - -# Set the device to CUDA if available -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -model = model.to(device) - - -# this method is supposed to make the inference faster but it doesn't -# model = torch.compile(model) - - -#We are noting the time taken in tokenization and inference -start_time = time.time() - - -# Tokenize sentences -encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt').to(device) - -# Compute token embeddings, do pooling n normalize -with torch.inference_mode(): - model_output = model(**encoded_input) -sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) -sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) - -end_time = time.time() - -# Print the results and execution time -print("Sentence embeddings:") -print(sentence_embeddings) -print(f"Execution time: {end_time - start_time:.4f} seconds") diff --git a/modules/all-mpnet-base_v2/testapi.py b/modules/all-mpnet-base_v2/testapi.py deleted file mode 100644 index 336edcf..0000000 --- a/modules/all-mpnet-base_v2/testapi.py +++ /dev/null @@ -1,81 +0,0 @@ -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -from typing import Optional - - - -from transformers import AutoTokenizer, AutoModel -import torch -import torch.nn.functional as F - - -class VectorInputConfig(BaseModel): - pooling_strategy: str - - -class VectorInput(BaseModel): - text: str - config: Optional[VectorInputConfig] = None - - -# Mean Pooling - Take attention mask into account for correct averaging -def mean_pooling(model_output, attention_mask): - token_embeddings = model_output[0] # First element of model_output contains all token embeddings - input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() - return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) - - -# Load model from HuggingFace Hub -tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-mpnet-base-v2', torch_dtype=torch.float16, clean_up_tokenization_spaces = True) -model = AutoModel.from_pretrained('sentence-transformers/all-mpnet-base-v2', torch_dtype=torch.float16) - -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -print(device) -model = model.to(device) - -print("model ready for inference") - -def generate_embeddings_single(sentence, config: VectorInputConfig): - sentences = [sentence] - sentence_embeddings = generate_embeddings_batch(sentences, config) - return sentence_embeddings - - -def generate_embeddings_batch(sentences, config: VectorInputConfig): - encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt').to(device) - with torch.inference_mode(): - model_output = model(**encoded_input) - - if config.pooling_strategy == 'mean': - sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) - else: - raise ValueError(f"Pooling strategy {config.pooling_strategy} not supported.") - - sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) - return sentence_embeddings - - -app = FastAPI() - -class SentenceInput(BaseModel): - sentence: str - -@app.post("/vectorize/") -@app.post("/vectorize") -async def vectorize(input: VectorInput): - try: - SentenceInput = input.text - config = input.config - sentence_embeddings = generate_embeddings_single(SentenceInput, config) - - vectorize_handler = sentence_embeddings.cpu().tolist() - vectorize_handler = vectorize_handler[0] - - return {"text":input.text,"vector": vectorize_handler, "dim": len(vectorize_handler)} - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -if __name__ == "__main__": - import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8000)