From 054f3e875a5184139525c48fdccfd9f3089070bc Mon Sep 17 00:00:00 2001 From: Adam El Youmi Date: Mon, 22 May 2023 10:26:34 +0200 Subject: [PATCH 1/3] Added a simple ranker file --- ranker.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 ranker.py diff --git a/ranker.py b/ranker.py new file mode 100644 index 00000000..b787210f --- /dev/null +++ b/ranker.py @@ -0,0 +1,98 @@ +# C:/valtech/Scripts/python.exe -W ignore c:/CODE/others/ranker/rank.py -d ImageBind/.assets -i ImageBind/.assets/bird_audio.wav -n 3 > output.txt + +import argparse, os, pathlib +# from deep.encoding import Embeddings +import torch +from ImageBind.models import imagebind_model +from ImageBind.models.imagebind_model import ModalityType +import ImageBind.data as data +import warnings +warnings.filterwarnings("ignore") + +args = argparse.ArgumentParser() +args.add_argument("-d") # Directory of the embedded dataset +args.add_argument("-i") # Item to find the similarities with +args.add_argument("-m") # Model path +args.add_argument("-n") # Top N items to return +args = args.parse_args() +# print(args.n, type(args.n)) +device = "cuda:0" if torch.cuda.is_available() else "cpu" +# exit() +modalities = { + ".wav" : ModalityType.AUDIO, + ".mp3" : ModalityType.AUDIO, + ".png" : ModalityType.VISION, + ".jpeg" : ModalityType.VISION, + ".jpg" : ModalityType.VISION, + # ".mp4" : ModalityType.VIDEO ?, + ".txt": ModalityType.TEXT +} + +preprocess_fn = { + ModalityType.AUDIO: data.load_and_transform_audio_data, + ModalityType.VISION: data.load_and_transform_vision_data, + ModalityType.TEXT: data.load_and_transform_text, +} + +inputs = { + ModalityType.AUDIO: [], + ModalityType.VISION: [], + ModalityType.TEXT: [], +} + +files = inputs.copy() + +for item in os.listdir(args.d): + path = pathlib.PureWindowsPath(os.path.join(args.d,item)) + modality = modalities[path.suffix] + + if modality == ModalityType.TEXT: + with open(item, "r") as f: + texts = f.readlines() + texts = [t.removesuffix("\n") for t in texts] + texts = " ".join(texts) + if texts != "": + files[modality].append(path) + inputs[modality].append(texts) + else: + inputs[modality].append(os.path.join(args.d,item)) + +keys = list(inputs.keys()) +for modality in keys: + if inputs[modality] == []: + inputs.pop(modality) + +keys = list(inputs.keys()) +inputs = {modality: preprocess_fn[modality](inputs[modality], device) for modality in inputs.keys()} + +model = torch.load(args.m) if args.m is not None else imagebind_model.imagebind_huge(pretrained=True) +model.eval() +model.to(device) + +with torch.no_grad(): + embeddings = model(inputs) + +main_input_path = pathlib.PureWindowsPath(args.i) +main_input_modality = modalities[main_input_path.suffix] +main_input = {main_input_modality: preprocess_fn[main_input_modality]([main_input_path], device)} + +with torch.no_grad(): + main_embedding = model(main_input) + +sim_matrices = {modality: torch.softmax(main_embedding[main_input_modality] @ embeddings[modality].T, dim=-1) for modality in inputs.keys()} + +scores={} +for modality in inputs.keys(): + for i in range(len(inputs[modality])): + score = sim_matrices[modality][0][i] + file = files[modality][i] + scores[file] = score + +def get_top_files(dictionary, N): + sorted_items = sorted(dictionary.items(), key=lambda x: x[1], reverse=True) + top_items = sorted_items[:N] + top_strings = [(item[0], f"{item[1]*100} %") for item in top_items] + return top_strings + +print(*get_top_files(scores, int(args.n) if args.n is not None else 5), sep="\n") + From c25f78708851dd643746cd3aaaebaae7e13212bc Mon Sep 17 00:00:00 2001 From: Adam El Youmi Date: Mon, 22 May 2023 11:21:43 +0200 Subject: [PATCH 2/3] Added a simple ranker file - removed comments --- ranker.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/ranker.py b/ranker.py index b787210f..1aa4d706 100644 --- a/ranker.py +++ b/ranker.py @@ -1,13 +1,10 @@ -# C:/valtech/Scripts/python.exe -W ignore c:/CODE/others/ranker/rank.py -d ImageBind/.assets -i ImageBind/.assets/bird_audio.wav -n 3 > output.txt +# python c:/.../ImageBind/ranker.py -d ImageBind/.assets -i ImageBind/.assets/bird_audio.wav -n 3 > output.txt import argparse, os, pathlib -# from deep.encoding import Embeddings import torch from ImageBind.models import imagebind_model from ImageBind.models.imagebind_model import ModalityType import ImageBind.data as data -import warnings -warnings.filterwarnings("ignore") args = argparse.ArgumentParser() args.add_argument("-d") # Directory of the embedded dataset @@ -15,16 +12,16 @@ args.add_argument("-m") # Model path args.add_argument("-n") # Top N items to return args = args.parse_args() -# print(args.n, type(args.n)) + device = "cuda:0" if torch.cuda.is_available() else "cpu" -# exit() + modalities = { ".wav" : ModalityType.AUDIO, ".mp3" : ModalityType.AUDIO, ".png" : ModalityType.VISION, ".jpeg" : ModalityType.VISION, ".jpg" : ModalityType.VISION, - # ".mp4" : ModalityType.VIDEO ?, + ".mp4" : ModalityType.VISION, ".txt": ModalityType.TEXT } @@ -95,4 +92,3 @@ def get_top_files(dictionary, N): return top_strings print(*get_top_files(scores, int(args.n) if args.n is not None else 5), sep="\n") - From 90607c0bd3efb2956e479de507fb0edf9d2b3f42 Mon Sep 17 00:00:00 2001 From: Adam El Youmi Date: Mon, 22 May 2023 11:35:26 +0200 Subject: [PATCH 3/3] Added a simple ranker file - removed comments --- ranker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ranker.py b/ranker.py index 1aa4d706..f17b0cab 100644 --- a/ranker.py +++ b/ranker.py @@ -7,7 +7,7 @@ import ImageBind.data as data args = argparse.ArgumentParser() -args.add_argument("-d") # Directory of the embedded dataset +args.add_argument("-d") # Directory of the dataset args.add_argument("-i") # Item to find the similarities with args.add_argument("-m") # Model path args.add_argument("-n") # Top N items to return