diff --git a/main.py b/main.py index f46f1d4..042d364 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,8 @@ from pycocotools.coco import COCO from tqdm import tqdm import os +import multiprocessing +from joblib import Parallel, delayed anns_path = './annotations/annotations.json' dest_path = './images/' @@ -10,6 +12,7 @@ def try_download(source: str, dest: str, name: str): + print(f'downloading {source}') r = requests.get(source, allow_redirects=True) if r.ok: with open(f'{dest}{name}', 'wb') as f: @@ -25,9 +28,13 @@ def download(img: dict, dest_path: str): def main(): dataset = COCO(anns_path) - for img in tqdm(dataset.imgs.values()): - download(img, dest_path) + download_pool = multiprocessing.Pool() + num_cores = multiprocessing.cpu_count() + nimgs = len(tqdm(dataset.imgs.values())) + inputs = tqdm(dataset.imgs.values()) + + processed_list = Parallel(n_jobs=num_cores)(delayed(download)(img, dest_path) for img in inputs) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/requirements.txt b/requirements.txt index 4c6535b..dc52a96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ requests pycocotools tqdm +multiprocessing +joblib