From 81e4f0420a4ab1e85931a3c7953fe7643f2a1609 Mon Sep 17 00:00:00 2001 From: Lain Date: Mon, 25 Mar 2024 12:14:23 +0100 Subject: [PATCH 1/4] inherit from PyTorchModelHubMixin --- image_classification/models/uniformer.py | 3 ++- image_classification/models/uniformer_light.py | 5 +++-- video_classification/slowfast/models/uniformer.py | 3 ++- video_classification/slowfast/models/uniformer_light.py | 3 ++- video_classification/slowfast/models/uniformer_light_fp32.py | 3 ++- .../slowfast/models/uniformer_light_ls_fp32.py | 3 ++- 6 files changed, 13 insertions(+), 7 deletions(-) diff --git a/image_classification/models/uniformer.py b/image_classification/models/uniformer.py index 7780eae..49e792f 100644 --- a/image_classification/models/uniformer.py +++ b/image_classification/models/uniformer.py @@ -8,6 +8,7 @@ from timm.models.vision_transformer import _cfg from timm.models.registry import register_model from timm.models.layers import trunc_normal_, DropPath, to_2tuple +from huggingface_hub import PyTorchModelHubMixin layer_scale = False init_value = 1e-6 @@ -196,7 +197,7 @@ def forward(self, x): return x -class UniFormer(nn.Module): +class UniFormer(nn.Module, PyTorchModelHubMixin): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/image_classification/models/uniformer_light.py b/image_classification/models/uniformer_light.py index 8683cb1..637d180 100644 --- a/image_classification/models/uniformer_light.py +++ b/image_classification/models/uniformer_light.py @@ -8,6 +8,7 @@ from timm.models.vision_transformer import _cfg from timm.models.registry import register_model from timm.models.layers import trunc_normal_, DropPath, to_2tuple +from huggingface_hub import PyTorchModelHubMixin layer_scale = False @@ -283,7 +284,7 @@ def forward(self, cls_token, x): return cls_token, x_patch -class PatchEmbed(nn.Module): +class PatchEmbed(nn.Module, ): """ Image to Patch Embedding """ def __init__(self, patch_size=16, in_chans=3, embed_dim=768): @@ -330,7 +331,7 @@ def forward(self, x): return x -class UniFormer_Light(nn.Module): +class UniFormer_Light(nn.Module, PyTorchModelHubMixin): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer.py b/video_classification/slowfast/models/uniformer.py index 242784b..db21c78 100644 --- a/video_classification/slowfast/models/uniformer.py +++ b/video_classification/slowfast/models/uniformer.py @@ -8,6 +8,7 @@ from timm.models.layers import trunc_normal_, DropPath, to_2tuple from .build import MODEL_REGISTRY import os +from huggingface_hub import PyTorchModelHubMixin import slowfast.utils.logging as logging @@ -258,7 +259,7 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer(nn.Module): +class Uniformer(nn.Module, PyTorchModelHubMixin): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer_light.py b/video_classification/slowfast/models/uniformer_light.py index d1e471d..ecc0841 100644 --- a/video_classification/slowfast/models/uniformer_light.py +++ b/video_classification/slowfast/models/uniformer_light.py @@ -8,6 +8,7 @@ from timm.models.layers import trunc_normal_, DropPath, to_2tuple from .build import MODEL_REGISTRY import os +from huggingface_hub import PyTorchModelHubMixin import slowfast.utils.logging as logging @@ -396,7 +397,7 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer_light(nn.Module): +class Uniformer_light(nn.Module, PyTorchModelHubMixin): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer_light_fp32.py b/video_classification/slowfast/models/uniformer_light_fp32.py index 079090d..ea773c9 100644 --- a/video_classification/slowfast/models/uniformer_light_fp32.py +++ b/video_classification/slowfast/models/uniformer_light_fp32.py @@ -8,6 +8,7 @@ from timm.models.layers import trunc_normal_, DropPath, to_2tuple from .build import MODEL_REGISTRY import os +from huggingface_hub import PyTorchModelHubMixin import slowfast.utils.logging as logging @@ -408,7 +409,7 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer_light_fp32(nn.Module): +class Uniformer_light_fp32(nn.Module, PyTorchModelHubMixin): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer_light_ls_fp32.py b/video_classification/slowfast/models/uniformer_light_ls_fp32.py index a8c4dc0..4651ad7 100644 --- a/video_classification/slowfast/models/uniformer_light_ls_fp32.py +++ b/video_classification/slowfast/models/uniformer_light_ls_fp32.py @@ -8,6 +8,7 @@ from timm.models.layers import trunc_normal_, DropPath, to_2tuple from .build import MODEL_REGISTRY import os +from huggingface_hub import PyTorchModelHubMixin import slowfast.utils.logging as logging @@ -420,7 +421,7 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer_light_ls_fp32(nn.Module): +class Uniformer_light_ls_fp32(nn.Module, PyTorchModelHubMixin): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 From 9fcd7eb1478f654098a23940adb92bc60a76f055 Mon Sep 17 00:00:00 2001 From: Lain Date: Thu, 28 Mar 2024 04:23:37 +0100 Subject: [PATCH 2/4] add metadata --- image_classification/models/uniformer.py | 5 ++++- image_classification/models/uniformer_light.py | 5 ++++- video_classification/slowfast/models/uniformer.py | 5 ++++- video_classification/slowfast/models/uniformer_light.py | 5 ++++- .../slowfast/models/uniformer_light_fp32.py | 5 ++++- .../slowfast/models/uniformer_light_ls_fp32.py | 5 ++++- video_classification/vis/model.py | 6 +++++- 7 files changed, 29 insertions(+), 7 deletions(-) diff --git a/image_classification/models/uniformer.py b/image_classification/models/uniformer.py index 49e792f..01c3d65 100644 --- a/image_classification/models/uniformer.py +++ b/image_classification/models/uniformer.py @@ -197,7 +197,10 @@ def forward(self, x): return x -class UniFormer(nn.Module, PyTorchModelHubMixin): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/image_classification/models/uniformer_light.py b/image_classification/models/uniformer_light.py index 637d180..6fbdc82 100644 --- a/image_classification/models/uniformer_light.py +++ b/image_classification/models/uniformer_light.py @@ -331,7 +331,10 @@ def forward(self, x): return x -class UniFormer_Light(nn.Module, PyTorchModelHubMixin): +class UniFormer_Light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer.py b/video_classification/slowfast/models/uniformer.py index db21c78..563068f 100644 --- a/video_classification/slowfast/models/uniformer.py +++ b/video_classification/slowfast/models/uniformer.py @@ -259,7 +259,10 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer(nn.Module, PyTorchModelHubMixin): +class Uniformer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer_light.py b/video_classification/slowfast/models/uniformer_light.py index ecc0841..d97c48d 100644 --- a/video_classification/slowfast/models/uniformer_light.py +++ b/video_classification/slowfast/models/uniformer_light.py @@ -397,7 +397,10 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer_light(nn.Module, PyTorchModelHubMixin): +class Uniformer_light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer_light_fp32.py b/video_classification/slowfast/models/uniformer_light_fp32.py index ea773c9..2854078 100644 --- a/video_classification/slowfast/models/uniformer_light_fp32.py +++ b/video_classification/slowfast/models/uniformer_light_fp32.py @@ -409,7 +409,10 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer_light_fp32(nn.Module, PyTorchModelHubMixin): +class Uniformer_light_fp32(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer_light_ls_fp32.py b/video_classification/slowfast/models/uniformer_light_ls_fp32.py index 4651ad7..734ce63 100644 --- a/video_classification/slowfast/models/uniformer_light_ls_fp32.py +++ b/video_classification/slowfast/models/uniformer_light_ls_fp32.py @@ -421,7 +421,10 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer_light_ls_fp32(nn.Module, PyTorchModelHubMixin): +class Uniformer_light_ls_fp32(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/vis/model.py b/video_classification/vis/model.py index d4d1060..6a4c53d 100644 --- a/video_classification/vis/model.py +++ b/video_classification/vis/model.py @@ -3,6 +3,7 @@ import torch.nn as nn from functools import partial from timm.models.layers import trunc_normal_, DropPath, to_2tuple +from huggingface_hub import PyTorchModelHubMixin def conv_3xnxn(inp, oup, kernel_size=3, stride=3, groups=1): @@ -236,7 +237,10 @@ def forward(self, x): return x -class Uniformer(nn.Module): +class Uniformer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 From 4fd530cbc67954d11d156f4443c8ebb32fd34023 Mon Sep 17 00:00:00 2001 From: Lain Date: Thu, 28 Mar 2024 04:33:03 +0100 Subject: [PATCH 3/4] fix requirements --- video_classification/setup.cfg | 2 +- video_classification/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/video_classification/setup.cfg b/video_classification/setup.cfg index bde7f4d..74ebfab 100644 --- a/video_classification/setup.cfg +++ b/video_classification/setup.cfg @@ -3,7 +3,7 @@ line_length=100 multi_line_output=4 known_standard_library=numpy,setuptools known_myself=slowfast -known_third_party=fvcore,iopath,av,torch,pycocotools,yacs,termcolor,scipy,simplejson,matplotlib,detectron2,torchvision,yaml,tqdm,psutil,opencv-python,pandas,tensorboard,moviepy,sklearn,cv2,PIL +known_third_party=fvcore,iopath,av,torch,pycocotools,yacs,termcolor,scipy,simplejson,matplotlib,detectron2,torchvision,yaml,tqdm,psutil,opencv-python,pandas,tensorboard,moviepy,scikit-learn,cv2,PIL no_lines_before=STDLIB,THIRDPARTY sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER default_section=FIRSTPARTY diff --git a/video_classification/setup.py b/video_classification/setup.py index 4937a48..68dafde 100644 --- a/video_classification/setup.py +++ b/video_classification/setup.py @@ -24,7 +24,7 @@ "pandas", "torchvision>=0.4.2", "pillow", - "sklearn", + "scikit-learn", "tensorboard", "timm" ], From be47397c73f12d2ed2f31a04235d911a08a29c39 Mon Sep 17 00:00:00 2001 From: Lain Date: Fri, 29 Mar 2024 18:02:26 +0100 Subject: [PATCH 4/4] update requirements and finish integrating PyTorchModelHubMixin --- image_classification/models/uniformer_light.py | 2 +- image_classification/requirements.txt | 3 ++- image_classification/token_labeling/tlt/models/uniformer.py | 6 +++++- object_detection/mmdet/models/backbones/uniformer.py | 6 +++++- object_detection/mmdet/models/backbones/uniformer_light.py | 6 +++++- pose_estimation/mmpose/models/backbones/uniformer.py | 6 +++++- semantic_segmentation/fpn_seg/uniformer.py | 6 +++++- semantic_segmentation/fpn_seg/uniformer_light.py | 6 +++++- semantic_segmentation/mmseg/models/backbones/uniformer.py | 6 +++++- .../mmseg/models/backbones/uniformer_light.py | 6 +++++- video_classification/setup.py | 3 ++- 11 files changed, 45 insertions(+), 11 deletions(-) diff --git a/image_classification/models/uniformer_light.py b/image_classification/models/uniformer_light.py index 6fbdc82..fdf1c7c 100644 --- a/image_classification/models/uniformer_light.py +++ b/image_classification/models/uniformer_light.py @@ -284,7 +284,7 @@ def forward(self, cls_token, x): return cls_token, x_patch -class PatchEmbed(nn.Module, ): +class PatchEmbed(nn.Module): """ Image to Patch Embedding """ def __init__(self, patch_size=16, in_chans=3, embed_dim=768): diff --git a/image_classification/requirements.txt b/image_classification/requirements.txt index 4d284d6..443c188 100644 --- a/image_classification/requirements.txt +++ b/image_classification/requirements.txt @@ -1,3 +1,4 @@ torch==1.7.0 torchvision==0.8.1 -timm==0.4.12 \ No newline at end of file +timm==0.4.12 +huggingface_hub>=0.22.0 \ No newline at end of file diff --git a/image_classification/token_labeling/tlt/models/uniformer.py b/image_classification/token_labeling/tlt/models/uniformer.py index ffce525..f05b00e 100644 --- a/image_classification/token_labeling/tlt/models/uniformer.py +++ b/image_classification/token_labeling/tlt/models/uniformer.py @@ -15,6 +15,7 @@ from timm.models.vision_transformer import _cfg from timm.models.registry import register_model from timm.models.layers import trunc_normal_, DropPath, to_2tuple +from huggingface_hub import PyTorchModelHubMixin layer_scale = False init_value = 1e-6 @@ -230,7 +231,10 @@ def forward(self, x): return x -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/object_detection/mmdet/models/backbones/uniformer.py b/object_detection/mmdet/models/backbones/uniformer.py index 5705a6d..32d2e59 100644 --- a/object_detection/mmdet/models/backbones/uniformer.py +++ b/object_detection/mmdet/models/backbones/uniformer.py @@ -15,6 +15,7 @@ import torch.utils.checkpoint as checkpoint import numpy as np from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmdet.utils import get_root_logger @@ -240,7 +241,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/object_detection/mmdet/models/backbones/uniformer_light.py b/object_detection/mmdet/models/backbones/uniformer_light.py index bd552b2..668a591 100644 --- a/object_detection/mmdet/models/backbones/uniformer_light.py +++ b/object_detection/mmdet/models/backbones/uniformer_light.py @@ -6,6 +6,7 @@ from timm.models.vision_transformer import _cfg from timm.models.layers import trunc_normal_, DropPath from torch.nn.modules.batchnorm import _BatchNorm +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmdet.utils import get_root_logger @@ -333,7 +334,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer_Light(nn.Module): +class UniFormer_Light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/pose_estimation/mmpose/models/backbones/uniformer.py b/pose_estimation/mmpose/models/backbones/uniformer.py index 00f4c97..eddd882 100644 --- a/pose_estimation/mmpose/models/backbones/uniformer.py +++ b/pose_estimation/mmpose/models/backbones/uniformer.py @@ -15,6 +15,7 @@ import torch.utils.checkpoint as checkpoint import numpy as np from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmpose.utils import get_root_logger @@ -240,7 +241,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/semantic_segmentation/fpn_seg/uniformer.py b/semantic_segmentation/fpn_seg/uniformer.py index a69e0e8..e1166f3 100644 --- a/semantic_segmentation/fpn_seg/uniformer.py +++ b/semantic_segmentation/fpn_seg/uniformer.py @@ -15,6 +15,7 @@ import torch.utils.checkpoint as checkpoint import numpy as np from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmseg.utils import get_root_logger @@ -240,7 +241,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/semantic_segmentation/fpn_seg/uniformer_light.py b/semantic_segmentation/fpn_seg/uniformer_light.py index 35b698b..85e55fd 100644 --- a/semantic_segmentation/fpn_seg/uniformer_light.py +++ b/semantic_segmentation/fpn_seg/uniformer_light.py @@ -5,6 +5,7 @@ from timm.models.vision_transformer import _cfg from timm.models.layers import trunc_normal_, DropPath from torch.nn.modules.batchnorm import _BatchNorm +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmseg.utils import get_root_logger @@ -332,7 +333,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer_Light(nn.Module): +class UniFormer_Light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/semantic_segmentation/mmseg/models/backbones/uniformer.py b/semantic_segmentation/mmseg/models/backbones/uniformer.py index f599e98..a56e8e2 100644 --- a/semantic_segmentation/mmseg/models/backbones/uniformer.py +++ b/semantic_segmentation/mmseg/models/backbones/uniformer.py @@ -15,6 +15,7 @@ import torch.utils.checkpoint as checkpoint import numpy as np from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmseg.utils import get_root_logger @@ -240,7 +241,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/semantic_segmentation/mmseg/models/backbones/uniformer_light.py b/semantic_segmentation/mmseg/models/backbones/uniformer_light.py index 35b698b..85e55fd 100644 --- a/semantic_segmentation/mmseg/models/backbones/uniformer_light.py +++ b/semantic_segmentation/mmseg/models/backbones/uniformer_light.py @@ -5,6 +5,7 @@ from timm.models.vision_transformer import _cfg from timm.models.layers import trunc_normal_, DropPath from torch.nn.modules.batchnorm import _BatchNorm +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmseg.utils import get_root_logger @@ -332,7 +333,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer_Light(nn.Module): +class UniFormer_Light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/setup.py b/video_classification/setup.py index 68dafde..0c32626 100644 --- a/video_classification/setup.py +++ b/video_classification/setup.py @@ -26,7 +26,8 @@ "pillow", "scikit-learn", "tensorboard", - "timm" + "timm", + "huggingface_hub>=0.22.0" ], extras_require={"tensorboard_video_visualization": ["moviepy"]}, packages=find_packages(exclude=("configs", "tests")),