diff --git a/image_classification/models/uniformer.py b/image_classification/models/uniformer.py index 7780eae..01c3d65 100644 --- a/image_classification/models/uniformer.py +++ b/image_classification/models/uniformer.py @@ -8,6 +8,7 @@ from timm.models.vision_transformer import _cfg from timm.models.registry import register_model from timm.models.layers import trunc_normal_, DropPath, to_2tuple +from huggingface_hub import PyTorchModelHubMixin layer_scale = False init_value = 1e-6 @@ -196,7 +197,10 @@ def forward(self, x): return x -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/image_classification/models/uniformer_light.py b/image_classification/models/uniformer_light.py index 8683cb1..fdf1c7c 100644 --- a/image_classification/models/uniformer_light.py +++ b/image_classification/models/uniformer_light.py @@ -8,6 +8,7 @@ from timm.models.vision_transformer import _cfg from timm.models.registry import register_model from timm.models.layers import trunc_normal_, DropPath, to_2tuple +from huggingface_hub import PyTorchModelHubMixin layer_scale = False @@ -330,7 +331,10 @@ def forward(self, x): return x -class UniFormer_Light(nn.Module): +class UniFormer_Light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/image_classification/requirements.txt b/image_classification/requirements.txt index 4d284d6..443c188 100644 --- a/image_classification/requirements.txt +++ b/image_classification/requirements.txt @@ -1,3 +1,4 @@ torch==1.7.0 torchvision==0.8.1 -timm==0.4.12 \ No newline at end of file +timm==0.4.12 +huggingface_hub>=0.22.0 \ No newline at end of file diff --git a/image_classification/token_labeling/tlt/models/uniformer.py b/image_classification/token_labeling/tlt/models/uniformer.py index ffce525..f05b00e 100644 --- a/image_classification/token_labeling/tlt/models/uniformer.py +++ b/image_classification/token_labeling/tlt/models/uniformer.py @@ -15,6 +15,7 @@ from timm.models.vision_transformer import _cfg from timm.models.registry import register_model from timm.models.layers import trunc_normal_, DropPath, to_2tuple +from huggingface_hub import PyTorchModelHubMixin layer_scale = False init_value = 1e-6 @@ -230,7 +231,10 @@ def forward(self, x): return x -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/object_detection/mmdet/models/backbones/uniformer.py b/object_detection/mmdet/models/backbones/uniformer.py index 5705a6d..32d2e59 100644 --- a/object_detection/mmdet/models/backbones/uniformer.py +++ b/object_detection/mmdet/models/backbones/uniformer.py @@ -15,6 +15,7 @@ import torch.utils.checkpoint as checkpoint import numpy as np from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmdet.utils import get_root_logger @@ -240,7 +241,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/object_detection/mmdet/models/backbones/uniformer_light.py b/object_detection/mmdet/models/backbones/uniformer_light.py index bd552b2..668a591 100644 --- a/object_detection/mmdet/models/backbones/uniformer_light.py +++ b/object_detection/mmdet/models/backbones/uniformer_light.py @@ -6,6 +6,7 @@ from timm.models.vision_transformer import _cfg from timm.models.layers import trunc_normal_, DropPath from torch.nn.modules.batchnorm import _BatchNorm +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmdet.utils import get_root_logger @@ -333,7 +334,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer_Light(nn.Module): +class UniFormer_Light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/pose_estimation/mmpose/models/backbones/uniformer.py b/pose_estimation/mmpose/models/backbones/uniformer.py index 00f4c97..eddd882 100644 --- a/pose_estimation/mmpose/models/backbones/uniformer.py +++ b/pose_estimation/mmpose/models/backbones/uniformer.py @@ -15,6 +15,7 @@ import torch.utils.checkpoint as checkpoint import numpy as np from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmpose.utils import get_root_logger @@ -240,7 +241,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/semantic_segmentation/fpn_seg/uniformer.py b/semantic_segmentation/fpn_seg/uniformer.py index a69e0e8..e1166f3 100644 --- a/semantic_segmentation/fpn_seg/uniformer.py +++ b/semantic_segmentation/fpn_seg/uniformer.py @@ -15,6 +15,7 @@ import torch.utils.checkpoint as checkpoint import numpy as np from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmseg.utils import get_root_logger @@ -240,7 +241,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/semantic_segmentation/fpn_seg/uniformer_light.py b/semantic_segmentation/fpn_seg/uniformer_light.py index 35b698b..85e55fd 100644 --- a/semantic_segmentation/fpn_seg/uniformer_light.py +++ b/semantic_segmentation/fpn_seg/uniformer_light.py @@ -5,6 +5,7 @@ from timm.models.vision_transformer import _cfg from timm.models.layers import trunc_normal_, DropPath from torch.nn.modules.batchnorm import _BatchNorm +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmseg.utils import get_root_logger @@ -332,7 +333,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer_Light(nn.Module): +class UniFormer_Light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/semantic_segmentation/mmseg/models/backbones/uniformer.py b/semantic_segmentation/mmseg/models/backbones/uniformer.py index f599e98..a56e8e2 100644 --- a/semantic_segmentation/mmseg/models/backbones/uniformer.py +++ b/semantic_segmentation/mmseg/models/backbones/uniformer.py @@ -15,6 +15,7 @@ import torch.utils.checkpoint as checkpoint import numpy as np from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmseg.utils import get_root_logger @@ -240,7 +241,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer(nn.Module): +class UniFormer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/semantic_segmentation/mmseg/models/backbones/uniformer_light.py b/semantic_segmentation/mmseg/models/backbones/uniformer_light.py index 35b698b..85e55fd 100644 --- a/semantic_segmentation/mmseg/models/backbones/uniformer_light.py +++ b/semantic_segmentation/mmseg/models/backbones/uniformer_light.py @@ -5,6 +5,7 @@ from timm.models.vision_transformer import _cfg from timm.models.layers import trunc_normal_, DropPath from torch.nn.modules.batchnorm import _BatchNorm +from huggingface_hub import PyTorchModelHubMixin from mmcv_custom import load_checkpoint from mmseg.utils import get_root_logger @@ -332,7 +333,10 @@ def forward(self, x): @BACKBONES.register_module() -class UniFormer_Light(nn.Module): +class UniFormer_Light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/setup.cfg b/video_classification/setup.cfg index bde7f4d..74ebfab 100644 --- a/video_classification/setup.cfg +++ b/video_classification/setup.cfg @@ -3,7 +3,7 @@ line_length=100 multi_line_output=4 known_standard_library=numpy,setuptools known_myself=slowfast -known_third_party=fvcore,iopath,av,torch,pycocotools,yacs,termcolor,scipy,simplejson,matplotlib,detectron2,torchvision,yaml,tqdm,psutil,opencv-python,pandas,tensorboard,moviepy,sklearn,cv2,PIL +known_third_party=fvcore,iopath,av,torch,pycocotools,yacs,termcolor,scipy,simplejson,matplotlib,detectron2,torchvision,yaml,tqdm,psutil,opencv-python,pandas,tensorboard,moviepy,scikit-learn,cv2,PIL no_lines_before=STDLIB,THIRDPARTY sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER default_section=FIRSTPARTY diff --git a/video_classification/setup.py b/video_classification/setup.py index 4937a48..0c32626 100644 --- a/video_classification/setup.py +++ b/video_classification/setup.py @@ -24,9 +24,10 @@ "pandas", "torchvision>=0.4.2", "pillow", - "sklearn", + "scikit-learn", "tensorboard", - "timm" + "timm", + "huggingface_hub>=0.22.0" ], extras_require={"tensorboard_video_visualization": ["moviepy"]}, packages=find_packages(exclude=("configs", "tests")), diff --git a/video_classification/slowfast/models/uniformer.py b/video_classification/slowfast/models/uniformer.py index 242784b..563068f 100644 --- a/video_classification/slowfast/models/uniformer.py +++ b/video_classification/slowfast/models/uniformer.py @@ -8,6 +8,7 @@ from timm.models.layers import trunc_normal_, DropPath, to_2tuple from .build import MODEL_REGISTRY import os +from huggingface_hub import PyTorchModelHubMixin import slowfast.utils.logging as logging @@ -258,7 +259,10 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer(nn.Module): +class Uniformer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer_light.py b/video_classification/slowfast/models/uniformer_light.py index d1e471d..d97c48d 100644 --- a/video_classification/slowfast/models/uniformer_light.py +++ b/video_classification/slowfast/models/uniformer_light.py @@ -8,6 +8,7 @@ from timm.models.layers import trunc_normal_, DropPath, to_2tuple from .build import MODEL_REGISTRY import os +from huggingface_hub import PyTorchModelHubMixin import slowfast.utils.logging as logging @@ -396,7 +397,10 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer_light(nn.Module): +class Uniformer_light(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer_light_fp32.py b/video_classification/slowfast/models/uniformer_light_fp32.py index 079090d..2854078 100644 --- a/video_classification/slowfast/models/uniformer_light_fp32.py +++ b/video_classification/slowfast/models/uniformer_light_fp32.py @@ -8,6 +8,7 @@ from timm.models.layers import trunc_normal_, DropPath, to_2tuple from .build import MODEL_REGISTRY import os +from huggingface_hub import PyTorchModelHubMixin import slowfast.utils.logging as logging @@ -408,7 +409,10 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer_light_fp32(nn.Module): +class Uniformer_light_fp32(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/slowfast/models/uniformer_light_ls_fp32.py b/video_classification/slowfast/models/uniformer_light_ls_fp32.py index a8c4dc0..734ce63 100644 --- a/video_classification/slowfast/models/uniformer_light_ls_fp32.py +++ b/video_classification/slowfast/models/uniformer_light_ls_fp32.py @@ -8,6 +8,7 @@ from timm.models.layers import trunc_normal_, DropPath, to_2tuple from .build import MODEL_REGISTRY import os +from huggingface_hub import PyTorchModelHubMixin import slowfast.utils.logging as logging @@ -420,7 +421,10 @@ def forward(self, x): @MODEL_REGISTRY.register() -class Uniformer_light_ls_fp32(nn.Module): +class Uniformer_light_ls_fp32(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 diff --git a/video_classification/vis/model.py b/video_classification/vis/model.py index d4d1060..6a4c53d 100644 --- a/video_classification/vis/model.py +++ b/video_classification/vis/model.py @@ -3,6 +3,7 @@ import torch.nn as nn from functools import partial from timm.models.layers import trunc_normal_, DropPath, to_2tuple +from huggingface_hub import PyTorchModelHubMixin def conv_3xnxn(inp, oup, kernel_size=3, stride=3, groups=1): @@ -236,7 +237,10 @@ def forward(self, x): return x -class Uniformer(nn.Module): +class Uniformer(nn.Module, + PyTorchModelHubMixin, + library_name = "UniFormer", + repo_url = "https://github.com/Sense-X/UniFormer"): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929