From 045bf628139099d0efe0b123ff82103ff21c6c29 Mon Sep 17 00:00:00 2001 From: Abhinav Ayalur Date: Thu, 21 Mar 2024 23:40:59 +0000 Subject: [PATCH 1/4] wip: init commit --- head_segmentation/face_parsing/face-parsing.PyTorch | 1 + 1 file changed, 1 insertion(+) create mode 160000 head_segmentation/face_parsing/face-parsing.PyTorch diff --git a/head_segmentation/face_parsing/face-parsing.PyTorch b/head_segmentation/face_parsing/face-parsing.PyTorch new file mode 160000 index 0000000..d2e684c --- /dev/null +++ b/head_segmentation/face_parsing/face-parsing.PyTorch @@ -0,0 +1 @@ +Subproject commit d2e684cf1588b46145635e8fe7bcc29544e5537e From be665c767ed8b672e2948f27edc28720a1d67700 Mon Sep 17 00:00:00 2001 From: Abhinav Ayalur Date: Thu, 21 Mar 2024 23:46:47 +0000 Subject: [PATCH 2/4] wip: cleanup --- head_segmentation/face_parsing/.gitignore | 3 + head_segmentation/face_parsing/LICENSE | 21 ++ head_segmentation/face_parsing/README.md | 68 ++++ head_segmentation/face_parsing/evaluate.py | 95 +++++ .../face_parsing/face-parsing.PyTorch | 1 - .../face_parsing/face_dataset.py | 106 ++++++ head_segmentation/face_parsing/logger.py | 23 ++ head_segmentation/face_parsing/loss.py | 75 ++++ head_segmentation/face_parsing/main.py | 178 ++++++++++ head_segmentation/face_parsing/makeup.py | 130 +++++++ head_segmentation/face_parsing/model.py | 283 +++++++++++++++ .../face_parsing/modules/__init__.py | 5 + head_segmentation/face_parsing/modules/bn.py | 130 +++++++ .../face_parsing/modules/deeplab.py | 84 +++++ .../face_parsing/modules/dense.py | 42 +++ .../face_parsing/modules/functions.py | 234 ++++++++++++ .../face_parsing/modules/misc.py | 21 ++ .../face_parsing/modules/residual.py | 88 +++++ .../face_parsing/modules/src/checks.h | 15 + .../face_parsing/modules/src/inplace_abn.cpp | 95 +++++ .../face_parsing/modules/src/inplace_abn.h | 88 +++++ .../modules/src/inplace_abn_cpu.cpp | 119 +++++++ .../modules/src/inplace_abn_cuda.cu | 333 ++++++++++++++++++ .../modules/src/inplace_abn_cuda_half.cu | 275 +++++++++++++++ .../face_parsing/modules/src/utils/checks.h | 15 + .../face_parsing/modules/src/utils/common.h | 49 +++ .../face_parsing/modules/src/utils/cuda.cuh | 71 ++++ head_segmentation/face_parsing/optimizer.py | 69 ++++ .../face_parsing/prepropess_data.py | 38 ++ head_segmentation/face_parsing/resnet.py | 109 ++++++ head_segmentation/face_parsing/test.py | 93 +++++ head_segmentation/face_parsing/train.py | 179 ++++++++++ head_segmentation/face_parsing/transform.py | 129 +++++++ 33 files changed, 3263 insertions(+), 1 deletion(-) create mode 100644 head_segmentation/face_parsing/.gitignore create mode 100644 head_segmentation/face_parsing/LICENSE create mode 100644 head_segmentation/face_parsing/README.md create mode 100644 head_segmentation/face_parsing/evaluate.py delete mode 160000 head_segmentation/face_parsing/face-parsing.PyTorch create mode 100644 head_segmentation/face_parsing/face_dataset.py create mode 100644 head_segmentation/face_parsing/logger.py create mode 100644 head_segmentation/face_parsing/loss.py create mode 100644 head_segmentation/face_parsing/main.py create mode 100644 head_segmentation/face_parsing/makeup.py create mode 100644 head_segmentation/face_parsing/model.py create mode 100644 head_segmentation/face_parsing/modules/__init__.py create mode 100644 head_segmentation/face_parsing/modules/bn.py create mode 100644 head_segmentation/face_parsing/modules/deeplab.py create mode 100644 head_segmentation/face_parsing/modules/dense.py create mode 100644 head_segmentation/face_parsing/modules/functions.py create mode 100644 head_segmentation/face_parsing/modules/misc.py create mode 100644 head_segmentation/face_parsing/modules/residual.py create mode 100644 head_segmentation/face_parsing/modules/src/checks.h create mode 100644 head_segmentation/face_parsing/modules/src/inplace_abn.cpp create mode 100644 head_segmentation/face_parsing/modules/src/inplace_abn.h create mode 100644 head_segmentation/face_parsing/modules/src/inplace_abn_cpu.cpp create mode 100644 head_segmentation/face_parsing/modules/src/inplace_abn_cuda.cu create mode 100644 head_segmentation/face_parsing/modules/src/inplace_abn_cuda_half.cu create mode 100644 head_segmentation/face_parsing/modules/src/utils/checks.h create mode 100644 head_segmentation/face_parsing/modules/src/utils/common.h create mode 100644 head_segmentation/face_parsing/modules/src/utils/cuda.cuh create mode 100644 head_segmentation/face_parsing/optimizer.py create mode 100644 head_segmentation/face_parsing/prepropess_data.py create mode 100644 head_segmentation/face_parsing/resnet.py create mode 100644 head_segmentation/face_parsing/test.py create mode 100644 head_segmentation/face_parsing/train.py create mode 100644 head_segmentation/face_parsing/transform.py diff --git a/head_segmentation/face_parsing/.gitignore b/head_segmentation/face_parsing/.gitignore new file mode 100644 index 0000000..e92496a --- /dev/null +++ b/head_segmentation/face_parsing/.gitignore @@ -0,0 +1,3 @@ +*.mp4 +*.jpg +*.png diff --git a/head_segmentation/face_parsing/LICENSE b/head_segmentation/face_parsing/LICENSE new file mode 100644 index 0000000..bfae0b0 --- /dev/null +++ b/head_segmentation/face_parsing/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 zll + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/head_segmentation/face_parsing/README.md b/head_segmentation/face_parsing/README.md new file mode 100644 index 0000000..849d55e --- /dev/null +++ b/head_segmentation/face_parsing/README.md @@ -0,0 +1,68 @@ +# face-parsing.PyTorch + +

+ + + +

+ +### Contents +- [Training](#training) +- [Demo](#Demo) +- [References](#references) + +## Training + +1. Prepare training data: + -- download [CelebAMask-HQ dataset](https://github.com/switchablenorms/CelebAMask-HQ) + + -- change file path in the `prepropess_data.py` and run +```Shell +python prepropess_data.py +``` + +2. Train the model using CelebAMask-HQ dataset: +Just run the train script: +``` + $ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 train.py +``` + +If you do not wish to train the model, you can download [our pre-trained model](https://drive.google.com/open?id=154JgKpzCPW82qINcVieuPH3fZ2e0P812) and save it in `res/cp`. + + +## Demo +1. Evaluate the trained model using: +```Shell +# evaluate using GPU +python test.py +``` + +## Face makeup using parsing maps +[**face-makeup.PyTorch**](https://github.com/zllrunning/face-makeup.PyTorch) + + + + + + + + + + + + + + + + + + + + + + +
 HairLip
Original InputOriginal InputOriginal Input
ColorColorColor
+ + +## References +- [BiSeNet](https://github.com/CoinCheung/BiSeNet) \ No newline at end of file diff --git a/head_segmentation/face_parsing/evaluate.py b/head_segmentation/face_parsing/evaluate.py new file mode 100644 index 0000000..cb0864d --- /dev/null +++ b/head_segmentation/face_parsing/evaluate.py @@ -0,0 +1,95 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + +from logger import setup_logger +from model import BiSeNet +from face_dataset import FaceMask + +import torch +import torch.nn as nn +from torch.utils.data import DataLoader +import torch.nn.functional as F +import torch.distributed as dist + +import os +import os.path as osp +import logging +import time +import numpy as np +from tqdm import tqdm +import math +from PIL import Image +import torchvision.transforms as transforms +import cv2 + +def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'): + # Colors for all 20 parts + part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], + [255, 0, 85], [255, 0, 170], + [0, 255, 0], [85, 255, 0], [170, 255, 0], + [0, 255, 85], [0, 255, 170], + [0, 0, 255], [85, 0, 255], [170, 0, 255], + [0, 85, 255], [0, 170, 255], + [255, 255, 0], [255, 255, 85], [255, 255, 170], + [255, 0, 255], [255, 85, 255], [255, 170, 255], + [0, 255, 255], [85, 255, 255], [170, 255, 255]] + + im = np.array(im) + vis_im = im.copy().astype(np.uint8) + vis_parsing_anno = parsing_anno.copy().astype(np.uint8) + vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST) + vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255 + + num_of_class = np.max(vis_parsing_anno) + + for pi in range(1, num_of_class + 1): + index = np.where(vis_parsing_anno == pi) + vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi] + + vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8) + # print(vis_parsing_anno_color.shape, vis_im.shape) + vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0) + + # Save result or not + if save_im: + cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) + + # return vis_im + +def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'): + + if not os.path.exists(respth): + os.makedirs(respth) + + n_classes = 19 + net = BiSeNet(n_classes=n_classes) + net.cuda() + save_pth = osp.join('res/cp', cp) + net.load_state_dict(torch.load(save_pth)) + net.eval() + + to_tensor = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + with torch.no_grad(): + for image_path in os.listdir(dspth): + img = Image.open(osp.join(dspth, image_path)) + image = img.resize((512, 512), Image.BILINEAR) + img = to_tensor(image) + img = torch.unsqueeze(img, 0) + img = img.cuda() + out = net(img)[0] + parsing = out.squeeze(0).cpu().numpy().argmax(0) + + vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path)) + + + + + + + +if __name__ == "__main__": + setup_logger('./res') + evaluate() diff --git a/head_segmentation/face_parsing/face-parsing.PyTorch b/head_segmentation/face_parsing/face-parsing.PyTorch deleted file mode 160000 index d2e684c..0000000 --- a/head_segmentation/face_parsing/face-parsing.PyTorch +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d2e684cf1588b46145635e8fe7bcc29544e5537e diff --git a/head_segmentation/face_parsing/face_dataset.py b/head_segmentation/face_parsing/face_dataset.py new file mode 100644 index 0000000..a1ece7f --- /dev/null +++ b/head_segmentation/face_parsing/face_dataset.py @@ -0,0 +1,106 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + +import torch +from torch.utils.data import Dataset +import torchvision.transforms as transforms + +import os.path as osp +import os +from PIL import Image +import numpy as np +import json +import cv2 + +from transform import * + + + +class FaceMask(Dataset): + def __init__(self, rootpth, cropsize=(640, 480), mode='train', *args, **kwargs): + super(FaceMask, self).__init__(*args, **kwargs) + assert mode in ('train', 'val', 'test') + self.mode = mode + self.ignore_lb = 255 + self.rootpth = rootpth + + self.imgs = os.listdir(os.path.join(self.rootpth, 'CelebA-HQ-img')) + + # pre-processing + self.to_tensor = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + self.trans_train = Compose([ + ColorJitter( + brightness=0.5, + contrast=0.5, + saturation=0.5), + HorizontalFlip(), + RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)), + RandomCrop(cropsize) + ]) + + def __getitem__(self, idx): + impth = self.imgs[idx] + img = Image.open(osp.join(self.rootpth, 'CelebA-HQ-img', impth)) + img = img.resize((512, 512), Image.BILINEAR) + label = Image.open(osp.join(self.rootpth, 'mask', impth[:-3]+'png')).convert('P') + # print(np.unique(np.array(label))) + if self.mode == 'train': + im_lb = dict(im=img, lb=label) + im_lb = self.trans_train(im_lb) + img, label = im_lb['im'], im_lb['lb'] + img = self.to_tensor(img) + label = np.array(label).astype(np.int64)[np.newaxis, :] + return img, label + + def __len__(self): + return len(self.imgs) + + +if __name__ == "__main__": + face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img' + face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno' + mask_path = '/home/zll/data/CelebAMask-HQ/mask' + counter = 0 + total = 0 + for i in range(15): + # files = os.listdir(osp.join(face_sep_mask, str(i))) + + atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r', + 'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat'] + + for j in range(i*2000, (i+1)*2000): + + mask = np.zeros((512, 512)) + + for l, att in enumerate(atts, 1): + total += 1 + file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png']) + path = osp.join(face_sep_mask, str(i), file_name) + + if os.path.exists(path): + counter += 1 + sep_mask = np.array(Image.open(path).convert('P')) + # print(np.unique(sep_mask)) + + mask[sep_mask == 225] = l + cv2.imwrite('{}/{}.png'.format(mask_path, j), mask) + print(j) + + print(counter, total) + + + + + + + + + + + + + + diff --git a/head_segmentation/face_parsing/logger.py b/head_segmentation/face_parsing/logger.py new file mode 100644 index 0000000..d3f9ddc --- /dev/null +++ b/head_segmentation/face_parsing/logger.py @@ -0,0 +1,23 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + + +import os.path as osp +import time +import sys +import logging + +import torch.distributed as dist + + +def setup_logger(logpth): + logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S')) + logfile = osp.join(logpth, logfile) + FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s' + log_level = logging.INFO + if dist.is_initialized() and not dist.get_rank()==0: + log_level = logging.ERROR + logging.basicConfig(level=log_level, format=FORMAT, filename=logfile) + logging.root.addHandler(logging.StreamHandler()) + + diff --git a/head_segmentation/face_parsing/loss.py b/head_segmentation/face_parsing/loss.py new file mode 100644 index 0000000..f8f65aa --- /dev/null +++ b/head_segmentation/face_parsing/loss.py @@ -0,0 +1,75 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import numpy as np + + +class OhemCELoss(nn.Module): + def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs): + super(OhemCELoss, self).__init__() + self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda() + self.n_min = n_min + self.ignore_lb = ignore_lb + self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none') + + def forward(self, logits, labels): + N, C, H, W = logits.size() + loss = self.criteria(logits, labels).view(-1) + loss, _ = torch.sort(loss, descending=True) + if loss[self.n_min] > self.thresh: + loss = loss[loss>self.thresh] + else: + loss = loss[:self.n_min] + return torch.mean(loss) + + +class SoftmaxFocalLoss(nn.Module): + def __init__(self, gamma, ignore_lb=255, *args, **kwargs): + super(SoftmaxFocalLoss, self).__init__() + self.gamma = gamma + self.nll = nn.NLLLoss(ignore_index=ignore_lb) + + def forward(self, logits, labels): + scores = F.softmax(logits, dim=1) + factor = torch.pow(1.-scores, self.gamma) + log_score = F.log_softmax(logits, dim=1) + log_score = factor * log_score + loss = self.nll(log_score, labels) + return loss + + +if __name__ == '__main__': + torch.manual_seed(15) + criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda() + criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda() + net1 = nn.Sequential( + nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), + ) + net1.cuda() + net1.train() + net2 = nn.Sequential( + nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), + ) + net2.cuda() + net2.train() + + with torch.no_grad(): + inten = torch.randn(16, 3, 20, 20).cuda() + lbs = torch.randint(0, 19, [16, 20, 20]).cuda() + lbs[1, :, :] = 255 + + logits1 = net1(inten) + logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear') + logits2 = net2(inten) + logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear') + + loss1 = criteria1(logits1, lbs) + loss2 = criteria2(logits2, lbs) + loss = loss1 + loss2 + print(loss.detach().cpu()) + loss.backward() diff --git a/head_segmentation/face_parsing/main.py b/head_segmentation/face_parsing/main.py new file mode 100644 index 0000000..6412fba --- /dev/null +++ b/head_segmentation/face_parsing/main.py @@ -0,0 +1,178 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + +from model import BiSeNet + +import torch + +import os +import os.path as osp +import numpy as np +from PIL import Image +import torchvision.transforms as transforms +import cv2 + +import sieve + +@sieve.Model( + name="face-parsing-head-segmentation", + python_packages=[ + "torch", + "Pillow", + "numpy", + "opencv-python-headless", + "torchvision", + ], + system_packages=[ + "ffmpeg", + "libx264-dev", + ], + python_version="3.10", + cuda_version="11.8", + gpu=sieve.gpu.L4(), +) +class HeadSegmentationModel: + + def __setup__(self): + n_classes = 19 + net = BiSeNet(n_classes=n_classes) + net.cuda() + checkpoint_path = '79999_iter.pth' + net.load_state_dict(torch.load(checkpoint_path)) + net.eval() + + self.net = net + self.to_tensor = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + + def __predict__(self, file: sieve.File, debug_viz: bool = False) -> sieve.File: + file_path = file.path + + is_video = False + + import os + _, file_extension = os.path.splitext(file_path) + if file_extension in ['.jpg', '.jpeg', '.png']: + print("This is an image file.") + elif file_extension in ['.mp4', '.avi', '.mov']: + print("This is a video file.") + is_video = True + else: + raise ValueError("Unsupported file format, must be one of: jpg, jpeg, png, mp4, avi, mov") + + import cv2 + + if is_video: + # Process video + import numpy as np + import cv2 + + video = cv2.VideoCapture(file_path) + frame_width = int(video.get(3)) + frame_height = int(video.get(4)) + fps = video.get(cv2.CAP_PROP_FPS) + size = (frame_width, frame_height) + + if os.path.exists("temp.mp4"): + os.remove("temp.mp4") + out = cv2.VideoWriter('temp.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, size) + + if debug_viz: + if os.path.exists("temp_viz.mp4"): + os.remove("temp_viz.mp4") + out_viz = cv2.VideoWriter('temp_viz.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, size) + + while True: + ret, frame = video.read() + if not ret: + break + frame_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + vis_parsing_anno = self.get_parsing_anno(frame_image) + if debug_viz: + vis_im = vis_parsing_maps(frame_image, vis_parsing_anno) + out_viz.write(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR)) + + vis_parsing_anno_color = cv2.cvtColor(vis_parsing_anno, cv2.COLOR_GRAY2RGB) + out.write(vis_parsing_anno_color) + + import subprocess + + video.release() + out.release() + + import subprocess + command = "ffmpeg -loglevel error -y -i temp.mp4 -c:v libx264 -crf 23 segmentation_map.mp4" + subprocess.call(command, shell=True) + + if debug_viz: + out_viz.release() + command = "ffmpeg -loglevel error -y -i temp_viz.mp4 -c:v libx264 -crf 23 segmentation_map_viz.mp4" + subprocess.call(command, shell=True) + + return (sieve.File(path="segmentation_map.mp4"), sieve.File(path="segmentation_map_viz.mp4")) + + return sieve.File(path="segmentation_map.mp4") + else: + image = Image.open(file.path) + save_path = "save_path.jpg" + save_path_viz = "save_path_viz.jpg" + if os.path.exists(save_path): + os.remove(save_path) + if os.path.exists(save_path_viz): + os.remove(save_path_viz) + + vis_parsing_anno = self.get_parsing_anno(image) + cv2.imwrite(save_path, vis_parsing_anno) + + if debug_viz: + vis_im = vis_parsing_maps(image, vis_parsing_anno) + cv2.imwrite(save_path_viz, cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR)) + + return (sieve.File(path=save_path), sieve.File(path=save_path_viz)) + + return sieve.File(path=save_path) + + def get_parsing_anno(self, image): + img = self.to_tensor(image) + img = torch.unsqueeze(img, 0) + img = img.cuda() + with torch.no_grad(): + out = self.net(img)[0] + parsing = out.squeeze(0).argmax(0).cpu().numpy() + return parsing.astype(np.uint8) + + +def vis_parsing_maps(im, vis_parsing_anno): + # Colors for all 20 parts + part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], + [255, 0, 85], [255, 0, 170], + [0, 255, 0], [85, 255, 0], [170, 255, 0], + [0, 255, 85], [0, 255, 170], + [0, 0, 255], [85, 0, 255], [170, 0, 255], + [0, 85, 255], [0, 170, 255], + [255, 255, 0], [255, 255, 85], [255, 255, 170], + [255, 0, 255], [255, 85, 255], [255, 170, 255], + [0, 255, 255], [85, 255, 255], [170, 255, 255]] + + im = np.array(im) + vis_im = im.copy().astype(np.uint8) + vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255 + + num_of_class = np.max(vis_parsing_anno) + + for pi in range(1, num_of_class + 1): + index = np.where(vis_parsing_anno == pi) + vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi] + + vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8) + # print(vis_parsing_anno_color.shape, vis_im.shape) + vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0) + + return vis_im + + +if __name__ == "__main__": + a = HeadSegmentationModel() + a.__predict__(sieve.File(path="/home/abhinav_ayalur_gmail_com/examples/head_segmentation/face_parsing/face-parsing.PyTorch/hdtr.mp4"), debug_viz=False) diff --git a/head_segmentation/face_parsing/makeup.py b/head_segmentation/face_parsing/makeup.py new file mode 100644 index 0000000..b03f141 --- /dev/null +++ b/head_segmentation/face_parsing/makeup.py @@ -0,0 +1,130 @@ +import cv2 +import os +import numpy as np +from skimage.filters import gaussian + + +def sharpen(img): + img = img * 1.0 + gauss_out = gaussian(img, sigma=5, multichannel=True) + + alpha = 1.5 + img_out = (img - gauss_out) * alpha + img + + img_out = img_out / 255.0 + + mask_1 = img_out < 0 + mask_2 = img_out > 1 + + img_out = img_out * (1 - mask_1) + img_out = img_out * (1 - mask_2) + mask_2 + img_out = np.clip(img_out, 0, 1) + img_out = img_out * 255 + return np.array(img_out, dtype=np.uint8) + + +def hair(image, parsing, part=17, color=[230, 50, 20]): + b, g, r = color #[10, 50, 250] # [10, 250, 10] + tar_color = np.zeros_like(image) + tar_color[:, :, 0] = b + tar_color[:, :, 1] = g + tar_color[:, :, 2] = r + + image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + tar_hsv = cv2.cvtColor(tar_color, cv2.COLOR_BGR2HSV) + + if part == 12 or part == 13: + image_hsv[:, :, 0:2] = tar_hsv[:, :, 0:2] + else: + image_hsv[:, :, 0:1] = tar_hsv[:, :, 0:1] + + changed = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2BGR) + + if part == 17: + changed = sharpen(changed) + + changed[parsing != part] = image[parsing != part] + # changed = cv2.resize(changed, (512, 512)) + return changed + +# +# def lip(image, parsing, part=17, color=[230, 50, 20]): +# b, g, r = color #[10, 50, 250] # [10, 250, 10] +# tar_color = np.zeros_like(image) +# tar_color[:, :, 0] = b +# tar_color[:, :, 1] = g +# tar_color[:, :, 2] = r +# +# image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab) +# il, ia, ib = cv2.split(image_lab) +# +# tar_lab = cv2.cvtColor(tar_color, cv2.COLOR_BGR2Lab) +# tl, ta, tb = cv2.split(tar_lab) +# +# image_lab[:, :, 0] = np.clip(il - np.mean(il) + tl, 0, 100) +# image_lab[:, :, 1] = np.clip(ia - np.mean(ia) + ta, -127, 128) +# image_lab[:, :, 2] = np.clip(ib - np.mean(ib) + tb, -127, 128) +# +# +# changed = cv2.cvtColor(image_lab, cv2.COLOR_Lab2BGR) +# +# if part == 17: +# changed = sharpen(changed) +# +# changed[parsing != part] = image[parsing != part] +# # changed = cv2.resize(changed, (512, 512)) +# return changed + + +if __name__ == '__main__': + # 1 face + # 10 nose + # 11 teeth + # 12 upper lip + # 13 lower lip + # 17 hair + num = 116 + table = { + 'hair': 17, + 'upper_lip': 12, + 'lower_lip': 13 + } + image_path = '/home/zll/data/CelebAMask-HQ/test-img/{}.jpg'.format(num) + parsing_path = 'res/test_res/{}.png'.format(num) + + image = cv2.imread(image_path) + ori = image.copy() + parsing = np.array(cv2.imread(parsing_path, 0)) + parsing = cv2.resize(parsing, image.shape[0:2], interpolation=cv2.INTER_NEAREST) + + parts = [table['hair'], table['upper_lip'], table['lower_lip']] + # colors = [[20, 20, 200], [100, 100, 230], [100, 100, 230]] + colors = [[100, 200, 100]] + for part, color in zip(parts, colors): + image = hair(image, parsing, part, color) + cv2.imwrite('res/makeup/116_ori.png', cv2.resize(ori, (512, 512))) + cv2.imwrite('res/makeup/116_2.png', cv2.resize(image, (512, 512))) + + cv2.imshow('image', cv2.resize(ori, (512, 512))) + cv2.imshow('color', cv2.resize(image, (512, 512))) + + # cv2.imshow('image', ori) + # cv2.imshow('color', image) + + cv2.waitKey(0) + cv2.destroyAllWindows() + + + + + + + + + + + + + + + diff --git a/head_segmentation/face_parsing/model.py b/head_segmentation/face_parsing/model.py new file mode 100644 index 0000000..040f41f --- /dev/null +++ b/head_segmentation/face_parsing/model.py @@ -0,0 +1,283 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +from resnet import Resnet18 +# from modules.bn import InPlaceABNSync as BatchNorm2d + + +class ConvBNReLU(nn.Module): + def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs): + super(ConvBNReLU, self).__init__() + self.conv = nn.Conv2d(in_chan, + out_chan, + kernel_size = ks, + stride = stride, + padding = padding, + bias = False) + self.bn = nn.BatchNorm2d(out_chan) + self.init_weight() + + def forward(self, x): + x = self.conv(x) + x = F.relu(self.bn(x)) + return x + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + +class BiSeNetOutput(nn.Module): + def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs): + super(BiSeNetOutput, self).__init__() + self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1) + self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False) + self.init_weight() + + def forward(self, x): + x = self.conv(x) + x = self.conv_out(x) + return x + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +class AttentionRefinementModule(nn.Module): + def __init__(self, in_chan, out_chan, *args, **kwargs): + super(AttentionRefinementModule, self).__init__() + self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1) + self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False) + self.bn_atten = nn.BatchNorm2d(out_chan) + self.sigmoid_atten = nn.Sigmoid() + self.init_weight() + + def forward(self, x): + feat = self.conv(x) + atten = F.avg_pool2d(feat, feat.size()[2:]) + atten = self.conv_atten(atten) + atten = self.bn_atten(atten) + atten = self.sigmoid_atten(atten) + out = torch.mul(feat, atten) + return out + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + +class ContextPath(nn.Module): + def __init__(self, *args, **kwargs): + super(ContextPath, self).__init__() + self.resnet = Resnet18() + self.arm16 = AttentionRefinementModule(256, 128) + self.arm32 = AttentionRefinementModule(512, 128) + self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) + self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) + self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0) + + self.init_weight() + + def forward(self, x): + H0, W0 = x.size()[2:] + feat8, feat16, feat32 = self.resnet(x) + H8, W8 = feat8.size()[2:] + H16, W16 = feat16.size()[2:] + H32, W32 = feat32.size()[2:] + + avg = F.avg_pool2d(feat32, feat32.size()[2:]) + avg = self.conv_avg(avg) + avg_up = F.interpolate(avg, (H32, W32), mode='nearest') + + feat32_arm = self.arm32(feat32) + feat32_sum = feat32_arm + avg_up + feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest') + feat32_up = self.conv_head32(feat32_up) + + feat16_arm = self.arm16(feat16) + feat16_sum = feat16_arm + feat32_up + feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest') + feat16_up = self.conv_head16(feat16_up) + + return feat8, feat16_up, feat32_up # x8, x8, x16 + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, (nn.Linear, nn.Conv2d)): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +### This is not used, since I replace this with the resnet feature with the same size +class SpatialPath(nn.Module): + def __init__(self, *args, **kwargs): + super(SpatialPath, self).__init__() + self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3) + self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) + self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) + self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0) + self.init_weight() + + def forward(self, x): + feat = self.conv1(x) + feat = self.conv2(feat) + feat = self.conv3(feat) + feat = self.conv_out(feat) + return feat + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +class FeatureFusionModule(nn.Module): + def __init__(self, in_chan, out_chan, *args, **kwargs): + super(FeatureFusionModule, self).__init__() + self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0) + self.conv1 = nn.Conv2d(out_chan, + out_chan//4, + kernel_size = 1, + stride = 1, + padding = 0, + bias = False) + self.conv2 = nn.Conv2d(out_chan//4, + out_chan, + kernel_size = 1, + stride = 1, + padding = 0, + bias = False) + self.relu = nn.ReLU(inplace=True) + self.sigmoid = nn.Sigmoid() + self.init_weight() + + def forward(self, fsp, fcp): + fcat = torch.cat([fsp, fcp], dim=1) + feat = self.convblk(fcat) + atten = F.avg_pool2d(feat, feat.size()[2:]) + atten = self.conv1(atten) + atten = self.relu(atten) + atten = self.conv2(atten) + atten = self.sigmoid(atten) + feat_atten = torch.mul(feat, atten) + feat_out = feat_atten + feat + return feat_out + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +class BiSeNet(nn.Module): + def __init__(self, n_classes, *args, **kwargs): + super(BiSeNet, self).__init__() + self.cp = ContextPath() + ## here self.sp is deleted + self.ffm = FeatureFusionModule(256, 256) + self.conv_out = BiSeNetOutput(256, 256, n_classes) + self.conv_out16 = BiSeNetOutput(128, 64, n_classes) + self.conv_out32 = BiSeNetOutput(128, 64, n_classes) + self.init_weight() + + def forward(self, x): + H, W = x.size()[2:] + feat_res8, feat_cp8, feat_cp16 = self.cp(x) # here return res3b1 feature + feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature + feat_fuse = self.ffm(feat_sp, feat_cp8) + + feat_out = self.conv_out(feat_fuse) + feat_out16 = self.conv_out16(feat_cp8) + feat_out32 = self.conv_out32(feat_cp16) + + feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True) + feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True) + feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True) + return feat_out, feat_out16, feat_out32 + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], [] + for name, child in self.named_children(): + child_wd_params, child_nowd_params = child.get_params() + if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput): + lr_mul_wd_params += child_wd_params + lr_mul_nowd_params += child_nowd_params + else: + wd_params += child_wd_params + nowd_params += child_nowd_params + return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params + + +if __name__ == "__main__": + net = BiSeNet(19) + net.cuda() + net.eval() + in_ten = torch.randn(16, 3, 640, 480).cuda() + out, out16, out32 = net(in_ten) + print(out.shape) + + net.get_params() diff --git a/head_segmentation/face_parsing/modules/__init__.py b/head_segmentation/face_parsing/modules/__init__.py new file mode 100644 index 0000000..8a098de --- /dev/null +++ b/head_segmentation/face_parsing/modules/__init__.py @@ -0,0 +1,5 @@ +from .bn import ABN, InPlaceABN, InPlaceABNSync +from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE +from .misc import GlobalAvgPool2d, SingleGPU +from .residual import IdentityResidualBlock +from .dense import DenseModule diff --git a/head_segmentation/face_parsing/modules/bn.py b/head_segmentation/face_parsing/modules/bn.py new file mode 100644 index 0000000..cd3928b --- /dev/null +++ b/head_segmentation/face_parsing/modules/bn.py @@ -0,0 +1,130 @@ +import torch +import torch.nn as nn +import torch.nn.functional as functional + +try: + from queue import Queue +except ImportError: + from Queue import Queue + +from .functions import * + + +class ABN(nn.Module): + """Activated Batch Normalization + + This gathers a `BatchNorm2d` and an activation function in a single module + """ + + def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): + """Creates an Activated Batch Normalization module + + Parameters + ---------- + num_features : int + Number of feature channels in the input and output. + eps : float + Small constant to prevent numerical issues. + momentum : float + Momentum factor applied to compute running statistics as. + affine : bool + If `True` apply learned scale and shift transformation after normalization. + activation : str + Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. + slope : float + Negative slope for the `leaky_relu` activation. + """ + super(ABN, self).__init__() + self.num_features = num_features + self.affine = affine + self.eps = eps + self.momentum = momentum + self.activation = activation + self.slope = slope + if self.affine: + self.weight = nn.Parameter(torch.ones(num_features)) + self.bias = nn.Parameter(torch.zeros(num_features)) + else: + self.register_parameter('weight', None) + self.register_parameter('bias', None) + self.register_buffer('running_mean', torch.zeros(num_features)) + self.register_buffer('running_var', torch.ones(num_features)) + self.reset_parameters() + + def reset_parameters(self): + nn.init.constant_(self.running_mean, 0) + nn.init.constant_(self.running_var, 1) + if self.affine: + nn.init.constant_(self.weight, 1) + nn.init.constant_(self.bias, 0) + + def forward(self, x): + x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, + self.training, self.momentum, self.eps) + + if self.activation == ACT_RELU: + return functional.relu(x, inplace=True) + elif self.activation == ACT_LEAKY_RELU: + return functional.leaky_relu(x, negative_slope=self.slope, inplace=True) + elif self.activation == ACT_ELU: + return functional.elu(x, inplace=True) + else: + return x + + def __repr__(self): + rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ + ' affine={affine}, activation={activation}' + if self.activation == "leaky_relu": + rep += ', slope={slope})' + else: + rep += ')' + return rep.format(name=self.__class__.__name__, **self.__dict__) + + +class InPlaceABN(ABN): + """InPlace Activated Batch Normalization""" + + def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): + """Creates an InPlace Activated Batch Normalization module + + Parameters + ---------- + num_features : int + Number of feature channels in the input and output. + eps : float + Small constant to prevent numerical issues. + momentum : float + Momentum factor applied to compute running statistics as. + affine : bool + If `True` apply learned scale and shift transformation after normalization. + activation : str + Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. + slope : float + Negative slope for the `leaky_relu` activation. + """ + super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope) + + def forward(self, x): + return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var, + self.training, self.momentum, self.eps, self.activation, self.slope) + + +class InPlaceABNSync(ABN): + """InPlace Activated Batch Normalization with cross-GPU synchronization + This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`. + """ + + def forward(self, x): + return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var, + self.training, self.momentum, self.eps, self.activation, self.slope) + + def __repr__(self): + rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ + ' affine={affine}, activation={activation}' + if self.activation == "leaky_relu": + rep += ', slope={slope})' + else: + rep += ')' + return rep.format(name=self.__class__.__name__, **self.__dict__) + + diff --git a/head_segmentation/face_parsing/modules/deeplab.py b/head_segmentation/face_parsing/modules/deeplab.py new file mode 100644 index 0000000..fd25b78 --- /dev/null +++ b/head_segmentation/face_parsing/modules/deeplab.py @@ -0,0 +1,84 @@ +import torch +import torch.nn as nn +import torch.nn.functional as functional + +from models._util import try_index +from .bn import ABN + + +class DeeplabV3(nn.Module): + def __init__(self, + in_channels, + out_channels, + hidden_channels=256, + dilations=(12, 24, 36), + norm_act=ABN, + pooling_size=None): + super(DeeplabV3, self).__init__() + self.pooling_size = pooling_size + + self.map_convs = nn.ModuleList([ + nn.Conv2d(in_channels, hidden_channels, 1, bias=False), + nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]), + nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]), + nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2]) + ]) + self.map_bn = norm_act(hidden_channels * 4) + + self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False) + self.global_pooling_bn = norm_act(hidden_channels) + + self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False) + self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False) + self.red_bn = norm_act(out_channels) + + self.reset_parameters(self.map_bn.activation, self.map_bn.slope) + + def reset_parameters(self, activation, slope): + gain = nn.init.calculate_gain(activation, slope) + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.xavier_normal_(m.weight.data, gain) + if hasattr(m, "bias") and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, ABN): + if hasattr(m, "weight") and m.weight is not None: + nn.init.constant_(m.weight, 1) + if hasattr(m, "bias") and m.bias is not None: + nn.init.constant_(m.bias, 0) + + def forward(self, x): + # Map convolutions + out = torch.cat([m(x) for m in self.map_convs], dim=1) + out = self.map_bn(out) + out = self.red_conv(out) + + # Global pooling + pool = self._global_pooling(x) + pool = self.global_pooling_conv(pool) + pool = self.global_pooling_bn(pool) + pool = self.pool_red_conv(pool) + if self.training or self.pooling_size is None: + pool = pool.repeat(1, 1, x.size(2), x.size(3)) + + out += pool + out = self.red_bn(out) + return out + + def _global_pooling(self, x): + if self.training or self.pooling_size is None: + pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1) + pool = pool.view(x.size(0), x.size(1), 1, 1) + else: + pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]), + min(try_index(self.pooling_size, 1), x.shape[3])) + padding = ( + (pooling_size[1] - 1) // 2, + (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1, + (pooling_size[0] - 1) // 2, + (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1 + ) + + pool = functional.avg_pool2d(x, pooling_size, stride=1) + pool = functional.pad(pool, pad=padding, mode="replicate") + return pool diff --git a/head_segmentation/face_parsing/modules/dense.py b/head_segmentation/face_parsing/modules/dense.py new file mode 100644 index 0000000..9638d6e --- /dev/null +++ b/head_segmentation/face_parsing/modules/dense.py @@ -0,0 +1,42 @@ +from collections import OrderedDict + +import torch +import torch.nn as nn + +from .bn import ABN + + +class DenseModule(nn.Module): + def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1): + super(DenseModule, self).__init__() + self.in_channels = in_channels + self.growth = growth + self.layers = layers + + self.convs1 = nn.ModuleList() + self.convs3 = nn.ModuleList() + for i in range(self.layers): + self.convs1.append(nn.Sequential(OrderedDict([ + ("bn", norm_act(in_channels)), + ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False)) + ]))) + self.convs3.append(nn.Sequential(OrderedDict([ + ("bn", norm_act(self.growth * bottleneck_factor)), + ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False, + dilation=dilation)) + ]))) + in_channels += self.growth + + @property + def out_channels(self): + return self.in_channels + self.growth * self.layers + + def forward(self, x): + inputs = [x] + for i in range(self.layers): + x = torch.cat(inputs, dim=1) + x = self.convs1[i](x) + x = self.convs3[i](x) + inputs += [x] + + return torch.cat(inputs, dim=1) diff --git a/head_segmentation/face_parsing/modules/functions.py b/head_segmentation/face_parsing/modules/functions.py new file mode 100644 index 0000000..093615f --- /dev/null +++ b/head_segmentation/face_parsing/modules/functions.py @@ -0,0 +1,234 @@ +from os import path +import torch +import torch.distributed as dist +import torch.autograd as autograd +import torch.cuda.comm as comm +from torch.autograd.function import once_differentiable +from torch.utils.cpp_extension import load + +_src_path = path.join(path.dirname(path.abspath(__file__)), "src") +_backend = load(name="inplace_abn", + extra_cflags=["-O3"], + sources=[path.join(_src_path, f) for f in [ + "inplace_abn.cpp", + "inplace_abn_cpu.cpp", + "inplace_abn_cuda.cu", + "inplace_abn_cuda_half.cu" + ]], + extra_cuda_cflags=["--expt-extended-lambda"]) + +# Activation names +ACT_RELU = "relu" +ACT_LEAKY_RELU = "leaky_relu" +ACT_ELU = "elu" +ACT_NONE = "none" + + +def _check(fn, *args, **kwargs): + success = fn(*args, **kwargs) + if not success: + raise RuntimeError("CUDA Error encountered in {}".format(fn)) + + +def _broadcast_shape(x): + out_size = [] + for i, s in enumerate(x.size()): + if i != 1: + out_size.append(1) + else: + out_size.append(s) + return out_size + + +def _reduce(x): + if len(x.size()) == 2: + return x.sum(dim=0) + else: + n, c = x.size()[0:2] + return x.contiguous().view((n, c, -1)).sum(2).sum(0) + + +def _count_samples(x): + count = 1 + for i, s in enumerate(x.size()): + if i != 1: + count *= s + return count + + +def _act_forward(ctx, x): + if ctx.activation == ACT_LEAKY_RELU: + _backend.leaky_relu_forward(x, ctx.slope) + elif ctx.activation == ACT_ELU: + _backend.elu_forward(x) + elif ctx.activation == ACT_NONE: + pass + + +def _act_backward(ctx, x, dx): + if ctx.activation == ACT_LEAKY_RELU: + _backend.leaky_relu_backward(x, dx, ctx.slope) + elif ctx.activation == ACT_ELU: + _backend.elu_backward(x, dx) + elif ctx.activation == ACT_NONE: + pass + + +class InPlaceABN(autograd.Function): + @staticmethod + def forward(ctx, x, weight, bias, running_mean, running_var, + training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): + # Save context + ctx.training = training + ctx.momentum = momentum + ctx.eps = eps + ctx.activation = activation + ctx.slope = slope + ctx.affine = weight is not None and bias is not None + + # Prepare inputs + count = _count_samples(x) + x = x.contiguous() + weight = weight.contiguous() if ctx.affine else x.new_empty(0) + bias = bias.contiguous() if ctx.affine else x.new_empty(0) + + if ctx.training: + mean, var = _backend.mean_var(x) + + # Update running stats + running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) + running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1)) + + # Mark in-place modified tensors + ctx.mark_dirty(x, running_mean, running_var) + else: + mean, var = running_mean.contiguous(), running_var.contiguous() + ctx.mark_dirty(x) + + # BN forward + activation + _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) + _act_forward(ctx, x) + + # Output + ctx.var = var + ctx.save_for_backward(x, var, weight, bias) + return x + + @staticmethod + @once_differentiable + def backward(ctx, dz): + z, var, weight, bias = ctx.saved_tensors + dz = dz.contiguous() + + # Undo activation + _act_backward(ctx, z, dz) + + if ctx.training: + edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) + else: + # TODO: implement simplified CUDA backward for inference mode + edz = dz.new_zeros(dz.size(1)) + eydz = dz.new_zeros(dz.size(1)) + + dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) + dweight = eydz * weight.sign() if ctx.affine else None + dbias = edz if ctx.affine else None + + return dx, dweight, dbias, None, None, None, None, None, None, None + +class InPlaceABNSync(autograd.Function): + @classmethod + def forward(cls, ctx, x, weight, bias, running_mean, running_var, + training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True): + # Save context + ctx.training = training + ctx.momentum = momentum + ctx.eps = eps + ctx.activation = activation + ctx.slope = slope + ctx.affine = weight is not None and bias is not None + + # Prepare inputs + ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1 + + #count = _count_samples(x) + batch_size = x.new_tensor([x.shape[0]],dtype=torch.long) + + x = x.contiguous() + weight = weight.contiguous() if ctx.affine else x.new_empty(0) + bias = bias.contiguous() if ctx.affine else x.new_empty(0) + + if ctx.training: + mean, var = _backend.mean_var(x) + if ctx.world_size>1: + # get global batch size + if equal_batches: + batch_size *= ctx.world_size + else: + dist.all_reduce(batch_size, dist.ReduceOp.SUM) + + ctx.factor = x.shape[0]/float(batch_size.item()) + + mean_all = mean.clone() * ctx.factor + dist.all_reduce(mean_all, dist.ReduceOp.SUM) + + var_all = (var + (mean - mean_all) ** 2) * ctx.factor + dist.all_reduce(var_all, dist.ReduceOp.SUM) + + mean = mean_all + var = var_all + + # Update running stats + running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) + count = batch_size.item() * x.view(x.shape[0],x.shape[1],-1).shape[-1] + running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1))) + + # Mark in-place modified tensors + ctx.mark_dirty(x, running_mean, running_var) + else: + mean, var = running_mean.contiguous(), running_var.contiguous() + ctx.mark_dirty(x) + + # BN forward + activation + _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) + _act_forward(ctx, x) + + # Output + ctx.var = var + ctx.save_for_backward(x, var, weight, bias) + return x + + @staticmethod + @once_differentiable + def backward(ctx, dz): + z, var, weight, bias = ctx.saved_tensors + dz = dz.contiguous() + + # Undo activation + _act_backward(ctx, z, dz) + + if ctx.training: + edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) + edz_local = edz.clone() + eydz_local = eydz.clone() + + if ctx.world_size>1: + edz *= ctx.factor + dist.all_reduce(edz, dist.ReduceOp.SUM) + + eydz *= ctx.factor + dist.all_reduce(eydz, dist.ReduceOp.SUM) + else: + edz_local = edz = dz.new_zeros(dz.size(1)) + eydz_local = eydz = dz.new_zeros(dz.size(1)) + + dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) + dweight = eydz_local * weight.sign() if ctx.affine else None + dbias = edz_local if ctx.affine else None + + return dx, dweight, dbias, None, None, None, None, None, None, None + +inplace_abn = InPlaceABN.apply +inplace_abn_sync = InPlaceABNSync.apply + +__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"] diff --git a/head_segmentation/face_parsing/modules/misc.py b/head_segmentation/face_parsing/modules/misc.py new file mode 100644 index 0000000..3c50b69 --- /dev/null +++ b/head_segmentation/face_parsing/modules/misc.py @@ -0,0 +1,21 @@ +import torch.nn as nn +import torch +import torch.distributed as dist + +class GlobalAvgPool2d(nn.Module): + def __init__(self): + """Global average pooling over the input's spatial dimensions""" + super(GlobalAvgPool2d, self).__init__() + + def forward(self, inputs): + in_size = inputs.size() + return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2) + +class SingleGPU(nn.Module): + def __init__(self, module): + super(SingleGPU, self).__init__() + self.module=module + + def forward(self, input): + return self.module(input.cuda(non_blocking=True)) + diff --git a/head_segmentation/face_parsing/modules/residual.py b/head_segmentation/face_parsing/modules/residual.py new file mode 100644 index 0000000..b7d51ad --- /dev/null +++ b/head_segmentation/face_parsing/modules/residual.py @@ -0,0 +1,88 @@ +from collections import OrderedDict + +import torch.nn as nn + +from .bn import ABN + + +class IdentityResidualBlock(nn.Module): + def __init__(self, + in_channels, + channels, + stride=1, + dilation=1, + groups=1, + norm_act=ABN, + dropout=None): + """Configurable identity-mapping residual block + + Parameters + ---------- + in_channels : int + Number of input channels. + channels : list of int + Number of channels in the internal feature maps. Can either have two or three elements: if three construct + a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then + `3 x 3` then `1 x 1` convolutions. + stride : int + Stride of the first `3 x 3` convolution + dilation : int + Dilation to apply to the `3 x 3` convolutions. + groups : int + Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with + bottleneck blocks. + norm_act : callable + Function to create normalization / activation Module. + dropout: callable + Function to create Dropout Module. + """ + super(IdentityResidualBlock, self).__init__() + + # Check parameters for inconsistencies + if len(channels) != 2 and len(channels) != 3: + raise ValueError("channels must contain either two or three values") + if len(channels) == 2 and groups != 1: + raise ValueError("groups > 1 are only valid if len(channels) == 3") + + is_bottleneck = len(channels) == 3 + need_proj_conv = stride != 1 or in_channels != channels[-1] + + self.bn1 = norm_act(in_channels) + if not is_bottleneck: + layers = [ + ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False, + dilation=dilation)), + ("bn2", norm_act(channels[0])), + ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, + dilation=dilation)) + ] + if dropout is not None: + layers = layers[0:2] + [("dropout", dropout())] + layers[2:] + else: + layers = [ + ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)), + ("bn2", norm_act(channels[0])), + ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, + groups=groups, dilation=dilation)), + ("bn3", norm_act(channels[1])), + ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)) + ] + if dropout is not None: + layers = layers[0:4] + [("dropout", dropout())] + layers[4:] + self.convs = nn.Sequential(OrderedDict(layers)) + + if need_proj_conv: + self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False) + + def forward(self, x): + if hasattr(self, "proj_conv"): + bn1 = self.bn1(x) + shortcut = self.proj_conv(bn1) + else: + shortcut = x.clone() + bn1 = self.bn1(x) + + out = self.convs(bn1) + out.add_(shortcut) + + return out diff --git a/head_segmentation/face_parsing/modules/src/checks.h b/head_segmentation/face_parsing/modules/src/checks.h new file mode 100644 index 0000000..e761a6f --- /dev/null +++ b/head_segmentation/face_parsing/modules/src/checks.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT +#ifndef AT_CHECK +#define AT_CHECK AT_ASSERT +#endif + +#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") +#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") + +#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) +#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) \ No newline at end of file diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn.cpp b/head_segmentation/face_parsing/modules/src/inplace_abn.cpp new file mode 100644 index 0000000..0a6b112 --- /dev/null +++ b/head_segmentation/face_parsing/modules/src/inplace_abn.cpp @@ -0,0 +1,95 @@ +#include + +#include + +#include "inplace_abn.h" + +std::vector mean_var(at::Tensor x) { + if (x.is_cuda()) { + if (x.type().scalarType() == at::ScalarType::Half) { + return mean_var_cuda_h(x); + } else { + return mean_var_cuda(x); + } + } else { + return mean_var_cpu(x); + } +} + +at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, + bool affine, float eps) { + if (x.is_cuda()) { + if (x.type().scalarType() == at::ScalarType::Half) { + return forward_cuda_h(x, mean, var, weight, bias, affine, eps); + } else { + return forward_cuda(x, mean, var, weight, bias, affine, eps); + } + } else { + return forward_cpu(x, mean, var, weight, bias, affine, eps); + } +} + +std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, + bool affine, float eps) { + if (z.is_cuda()) { + if (z.type().scalarType() == at::ScalarType::Half) { + return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps); + } else { + return edz_eydz_cuda(z, dz, weight, bias, affine, eps); + } + } else { + return edz_eydz_cpu(z, dz, weight, bias, affine, eps); + } +} + +at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, + at::Tensor edz, at::Tensor eydz, bool affine, float eps) { + if (z.is_cuda()) { + if (z.type().scalarType() == at::ScalarType::Half) { + return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps); + } else { + return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps); + } + } else { + return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps); + } +} + +void leaky_relu_forward(at::Tensor z, float slope) { + at::leaky_relu_(z, slope); +} + +void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) { + if (z.is_cuda()) { + if (z.type().scalarType() == at::ScalarType::Half) { + return leaky_relu_backward_cuda_h(z, dz, slope); + } else { + return leaky_relu_backward_cuda(z, dz, slope); + } + } else { + return leaky_relu_backward_cpu(z, dz, slope); + } +} + +void elu_forward(at::Tensor z) { + at::elu_(z); +} + +void elu_backward(at::Tensor z, at::Tensor dz) { + if (z.is_cuda()) { + return elu_backward_cuda(z, dz); + } else { + return elu_backward_cpu(z, dz); + } +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("mean_var", &mean_var, "Mean and variance computation"); + m.def("forward", &forward, "In-place forward computation"); + m.def("edz_eydz", &edz_eydz, "First part of backward computation"); + m.def("backward", &backward, "Second part of backward computation"); + m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation"); + m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion"); + m.def("elu_forward", &elu_forward, "Elu forward computation"); + m.def("elu_backward", &elu_backward, "Elu backward computation and inversion"); +} diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn.h b/head_segmentation/face_parsing/modules/src/inplace_abn.h new file mode 100644 index 0000000..17afd11 --- /dev/null +++ b/head_segmentation/face_parsing/modules/src/inplace_abn.h @@ -0,0 +1,88 @@ +#pragma once + +#include + +#include + +std::vector mean_var_cpu(at::Tensor x); +std::vector mean_var_cuda(at::Tensor x); +std::vector mean_var_cuda_h(at::Tensor x); + +at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, + bool affine, float eps); +at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, + bool affine, float eps); +at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, + bool affine, float eps); + +std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, + bool affine, float eps); +std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, + bool affine, float eps); +std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, + bool affine, float eps); + +at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, + at::Tensor edz, at::Tensor eydz, bool affine, float eps); +at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, + at::Tensor edz, at::Tensor eydz, bool affine, float eps); +at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, + at::Tensor edz, at::Tensor eydz, bool affine, float eps); + +void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope); +void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope); +void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope); + +void elu_backward_cpu(at::Tensor z, at::Tensor dz); +void elu_backward_cuda(at::Tensor z, at::Tensor dz); + +static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) { + num = x.size(0); + chn = x.size(1); + sp = 1; + for (int64_t i = 2; i < x.ndimension(); ++i) + sp *= x.size(i); +} + +/* + * Specialized CUDA reduction functions for BN + */ +#ifdef __CUDACC__ + +#include "utils/cuda.cuh" + +template +__device__ T reduce(Op op, int plane, int N, int S) { + T sum = (T)0; + for (int batch = 0; batch < N; ++batch) { + for (int x = threadIdx.x; x < S; x += blockDim.x) { + sum += op(batch, plane, x); + } + } + + // sum over NumThreads within a warp + sum = warpSum(sum); + + // 'transpose', and reduce within warp again + __shared__ T shared[32]; + __syncthreads(); + if (threadIdx.x % WARP_SIZE == 0) { + shared[threadIdx.x / WARP_SIZE] = sum; + } + if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { + // zero out the other entries in shared + shared[threadIdx.x] = (T)0; + } + __syncthreads(); + if (threadIdx.x / WARP_SIZE == 0) { + sum = warpSum(shared[threadIdx.x]); + if (threadIdx.x == 0) { + shared[0] = sum; + } + } + __syncthreads(); + + // Everyone picks it up, should be broadcast into the whole gradInput + return shared[0]; +} +#endif diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn_cpu.cpp b/head_segmentation/face_parsing/modules/src/inplace_abn_cpu.cpp new file mode 100644 index 0000000..ffc6d38 --- /dev/null +++ b/head_segmentation/face_parsing/modules/src/inplace_abn_cpu.cpp @@ -0,0 +1,119 @@ +#include + +#include + +#include "utils/checks.h" +#include "inplace_abn.h" + +at::Tensor reduce_sum(at::Tensor x) { + if (x.ndimension() == 2) { + return x.sum(0); + } else { + auto x_view = x.view({x.size(0), x.size(1), -1}); + return x_view.sum(-1).sum(0); + } +} + +at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { + if (x.ndimension() == 2) { + return v; + } else { + std::vector broadcast_size = {1, -1}; + for (int64_t i = 2; i < x.ndimension(); ++i) + broadcast_size.push_back(1); + + return v.view(broadcast_size); + } +} + +int64_t count(at::Tensor x) { + int64_t count = x.size(0); + for (int64_t i = 2; i < x.ndimension(); ++i) + count *= x.size(i); + + return count; +} + +at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) { + if (affine) { + return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z); + } else { + return z; + } +} + +std::vector mean_var_cpu(at::Tensor x) { + auto num = count(x); + auto mean = reduce_sum(x) / num; + auto diff = x - broadcast_to(mean, x); + auto var = reduce_sum(diff.pow(2)) / num; + + return {mean, var}; +} + +at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, + bool affine, float eps) { + auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var); + auto mul = at::rsqrt(var + eps) * gamma; + + x.sub_(broadcast_to(mean, x)); + x.mul_(broadcast_to(mul, x)); + if (affine) x.add_(broadcast_to(bias, x)); + + return x; +} + +std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, + bool affine, float eps) { + auto edz = reduce_sum(dz); + auto y = invert_affine(z, weight, bias, affine, eps); + auto eydz = reduce_sum(y * dz); + + return {edz, eydz}; +} + +at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, + at::Tensor edz, at::Tensor eydz, bool affine, float eps) { + auto y = invert_affine(z, weight, bias, affine, eps); + auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps); + + auto num = count(z); + auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz); + return dx; +} + +void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) { + CHECK_CPU_INPUT(z); + CHECK_CPU_INPUT(dz); + + AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] { + int64_t count = z.numel(); + auto *_z = z.data(); + auto *_dz = dz.data(); + + for (int64_t i = 0; i < count; ++i) { + if (_z[i] < 0) { + _z[i] *= 1 / slope; + _dz[i] *= slope; + } + } + })); +} + +void elu_backward_cpu(at::Tensor z, at::Tensor dz) { + CHECK_CPU_INPUT(z); + CHECK_CPU_INPUT(dz); + + AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] { + int64_t count = z.numel(); + auto *_z = z.data(); + auto *_dz = dz.data(); + + for (int64_t i = 0; i < count; ++i) { + if (_z[i] < 0) { + _z[i] = log1p(_z[i]); + _dz[i] *= (_z[i] + 1.f); + } + } + })); +} diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn_cuda.cu b/head_segmentation/face_parsing/modules/src/inplace_abn_cuda.cu new file mode 100644 index 0000000..b157b06 --- /dev/null +++ b/head_segmentation/face_parsing/modules/src/inplace_abn_cuda.cu @@ -0,0 +1,333 @@ +#include + +#include +#include + +#include + +#include "utils/checks.h" +#include "utils/cuda.cuh" +#include "inplace_abn.h" + +#include + +// Operations for reduce +template +struct SumOp { + __device__ SumOp(const T *t, int c, int s) + : tensor(t), chn(c), sp(s) {} + __device__ __forceinline__ T operator()(int batch, int plane, int n) { + return tensor[(batch * chn + plane) * sp + n]; + } + const T *tensor; + const int chn; + const int sp; +}; + +template +struct VarOp { + __device__ VarOp(T m, const T *t, int c, int s) + : mean(m), tensor(t), chn(c), sp(s) {} + __device__ __forceinline__ T operator()(int batch, int plane, int n) { + T val = tensor[(batch * chn + plane) * sp + n]; + return (val - mean) * (val - mean); + } + const T mean; + const T *tensor; + const int chn; + const int sp; +}; + +template +struct GradOp { + __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s) + : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} + __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { + T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight; + T _dz = dz[(batch * chn + plane) * sp + n]; + return Pair(_dz, _y * _dz); + } + const T weight; + const T bias; + const T *z; + const T *dz; + const int chn; + const int sp; +}; + +/*********** + * mean_var + ***********/ + +template +__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) { + int plane = blockIdx.x; + T norm = T(1) / T(num * sp); + + T _mean = reduce>(SumOp(x, chn, sp), plane, num, sp) * norm; + __syncthreads(); + T _var = reduce>(VarOp(_mean, x, chn, sp), plane, num, sp) * norm; + + if (threadIdx.x == 0) { + mean[plane] = _mean; + var[plane] = _var; + } +} + +std::vector mean_var_cuda(at::Tensor x) { + CHECK_CUDA_INPUT(x); + + // Extract dimensions + int64_t num, chn, sp; + get_dims(x, num, chn, sp); + + // Prepare output tensors + auto mean = at::empty({chn}, x.options()); + auto var = at::empty({chn}, x.options()); + + // Run kernel + dim3 blocks(chn); + dim3 threads(getNumThreads(sp)); + auto stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] { + mean_var_kernel<<>>( + x.data(), + mean.data(), + var.data(), + num, chn, sp); + })); + + return {mean, var}; +} + +/********** + * forward + **********/ + +template +__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias, + bool affine, float eps, int num, int chn, int sp) { + int plane = blockIdx.x; + + T _mean = mean[plane]; + T _var = var[plane]; + T _weight = affine ? abs(weight[plane]) + eps : T(1); + T _bias = affine ? bias[plane] : T(0); + + T mul = rsqrt(_var + eps) * _weight; + + for (int batch = 0; batch < num; ++batch) { + for (int n = threadIdx.x; n < sp; n += blockDim.x) { + T _x = x[(batch * chn + plane) * sp + n]; + T _y = (_x - _mean) * mul + _bias; + + x[(batch * chn + plane) * sp + n] = _y; + } + } +} + +at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, + bool affine, float eps) { + CHECK_CUDA_INPUT(x); + CHECK_CUDA_INPUT(mean); + CHECK_CUDA_INPUT(var); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(bias); + + // Extract dimensions + int64_t num, chn, sp; + get_dims(x, num, chn, sp); + + // Run kernel + dim3 blocks(chn); + dim3 threads(getNumThreads(sp)); + auto stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] { + forward_kernel<<>>( + x.data(), + mean.data(), + var.data(), + weight.data(), + bias.data(), + affine, eps, num, chn, sp); + })); + + return x; +} + +/*********** + * edz_eydz + ***********/ + +template +__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias, + T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) { + int plane = blockIdx.x; + + T _weight = affine ? abs(weight[plane]) + eps : 1.f; + T _bias = affine ? bias[plane] : 0.f; + + Pair res = reduce, GradOp>(GradOp(_weight, _bias, z, dz, chn, sp), plane, num, sp); + __syncthreads(); + + if (threadIdx.x == 0) { + edz[plane] = res.v1; + eydz[plane] = res.v2; + } +} + +std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, + bool affine, float eps) { + CHECK_CUDA_INPUT(z); + CHECK_CUDA_INPUT(dz); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(bias); + + // Extract dimensions + int64_t num, chn, sp; + get_dims(z, num, chn, sp); + + auto edz = at::empty({chn}, z.options()); + auto eydz = at::empty({chn}, z.options()); + + // Run kernel + dim3 blocks(chn); + dim3 threads(getNumThreads(sp)); + auto stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] { + edz_eydz_kernel<<>>( + z.data(), + dz.data(), + weight.data(), + bias.data(), + edz.data(), + eydz.data(), + affine, eps, num, chn, sp); + })); + + return {edz, eydz}; +} + +/*********** + * backward + ***********/ + +template +__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz, + const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) { + int plane = blockIdx.x; + + T _weight = affine ? abs(weight[plane]) + eps : 1.f; + T _bias = affine ? bias[plane] : 0.f; + T _var = var[plane]; + T _edz = edz[plane]; + T _eydz = eydz[plane]; + + T _mul = _weight * rsqrt(_var + eps); + T count = T(num * sp); + + for (int batch = 0; batch < num; ++batch) { + for (int n = threadIdx.x; n < sp; n += blockDim.x) { + T _dz = dz[(batch * chn + plane) * sp + n]; + T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight; + + dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul; + } + } +} + +at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, + at::Tensor edz, at::Tensor eydz, bool affine, float eps) { + CHECK_CUDA_INPUT(z); + CHECK_CUDA_INPUT(dz); + CHECK_CUDA_INPUT(var); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(bias); + CHECK_CUDA_INPUT(edz); + CHECK_CUDA_INPUT(eydz); + + // Extract dimensions + int64_t num, chn, sp; + get_dims(z, num, chn, sp); + + auto dx = at::zeros_like(z); + + // Run kernel + dim3 blocks(chn); + dim3 threads(getNumThreads(sp)); + auto stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] { + backward_kernel<<>>( + z.data(), + dz.data(), + var.data(), + weight.data(), + bias.data(), + edz.data(), + eydz.data(), + dx.data(), + affine, eps, num, chn, sp); + })); + + return dx; +} + +/************** + * activations + **************/ + +template +inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) { + // Create thrust pointers + thrust::device_ptr th_z = thrust::device_pointer_cast(z); + thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); + + auto stream = at::cuda::getCurrentCUDAStream(); + thrust::transform_if(thrust::cuda::par.on(stream), + th_dz, th_dz + count, th_z, th_dz, + [slope] __device__ (const T& dz) { return dz * slope; }, + [] __device__ (const T& z) { return z < 0; }); + thrust::transform_if(thrust::cuda::par.on(stream), + th_z, th_z + count, th_z, + [slope] __device__ (const T& z) { return z / slope; }, + [] __device__ (const T& z) { return z < 0; }); +} + +void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) { + CHECK_CUDA_INPUT(z); + CHECK_CUDA_INPUT(dz); + + int64_t count = z.numel(); + + AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { + leaky_relu_backward_impl(z.data(), dz.data(), slope, count); + })); +} + +template +inline void elu_backward_impl(T *z, T *dz, int64_t count) { + // Create thrust pointers + thrust::device_ptr th_z = thrust::device_pointer_cast(z); + thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); + + auto stream = at::cuda::getCurrentCUDAStream(); + thrust::transform_if(thrust::cuda::par.on(stream), + th_dz, th_dz + count, th_z, th_z, th_dz, + [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); }, + [] __device__ (const T& z) { return z < 0; }); + thrust::transform_if(thrust::cuda::par.on(stream), + th_z, th_z + count, th_z, + [] __device__ (const T& z) { return log1p(z); }, + [] __device__ (const T& z) { return z < 0; }); +} + +void elu_backward_cuda(at::Tensor z, at::Tensor dz) { + CHECK_CUDA_INPUT(z); + CHECK_CUDA_INPUT(dz); + + int64_t count = z.numel(); + + AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { + elu_backward_impl(z.data(), dz.data(), count); + })); +} diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn_cuda_half.cu b/head_segmentation/face_parsing/modules/src/inplace_abn_cuda_half.cu new file mode 100644 index 0000000..bb63e73 --- /dev/null +++ b/head_segmentation/face_parsing/modules/src/inplace_abn_cuda_half.cu @@ -0,0 +1,275 @@ +#include + +#include + +#include + +#include "utils/checks.h" +#include "utils/cuda.cuh" +#include "inplace_abn.h" + +#include + +// Operations for reduce +struct SumOpH { + __device__ SumOpH(const half *t, int c, int s) + : tensor(t), chn(c), sp(s) {} + __device__ __forceinline__ float operator()(int batch, int plane, int n) { + return __half2float(tensor[(batch * chn + plane) * sp + n]); + } + const half *tensor; + const int chn; + const int sp; +}; + +struct VarOpH { + __device__ VarOpH(float m, const half *t, int c, int s) + : mean(m), tensor(t), chn(c), sp(s) {} + __device__ __forceinline__ float operator()(int batch, int plane, int n) { + const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]); + return (t - mean) * (t - mean); + } + const float mean; + const half *tensor; + const int chn; + const int sp; +}; + +struct GradOpH { + __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s) + : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} + __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { + float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight; + float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); + return Pair(_dz, _y * _dz); + } + const float weight; + const float bias; + const half *z; + const half *dz; + const int chn; + const int sp; +}; + +/*********** + * mean_var + ***********/ + +__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) { + int plane = blockIdx.x; + float norm = 1.f / static_cast(num * sp); + + float _mean = reduce(SumOpH(x, chn, sp), plane, num, sp) * norm; + __syncthreads(); + float _var = reduce(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm; + + if (threadIdx.x == 0) { + mean[plane] = _mean; + var[plane] = _var; + } +} + +std::vector mean_var_cuda_h(at::Tensor x) { + CHECK_CUDA_INPUT(x); + + // Extract dimensions + int64_t num, chn, sp; + get_dims(x, num, chn, sp); + + // Prepare output tensors + auto mean = at::empty({chn},x.options().dtype(at::kFloat)); + auto var = at::empty({chn},x.options().dtype(at::kFloat)); + + // Run kernel + dim3 blocks(chn); + dim3 threads(getNumThreads(sp)); + auto stream = at::cuda::getCurrentCUDAStream(); + mean_var_kernel_h<<>>( + reinterpret_cast(x.data()), + mean.data(), + var.data(), + num, chn, sp); + + return {mean, var}; +} + +/********** + * forward + **********/ + +__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias, + bool affine, float eps, int num, int chn, int sp) { + int plane = blockIdx.x; + + const float _mean = mean[plane]; + const float _var = var[plane]; + const float _weight = affine ? abs(weight[plane]) + eps : 1.f; + const float _bias = affine ? bias[plane] : 0.f; + + const float mul = rsqrt(_var + eps) * _weight; + + for (int batch = 0; batch < num; ++batch) { + for (int n = threadIdx.x; n < sp; n += blockDim.x) { + half *x_ptr = x + (batch * chn + plane) * sp + n; + float _x = __half2float(*x_ptr); + float _y = (_x - _mean) * mul + _bias; + + *x_ptr = __float2half(_y); + } + } +} + +at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, + bool affine, float eps) { + CHECK_CUDA_INPUT(x); + CHECK_CUDA_INPUT(mean); + CHECK_CUDA_INPUT(var); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(bias); + + // Extract dimensions + int64_t num, chn, sp; + get_dims(x, num, chn, sp); + + // Run kernel + dim3 blocks(chn); + dim3 threads(getNumThreads(sp)); + auto stream = at::cuda::getCurrentCUDAStream(); + forward_kernel_h<<>>( + reinterpret_cast(x.data()), + mean.data(), + var.data(), + weight.data(), + bias.data(), + affine, eps, num, chn, sp); + + return x; +} + +__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias, + float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) { + int plane = blockIdx.x; + + float _weight = affine ? abs(weight[plane]) + eps : 1.f; + float _bias = affine ? bias[plane] : 0.f; + + Pair res = reduce, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp); + __syncthreads(); + + if (threadIdx.x == 0) { + edz[plane] = res.v1; + eydz[plane] = res.v2; + } +} + +std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, + bool affine, float eps) { + CHECK_CUDA_INPUT(z); + CHECK_CUDA_INPUT(dz); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(bias); + + // Extract dimensions + int64_t num, chn, sp; + get_dims(z, num, chn, sp); + + auto edz = at::empty({chn},z.options().dtype(at::kFloat)); + auto eydz = at::empty({chn},z.options().dtype(at::kFloat)); + + // Run kernel + dim3 blocks(chn); + dim3 threads(getNumThreads(sp)); + auto stream = at::cuda::getCurrentCUDAStream(); + edz_eydz_kernel_h<<>>( + reinterpret_cast(z.data()), + reinterpret_cast(dz.data()), + weight.data(), + bias.data(), + edz.data(), + eydz.data(), + affine, eps, num, chn, sp); + + return {edz, eydz}; +} + +__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz, + const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) { + int plane = blockIdx.x; + + float _weight = affine ? abs(weight[plane]) + eps : 1.f; + float _bias = affine ? bias[plane] : 0.f; + float _var = var[plane]; + float _edz = edz[plane]; + float _eydz = eydz[plane]; + + float _mul = _weight * rsqrt(_var + eps); + float count = float(num * sp); + + for (int batch = 0; batch < num; ++batch) { + for (int n = threadIdx.x; n < sp; n += blockDim.x) { + float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); + float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight; + + dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul); + } + } +} + +at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, + at::Tensor edz, at::Tensor eydz, bool affine, float eps) { + CHECK_CUDA_INPUT(z); + CHECK_CUDA_INPUT(dz); + CHECK_CUDA_INPUT(var); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(bias); + CHECK_CUDA_INPUT(edz); + CHECK_CUDA_INPUT(eydz); + + // Extract dimensions + int64_t num, chn, sp; + get_dims(z, num, chn, sp); + + auto dx = at::zeros_like(z); + + // Run kernel + dim3 blocks(chn); + dim3 threads(getNumThreads(sp)); + auto stream = at::cuda::getCurrentCUDAStream(); + backward_kernel_h<<>>( + reinterpret_cast(z.data()), + reinterpret_cast(dz.data()), + var.data(), + weight.data(), + bias.data(), + edz.data(), + eydz.data(), + reinterpret_cast(dx.data()), + affine, eps, num, chn, sp); + + return dx; +} + +__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) { + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){ + float _z = __half2float(z[i]); + if (_z < 0) { + dz[i] = __float2half(__half2float(dz[i]) * slope); + z[i] = __float2half(_z / slope); + } + } +} + +void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) { + CHECK_CUDA_INPUT(z); + CHECK_CUDA_INPUT(dz); + + int64_t count = z.numel(); + dim3 threads(getNumThreads(count)); + dim3 blocks = (count + threads.x - 1) / threads.x; + auto stream = at::cuda::getCurrentCUDAStream(); + leaky_relu_backward_impl_h<<>>( + reinterpret_cast(z.data()), + reinterpret_cast(dz.data()), + slope, count); +} + diff --git a/head_segmentation/face_parsing/modules/src/utils/checks.h b/head_segmentation/face_parsing/modules/src/utils/checks.h new file mode 100644 index 0000000..e761a6f --- /dev/null +++ b/head_segmentation/face_parsing/modules/src/utils/checks.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT +#ifndef AT_CHECK +#define AT_CHECK AT_ASSERT +#endif + +#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") +#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") + +#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) +#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) \ No newline at end of file diff --git a/head_segmentation/face_parsing/modules/src/utils/common.h b/head_segmentation/face_parsing/modules/src/utils/common.h new file mode 100644 index 0000000..e8403ee --- /dev/null +++ b/head_segmentation/face_parsing/modules/src/utils/common.h @@ -0,0 +1,49 @@ +#pragma once + +#include + +/* + * Functions to share code between CPU and GPU + */ + +#ifdef __CUDACC__ +// CUDA versions + +#define HOST_DEVICE __host__ __device__ +#define INLINE_HOST_DEVICE __host__ __device__ inline +#define FLOOR(x) floor(x) + +#if __CUDA_ARCH__ >= 600 +// Recent compute capabilities have block-level atomicAdd for all data types, so we use that +#define ACCUM(x,y) atomicAdd_block(&(x),(y)) +#else +// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float +// and use the known atomicCAS-based implementation for double +template +__device__ inline data_t atomic_add(data_t *address, data_t val) { + return atomicAdd(address, val); +} + +template<> +__device__ inline double atomic_add(double *address, double val) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} + +#define ACCUM(x,y) atomic_add(&(x),(y)) +#endif // #if __CUDA_ARCH__ >= 600 + +#else +// CPU versions + +#define HOST_DEVICE +#define INLINE_HOST_DEVICE inline +#define FLOOR(x) std::floor(x) +#define ACCUM(x,y) (x) += (y) + +#endif // #ifdef __CUDACC__ \ No newline at end of file diff --git a/head_segmentation/face_parsing/modules/src/utils/cuda.cuh b/head_segmentation/face_parsing/modules/src/utils/cuda.cuh new file mode 100644 index 0000000..60c0023 --- /dev/null +++ b/head_segmentation/face_parsing/modules/src/utils/cuda.cuh @@ -0,0 +1,71 @@ +#pragma once + +/* + * General settings and functions + */ +const int WARP_SIZE = 32; +const int MAX_BLOCK_SIZE = 1024; + +static int getNumThreads(int nElem) { + int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE}; + for (int i = 0; i < 6; ++i) { + if (nElem <= threadSizes[i]) { + return threadSizes[i]; + } + } + return MAX_BLOCK_SIZE; +} + +/* + * Reduction utilities + */ +template +__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, + unsigned int mask = 0xffffffff) { +#if CUDART_VERSION >= 9000 + return __shfl_xor_sync(mask, value, laneMask, width); +#else + return __shfl_xor(value, laneMask, width); +#endif +} + +__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); } + +template +struct Pair { + T v1, v2; + __device__ Pair() {} + __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {} + __device__ Pair(T v) : v1(v), v2(v) {} + __device__ Pair(int v) : v1(v), v2(v) {} + __device__ Pair &operator+=(const Pair &a) { + v1 += a.v1; + v2 += a.v2; + return *this; + } +}; + +template +static __device__ __forceinline__ T warpSum(T val) { +#if __CUDA_ARCH__ >= 300 + for (int i = 0; i < getMSB(WARP_SIZE); ++i) { + val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); + } +#else + __shared__ T values[MAX_BLOCK_SIZE]; + values[threadIdx.x] = val; + __threadfence_block(); + const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; + for (int i = 1; i < WARP_SIZE; i++) { + val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; + } +#endif + return val; +} + +template +static __device__ __forceinline__ Pair warpSum(Pair value) { + value.v1 = warpSum(value.v1); + value.v2 = warpSum(value.v2); + return value; +} \ No newline at end of file diff --git a/head_segmentation/face_parsing/optimizer.py b/head_segmentation/face_parsing/optimizer.py new file mode 100644 index 0000000..0c99e06 --- /dev/null +++ b/head_segmentation/face_parsing/optimizer.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + + +import torch +import logging + +logger = logging.getLogger() + +class Optimizer(object): + def __init__(self, + model, + lr0, + momentum, + wd, + warmup_steps, + warmup_start_lr, + max_iter, + power, + *args, **kwargs): + self.warmup_steps = warmup_steps + self.warmup_start_lr = warmup_start_lr + self.lr0 = lr0 + self.lr = self.lr0 + self.max_iter = float(max_iter) + self.power = power + self.it = 0 + wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = model.get_params() + param_list = [ + {'params': wd_params}, + {'params': nowd_params, 'weight_decay': 0}, + {'params': lr_mul_wd_params, 'lr_mul': True}, + {'params': lr_mul_nowd_params, 'weight_decay': 0, 'lr_mul': True}] + self.optim = torch.optim.SGD( + param_list, + lr = lr0, + momentum = momentum, + weight_decay = wd) + self.warmup_factor = (self.lr0/self.warmup_start_lr)**(1./self.warmup_steps) + + + def get_lr(self): + if self.it <= self.warmup_steps: + lr = self.warmup_start_lr*(self.warmup_factor**self.it) + else: + factor = (1-(self.it-self.warmup_steps)/(self.max_iter-self.warmup_steps))**self.power + lr = self.lr0 * factor + return lr + + + def step(self): + self.lr = self.get_lr() + for pg in self.optim.param_groups: + if pg.get('lr_mul', False): + pg['lr'] = self.lr * 10 + else: + pg['lr'] = self.lr + if self.optim.defaults.get('lr_mul', False): + self.optim.defaults['lr'] = self.lr * 10 + else: + self.optim.defaults['lr'] = self.lr + self.it += 1 + self.optim.step() + if self.it == self.warmup_steps+2: + logger.info('==> warmup done, start to implement poly lr strategy') + + def zero_grad(self): + self.optim.zero_grad() + diff --git a/head_segmentation/face_parsing/prepropess_data.py b/head_segmentation/face_parsing/prepropess_data.py new file mode 100644 index 0000000..ee7ed56 --- /dev/null +++ b/head_segmentation/face_parsing/prepropess_data.py @@ -0,0 +1,38 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + +import os.path as osp +import os +import cv2 +from transform import * +from PIL import Image + +face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img' +face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno' +mask_path = '/home/zll/data/CelebAMask-HQ/mask' +counter = 0 +total = 0 +for i in range(15): + + atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r', + 'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat'] + + for j in range(i * 2000, (i + 1) * 2000): + + mask = np.zeros((512, 512)) + + for l, att in enumerate(atts, 1): + total += 1 + file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png']) + path = osp.join(face_sep_mask, str(i), file_name) + + if os.path.exists(path): + counter += 1 + sep_mask = np.array(Image.open(path).convert('P')) + # print(np.unique(sep_mask)) + + mask[sep_mask == 225] = l + cv2.imwrite('{}/{}.png'.format(mask_path, j), mask) + print(j) + +print(counter, total) \ No newline at end of file diff --git a/head_segmentation/face_parsing/resnet.py b/head_segmentation/face_parsing/resnet.py new file mode 100644 index 0000000..aa2bf95 --- /dev/null +++ b/head_segmentation/face_parsing/resnet.py @@ -0,0 +1,109 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as modelzoo + +# from modules.bn import InPlaceABNSync as BatchNorm2d + +resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth' + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + def __init__(self, in_chan, out_chan, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(in_chan, out_chan, stride) + self.bn1 = nn.BatchNorm2d(out_chan) + self.conv2 = conv3x3(out_chan, out_chan) + self.bn2 = nn.BatchNorm2d(out_chan) + self.relu = nn.ReLU(inplace=True) + self.downsample = None + if in_chan != out_chan or stride != 1: + self.downsample = nn.Sequential( + nn.Conv2d(in_chan, out_chan, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(out_chan), + ) + + def forward(self, x): + residual = self.conv1(x) + residual = F.relu(self.bn1(residual)) + residual = self.conv2(residual) + residual = self.bn2(residual) + + shortcut = x + if self.downsample is not None: + shortcut = self.downsample(x) + + out = shortcut + residual + out = self.relu(out) + return out + + +def create_layer_basic(in_chan, out_chan, bnum, stride=1): + layers = [BasicBlock(in_chan, out_chan, stride=stride)] + for i in range(bnum-1): + layers.append(BasicBlock(out_chan, out_chan, stride=1)) + return nn.Sequential(*layers) + + +class Resnet18(nn.Module): + def __init__(self): + super(Resnet18, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1) + self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2) + self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2) + self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2) + self.init_weight() + + def forward(self, x): + x = self.conv1(x) + x = F.relu(self.bn1(x)) + x = self.maxpool(x) + + x = self.layer1(x) + feat8 = self.layer2(x) # 1/8 + feat16 = self.layer3(feat8) # 1/16 + feat32 = self.layer4(feat16) # 1/32 + return feat8, feat16, feat32 + + def init_weight(self): + state_dict = modelzoo.load_url(resnet18_url) + self_state_dict = self.state_dict() + for k, v in state_dict.items(): + if 'fc' in k: continue + self_state_dict.update({k: v}) + self.load_state_dict(self_state_dict) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, (nn.Linear, nn.Conv2d)): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +if __name__ == "__main__": + net = Resnet18() + x = torch.randn(16, 3, 224, 224) + out = net(x) + print(out[0].size()) + print(out[1].size()) + print(out[2].size()) + net.get_params() diff --git a/head_segmentation/face_parsing/test.py b/head_segmentation/face_parsing/test.py new file mode 100644 index 0000000..ada286f --- /dev/null +++ b/head_segmentation/face_parsing/test.py @@ -0,0 +1,93 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + +from logger import setup_logger +from model import BiSeNet + +import torch + +import os +import os.path as osp +import numpy as np +from PIL import Image +import torchvision.transforms as transforms +import cv2 + +def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'): + # Colors for all 20 parts + part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], + [255, 0, 85], [255, 0, 170], + [0, 255, 0], [85, 255, 0], [170, 255, 0], + [0, 255, 85], [0, 255, 170], + [0, 0, 255], [85, 0, 255], [170, 0, 255], + [0, 85, 255], [0, 170, 255], + [255, 255, 0], [255, 255, 85], [255, 255, 170], + [255, 0, 255], [255, 85, 255], [255, 170, 255], + [0, 255, 255], [85, 255, 255], [170, 255, 255]] + + im = np.array(im) + vis_im = im.copy().astype(np.uint8) + vis_parsing_anno = parsing_anno.copy().astype(np.uint8) + vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST) + vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255 + + num_of_class = np.max(vis_parsing_anno) + + for pi in range(1, num_of_class + 1): + index = np.where(vis_parsing_anno == pi) + vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi] + + vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8) + # print(vis_parsing_anno_color.shape, vis_im.shape) + vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0) + + # Save result or not + if save_im: + cv2.imwrite(save_path[:-4] +'.png', vis_parsing_anno) + cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) + + # return vis_im + +def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'): + + if not os.path.exists(respth): + os.makedirs(respth) + + n_classes = 19 + net = BiSeNet(n_classes=n_classes) + net.cuda() + save_pth = osp.join('res/cp', cp) + net.load_state_dict(torch.load(save_pth)) + net.eval() + + to_tensor = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + with torch.no_grad(): + for image_path in os.listdir(dspth): + image = Image.open(osp.join(dspth, image_path)) + # image = img.resize((512, 512), Image.BILINEAR) + img = to_tensor(image) + img = torch.unsqueeze(img, 0) + img = img.cuda() + import time + start = time.time() + out = net(img)[0] + print('Time: ', time.time() - start) + parsing = out.squeeze(0).cpu().numpy().argmax(0) + # print(parsing) + print(np.unique(parsing)) + + vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path)) + + + + + + + +if __name__ == "__main__": + evaluate(dspth='/home/abhinav_ayalur_gmail_com/examples/head_segmentation/face_parsing/face-parsing.PyTorch/hard', cp='79999_iter.pth') + + diff --git a/head_segmentation/face_parsing/train.py b/head_segmentation/face_parsing/train.py new file mode 100644 index 0000000..580a0ca --- /dev/null +++ b/head_segmentation/face_parsing/train.py @@ -0,0 +1,179 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + +from logger import setup_logger +from model import BiSeNet +from face_dataset import FaceMask +from loss import OhemCELoss +from evaluate import evaluate +from optimizer import Optimizer +import cv2 +import numpy as np + +import torch +import torch.nn as nn +from torch.utils.data import DataLoader +import torch.nn.functional as F +import torch.distributed as dist + +import os +import os.path as osp +import logging +import time +import datetime +import argparse + + +respth = './res' +if not osp.exists(respth): + os.makedirs(respth) +logger = logging.getLogger() + + +def parse_args(): + parse = argparse.ArgumentParser() + parse.add_argument( + '--local_rank', + dest = 'local_rank', + type = int, + default = -1, + ) + return parse.parse_args() + + +def train(): + args = parse_args() + torch.cuda.set_device(args.local_rank) + dist.init_process_group( + backend = 'nccl', + init_method = 'tcp://127.0.0.1:33241', + world_size = torch.cuda.device_count(), + rank=args.local_rank + ) + setup_logger(respth) + + # dataset + n_classes = 19 + n_img_per_gpu = 16 + n_workers = 8 + cropsize = [448, 448] + data_root = '/home/zll/data/CelebAMask-HQ/' + + ds = FaceMask(data_root, cropsize=cropsize, mode='train') + sampler = torch.utils.data.distributed.DistributedSampler(ds) + dl = DataLoader(ds, + batch_size = n_img_per_gpu, + shuffle = False, + sampler = sampler, + num_workers = n_workers, + pin_memory = True, + drop_last = True) + + # model + ignore_idx = -100 + net = BiSeNet(n_classes=n_classes) + net.cuda() + net.train() + net = nn.parallel.DistributedDataParallel(net, + device_ids = [args.local_rank, ], + output_device = args.local_rank + ) + score_thres = 0.7 + n_min = n_img_per_gpu * cropsize[0] * cropsize[1]//16 + LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) + Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) + Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) + + ## optimizer + momentum = 0.9 + weight_decay = 5e-4 + lr_start = 1e-2 + max_iter = 80000 + power = 0.9 + warmup_steps = 1000 + warmup_start_lr = 1e-5 + optim = Optimizer( + model = net.module, + lr0 = lr_start, + momentum = momentum, + wd = weight_decay, + warmup_steps = warmup_steps, + warmup_start_lr = warmup_start_lr, + max_iter = max_iter, + power = power) + + ## train loop + msg_iter = 50 + loss_avg = [] + st = glob_st = time.time() + diter = iter(dl) + epoch = 0 + for it in range(max_iter): + try: + im, lb = next(diter) + if not im.size()[0] == n_img_per_gpu: + raise StopIteration + except StopIteration: + epoch += 1 + sampler.set_epoch(epoch) + diter = iter(dl) + im, lb = next(diter) + im = im.cuda() + lb = lb.cuda() + H, W = im.size()[2:] + lb = torch.squeeze(lb, 1) + + optim.zero_grad() + out, out16, out32 = net(im) + lossp = LossP(out, lb) + loss2 = Loss2(out16, lb) + loss3 = Loss3(out32, lb) + loss = lossp + loss2 + loss3 + loss.backward() + optim.step() + + loss_avg.append(loss.item()) + + # print training log message + if (it+1) % msg_iter == 0: + loss_avg = sum(loss_avg) / len(loss_avg) + lr = optim.lr + ed = time.time() + t_intv, glob_t_intv = ed - st, ed - glob_st + eta = int((max_iter - it) * (glob_t_intv / it)) + eta = str(datetime.timedelta(seconds=eta)) + msg = ', '.join([ + 'it: {it}/{max_it}', + 'lr: {lr:4f}', + 'loss: {loss:.4f}', + 'eta: {eta}', + 'time: {time:.4f}', + ]).format( + it = it+1, + max_it = max_iter, + lr = lr, + loss = loss_avg, + time = t_intv, + eta = eta + ) + logger.info(msg) + loss_avg = [] + st = ed + if dist.get_rank() == 0: + if (it+1) % 5000 == 0: + state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict() + if dist.get_rank() == 0: + torch.save(state, './res/cp/{}_iter.pth'.format(it)) + evaluate(dspth='/home/zll/data/CelebAMask-HQ/test-img', cp='{}_iter.pth'.format(it)) + + # dump the final model + save_pth = osp.join(respth, 'model_final_diss.pth') + # net.cpu() + state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict() + if dist.get_rank() == 0: + torch.save(state, save_pth) + logger.info('training done, model saved to: {}'.format(save_pth)) + + +if __name__ == "__main__": + train() diff --git a/head_segmentation/face_parsing/transform.py b/head_segmentation/face_parsing/transform.py new file mode 100644 index 0000000..9479ae3 --- /dev/null +++ b/head_segmentation/face_parsing/transform.py @@ -0,0 +1,129 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + + +from PIL import Image +import PIL.ImageEnhance as ImageEnhance +import random +import numpy as np + +class RandomCrop(object): + def __init__(self, size, *args, **kwargs): + self.size = size + + def __call__(self, im_lb): + im = im_lb['im'] + lb = im_lb['lb'] + assert im.size == lb.size + W, H = self.size + w, h = im.size + + if (W, H) == (w, h): return dict(im=im, lb=lb) + if w < W or h < H: + scale = float(W) / w if w < h else float(H) / h + w, h = int(scale * w + 1), int(scale * h + 1) + im = im.resize((w, h), Image.BILINEAR) + lb = lb.resize((w, h), Image.NEAREST) + sw, sh = random.random() * (w - W), random.random() * (h - H) + crop = int(sw), int(sh), int(sw) + W, int(sh) + H + return dict( + im = im.crop(crop), + lb = lb.crop(crop) + ) + + +class HorizontalFlip(object): + def __init__(self, p=0.5, *args, **kwargs): + self.p = p + + def __call__(self, im_lb): + if random.random() > self.p: + return im_lb + else: + im = im_lb['im'] + lb = im_lb['lb'] + + # atts = [1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye', 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r', + # 10 'nose', 11 'mouth', 12 'u_lip', 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat'] + + flip_lb = np.array(lb) + flip_lb[lb == 2] = 3 + flip_lb[lb == 3] = 2 + flip_lb[lb == 4] = 5 + flip_lb[lb == 5] = 4 + flip_lb[lb == 7] = 8 + flip_lb[lb == 8] = 7 + flip_lb = Image.fromarray(flip_lb) + return dict(im = im.transpose(Image.FLIP_LEFT_RIGHT), + lb = flip_lb.transpose(Image.FLIP_LEFT_RIGHT), + ) + + +class RandomScale(object): + def __init__(self, scales=(1, ), *args, **kwargs): + self.scales = scales + + def __call__(self, im_lb): + im = im_lb['im'] + lb = im_lb['lb'] + W, H = im.size + scale = random.choice(self.scales) + w, h = int(W * scale), int(H * scale) + return dict(im = im.resize((w, h), Image.BILINEAR), + lb = lb.resize((w, h), Image.NEAREST), + ) + + +class ColorJitter(object): + def __init__(self, brightness=None, contrast=None, saturation=None, *args, **kwargs): + if not brightness is None and brightness>0: + self.brightness = [max(1-brightness, 0), 1+brightness] + if not contrast is None and contrast>0: + self.contrast = [max(1-contrast, 0), 1+contrast] + if not saturation is None and saturation>0: + self.saturation = [max(1-saturation, 0), 1+saturation] + + def __call__(self, im_lb): + im = im_lb['im'] + lb = im_lb['lb'] + r_brightness = random.uniform(self.brightness[0], self.brightness[1]) + r_contrast = random.uniform(self.contrast[0], self.contrast[1]) + r_saturation = random.uniform(self.saturation[0], self.saturation[1]) + im = ImageEnhance.Brightness(im).enhance(r_brightness) + im = ImageEnhance.Contrast(im).enhance(r_contrast) + im = ImageEnhance.Color(im).enhance(r_saturation) + return dict(im = im, + lb = lb, + ) + + +class MultiScale(object): + def __init__(self, scales): + self.scales = scales + + def __call__(self, img): + W, H = img.size + sizes = [(int(W*ratio), int(H*ratio)) for ratio in self.scales] + imgs = [] + [imgs.append(img.resize(size, Image.BILINEAR)) for size in sizes] + return imgs + + +class Compose(object): + def __init__(self, do_list): + self.do_list = do_list + + def __call__(self, im_lb): + for comp in self.do_list: + im_lb = comp(im_lb) + return im_lb + + + + +if __name__ == '__main__': + flip = HorizontalFlip(p = 1) + crop = RandomCrop((321, 321)) + rscales = RandomScale((0.75, 1.0, 1.5, 1.75, 2.0)) + img = Image.open('data/img.jpg') + lb = Image.open('data/label.png') From f21b3c864a70e9be42c333a484b2f41830c2c8cd Mon Sep 17 00:00:00 2001 From: Abhinav Ayalur Date: Fri, 22 Mar 2024 21:18:31 +0000 Subject: [PATCH 3/4] wip: return zip --- head_segmentation/face_parsing/.gitignore | 2 + head_segmentation/face_parsing/main.py | 64 +++++++++++++++++------ 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/head_segmentation/face_parsing/.gitignore b/head_segmentation/face_parsing/.gitignore index e92496a..0b46e1f 100644 --- a/head_segmentation/face_parsing/.gitignore +++ b/head_segmentation/face_parsing/.gitignore @@ -1,3 +1,5 @@ *.mp4 *.jpg *.png +*.zip +*.avi \ No newline at end of file diff --git a/head_segmentation/face_parsing/main.py b/head_segmentation/face_parsing/main.py index 6412fba..a3ae597 100644 --- a/head_segmentation/face_parsing/main.py +++ b/head_segmentation/face_parsing/main.py @@ -75,45 +75,75 @@ def __predict__(self, file: sieve.File, debug_viz: bool = False) -> sieve.File: fps = video.get(cv2.CAP_PROP_FPS) size = (frame_width, frame_height) - if os.path.exists("temp.mp4"): - os.remove("temp.mp4") - out = cv2.VideoWriter('temp.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, size) - if debug_viz: - if os.path.exists("temp_viz.mp4"): - os.remove("temp_viz.mp4") - out_viz = cv2.VideoWriter('temp_viz.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, size) + if os.path.exists("temp_viz.avi"): + os.remove("temp_viz.avi") + out_viz = cv2.VideoWriter('temp_viz.avi', cv2.VideoWriter_fourcc(*'XVID'), fps, size) + + masks_dir = "masks" + + import shutil + shutil.rmtree(masks_dir, ignore_errors=True) + os.makedirs(masks_dir) + counter = 0 while True: + import time + + start_time = time.time() ret, frame = video.read() + read_time = time.time() - start_time + print(f"Time taken to read frame: {read_time} seconds") + if not ret: - break + break + + start_time = time.time() frame_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + convert_time = time.time() - start_time + print(f"Time taken to convert frame to RGB: {convert_time} seconds") + + start_time = time.time() vis_parsing_anno = self.get_parsing_anno(frame_image) + parsing_time = time.time() - start_time + print(f"Time taken for parsing annotation: {parsing_time} seconds") + if debug_viz: + start_time = time.time() vis_im = vis_parsing_maps(frame_image, vis_parsing_anno) + viz_time = time.time() - start_time + print(f"Time taken for visualization: {viz_time} seconds") + + start_time = time.time() out_viz.write(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR)) + write_viz_time = time.time() - start_time + print(f"Time taken to write visualization: {write_viz_time} seconds") - vis_parsing_anno_color = cv2.cvtColor(vis_parsing_anno, cv2.COLOR_GRAY2RGB) - out.write(vis_parsing_anno_color) + start_time = time.time() + cv2.imwrite(f"{masks_dir}/%06d.png" % counter, vis_parsing_anno) + write_mask_time = time.time() - start_time + print(f"Time taken to write mask: {write_mask_time} seconds") + + counter +=1 import subprocess video.release() - out.release() - import subprocess - command = "ffmpeg -loglevel error -y -i temp.mp4 -c:v libx264 -crf 23 segmentation_map.mp4" + if os.path.exists('masks.zip'): + os.remove('masks.zip') + + command = "zip -r masks.zip masks" subprocess.call(command, shell=True) if debug_viz: out_viz.release() - command = "ffmpeg -loglevel error -y -i temp_viz.mp4 -c:v libx264 -crf 23 segmentation_map_viz.mp4" + command = "ffmpeg -loglevel error -y -i temp_viz.avi -c:v libx264 -qp 0 segmentation_map_viz.avi" subprocess.call(command, shell=True) - return (sieve.File(path="segmentation_map.mp4"), sieve.File(path="segmentation_map_viz.mp4")) + return (sieve.File(path="masks.zip"), sieve.File(path="segmentation_map_viz.avi")) - return sieve.File(path="segmentation_map.mp4") + return sieve.File(path="masks.zip") else: image = Image.open(file.path) save_path = "save_path.jpg" @@ -175,4 +205,4 @@ def vis_parsing_maps(im, vis_parsing_anno): if __name__ == "__main__": a = HeadSegmentationModel() - a.__predict__(sieve.File(path="/home/abhinav_ayalur_gmail_com/examples/head_segmentation/face_parsing/face-parsing.PyTorch/hdtr.mp4"), debug_viz=False) + a.__predict__(sieve.File(path="/home/abhinav_ayalur_gmail_com/examples/head_segmentation/face_parsing/hdtr.mp4"), debug_viz=False) From 97b2f81d4c0876b88bf161b4aab0cddede378899 Mon Sep 17 00:00:00 2001 From: Abhinav Ayalur Date: Thu, 28 Mar 2024 23:56:51 +0000 Subject: [PATCH 4/4] mask gan --- head_segmentation/face_parsing/main.py | 47 ++++++++------------------ 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/head_segmentation/face_parsing/main.py b/head_segmentation/face_parsing/main.py index a3ae597..27532f0 100644 --- a/head_segmentation/face_parsing/main.py +++ b/head_segmentation/face_parsing/main.py @@ -26,6 +26,7 @@ system_packages=[ "ffmpeg", "libx264-dev", + "zip", ], python_version="3.10", cuda_version="11.8", @@ -69,6 +70,9 @@ def __predict__(self, file: sieve.File, debug_viz: bool = False) -> sieve.File: import numpy as np import cv2 + import time + st = time.time() + video = cv2.VideoCapture(file_path) frame_width = int(video.get(3)) frame_height = int(video.get(4)) @@ -76,9 +80,9 @@ def __predict__(self, file: sieve.File, debug_viz: bool = False) -> sieve.File: size = (frame_width, frame_height) if debug_viz: - if os.path.exists("temp_viz.avi"): - os.remove("temp_viz.avi") - out_viz = cv2.VideoWriter('temp_viz.avi', cv2.VideoWriter_fourcc(*'XVID'), fps, size) + if os.path.exists("temp_viz.mp4"): + os.remove("temp_viz.mp4") + out_viz = cv2.VideoWriter('temp_viz.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, size) masks_dir = "masks" @@ -88,42 +92,16 @@ def __predict__(self, file: sieve.File, debug_viz: bool = False) -> sieve.File: counter = 0 while True: - import time - - start_time = time.time() ret, frame = video.read() - read_time = time.time() - start_time - print(f"Time taken to read frame: {read_time} seconds") - if not ret: - break - - start_time = time.time() + break frame_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - convert_time = time.time() - start_time - print(f"Time taken to convert frame to RGB: {convert_time} seconds") - - start_time = time.time() vis_parsing_anno = self.get_parsing_anno(frame_image) - parsing_time = time.time() - start_time - print(f"Time taken for parsing annotation: {parsing_time} seconds") - if debug_viz: - start_time = time.time() vis_im = vis_parsing_maps(frame_image, vis_parsing_anno) - viz_time = time.time() - start_time - print(f"Time taken for visualization: {viz_time} seconds") - - start_time = time.time() out_viz.write(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR)) - write_viz_time = time.time() - start_time - print(f"Time taken to write visualization: {write_viz_time} seconds") - start_time = time.time() cv2.imwrite(f"{masks_dir}/%06d.png" % counter, vis_parsing_anno) - write_mask_time = time.time() - start_time - print(f"Time taken to write mask: {write_mask_time} seconds") - counter +=1 import subprocess @@ -134,14 +112,17 @@ def __predict__(self, file: sieve.File, debug_viz: bool = False) -> sieve.File: os.remove('masks.zip') command = "zip -r masks.zip masks" - subprocess.call(command, shell=True) + process = subprocess.Popen(command, shell=True, cwd=os.getcwd(), stdout=subprocess.PIPE) + output, error = process.communicate() + + print("time to process: ", time.time() - st) if debug_viz: out_viz.release() - command = "ffmpeg -loglevel error -y -i temp_viz.avi -c:v libx264 -qp 0 segmentation_map_viz.avi" + command = "ffmpeg -loglevel error -y -i temp_viz.mp4 -c:v libx264 -crf 17 segmentation_map_viz.mp4" subprocess.call(command, shell=True) - return (sieve.File(path="masks.zip"), sieve.File(path="segmentation_map_viz.avi")) + return (sieve.File(path="masks.zip"), sieve.File(path="segmentation_map_viz.mp4")) return sieve.File(path="masks.zip") else: