diff --git a/BDXJTUdata/640.jpeg b/BDXJTUdata/640.jpeg deleted file mode 100644 index e995a24..0000000 Binary files a/BDXJTUdata/640.jpeg and /dev/null differ diff --git a/BDXJTUdata/BDXJTUdataset.py b/BDXJTUdata/BDXJTUdataset.py deleted file mode 100644 index cb91c05..0000000 --- a/BDXJTUdata/BDXJTUdataset.py +++ /dev/null @@ -1,38 +0,0 @@ -# coding=utf8 -from __future__ import division -import os -import torch -import torch.utils.data as data -import PIL.Image as Image -class BDXJTUdata(data.Dataset): - def __init__(self, imgroot, anno_pd, transforms=None): - self.root_path = imgroot - self.paths = anno_pd['ImageName'].tolist() - self.labels = anno_pd['label'].tolist() - self.transforms = transforms - - def __len__(self): - return len(self.paths) - - def __getitem__(self, item): - img_path = os.path.join(self.root_path, self.paths[item]) - img = self.pil_loader(img_path) - if self.transforms is not None: - img = self.transforms(img) - label = self.labels[item]-1 - return img, label - - def pil_loader(self,imgpath): - with open(imgpath, 'rb') as f: - with Image.open(f) as img: - return img.convert('RGB') -def collate_fn(batch): - imgs = [] - label = [] - - for sample in batch: - imgs.append(sample[0]) - label.append(sample[1]) - return torch.stack(imgs, 0), \ - label - diff --git a/BDXJTUdata/__init__.pyc b/BDXJTUdata/__init__.pyc deleted file mode 100644 index 77f65a2..0000000 Binary files a/BDXJTUdata/__init__.pyc and /dev/null differ diff --git a/README.md b/README.md index ae4de44..290fdea 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,11 @@ -百度-西交大·大数据竞赛2018——商家招牌的分类与检测 +[百度-西交大·大数据竞赛2018——商家招牌的分类与检测](http://dianshi.baidu.com/gemstone/competitions/detail?raceId=17) ========================================= -这是一个很基的基模型,使用pytorch框架,训练15-30分钟,val准确率大概90.5-93.5.线上提交一次在93左右, ----------------------------------------------------------------------------------- -[比赛详情](http://dianshi.baidu.com/gemstone/competitions/detail?raceId=17) -另外面向西安交通大学在校生举办的第一届西安交通大学人工智能实践大赛(AI实践大赛,群号码:755925153)正在进行中,除了奖金奖励,还有入门培训会,以及交大校徽钥匙扣,“沙坡职业技术学院”帆布袋,交大校徽马克杯等纪念品赠送,有兴趣的同学可以了解一下. ------------------------------------------------------------------------ -[详情请看西安交通大学图书馆公众号推文](https://mp.weixin.qq.com/s?__biz=MjM5NTg3MzI0Mw==&mid=2652848539&idx=1&sn=4a9f295c8164e4cb5da6983ea309ca67&chksm=bd1a092f8a6d8039c35574d778dc44c74c64dbe616aeb33438ec6622324bd55faf700a1310c2&mpshare=1&scene=1&srcid=0502KfSIhXfGDLHsUViAuslj&pass_ticket=5zMXwPoYNK%2FyREdfSH%2Bz5xuDtoyjNkiJihUXz%2FgNqyq9sqwmcTSczejhoa8byIzN#rd) -![Image text](https://github.com/OdingdongO/BD_XJTU/blob/master/BDXJTUdata/640.jpeg)\
+相比于fine-tune,进行了数据增强(data_agu.py) +------------------------------------------- +基于pytorch框架的baseline,单模型线上0.99左右 +-------------------------------------------------------- +运行环境:Ubuntu系统 Python2 pytorch0.3 +-------------------------------------------------------- diff --git a/BDXJTUdata/__init__.py b/dataset/__init__.py similarity index 100% rename from BDXJTUdata/__init__.py rename to dataset/__init__.py diff --git a/dataset/data_aug.py b/dataset/data_aug.py new file mode 100644 index 0000000..013354c --- /dev/null +++ b/dataset/data_aug.py @@ -0,0 +1,691 @@ +from __future__ import division +import cv2 +import numpy as np +from numpy import random +import math +from sklearn.utils import shuffle + +__all__ = ['Compose','RandomHflip', 'RandomUpperCrop', 'Resize', 'UpperCrop', 'RandomBottomCrop',"RandomErasing", + 'BottomCrop', 'Normalize', 'RandomSwapChannels', 'RandomRotate', 'RandomHShift',"CenterCrop", + 'ExpandBorder', 'RandomResizedCrop','RandomDownCrop', 'DownCrop', 'ResizedCrop'] + +def rotate_nobound(image, angle, center=None, scale=1.): + (h, w) = image.shape[:2] + + + # if the center is None, initialize it as the center of + # the image + if center is None: + center = (w // 2, h // 2) + + # perform the rotation + M = cv2.getRotationMatrix2D(center, angle, scale) + rotated = cv2.warpAffine(image, M, (w, h)) + + return rotated + +def scale_down(src_size, size): + w, h = size + sw, sh = src_size + if sh < h: + w, h = float(w * sh) / h, sh + if sw < w: + w, h = sw, float(h * sw) / w + return int(w), int(h) + + +def fixed_crop(src, x0, y0, w, h, size=None): + out = src[y0:y0 + h, x0:x0 + w] + if size is not None and (w, h) != size: + out = cv2.resize(out, (size[0], size[1]), interpolation=cv2.INTER_CUBIC) + return out + + +def center_crop(src, size): + h, w = src.shape[0:2] + new_w, new_h = scale_down((w, h), size) + + x0 = int((w - new_w) / 2) + y0 = int((h - new_h) / 2) + + out = fixed_crop(src, x0, y0, new_w, new_h, size) + return out + + +def bottom_crop(src, size): + h, w = src.shape[0:2] + new_w, new_h = scale_down((w, h), size) + + x0 = int((w - new_w) / 2) + y0 = int((h - new_h) * 0.75) + + out = fixed_crop(src, x0, y0, new_w, new_h, size) + return out + +def rotate_bound(image, angle): + # grab the dimensions of the image and then determine the + # center + h, w = image.shape[:2] + + (cX, cY) = (w // 2, h // 2) + + M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0) + cos = np.abs(M[0, 0]) + sin = np.abs(M[0, 1]) + + # compute the new bounding dimensions of the image + nW = int((h * sin) + (w * cos)) + nH = int((h * cos) + (w * sin)) + + # adjust the rotation matrix to take into account translation + M[0, 2] += (nW / 2) - cX + M[1, 2] += (nH / 2) - cY + + rotated = cv2.warpAffine(image, M, (nW, nH)) + + return rotated + + +class Compose(object): + def __init__(self, transforms): + self.transforms = transforms + def __call__(self, img): + for t in self.transforms: + img = t(img) + return img +class RandomRotate(object): + def __init__(self, angles, bound=False): + self.angles = angles + self.bound = bound + + def __call__(self,img): + do_rotate = random.randint(0, 2) + if do_rotate: + angle = np.random.uniform(self.angles[0], self.angles[1]) + if self.bound: + img = rotate_bound(img, angle) + else: + img = rotate_nobound(img, angle) + return img +class RandomBrightness(object): + def __init__(self, delta=10): + assert delta >= 0 + assert delta <= 255 + self.delta = delta + + def __call__(self, image): + if random.randint(2): + delta = random.uniform(-self.delta, self.delta) + image = (image + delta).clip(0.0, 255.0) + # print('RandomBrightness,delta ',delta) + return image + + +class RandomContrast(object): + def __init__(self, lower=0.9, upper=1.05): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + # expects float image + def __call__(self, image): + if random.randint(2): + alpha = random.uniform(self.lower, self.upper) + # print('contrast:', alpha) + image = (image * alpha).clip(0.0,255.0) + return image + + +class RandomSaturation(object): + def __init__(self, lower=0.8, upper=1.2): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + def __call__(self, image): + if random.randint(2): + alpha = random.uniform(self.lower, self.upper) + image[:, :, 1] *= alpha + # print('RandomSaturation,alpha',alpha) + return image + + +class RandomHue(object): + def __init__(self, delta=18.0): + assert delta >= 0.0 and delta <= 360.0 + self.delta = delta + + def __call__(self, image): + if random.randint(2): + alpha = random.uniform(-self.delta, self.delta) + image[:, :, 0] += alpha + image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 + image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 + # print('RandomHue,alpha:', alpha) + return image + + +class ConvertColor(object): + def __init__(self, current='BGR', transform='HSV'): + self.transform = transform + self.current = current + + def __call__(self, image): + if self.current == 'BGR' and self.transform == 'HSV': + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + elif self.current == 'HSV' and self.transform == 'BGR': + image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) + else: + raise NotImplementedError + return image + +class RandomSwapChannels(object): + def __call__(self, img): + if np.random.randint(2): + order = np.random.permutation(3) + return img[:,:,order] + return img + +class RandomCrop(object): + def __init__(self, size): + self.size = size + def __call__(self, image): + h, w, _ = image.shape + new_w, new_h = scale_down((w, h), self.size) + + if w == new_w: + x0 = 0 + else: + x0 = random.randint(0, w - new_w) + + if h == new_h: + y0 = 0 + else: + y0 = random.randint(0, h - new_h) + + out = fixed_crop(image, x0, y0, new_w, new_h, self.size) + return out + + + +class RandomResizedCrop(object): + def __init__(self, size,scale=(0.49, 1.0), ratio=(1., 1.)): + self.size = size + self.scale = scale + self.ratio = ratio + + def __call__(self,img): + if random.random() < 0.2: + return cv2.resize(img,self.size) + h, w, _ = img.shape + area = h * w + d=1 + for attempt in range(10): + target_area = random.uniform(self.scale[0], self.scale[1]) * area + aspect_ratio = random.uniform(self.ratio[0], self.ratio[1]) + + + new_w = int(round(math.sqrt(target_area * aspect_ratio))) + new_h = int(round(math.sqrt(target_area / aspect_ratio))) + + if random.random() < 0.5: + new_h, new_w = new_w, new_h + + if new_w < w and new_h < h: + x0 = random.randint(0, w - new_w) + y0 = (random.randint(0, h - new_h))//d + out = fixed_crop(img, x0, y0, new_w, new_h, self.size) + + return out + + # Fallback + return center_crop(img, self.size) + + +class DownCrop(): + def __init__(self, size, select, scale=(0.36,0.81)): + self.size = size + self.scale = scale + self.select = select + + def __call__(self,img, attr_idx): + if attr_idx not in self.select: + return img, attr_idx + if attr_idx == 0: + self.scale=(0.64,1.0) + h, w, _ = img.shape + area = h * w + + s = (self.scale[0]+self.scale[1])/2.0 + + target_area = s * area + + new_w = int(round(math.sqrt(target_area))) + new_h = int(round(math.sqrt(target_area))) + + if new_w < w and new_h < h: + dw = w-new_w + x0 = int(0.5*dw) + y0 = h-new_h + out = fixed_crop(img, x0, y0, new_w, new_h, self.size) + return out, attr_idx + + # Fallback + return center_crop(img, self.size), attr_idx + + +class ResizedCrop(object): + def __init__(self, size, select,scale=(0.64, 1.0), ratio=(3. / 4., 4. / 3.)): + self.size = size + self.scale = scale + self.ratio = ratio + self.select = select + + def __call__(self,img, attr_idx): + if attr_idx not in self.select: + return img, attr_idx + h, w, _ = img.shape + area = h * w + d=1 + if attr_idx == 2: + self.scale=(0.36,0.81) + d=2 + if attr_idx == 0: + self.scale=(0.81,1.0) + + target_area = (self.scale[0]+self.scale[1])/2.0 * area + # aspect_ratio = random.uniform(self.ratio[0], self.ratio[1]) + + + new_w = int(round(math.sqrt(target_area))) + new_h = int(round(math.sqrt(target_area))) + + # if random.random() < 0.5: + # new_h, new_w = new_w, new_h + + if new_w < w and new_h < h: + x0 = (w - new_w)//2 + y0 = (h - new_h)//d//2 + out = fixed_crop(img, x0, y0, new_w, new_h, self.size) + # cv2.imshow('{}_img'.format(idx2attr_map[attr_idx]), img) + # cv2.imshow('{}_crop'.format(idx2attr_map[attr_idx]), out) + # + # cv2.waitKey(0) + return out, attr_idx + + # Fallback + return center_crop(img, self.size), attr_idx + +class RandomHflip(object): + def __call__(self, image): + if random.randint(2): + return cv2.flip(image, 1) + else: + return image + + +class Hflip(object): + def __init__(self,doHflip): + self.doHflip = doHflip + + def __call__(self, image): + if self.doHflip: + return cv2.flip(image, 1) + else: + return image + + +class CenterCrop(object): + def __init__(self, size): + self.size = size + + def __call__(self, image): + return center_crop(image, self.size) + +class UpperCrop(): + def __init__(self, size, scale=(0.09, 0.64)): + self.size = size + self.scale = scale + + def __call__(self,img): + h, w, _ = img.shape + area = h * w + + s = (self.scale[0]+self.scale[1])/2.0 + + target_area = s * area + + new_w = int(round(math.sqrt(target_area))) + new_h = int(round(math.sqrt(target_area))) + + if new_w < w and new_h < h: + dw = w-new_w + x0 = int(0.5*dw) + y0 = 0 + out = fixed_crop(img, x0, y0, new_w, new_h, self.size) + return out + + # Fallback + return center_crop(img, self.size) + + + +class RandomUpperCrop(object): + def __init__(self, size, select, scale=(0.09, 0.64), ratio=(3. / 4., 4. / 3.)): + self.size = size + self.scale = scale + self.ratio = ratio + self.select = select + + def __call__(self,img, attr_idx): + if random.random() < 0.2: + return img, attr_idx + if attr_idx not in self.select: + return img, attr_idx + + h, w, _ = img.shape + area = h * w + for attempt in range(10): + s = random.uniform(self.scale[0], self.scale[1]) + d = 0.1 + (0.3 - 0.1) / (self.scale[1] - self.scale[0]) * (s - self.scale[0]) + target_area = s * area + aspect_ratio = random.uniform(self.ratio[0], self.ratio[1]) + new_w = int(round(math.sqrt(target_area * aspect_ratio))) + new_h = int(round(math.sqrt(target_area / aspect_ratio))) + + + # new_w = int(round(math.sqrt(target_area))) + # new_h = int(round(math.sqrt(target_area))) + + if new_w < w and new_h < h: + dw = w-new_w + x0 = random.randint(int((0.5-d)*dw), int((0.5+d)*dw)+1) + y0 = (random.randint(0, h - new_h))//10 + out = fixed_crop(img, x0, y0, new_w, new_h, self.size) + return out, attr_idx + + # Fallback + return center_crop(img, self.size), attr_idx +class RandomDownCrop(object): + def __init__(self, size, select, scale=(0.36, 0.81), ratio=(3. / 4., 4. / 3.)): + self.size = size + self.scale = scale + self.ratio = ratio + self.select = select + + def __call__(self,img, attr_idx): + if random.random() < 0.2: + return img, attr_idx + if attr_idx not in self.select: + return img, attr_idx + if attr_idx == 0: + self.scale=(0.64,1.0) + + h, w, _ = img.shape + area = h * w + for attempt in range(10): + s = random.uniform(self.scale[0], self.scale[1]) + d = 0.1 + (0.3 - 0.1) / (self.scale[1] - self.scale[0]) * (s - self.scale[0]) + target_area = s * area + aspect_ratio = random.uniform(self.ratio[0], self.ratio[1]) + new_w = int(round(math.sqrt(target_area * aspect_ratio))) + new_h = int(round(math.sqrt(target_area / aspect_ratio))) + # + # new_w = int(round(math.sqrt(target_area))) + # new_h = int(round(math.sqrt(target_area))) + + if new_w < w and new_h < h: + dw = w-new_w + x0 = random.randint(int((0.5-d)*dw), int((0.5+d)*dw)+1) + y0 = (random.randint((h - new_h)*9//10, h - new_h)) + out = fixed_crop(img, x0, y0, new_w, new_h, self.size) + + # cv2.imshow('{}_img'.format(idx2attr_map[attr_idx]), img) + # cv2.imshow('{}_crop'.format(idx2attr_map[attr_idx]), out) + # + # cv2.waitKey(0) + + return out, attr_idx + + # Fallback + return center_crop(img, self.size), attr_idx + +class RandomHShift(object): + def __init__(self, select, scale=(0.0, 0.2)): + self.scale = scale + self.select = select + + def __call__(self,img, attr_idx): + if attr_idx not in self.select: + return img, attr_idx + do_shift_crop = random.randint(0, 2) + if do_shift_crop: + h, w, _ = img.shape + min_shift = int(w*self.scale[0]) + max_shift = int(w*self.scale[1]) + shift_idx = random.randint(min_shift, max_shift) + direction = random.randint(0,2) + if direction: + right_part = img[:, -shift_idx:, :] + left_part = img[:, :-shift_idx, :] + else: + left_part = img[:, :shift_idx, :] + right_part = img[:, shift_idx:, :] + img = np.concatenate((right_part, left_part), axis=1) + + # Fallback + return img, attr_idx + + +class RandomBottomCrop(object): + def __init__(self, size, select, scale=(0.4, 0.8)): + self.size = size + self.scale = scale + self.select = select + + def __call__(self,img, attr_idx): + if attr_idx not in self.select: + return img, attr_idx + + h, w, _ = img.shape + area = h * w + for attempt in range(10): + s = random.uniform(self.scale[0], self.scale[1]) + d = 0.25 + (0.45 - 0.25) / (self.scale[1] - self.scale[0]) * (s - self.scale[0]) + target_area = s * area + + new_w = int(round(math.sqrt(target_area))) + new_h = int(round(math.sqrt(target_area))) + + if new_w < w and new_h < h: + dw = w-new_w + dh = h - new_h + x0 = random.randint(int((0.5-d)*dw), min(int((0.5+d)*dw)+1,dw)) + y0 = (random.randint(max(0,int(0.8*dh)-1), dh)) + out = fixed_crop(img, x0, y0, new_w, new_h, self.size) + return out, attr_idx + + # Fallback + return bottom_crop(img, self.size), attr_idx + + +class BottomCrop(): + def __init__(self, size, select, scale=(0.4, 0.8)): + self.size = size + self.scale = scale + self.select = select + + def __call__(self,img, attr_idx): + if attr_idx not in self.select: + return img, attr_idx + + h, w, _ = img.shape + area = h * w + + s = (self.scale[0]+self.scale[1])/3.*2. + + target_area = s * area + + new_w = int(round(math.sqrt(target_area))) + new_h = int(round(math.sqrt(target_area))) + + if new_w < w and new_h < h: + dw = w-new_w + dh = h-new_h + x0 = int(0.5*dw) + y0 = int(0.9*dh) + out = fixed_crop(img, x0, y0, new_w, new_h, self.size) + return out, attr_idx + + # Fallback + return bottom_crop(img, self.size), attr_idx + + + +class Resize(object): + def __init__(self, size, inter=cv2.INTER_LINEAR): + self.size = size + self.inter = inter + + def __call__(self, image): + return cv2.resize(image, (self.size[0], self.size[0]), interpolation=self.inter) + +class ExpandBorder(object): + def __init__(self, mode='constant', value=255, size=(336,336), resize=False): + self.mode = mode + self.value = value + self.resize = resize + self.size = size + + def __call__(self, image): + h, w, _ = image.shape + if h > w: + pad1 = (h-w)//2 + pad2 = h - w - pad1 + if self.mode == 'constant': + image = np.pad(image, ((0, 0), (pad1, pad2), (0, 0)), + self.mode, constant_values=self.value) + else: + image = np.pad(image,((0,0), (pad1, pad2),(0,0)), self.mode) + elif h < w: + pad1 = (w-h)//2 + pad2 = w-h - pad1 + if self.mode == 'constant': + image = np.pad(image, ((pad1, pad2),(0, 0), (0, 0)), + self.mode,constant_values=self.value) + else: + image = np.pad(image, ((pad1, pad2), (0, 0), (0, 0)),self.mode) + if self.resize: + image = cv2.resize(image, (self.size[0], self.size[0]),interpolation=cv2.INTER_LINEAR) + return image +class AstypeToInt(): + def __call__(self, image, attr_idx): + return image.clip(0,255.0).astype(np.uint8), attr_idx + +class AstypeToFloat(): + def __call__(self, image, attr_idx): + return image.astype(np.float32), attr_idx + +import matplotlib.pyplot as plt +class Normalize(object): + def __init__(self,mean, std): + ''' + :param mean: RGB order + :param std: RGB order + ''' + self.mean = np.array(mean).reshape(3,1,1) + self.std = np.array(std).reshape(3,1,1) + def __call__(self, image): + ''' + :param image: (H,W,3) RGB + :return: + ''' + # plt.figure(1) + # plt.imshow(image) + # plt.show() + return (image.transpose((2, 0, 1)) / 255. - self.mean) / self.std + +class RandomErasing(object): + def __init__(self, select,EPSILON=0.5,sl=0.02, sh=0.09, r1=0.3, mean=[0.485, 0.456, 0.406]): + self.EPSILON = EPSILON + self.mean = mean + self.sl = sl + self.sh = sh + self.r1 = r1 + self.select = select + + def __call__(self, img,attr_idx): + if attr_idx not in self.select: + return img,attr_idx + + if random.uniform(0, 1) > self.EPSILON: + return img,attr_idx + + for attempt in range(100): + area = img.shape[1] * img.shape[2] + + target_area = random.uniform(self.sl, self.sh) * area + aspect_ratio = random.uniform(self.r1, 1 / self.r1) + + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + + if w <= img.shape[2] and h <= img.shape[1]: + x1 = random.randint(0, img.shape[1] - h) + y1 = random.randint(0, img.shape[2] - w) + if img.shape[0] == 3: + # img[0, x1:x1+h, y1:y1+w] = random.uniform(0, 1) + # img[1, x1:x1+h, y1:y1+w] = random.uniform(0, 1) + # img[2, x1:x1+h, y1:y1+w] = random.uniform(0, 1) + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] + img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] + # img[:, x1:x1+h, y1:y1+w] = torch.from_numpy(np.random.rand(3, h, w)) + else: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[1] + # img[0, x1:x1+h, y1:y1+w] = torch.from_numpy(np.random.rand(1, h, w)) + return img,attr_idx + + return img,attr_idx + +if __name__ == '__main__': + import matplotlib.pyplot as plt + + + class FSAug(object): + def __init__(self): + self.augment = Compose([ + AstypeToFloat(), + # RandomHShift(scale=(0.,0.2),select=range(8)), + # RandomRotate(angles=(-20., 20.), bound=True), + ExpandBorder(select=range(8), mode='symmetric'),# symmetric + # Resize(size=(336, 336), select=[ 2, 7]), + AstypeToInt() + ]) + + def __call__(self, spct,attr_idx): + return self.augment(spct,attr_idx) + + + trans = FSAug() + + img_path = '/media/gserver/data/FashionAI/round2/train/Images/coat_length_labels/0b6b4a2146fc8616a19fcf2026d61d50.jpg' + img = cv2.cvtColor(cv2.imread(img_path),cv2.COLOR_BGR2RGB) + img_trans,_ = trans(img,5) + # img_trans2,_ = trans(img,6) + print img_trans.max(), img_trans.min() + print img_trans.dtype + + plt.figure() + plt.subplot(221) + plt.imshow(img) + + plt.subplot(222) + plt.imshow(img_trans) + + # plt.subplot(223) + # plt.imshow(img_trans2) + # plt.imshow(img_trans2) + plt.show() diff --git a/dataset/dataset.py b/dataset/dataset.py new file mode 100644 index 0000000..c33432f --- /dev/null +++ b/dataset/dataset.py @@ -0,0 +1,82 @@ +# coding=utf8 +from __future__ import division +import os +import torch +import torch.utils.data as data +import PIL.Image as Image +from data_aug import * +import cv2 +class dataset(data.Dataset): + def __init__(self, imgroot, anno_pd, transforms=None): + self.root_path = imgroot + self.paths = anno_pd['ImageName'].tolist() + self.labels = anno_pd['label'].tolist() + self.transforms = transforms + + def __len__(self): + return len(self.paths) + + def __getitem__(self, item): + img_path = os.path.join(self.root_path, self.paths[item]) + # img = self.pil_loader(img_path) + img =cv2.imread(img_path) + img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) # [h,w,3] RGB + + if self.transforms is not None: + img = self.transforms(img) + label = self.labels[item]-1 + # return img, label + return torch.from_numpy(img).float(), label + + def pil_loader(self,imgpath): + with open(imgpath, 'rb') as f: + with Image.open(f) as img: + return img.convert('RGB') +def collate_fn(batch): + imgs = [] + label = [] + + for sample in batch: + imgs.append(sample[0]) + label.append(sample[1]) + + return torch.stack(imgs, 0), \ + label + +if __name__ == '__main__': + import matplotlib.pyplot as plt + import pandas as pd + rawdata_root = '/media/hszc/data/detao/data/baidu/datasets' + train_pd = pd.read_csv("/media/hszc/data/detao/data/baidu/datasets/train.txt", sep=" ", + header=None, names=['ImageName', 'label']) + val_pd = pd.read_csv("/media/hszc/data/detao/data/baidu/datasets/test_answer.txt", sep=" ", + header=None, names=['ImageName', "label"]) + val_pd['ImageName'] = val_pd['ImageName'].apply(lambda x: os.path.join('test', x)) + train_pd['ImageName'] = train_pd['ImageName'].apply(lambda x: os.path.join('train', x)) + data_transforms = { + 'train': Compose([ + RandomRotate(angles=(-15,15)), + ExpandBorder(size=(368, 368), resize=True), + RandomResizedCrop(size=(336, 336)), + # RandomHflip(), + # transforms.RandomResizedCrop(336,scale=(0.49,1.0)), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), # 0-255 to 0-1 + Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ]), + 'val': Compose([ + ExpandBorder(size=(368, 368), resize=True), + # CenterCrop(size=(336, 336)), + Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]), + } + + data_set = {} + data_set['train'] = dataset(imgroot=rawdata_root, anno_pd=train_pd, + transforms=data_transforms["train"], + ) + data_set['val'] = dataset(imgroot=rawdata_root, anno_pd=val_pd, + transforms=data_transforms["val"], + ) + for data in data_set["val"]: + print("22222") \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/multiscale_resnet.py b/models/multiscale_resnet.py new file mode 100644 index 0000000..c4b73ad --- /dev/null +++ b/models/multiscale_resnet.py @@ -0,0 +1,33 @@ +from torch import nn +import torch +from torchvision import models,transforms,datasets +import torch.nn.functional as F +class multiscale_resnet(nn.Module): + def __init__(self,num_class): + super(multiscale_resnet,self).__init__() + resnet50 =models.resnet50(pretrained=True) + self.base_model =nn.Sequential(*list(resnet50.children())[:-2]) + self.avgpool = nn.AdaptiveAvgPool2d(output_size=1) + self.classifier = nn.Linear(resnet50.fc.in_features,num_class) + + def forward(self, x): + input_size = x.size()[2] + self.interp = nn.UpsamplingBilinear2d(size = (int(input_size*0.75)+1, int(input_size*0.75)+1)) + + x2 = self.interp(x) + x = self.base_model(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + + x2 = self.base_model(x2) + x2 = self.avgpool(x2) + x2 = x2.view(x2.size(0), -1) + + out =[] + out.append(self.classifier(x)) + out.append(self.classifier(x2)) + return out + + + + diff --git a/predict/pred_BDXJTU.py b/predict/pred.py similarity index 82% rename from predict/pred_BDXJTU.py rename to predict/pred.py index 97c634a..d27f618 100644 --- a/predict/pred_BDXJTU.py +++ b/predict/pred.py @@ -1,7 +1,7 @@ import os import numpy as np import pandas as pd -from BDXJTUdata.BDXJTUdataset import BDXJTUdata, collate_fn +from dataset.dataset import dataset, collate_fn import torch from torch.nn import CrossEntropyLoss import torch.utils.data as torchdata @@ -11,15 +11,13 @@ from torch.autograd import Variable from math import ceil from torch.nn.functional import softmax +from dataset.data_aug import * +test_transforms= Compose([ + ExpandBorder(size=(336,336),resize=True), + Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) -test_transforms= transforms.Compose([ - transforms.Resize(224), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) - ]) - -os.environ["CUDA_VISIBLE_DEVICES"] = "3" +os.environ["CUDA_VISIBLE_DEVICES"] = "0" mode ="train" rawdata_root = '/media/hszc/data/detao/data/baidu/datasets/' @@ -36,7 +34,7 @@ print(test_pd.head()) data_set = {} -data_set['test'] = BDXJTUdata(imgroot=os.path.join(rawdata_root, mode), anno_pd=test_pd, +data_set['test'] = dataset(imgroot=os.path.join(rawdata_root, mode), anno_pd=test_pd, transforms=test_transforms, ) data_loader = {} @@ -44,7 +42,7 @@ shuffle=False, pin_memory=True, collate_fn=collate_fn) model_name = 'resnet50-out' -resume = '/media/hszc/model/detao/baidu_model/resnet50/weights-6-526-[0.9071].pth' +resume = '/media/hszc/model/detao/baidu_model/resnet/weights-20-360-[0.9870].pth' model =resnet50(pretrained=True) model.avgpool = torch.nn.AdaptiveAvgPool2d(output_size=1) @@ -54,6 +52,7 @@ model.load_state_dict(torch.load(resume)) model = model.cuda() model.eval() + criterion = CrossEntropyLoss() if not os.path.exists('./Baidu/csv'): @@ -75,8 +74,12 @@ outputs = model(inputs) # statistics - loss = criterion(outputs, labels) - outputs=softmax(outputs) + if isinstance(outputs, list): + loss = criterion(outputs[0], labels) + loss += criterion(outputs[1], labels) + outputs = (outputs[0]+outputs[1])/2 + else: + loss = criterion(outputs, labels) _, preds = torch.max(outputs, 1) test_loss += loss.data[0] diff --git a/train_baidu_xjtu.py b/train_main.py similarity index 56% rename from train_baidu_xjtu.py rename to train_main.py index 5b0b724..900f626 100644 --- a/train_baidu_xjtu.py +++ b/train_main.py @@ -2,7 +2,7 @@ import os import pandas as pd from sklearn.model_selection import train_test_split -from BDXJTUdata.BDXJTUdataset import collate_fn, BDXJTUdata +from dataset.dataset import collate_fn, dataset import torch import torch.utils.data as torchdata from torchvision import datasets, models, transforms @@ -12,41 +12,41 @@ from utils.train_util import train, trainlog from torch.nn import CrossEntropyLoss import logging +from dataset.data_aug import * +os.environ["CUDA_VISIBLE_DEVICES"] = "0" + +save_dir = '/media/hszc/model/detao/baidu_model/resnet50' +if not os.path.exists(save_dir): + os.makedirs(save_dir) +logfile = '%s/trainlog.log'%save_dir +trainlog(logfile) + rawdata_root = '/media/hszc/data/detao/data/baidu/datasets/train' all_pd = pd.read_csv("/media/hszc/data/detao/data/baidu/datasets/train.txt",sep=" ", - header=None, names=['ImageName', 'label']) + header=None, names=['ImageName', 'label']) train_pd, val_pd = train_test_split(all_pd, test_size=0.15, random_state=43, stratify=all_pd['label']) print(val_pd.shape) + '''数据扩增''' data_transforms = { - 'train': transforms.Compose([ - transforms.RandomRotation(degrees=15), - transforms.RandomResizedCrop(224), - # transforms.RandomHorizontalFlip(), - transforms.ToTensor(), # 0-255 to 0-1 - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + 'train': Compose([ + RandomRotate(angles=(-15,15)), + ExpandBorder(size=(368,368),resize=True), + RandomResizedCrop(size=(336, 336)), + Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), - 'val': transforms.Compose([ - transforms.Resize(224), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + 'val': Compose([ + ExpandBorder(size=(336,336),resize=True), + Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } -os.environ["CUDA_VISIBLE_DEVICES"] = "3" - -save_dir = '/media/hszc/model/detao/baidu_model/resnet50' -if not os.path.exists(save_dir): - os.makedirs(save_dir) -logfile = '%s/trainlog.log'%save_dir -trainlog(logfile) data_set = {} -data_set['train'] = BDXJTUdata(imgroot=rawdata_root,anno_pd=train_pd, +data_set['train'] = dataset(imgroot=rawdata_root,anno_pd=train_pd, transforms=data_transforms["train"], ) -data_set['val'] = BDXJTUdata(imgroot=rawdata_root,anno_pd=val_pd, +data_set['val'] = dataset(imgroot=rawdata_root,anno_pd=val_pd, transforms=data_transforms["val"], ) dataloader = {} @@ -66,18 +66,18 @@ model.load_state_dict(torch.load(resume)) model = model.cuda() -optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5) +optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=1e-5) criterion = CrossEntropyLoss() exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.1) -best_acc,best_model_wts = train(model, - epoch_num=50, - start_epoch=0, - optimizer=optimizer, - criterion=criterion, - exp_lr_scheduler=exp_lr_scheduler, - data_set=data_set, - data_loader=dataloader, - save_dir=save_dir, - print_inter=50, - val_inter=400, - ) \ No newline at end of file + +train(model, + epoch_num=50, + start_epoch=0, + optimizer=optimizer, + criterion=criterion, + exp_lr_scheduler=exp_lr_scheduler, + data_set=data_set, + data_loader=dataloader, + save_dir=save_dir, + print_inter=50, + val_inter=400) \ No newline at end of file diff --git a/utils/train_util.py b/utils/train_util.py index ad767bc..41468a3 100644 --- a/utils/train_util.py +++ b/utils/train_util.py @@ -52,8 +52,14 @@ def train(model, optimizer.zero_grad() outputs = model(inputs) - loss = criterion(outputs, labels) - + if isinstance(outputs, list): + loss = criterion(outputs[0], labels) + loss += criterion(outputs[1], labels) + outputs=outputs[0] + else: + loss = criterion(outputs, labels) + + _, preds = torch.max(outputs, 1) loss.backward() optimizer.step() @@ -88,8 +94,13 @@ def train(model, # forward outputs = model(inputs) + if isinstance(outputs, list): + loss = criterion(outputs[0], labels) + loss += criterion(outputs[1], labels) + outputs = outputs[0] - loss = criterion(outputs, labels) + else: + loss = criterion(outputs, labels) _, preds = torch.max(outputs, 1) # statistics