diff --git a/README.md b/README.md index 37c8b28..6f79531 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ Papers: * [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698) * [SphereFace: Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063) * [CosFace: Large Margin Cosine Loss for Deep Face Recognition](https://arxiv.org/abs/1801.09414) +* [Random Erasing Data Augmentation](https://arxiv.org/abs/1708.04896) ## Licensing diff --git a/vectorizer/README.md b/vectorizer/README.md index 0c55fa4..20a7b64 100644 --- a/vectorizer/README.md +++ b/vectorizer/README.md @@ -154,6 +154,7 @@ Papers: * [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698) * [SphereFace: Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063) * [CosFace: Large Margin Cosine Loss for Deep Face Recognition](https://arxiv.org/abs/1801.09414) +* [Random Erasing Data Augmentation](https://arxiv.org/abs/1708.04896) ## Licensing diff --git a/vectorizer/identification/dataloader.py b/vectorizer/identification/dataloader.py index f981a88..d0db577 100644 --- a/vectorizer/identification/dataloader.py +++ b/vectorizer/identification/dataloader.py @@ -27,6 +27,7 @@ from torch.utils.data import Dataset from torch.utils.data.sampler import Sampler from PIL import Image, ImageEnhance, ImageFilter +from torchvision import transforms as T class CSVDataset(Dataset): @@ -402,6 +403,19 @@ class Resizer(object): return {'img': new_image, 'annot': annots, 'scale': scale} +class RandomEraser(object): + def __init__(self): + self.eraser = T.RandomErasing() + + def __call__(self, sample): + image, annots, scales = sample['img'], sample['annot'], sample['scale'] + + image = self.eraser(image) + + sample = {'img': image, 'annot': annots, 'scale': scales} + return sample + + class Augmenter(object): """Convert ndarrays in sample to Tensors.""" diff --git a/vectorizer/identification/losses.py b/vectorizer/identification/losses.py index d4c1235..cf64e6c 100644 --- a/vectorizer/identification/losses.py +++ b/vectorizer/identification/losses.py @@ -23,14 +23,11 @@ import torch.nn as nn import torch.nn.functional as F -def memprint(a): - print(a.shape) - print(a.element_size() * a.nelement()) - - -def calc_iou(a, b): +def calc_iou(a, b, is_cuda=False): step = 20 - IoU = torch.zeros((len(a), len(b))).cuda() + IoU = torch.zeros((len(a), len(b))) + if is_cuda: + IoU = IoU.cuda() step_count = int(len(b) / step) if len(b) % step != 0: step_count += 1 @@ -127,7 +124,7 @@ class FocalLoss(nn.Module): classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) - IoU = calc_iou(anchor, bbox_annotation[:, :4]) # num_anchors x num_annotations + IoU = calc_iou(anchor, bbox_annotation[:, :4], self.is_cuda) # num_anchors x num_annotations IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 diff --git a/vectorizer/identification/train.py b/vectorizer/identification/train.py index b161668..e896903 100644 --- a/vectorizer/identification/train.py +++ b/vectorizer/identification/train.py @@ -32,7 +32,7 @@ from identification.model_level_attention import resnet18, resnet34, resnet50, r from torch.utils.data import DataLoader from identification.csv_eval import evaluate from identification.dataloader import WIDERDataset, AspectRatioBasedSampler, collater, Resizer, Augmenter, Normalizer, \ - CSVDataset + CSVDataset, RandomEraser is_cuda = torch.cuda.is_available() print('CUDA available: {}'.format(is_cuda)) @@ -75,10 +75,10 @@ def main(args=None): # Create the data loaders if parser.wider_train is None: dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, - transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()])) + transform=transforms.Compose([Resizer(), Augmenter(), Normalizer(), RandomEraser()])) else: dataset_train = WIDERDataset(train_file=parser.wider_train, img_prefix=parser.wider_train_prefix, - transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()])) + transform=transforms.Compose([Resizer(), Augmenter(), Normalizer(), RandomEraser()])) if parser.wider_val is None: if parser.csv_val is None: @@ -175,9 +175,10 @@ def main(args=None): img_data = img_data.cuda() annot_data = annot_data.cuda() - print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % ( - torch.cuda.memory_allocated() / 1024 ** 2, torch.cuda.max_memory_allocated() / 1024 ** 2, - torch.cuda.memory_cached() / 1024 ** 2, torch.cuda.max_memory_cached() / 1024 ** 2)) + print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % ( + torch.cuda.memory_allocated() / 1024 ** 2, torch.cuda.max_memory_allocated() / 1024 ** 2, + torch.cuda.memory_cached() / 1024 ** 2, torch.cuda.max_memory_cached() / 1024 ** 2)) + classification_loss, regression_loss, mask_loss = retinanet([img_data, annot_data]) del img_data diff --git a/vectorizer/recognition/train.py b/vectorizer/recognition/train.py index d8e9ea6..76a6f00 100644 --- a/vectorizer/recognition/train.py +++ b/vectorizer/recognition/train.py @@ -50,7 +50,8 @@ class Dataset(torch.utils.data.Dataset): T.RandomResizedCrop(imagesize), T.RandomHorizontalFlip(), T.ToTensor(), - normalize + normalize, + T.RandomErasing() ]) def __getitem__(self, index):