diff --git a/README.md b/README.md
index 37c8b28..6f79531 100644
--- a/README.md
+++ b/README.md
@@ -98,6 +98,7 @@ Papers:
 * [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
 * [SphereFace: Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
 * [CosFace: Large Margin Cosine Loss for Deep Face Recognition](https://arxiv.org/abs/1801.09414)
+* [Random Erasing Data Augmentation](https://arxiv.org/abs/1708.04896)
 
 ## Licensing
 
diff --git a/vectorizer/README.md b/vectorizer/README.md
index 0c55fa4..20a7b64 100644
--- a/vectorizer/README.md
+++ b/vectorizer/README.md
@@ -154,6 +154,7 @@ Papers:
 * [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
 * [SphereFace: Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
 * [CosFace: Large Margin Cosine Loss for Deep Face Recognition](https://arxiv.org/abs/1801.09414)
+* [Random Erasing Data Augmentation](https://arxiv.org/abs/1708.04896)
 
 ## Licensing
 
diff --git a/vectorizer/identification/dataloader.py b/vectorizer/identification/dataloader.py
index f981a88..d0db577 100644
--- a/vectorizer/identification/dataloader.py
+++ b/vectorizer/identification/dataloader.py
@@ -27,6 +27,7 @@ from torch.utils.data import Dataset
 from torch.utils.data.sampler import Sampler
 
 from PIL import Image, ImageEnhance, ImageFilter
+from torchvision import transforms as T
 
 
 class CSVDataset(Dataset):
@@ -402,6 +403,19 @@ class Resizer(object):
         return {'img': new_image, 'annot': annots, 'scale': scale}
 
 
+class RandomEraser(object):
+    def __init__(self):
+        self.eraser = T.RandomErasing()
+
+    def __call__(self, sample):
+        image, annots, scales = sample['img'], sample['annot'], sample['scale']
+
+        image = self.eraser(image)
+
+        sample = {'img': image, 'annot': annots, 'scale': scales}
+        return sample
+
+
 class Augmenter(object):
     """Convert ndarrays in sample to Tensors."""
 
diff --git a/vectorizer/identification/losses.py b/vectorizer/identification/losses.py
index d4c1235..cf64e6c 100644
--- a/vectorizer/identification/losses.py
+++ b/vectorizer/identification/losses.py
@@ -23,14 +23,11 @@ import torch.nn as nn
 import torch.nn.functional as F
 
 
-def memprint(a):
-    print(a.shape)
-    print(a.element_size() * a.nelement())
-
-
-def calc_iou(a, b):
+def calc_iou(a, b, is_cuda=False):
     step = 20
-    IoU = torch.zeros((len(a), len(b))).cuda()
+    IoU = torch.zeros((len(a), len(b)))
+    if is_cuda:
+        IoU = IoU.cuda()
     step_count = int(len(b) / step)
     if len(b) % step != 0:
         step_count += 1
@@ -127,7 +124,7 @@ class FocalLoss(nn.Module):
 
             classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
 
-            IoU = calc_iou(anchor, bbox_annotation[:, :4])  # num_anchors x num_annotations
+            IoU = calc_iou(anchor, bbox_annotation[:, :4], self.is_cuda)  # num_anchors x num_annotations
 
             IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1
 
diff --git a/vectorizer/identification/train.py b/vectorizer/identification/train.py
index b161668..e896903 100644
--- a/vectorizer/identification/train.py
+++ b/vectorizer/identification/train.py
@@ -32,7 +32,7 @@ from identification.model_level_attention import resnet18, resnet34, resnet50, r
 from torch.utils.data import DataLoader
 from identification.csv_eval import evaluate
 from identification.dataloader import WIDERDataset, AspectRatioBasedSampler, collater, Resizer, Augmenter, Normalizer, \
-    CSVDataset
+    CSVDataset, RandomEraser
 
 is_cuda = torch.cuda.is_available()
 print('CUDA available: {}'.format(is_cuda))
@@ -75,10 +75,10 @@ def main(args=None):
     # Create the data loaders
     if parser.wider_train is None:
         dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
-                                   transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()]))
+                                   transform=transforms.Compose([Resizer(), Augmenter(), Normalizer(), RandomEraser()]))
     else:
         dataset_train = WIDERDataset(train_file=parser.wider_train, img_prefix=parser.wider_train_prefix,
-                                     transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()]))
+                                     transform=transforms.Compose([Resizer(), Augmenter(), Normalizer(), RandomEraser()]))
 
     if parser.wider_val is None:
         if parser.csv_val is None:
@@ -175,9 +175,10 @@ def main(args=None):
                 img_data = img_data.cuda()
                 annot_data = annot_data.cuda()
 
-            print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % (
-            torch.cuda.memory_allocated() / 1024 ** 2, torch.cuda.max_memory_allocated() / 1024 ** 2,
-            torch.cuda.memory_cached() / 1024 ** 2, torch.cuda.max_memory_cached() / 1024 ** 2))
+                print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % (
+                torch.cuda.memory_allocated() / 1024 ** 2, torch.cuda.max_memory_allocated() / 1024 ** 2,
+                torch.cuda.memory_cached() / 1024 ** 2, torch.cuda.max_memory_cached() / 1024 ** 2))
+
             classification_loss, regression_loss, mask_loss = retinanet([img_data, annot_data])
 
             del img_data
diff --git a/vectorizer/recognition/train.py b/vectorizer/recognition/train.py
index d8e9ea6..76a6f00 100644
--- a/vectorizer/recognition/train.py
+++ b/vectorizer/recognition/train.py
@@ -50,7 +50,8 @@ class Dataset(torch.utils.data.Dataset):
             T.RandomResizedCrop(imagesize),
             T.RandomHorizontalFlip(),
             T.ToTensor(),
-            normalize
+            normalize,
+            T.RandomErasing()
         ])
 
     def __getitem__(self, index):