Faceserver vectorizer initial commit

6 years ago · 95d4f2ca7d
commit 95d4f2ca7d
13 changed files with 1969 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,71 @@
+# Runtime directories
+ckpt/
+mAP_txt/
+summary/
+weight/
+
+# IntelliJ IDEA
+.idea/
+*.iml
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+# lib is NOT ignored
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
--- a/vectorizer/identification/anchors.py
+++ b/vectorizer/identification/anchors.py
@ -0,0 +1,131 @@
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+class Anchors(nn.Module):
+    def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None, is_cuda=True):
+        super(Anchors, self).__init__()
+
+        self.is_cuda = is_cuda
+        if pyramid_levels is None:
+            self.pyramid_levels = [3, 4, 5, 6, 7]
+        if strides is None:
+            self.strides = [2 ** x for x in self.pyramid_levels]
+        if sizes is None:
+            self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
+        if ratios is None:
+            # self.ratios = np.array([1., 1.5, 2., 2.5, 3.])
+            self.ratios = np.array([0.5, 1., 2.])
+        if scales is None:
+            self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
+
+    def forward(self, image):
+
+        image_shape = image.shape[2:]
+        image_shape = np.array(image_shape)
+        image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
+
+        # compute anchors over all pyramid levels
+        all_anchors = np.zeros((0, 4)).astype(np.float32)
+
+        for idx, p in enumerate(self.pyramid_levels):
+            anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
+            shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
+            all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
+
+        all_anchors = np.expand_dims(all_anchors, axis=0)
+        all_anchors = torch.from_numpy(all_anchors.astype(np.float32))
+        if self.is_cuda:
+            all_anchors = all_anchors.cuda()
+
+        return all_anchors
+
+
+def generate_anchors(base_size=16, ratios=None, scales=None):
+    """
+    Generate anchor (reference) windows by enumerating aspect ratios X
+    scales w.r.t. a reference window.
+    """
+
+    if ratios is None:
+        ratios = np.array([0.5, 1, 2])
+
+    if scales is None:
+        scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
+
+    num_anchors = len(ratios) * len(scales)
+
+    # initialize output anchors
+    anchors = np.zeros((num_anchors, 4))
+
+    # scale base_size
+    anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
+
+    # compute areas of anchors
+    areas = anchors[:, 2] * anchors[:, 3]
+
+    # correct for ratios
+    anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
+    anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
+
+    # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
+    anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
+    anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
+
+    return anchors
+
+
+def compute_shape(image_shape, pyramid_levels):
+    """Compute shapes based on pyramid levels.
+
+    :param image_shape:
+    :param pyramid_levels:
+    :return:
+    """
+    image_shape = np.array(image_shape[:2])
+    image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
+    return image_shapes
+
+
+def anchors_for_shape(
+        image_shape,
+        pyramid_levels=None,
+        ratios=None,
+        scales=None,
+        strides=None,
+        sizes=None,
+):
+    image_shapes = compute_shape(image_shape, pyramid_levels)
+
+    # compute anchors over all pyramid levels
+    all_anchors = np.zeros((0, 4))
+    for idx, p in enumerate(pyramid_levels):
+        anchors = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
+        shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
+        all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
+
+    return all_anchors
+
+
+def shift(shape, stride, anchors):
+    shift_x = (np.arange(0, shape[1]) + 0.5) * stride
+    shift_y = (np.arange(0, shape[0]) + 0.5) * stride
+
+    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+
+    shifts = np.vstack((
+        shift_x.ravel(), shift_y.ravel(),
+        shift_x.ravel(), shift_y.ravel()
+    )).transpose()
+
+    # add A anchors (1, A, 4) to
+    # cell K shifts (K, 1, 4) to get
+    # shift anchors (K, A, 4)
+    # reshape to (K*A, 4) shifted anchors
+    A = anchors.shape[0]
+    K = shifts.shape[0]
+    all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+    all_anchors = all_anchors.reshape((K * A, 4))
+
+    return all_anchors
--- a/vectorizer/identification/detector.py
+++ b/vectorizer/identification/detector.py
@ -0,0 +1,95 @@
+import numpy as np
+import torch
+from PIL import Image
+
+from torchvision import transforms
+
+
+class Resizer(object):
+    """Convert ndarrays in sample to Tensors."""
+
+    def __call__(self, sample, min_side=800, max_side=1400):
+        image, annots, scale = sample['img'], sample['annot'], sample['scale']
+
+        rows, cols = image.size
+
+        # scale = min_side / rows
+
+        smallest_side = min(rows, cols)
+
+        # rescale the image so the smallest side is min_side
+        scale = min_side / smallest_side
+
+        # check if the largest side is now greater than max_side, which can happen
+        # when images have a large aspect ratio
+        largest_side = max(rows, cols)
+
+        if largest_side * scale > max_side:
+            scale = max_side / largest_side
+
+        # resize the image with the computed scale
+        image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR))
+        image = image  / 255.0
+
+        rows, cols, cns = image.shape
+
+        pad_w = 32 - rows % 32
+        pad_h = 32 - cols % 32
+
+        new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
+        new_image[:rows, :cols, :] = image.astype(np.float32)
+
+        annots[:, :4] *= scale
+
+        return {'img': new_image, 'annot': annots, 'scale': scale}
+
+
+class Normalizer(object):
+    def __init__(self):
+        self.mean = np.array([[[0.485, 0.456, 0.406]]])
+        self.std = np.array([[[0.229, 0.224, 0.225]]])
+
+    def __call__(self, sample):
+        image, annots, scales = sample['img'], sample['annot'], sample['scale']
+
+        image = (image.astype(np.float32) - self.mean) / self.std
+
+        sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales}
+        return sample
+
+
+def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True):
+    input_data = {'img': img_data, 'annot': np.zeros((0, 5)), 'scale': 1}
+    transform = transforms.Compose([Resizer(), Normalizer()])
+    transformed = transform(input_data)
+
+    model.eval()
+    with torch.no_grad():
+        img_data = transformed['img'].permute(2, 0, 1).float().unsqueeze(dim=0)
+        if is_cuda:
+            img_data = img_data.cuda()
+        scores, labels, boxes = model(img_data)
+        if scores is None:
+            return np.array()
+
+        scores = scores.cpu().numpy()
+        scale = transformed['scale']
+        boxes = boxes.cpu().numpy() / scale
+
+        indices = np.where(scores > threshold)[0]
+        scores = scores[indices]
+        scores_sort = np.argsort(-scores)[:max_detections]
+        image_boxes = boxes[indices[scores_sort], :]
+
+    return image_boxes
+
+
+def load_model(model_path, is_cuda=True):
+    # load possible cuda model as cpu
+    model = torch.load(model_path, map_location=lambda storage, location: storage)
+    if is_cuda:
+        model = model.cuda()
+
+    model.anchors.is_cuda=is_cuda
+
+    return model
--- a/vectorizer/identification/losses.py
+++ b/vectorizer/identification/losses.py
@ -0,0 +1,289 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def memprint(a):
+    print(a.shape)
+    print(a.element_size() * a.nelement())
+
+def calc_iou(a, b):
+    step = 20
+    IoU = torch.zeros((len(a), len(b))).cuda()
+    step_count = int(len(b) / step)
+    if len(b) % step != 0:
+        step_count += 1
+
+    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
+
+    for i in range(step_count):
+        iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[i * step:(i+1) * step, 2])
+        iw.sub_(torch.max(torch.unsqueeze(a[:, 0], 1), b[i * step:(i+1) * step, 0]))
+
+        ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[i * step:(i+1) * step, 3])
+        ih.sub_(torch.max(torch.unsqueeze(a[:, 1], 1), b[i * step:(i+1) * step, 1]))
+
+        iw.clamp_(min=0)
+        ih.clamp_(min=0)
+
+        iw.mul_(ih)
+        del ih
+
+        ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area[i * step:(i+1) * step] - iw
+        ua = torch.clamp(ua, min=1e-8)
+        iw.div_(ua)
+        del ua
+
+        IoU[:, i * step:(i+1) * step] = iw
+
+    return IoU
+
+
+def calc_iou_vis(a, b):
+    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
+
+    iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
+    ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
+
+    iw = torch.clamp(iw, min=0)
+    ih = torch.clamp(ih, min=0)
+
+    intersection = iw * ih
+
+    IoU = intersection / area
+
+    return IoU
+
+
+def IoG(box_a, box_b):
+    inter_xmin = torch.max(box_a[:, 0], box_b[:, 0])
+    inter_ymin = torch.max(box_a[:, 1], box_b[:, 1])
+    inter_xmax = torch.min(box_a[:, 2], box_b[:, 2])
+    inter_ymax = torch.min(box_a[:, 3], box_b[:, 3])
+    Iw = torch.clamp(inter_xmax - inter_xmin, min=0)
+    Ih = torch.clamp(inter_ymax - inter_ymin, min=0)
+    I = Iw * Ih
+    G = (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])
+    return I / G
+
+
+class FocalLoss(nn.Module):
+    def __init__(self, is_cuda=True):
+        super(FocalLoss, self).__init__()
+        self.is_cuda = is_cuda
+
+    def forward(self, classifications, regressions, anchors, annotations):
+        alpha = 0.25
+        gamma = 2.0
+        batch_size = classifications.shape[0]
+        classification_losses = []
+        regression_losses = []
+
+        anchor = anchors[0, :, :]
+
+        anchor_widths = anchor[:, 2] - anchor[:, 0]
+        anchor_heights = anchor[:, 3] - anchor[:, 1]
+        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
+        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
+
+        for j in range(batch_size):
+
+            classification = classifications[j, :, :]
+            regression = regressions[j, :, :]
+
+            bbox_annotation = annotations[j, :, :]
+            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
+
+            if bbox_annotation.shape[0] == 0:
+                if self.is_cuda:
+                    regression_losses.append(torch.tensor(0).float().cuda())
+                    classification_losses.append(torch.tensor(0).float().cuda())
+                else:
+                    regression_losses.append(torch.tensor(0).float())
+                    classification_losses.append(torch.tensor(0).float())
+
+                continue
+
+            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
+
+            IoU = calc_iou(anchor, bbox_annotation[:, :4])  # num_anchors x num_annotations
+
+            IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1
+
+            # compute the loss for classification
+            targets = torch.ones(classification.shape) * -1
+            if self.is_cuda:
+                targets = targets.cuda()
+
+            targets[torch.lt(IoU_max, 0.4), :] = 0
+
+            positive_ful = torch.ge(IoU_max, 0.5)
+            positive_indices = positive_ful
+
+            num_positive_anchors = positive_indices.sum()
+
+            assigned_annotations = bbox_annotation[IoU_argmax, :]
+
+            targets[positive_indices, :] = 0
+            targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
+            try:
+                alpha_factor = torch.ones(targets.shape)
+                if self.is_cuda:
+                    alpha_factor = alpha_factor.cuda()
+                alpha_factor *=  alpha
+            except:
+                print(targets)
+                print(targets.shape)
+
+            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
+            focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
+            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
+
+            bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
+
+            # cls_loss = focal_weight * torch.pow(bce, gamma)
+            cls_loss = focal_weight * bce
+
+            cls_zeros = torch.zeros(cls_loss.shape)
+            if self.is_cuda:
+                cls_zeros = cls_zeros.cuda()
+            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, cls_zeros)
+
+            classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))
+
+            # compute the loss for regression
+
+            if positive_indices.sum() > 0:
+                assigned_annotations = assigned_annotations[positive_indices, :]
+
+                anchor_widths_pi = anchor_widths[positive_indices]
+                anchor_heights_pi = anchor_heights[positive_indices]
+                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
+                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
+
+                gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
+                gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
+                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
+                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights
+
+                # clip widths to 1
+                gt_widths = torch.clamp(gt_widths, min=1)
+                gt_heights = torch.clamp(gt_heights, min=1)
+
+                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
+                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
+                targets_dw = torch.log(gt_widths / anchor_widths_pi)
+                targets_dh = torch.log(gt_heights / anchor_heights_pi)
+
+                targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
+                targets = targets.t()
+
+                if self.is_cuda:
+                    targets = targets.cuda() / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
+                else:
+                    targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
+
+                regression_diff = torch.abs(targets - regression[positive_indices, :])
+
+                regression_loss = torch.where(
+                    torch.le(regression_diff, 1.0 / 9.0),
+                    0.5 * 9.0 * torch.pow(regression_diff, 2),
+                    regression_diff - 0.5 / 9.0
+                )
+                regression_losses.append(regression_loss.mean())
+            else:
+                if self.is_cuda:
+                    regression_losses.append(torch.tensor(0).float().cuda())
+                else:
+                    regression_losses.append(torch.tensor(0).float())
+
+        return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses) \
+            .mean(dim=0, keepdim=True)
+
+
+class LevelAttentionLoss(nn.Module):
+    def __init__(self, is_cuda=True):
+        super(LevelAttentionLoss, self).__init__()
+        self.is_cuda = is_cuda
+
+    def forward(self, img_batch_shape, attention_mask, bboxs):
+        h, w = img_batch_shape[2], img_batch_shape[3]
+
+        mask_losses = []
+
+        batch_size = bboxs.shape[0]
+        for j in range(batch_size):
+
+            bbox_annotation = bboxs[j, :, :]
+            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
+
+            if bbox_annotation.shape[0] == 0:
+                if self.is_cuda:
+                    mask_losses.append(torch.tensor(0).float().cuda())
+                else:
+                    mask_losses.append(torch.tensor(0).float())
+                continue
+
+            cond1 = torch.le(bbox_annotation[:, 0], w)
+            cond2 = torch.le(bbox_annotation[:, 1], h)
+            cond3 = torch.le(bbox_annotation[:, 2], w)
+            cond4 = torch.le(bbox_annotation[:, 3], h)
+            cond = cond1 * cond2 * cond3 * cond4
+
+            bbox_annotation = bbox_annotation[cond, :]
+
+            if bbox_annotation.shape[0] == 0:
+                if self.is_cuda:
+                    mask_losses.append(torch.tensor(0).float().cuda())
+                else:
+                    mask_losses.append(torch.tensor(0).float())
+                continue
+
+            bbox_area = (bbox_annotation[:, 2] - bbox_annotation[:, 0]) * (
+                    bbox_annotation[:, 3] - bbox_annotation[:, 1])
+
+            mask_loss = []
+            for id in range(len(attention_mask)):
+
+                attention_map = attention_mask[id][j, 0, :, :]
+
+                min_area = (2 ** (id + 5)) ** 2 * 0.5
+                max_area = (2 ** (id + 5) * 1.58) ** 2 * 2
+
+                level_bbox_indice1 = torch.ge(bbox_area, min_area)
+                level_bbox_indice2 = torch.le(bbox_area, max_area)
+
+                level_bbox_indice = level_bbox_indice1 * level_bbox_indice2
+
+                level_bbox_annotation = bbox_annotation[level_bbox_indice, :].clone()
+
+                # level_bbox_annotation = bbox_annotation.clone()
+
+                attention_h, attention_w = attention_map.shape
+
+                if level_bbox_annotation.shape[0]:
+                    level_bbox_annotation[:, 0] *= attention_w / w
+                    level_bbox_annotation[:, 1] *= attention_h / h
+                    level_bbox_annotation[:, 2] *= attention_w / w
+                    level_bbox_annotation[:, 3] *= attention_h / h
+
+                mask_gt = torch.zeros(attention_map.shape)
+                if self.is_cuda:
+                    mask_gt = mask_gt.cuda()
+
+                for i in range(level_bbox_annotation.shape[0]):
+                    x1 = max(int(level_bbox_annotation[i, 0]), 0)
+                    y1 = max(int(level_bbox_annotation[i, 1]), 0)
+                    x2 = min(math.ceil(level_bbox_annotation[i, 2]) + 1, attention_w)
+                    y2 = min(math.ceil(level_bbox_annotation[i, 3]) + 1, attention_h)
+
+                    mask_gt[y1:y2, x1:x2] = 1
+
+                mask_gt = mask_gt[mask_gt >= 0]
+                mask_predict = attention_map[attention_map >= 0]
+
+                mask_loss.append(F.binary_cross_entropy(mask_predict, mask_gt))
+            mask_losses.append(torch.stack(mask_loss).mean())
+
+        return torch.stack(mask_losses).mean(dim=0, keepdim=True)
--- a/vectorizer/identification/model_level_attention.py
+++ b/vectorizer/identification/model_level_attention.py
@ -0,0 +1,385 @@
+import torch.nn as nn
+import torch
+import math
+from identification.utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes
+from identification.anchors import Anchors
+from identification.losses import LevelAttentionLoss, FocalLoss
+from torchvision.ops.boxes import nms as tv_nms
+
+
+def nms(dets, thresh):
+    """Dispatch to either CPU or GPU NMS implementations. Accept dets as tensor"""
+    return tv_nms(dets[:, :4], dets[:, 4], thresh)
+
+
+class PyramidFeatures(nn.Module):
+    def __init__(self, c3_size, c4_size, c5_size, feature_size=256):
+        super(PyramidFeatures, self).__init__()
+
+        # upsample C5 to get P5 from the FPN paper
+        self.p5_1 = nn.Conv2d(c5_size, feature_size, kernel_size=1, stride=1, padding=0)
+        self.p5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
+        self.p5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
+
+        # add P5 elementwise to C4
+        self.p4_1 = nn.Conv2d(c4_size, feature_size, kernel_size=1, stride=1, padding=0)
+        self.p4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
+        self.p4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
+
+        # add P4 elementwise to C3
+        self.p3_1 = nn.Conv2d(c3_size, feature_size, kernel_size=1, stride=1, padding=0)
+        self.p3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
+
+        # "P6 is obtained via a 3x3 stride-2 conv on C5"
+        self.p6 = nn.Conv2d(c5_size, feature_size, kernel_size=3, stride=2, padding=1)
+
+        # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
+        self.p7_1 = nn.ReLU()
+        self.p7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
+
+    def forward(self, inputs):
+        c3, c4, c5 = inputs
+
+        # TODO hack for old model
+        self.p5_1.padding_mode = 'zeros'
+        self.p5_2.padding_mode = 'zeros'
+        self.p4_1.padding_mode = 'zeros'
+        self.p4_2.padding_mode = 'zeros'
+        self.p3_1.padding_mode = 'zeros'
+        self.p3_2.padding_mode = 'zeros'
+        self.p6.padding_mode = 'zeros'
+        self.p7_2.padding_mode = 'zeros'
+
+        p5_x = self.p5_1(c5)
+        p5_upsampled_x = self.p5_upsampled(p5_x)
+        p5_x = self.p5_2(p5_x)
+
+        p4_x = self.p4_1(c4)
+        p4_x = p5_upsampled_x + p4_x
+        p4_upsampled_x = self.p4_upsampled(p4_x)
+        p4_x = self.p4_2(p4_x)
+
+        p3_x = self.p3_1(c3)
+        p3_x = p3_x + p4_upsampled_x
+        p3_x = self.p3_2(p3_x)
+
+        p6_x = self.p6(c5)
+
+        p7_x = self.p7_1(p6_x)
+        p7_x = self.p7_2(p7_x)
+
+        return [p3_x, p4_x, p5_x, p6_x, p7_x]
+
+
+class RegressionModel(nn.Module):
+    def __init__(self, num_features_in, num_anchors=9, feature_size=256):
+        super(RegressionModel, self).__init__()
+
+        self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
+        self.act1 = nn.ReLU()
+
+        self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
+        self.act2 = nn.ReLU()
+
+        self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
+        self.act3 = nn.ReLU()
+
+        self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
+        self.act4 = nn.ReLU()
+
+        self.output = nn.Conv2d(feature_size, num_anchors * 4, kernel_size=3, padding=1)
+
+    def forward(self, x):
+        # TODO hack for old model
+        self.conv1.padding_mode = 'zeros'
+        self.conv2.padding_mode = 'zeros'
+        self.conv3.padding_mode = 'zeros'
+        self.conv4.padding_mode = 'zeros'
+        self.output.padding_mode = 'zeros'
+
+        out = self.conv1(x)
+        out = self.act1(out)
+
+        out = self.conv2(out)
+        out = self.act2(out)
+
+        out = self.conv3(out)
+        out = self.act3(out)
+
+        out = self.conv4(out)
+        out = self.act4(out)
+
+        out = self.output(out)
+
+        # out is B x C x W x H, with C = 4*num_anchors
+        out = out.permute(0, 2, 3, 1)
+
+        return out.contiguous().view(out.shape[0], -1, 4)
+
+
+class ClassificationModel(nn.Module):
+    def __init__(self, num_features_in, num_anchors=9, num_classes=80, feature_size=256):
+        super(ClassificationModel, self).__init__()
+
+        self.num_classes = num_classes
+        self.num_anchors = num_anchors
+
+        self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
+        self.act1 = nn.ReLU()
+
+        self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
+        self.act2 = nn.ReLU()
+
+        self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
+        self.act3 = nn.ReLU()
+
+        self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
+        self.act4 = nn.ReLU()
+
+        self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1)
+        self.output_act = nn.Sigmoid()
+
+    def forward(self, x):
+        # TODO hack for old model
+        self.conv1.padding_mode = 'zeros'
+        self.conv2.padding_mode = 'zeros'
+        self.conv3.padding_mode = 'zeros'
+        self.conv4.padding_mode = 'zeros'
+        self.output.padding_mode = 'zeros'
+
+        out = self.conv1(x)
+        out = self.act1(out)
+
+        out = self.conv2(out)
+        out = self.act2(out)
+
+        out = self.conv3(out)
+        out = self.act3(out)
+
+        out = self.conv4(out)
+        out = self.act4(out)
+
+        out = self.output(out)
+        out = self.output_act(out)
+
+        # out is B x C x W x H, with C = n_classes + n_anchors
+        out1 = out.permute(0, 2, 3, 1)
+
+        batch_size, width, height, channels = out1.shape
+
+        out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
+
+        return out2.contiguous().view(x.shape[0], -1, self.num_classes)
+
+
+class LevelAttentionModel(nn.Module):
+    def __init__(self, num_features_in, feature_size=256):
+        super(LevelAttentionModel, self).__init__()
+
+        self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
+        self.act1 = nn.ReLU()
+
+        self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
+        self.act2 = nn.ReLU()
+
+        self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
+        self.act3 = nn.ReLU()
+
+        self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
+        self.act4 = nn.ReLU()
+
+        self.conv5 = nn.Conv2d(feature_size, 1, kernel_size=3, padding=1)
+
+        self.output_act = nn.Sigmoid()
+
+    def forward(self, x):
+        # TODO hack for old model
+        self.conv1.padding_mode = 'zeros'
+        self.conv2.padding_mode = 'zeros'
+        self.conv3.padding_mode = 'zeros'
+        self.conv4.padding_mode = 'zeros'
+        self.conv5.padding_mode = 'zeros'
+
+        out = self.conv1(x)
+        out = self.act1(out)
+
+        out = self.conv2(out)
+        out = self.act2(out)
+
+        out = self.conv3(out)
+        out = self.act3(out)
+
+        out = self.conv4(out)
+        out = self.act4(out)
+
+        out = self.conv5(out)
+        out_attention = self.output_act(out)
+
+        return out_attention
+
+
+class ResNet(nn.Module):
+    def __init__(self, num_classes, block, layers, is_cuda=True):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+
+        if block == BasicBlock:
+            fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
+                         self.layer4[layers[3] - 1].conv2.out_channels]
+        elif block == Bottleneck:
+            fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
+                         self.layer4[layers[3] - 1].conv3.out_channels]
+        else:
+            raise Exception("Invalid block type")
+
+        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
+
+        self.regressionModel = RegressionModel(256)
+        self.classificationModel = ClassificationModel(256, num_classes=num_classes)
+        self.levelattentionModel = LevelAttentionModel(256)
+
+        self.anchors = Anchors(is_cuda=is_cuda)
+
+        self.regressBoxes = BBoxTransform(is_cuda=is_cuda)
+
+        self.clipBoxes = ClipBoxes()
+
+        self.levelattentionLoss = LevelAttentionLoss(is_cuda=is_cuda)
+
+        self.focalLoss = FocalLoss(is_cuda=is_cuda)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                # init.xavier_normal(m.weight)
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+        prior = 0.01
+
+        self.classificationModel.output.weight.data.fill_(0)
+        self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))
+
+        self.regressionModel.output.weight.data.fill_(0)
+        self.regressionModel.output.bias.data.fill_(0)
+
+        self.levelattentionModel.conv5.weight.data.fill_(0)
+        self.levelattentionModel.conv5.bias.data.fill_(0)
+
+        self.freeze_bn()
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = [block(self.inplanes, planes, stride, downsample)]
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def freeze_bn(self):
+        """Freeze BatchNorm layers."""
+        for layer in self.modules():
+            if isinstance(layer, nn.BatchNorm2d):
+                layer.eval()
+
+    def forward(self, inputs):
+        if self.training:
+            img_batch, annotations = inputs
+        else:
+            img_batch = inputs
+            annotations = None
+
+        # TODO hack for old model
+        self.conv1.padding_mode = 'zeros'
+
+        x = self.conv1(img_batch)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x1 = self.layer1(x)
+        x2 = self.layer2(x1)
+        x3 = self.layer3(x2)
+        x4 = self.layer4(x3)
+
+        features = self.fpn([x2, x3, x4])
+
+        attention = [self.levelattentionModel(feature) for feature in features]
+
+        # i = 1
+        # for level in attention:
+        #     i += 1
+        #     level = level.squeeze(0)
+        #     level = np.array(255 * unnormalize(level)).copy()
+        #     level = np.transpose(level, (1, 2, 0))
+        #     plt.imsave(os.path.join('./output', str(i) + '.jpg'), level[:,:,0])
+
+        features = [features[i] * torch.exp(attention[i]) for i in range(len(features))]
+
+        regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
+
+        classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
+
+        anchors = self.anchors(img_batch)
+
+        if self.training:
+            clc_loss, reg_loss = self.focalLoss(classification, regression, anchors, annotations)
+            mask_loss = self.levelattentionLoss(img_batch.shape, attention, annotations)
+            return clc_loss, reg_loss, mask_loss
+        else:
+            # transformed_anchors = self.regressBoxes(anchors, regression)
+            transformed_anchors = self.clipBoxes(anchors, img_batch)
+
+            scores = torch.max(classification, dim=2, keepdim=True)[0]
+            scores_over_thresh = (scores > 0.05)[0, :, 0]
+
+            if scores_over_thresh.sum() == 0:
+                # no boxes to NMS, just return
+                # return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
+                return [None, None, None]
+
+            classification = classification[:, scores_over_thresh, :]
+            transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
+            scores = scores[:, scores_over_thresh, :]
+
+            anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.3)
+            nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
+            return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
+
+
+def resnet18(num_classes, is_cuda=True):
+    return ResNet(num_classes, BasicBlock, [2, 2, 2, 2], is_cuda=is_cuda)
+
+
+def resnet34(num_classes, is_cuda=True):
+    return ResNet(num_classes, BasicBlock, [3, 4, 6, 3], is_cuda=is_cuda)
+
+
+def resnet50(num_classes, is_cuda=True):
+    return ResNet(num_classes, Bottleneck, [3, 4, 6, 3], is_cuda=is_cuda)
+
+
+def resnet101(num_classes, is_cuda=True):
+    return ResNet(num_classes, Bottleneck, [3, 4, 23, 3], is_cuda=is_cuda)
+
+
+def resnet152(num_classes, is_cuda=True):
+    return ResNet(num_classes, Bottleneck, [3, 8, 36, 3], is_cuda=is_cuda)
--- a/vectorizer/identification/utils.py
+++ b/vectorizer/identification/utils.py
@ -0,0 +1,282 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        # TODO hack for old model
+        self.conv1.padding_mode = 'zeros'
+        self.conv2.padding_mode = 'zeros'
+        self.conv3.padding_mode = 'zeros'
+        if self.downsample is not None:
+            self.downsample[0].padding_mode = 'zeros'
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class SELayer(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(SELayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel),
+            nn.Sigmoid()
+        )
+
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y
+
+
+class BottleneckSE(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=16):
+        super(BottleneckSE, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.se = SELayer(planes * 4, reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+        out = self.se(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class CBAMModule(nn.Module):
+    def __init__(self, channels, reduction):
+        super(CBAMModule, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
+                             padding=0)
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
+                             padding=0)
+        self.sigmoid_channel = nn.Sigmoid()
+        self.conv_after_concat = nn.Conv2d(2, 1, kernel_size=7, stride=1, padding=3)
+        self.sigmoid_spatial = nn.Sigmoid()
+
+    def forward(self, x):
+        module_input = x
+        avg = self.avg_pool(x)
+        mx = self.max_pool(x)
+        avg = self.fc1(avg)
+        mx = self.fc1(mx)
+        avg = self.relu(avg)
+        mx = self.relu(mx)
+        avg = self.fc2(avg)
+        mx = self.fc2(mx)
+        x = avg + mx
+        x = self.sigmoid_channel(x)
+        x = module_input * x
+        module_input = x
+        avg = torch.mean(x, 1, True)
+        mx, _ = torch.max(x, 1, True)
+        x = torch.cat((avg, mx), 1)
+        x = self.conv_after_concat(x)
+        x = self.sigmoid_spatial(x)
+        x = module_input * x
+        return x
+
+
+class BottleneckCBAM(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=16):
+        super(BottleneckCBAM, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.se = CBAMModule(planes * 4, reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+        out = self.se(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class BBoxTransform(nn.Module):
+    def __init__(self, mean=None, std=None, is_cuda=True):
+        super(BBoxTransform, self).__init__()
+        if mean is None:
+            self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32))
+            if is_cuda:
+                self.mean = self.mean.cuda()
+        else:
+            self.mean = mean
+        if std is None:
+            self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32))
+            if is_cuda:
+                self.std = self.std.cuda()
+        else:
+            self.std = std
+
+    def forward(self, boxes, deltas):
+        widths = boxes[:, :, 2] - boxes[:, :, 0]
+        heights = boxes[:, :, 3] - boxes[:, :, 1]
+        ctr_x = boxes[:, :, 0] + 0.5 * widths
+        ctr_y = boxes[:, :, 1] + 0.5 * heights
+
+        dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
+        dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
+        dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
+        dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
+
+        pred_ctr_x = ctr_x + dx * widths
+        pred_ctr_y = ctr_y + dy * heights
+        pred_w = torch.exp(dw) * widths
+        pred_h = torch.exp(dh) * heights
+
+        pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
+        pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
+        pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
+        pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
+
+        pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
+
+        return pred_boxes
+
+
+class ClipBoxes(nn.Module):
+    def __init__(self):
+        super(ClipBoxes, self).__init__()
+
+    def forward(self, boxes, img):
+        batch_size, num_channels, height, width = img.shape
+
+        boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
+        boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
+
+        boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
+        boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
+
+        return boxes
--- a/vectorizer/recognition/angle.py
+++ b/vectorizer/recognition/angle.py
@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import Parameter
+
+
+class AngleLinear(nn.Module):
+    def __init__(self, in_features, out_features):
+        super(AngleLinear, self).__init__()
+        self.W = Parameter(torch.FloatTensor(out_features, in_features))
+        nn.init.xavier_uniform_(self.W)
+
+    def forward(self, input):
+        x = F.normalize(input)
+        W = F.normalize(self.W)
+        return F.linear(x, W)
+
+
+class AdaCos(nn.Module):
+    def __init__(self, num_classes, m=0.50, is_cuda=True):
+        super(AdaCos, self).__init__()
+        self.n_classes = num_classes
+        self.s = math.sqrt(2) * math.log(num_classes - 1)
+        self.base_s = self.s
+        self.m = m
+        self.criterion = nn.CrossEntropyLoss()
+        if is_cuda:
+            self.criterion = self.criterion.cuda()
+
+    def forward(self, input, label):
+# changed to fixed adacos
+#        theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7))
+#        one_hot = torch.zeros_like(input)
+#        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
+#        with torch.no_grad():
+#            B_avg = torch.where(one_hot < 1, torch.exp(self.s * input), torch.zeros_like(input))
+#            B_avg = torch.sum(B_avg) / input.size(0)
+#            theta_med = torch.median(theta)
+#            self.s = torch.log(B_avg) / torch.cos(torch.min(math.pi/4 * torch.ones_like(theta_med), theta_med))
+#            # TODO why converge to infinity ?
+#            self.s = torch.clamp(self.s, self.base_s / 2, self.base_s * 2)
+#            print(self.s)
+        output = self.s * input
+
+        return self.criterion(output, label)
+
+
+class ArcFace(nn.Module):
+    def __init__(self, s=30.0, m=0.50, is_cuda=True):
+        super(ArcFace, self).__init__()
+        self.s = s
+        self.m = m
+        self.criterion = nn.CrossEntropyLoss()
+        if is_cuda:
+            self.criterion = self.criterion.cuda()
+
+    def forward(self, input, label):
+        theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7))
+        target_logits = torch.cos(theta + self.m)
+        one_hot = torch.zeros_like(input)
+        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
+        output = input * (1 - one_hot) + target_logits * one_hot
+        output *= self.s
+
+        return self.criterion(output, label)
+
+
+class SphereFace(nn.Module):
+    def __init__(self, s=30.0, m=1.35, is_cuda=True):
+        super(SphereFace, self).__init__()
+        self.s = s
+        self.m = m
+        self.criterion = nn.CrossEntropyLoss()
+        if is_cuda:
+            self.criterion = self.criterion.cuda()
+
+    def forward(self, input, label):
+        theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7))
+        target_logits = torch.cos(self.m * theta)
+        one_hot = torch.zeros_like(input)
+        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
+        output = input * (1 - one_hot) + target_logits * one_hot
+        output *= self.s
+
+        return self.criterion(output, label)
+
+
+class CosFace(nn.Module):
+    def __init__(self, s=30.0, m=0.35, is_cuda=True):
+        super(CosFace, self).__init__()
+        self.s = s
+        self.m = m
+        self.criterion = nn.CrossEntropyLoss()
+        if is_cuda:
+            self.criterion = self.criterion.cuda()
+
+    def forward(self, input, label):
+        target_logits = input - self.m
+        one_hot = torch.zeros_like(input)
+        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
+        output = input * (1 - one_hot) + target_logits * one_hot
+        output *= self.s
+
+        return self.criterion(output, label)
--- a/vectorizer/recognition/focal_loss.py
+++ b/vectorizer/recognition/focal_loss.py
@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 18-6-7 上午10:11
+
+@author: ronghuaiyang
+"""
+
+import torch
+import torch.nn as nn
+
+
+class FocalLoss(nn.Module):
+    def __init__(self, gamma=0, eps=1e-7, is_cuda=True):
+        super(FocalLoss, self).__init__()
+        self.gamma = gamma
+        self.eps = eps
+        self.ce = nn.CrossEntropyLoss()
+        if is_cuda:
+            self.ce = self.ce.cuda()
+
+    def forward(self, inp, target):
+        logp = self.ce(inp, target)
+        p = torch.exp(-logp)
+        loss = (1 - p) ** self.gamma * logp
+        return loss.mean()
--- a/vectorizer/recognition/nets.py
+++ b/vectorizer/recognition/nets.py
@ -0,0 +1,129 @@
+import torchvision.models as models
+from torch import nn
+
+
+def resnet18(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = models.resnet18(num_classes=512, **kwargs)
+    return model
+
+
+def resnet34(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = models.resnet34(num_classes=512, **kwargs)
+    return model
+
+
+def resnet50(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = models.resnet50(num_classes=512, **kwargs)
+    return model
+
+
+def resnet101(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = models.resnet101(num_classes=512, **kwargs)
+    return model
+
+
+def resnet152(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = models.resnet152(num_classes=512, **kwargs)
+    return model
+
+def sphere20():
+    return sphere20a()
+
+
+class sphere20a(nn.Module):
+    def __init__(self):
+        super(sphere20a, self).__init__()
+        #input = B*3*112*96
+        self.conv1_1 = nn.Conv2d(3,64,3,2,1) #=>B*64*56*48
+        self.relu1_1 = nn.PReLU(64)
+        self.conv1_2 = nn.Conv2d(64,64,3,1,1)
+        self.relu1_2 = nn.PReLU(64)
+        self.conv1_3 = nn.Conv2d(64,64,3,1,1)
+        self.relu1_3 = nn.PReLU(64)
+
+        self.conv2_1 = nn.Conv2d(64,128,3,2,1) #=>B*128*28*24
+        self.relu2_1 = nn.PReLU(128)
+        self.conv2_2 = nn.Conv2d(128,128,3,1,1)
+        self.relu2_2 = nn.PReLU(128)
+        self.conv2_3 = nn.Conv2d(128,128,3,1,1)
+        self.relu2_3 = nn.PReLU(128)
+
+        self.conv2_4 = nn.Conv2d(128,128,3,1,1) #=>B*128*28*24
+        self.relu2_4 = nn.PReLU(128)
+        self.conv2_5 = nn.Conv2d(128,128,3,1,1)
+        self.relu2_5 = nn.PReLU(128)
+
+
+        self.conv3_1 = nn.Conv2d(128,256,3,2,1) #=>B*256*14*12
+        self.relu3_1 = nn.PReLU(256)
+        self.conv3_2 = nn.Conv2d(256,256,3,1,1)
+        self.relu3_2 = nn.PReLU(256)
+        self.conv3_3 = nn.Conv2d(256,256,3,1,1)
+        self.relu3_3 = nn.PReLU(256)
+
+        self.conv3_4 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
+        self.relu3_4 = nn.PReLU(256)
+        self.conv3_5 = nn.Conv2d(256,256,3,1,1)
+        self.relu3_5 = nn.PReLU(256)
+
+        self.conv3_6 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
+        self.relu3_6 = nn.PReLU(256)
+        self.conv3_7 = nn.Conv2d(256,256,3,1,1)
+        self.relu3_7 = nn.PReLU(256)
+
+        self.conv3_8 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
+        self.relu3_8 = nn.PReLU(256)
+        self.conv3_9 = nn.Conv2d(256,256,3,1,1)
+        self.relu3_9 = nn.PReLU(256)
+
+        self.conv4_1 = nn.Conv2d(256,512,3,2,1) #=>B*512*7*6
+        self.relu4_1 = nn.PReLU(512)
+        self.conv4_2 = nn.Conv2d(512,512,3,1,1)
+        self.relu4_2 = nn.PReLU(512)
+        self.conv4_3 = nn.Conv2d(512,512,3,1,1)
+        self.relu4_3 = nn.PReLU(512)
+
+        self.fc5 = nn.Linear(512*14*14,512)
+        # ORIGINAL for 112x96: self.fc5 = nn.Linear(512*7*6,512)
+
+
+    def forward(self, x):
+        x = self.relu1_1(self.conv1_1(x))
+        x = x + self.relu1_3(self.conv1_3(self.relu1_2(self.conv1_2(x))))
+
+        x = self.relu2_1(self.conv2_1(x))
+        x = x + self.relu2_3(self.conv2_3(self.relu2_2(self.conv2_2(x))))
+        x = x + self.relu2_5(self.conv2_5(self.relu2_4(self.conv2_4(x))))
+
+        x = self.relu3_1(self.conv3_1(x))
+        x = x + self.relu3_3(self.conv3_3(self.relu3_2(self.conv3_2(x))))
+        x = x + self.relu3_5(self.conv3_5(self.relu3_4(self.conv3_4(x))))
+        x = x + self.relu3_7(self.conv3_7(self.relu3_6(self.conv3_6(x))))
+        x = x + self.relu3_9(self.conv3_9(self.relu3_8(self.conv3_8(x))))
+
+        x = self.relu4_1(self.conv4_1(x))
+        x = x + self.relu4_3(self.conv4_3(self.relu4_2(self.conv4_2(x))))
+
+        x = x.view(x.size(0),-1)
+        x = self.fc5(x)
+        return x
--- a/vectorizer/recognition/test.py
+++ b/vectorizer/recognition/test.py
@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 18-5-30 下午4:55
+
+@author: ronghuaiyang
+"""
+import os
+import argparse
+
+from torch.utils.data import TensorDataset, DataLoader
+
+from recognition.nets import resnet18, resnet34, resnet50, resnet101, resnet152, sphere20
+import torch
+import numpy as np
+from torch.nn import DataParallel
+from PIL import Image
+from torchvision import transforms as T
+
+
+imagesize = 224
+batch_size = 20
+
+
+class Dataset(torch.utils.data.Dataset):
+    def __init__(self, identity_list, root_path):
+        self.identity_list = identity_list
+        self.root_path = root_path
+
+        normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
+                                std=[0.229, 0.224, 0.225])
+
+        self.transforms = T.Compose([
+            T.Resize(imagesize),
+            T.ToTensor(),
+            normalize
+        ])
+
+    def __getitem__(self, index):
+        a, b, label = self.identity_list[index]
+        a_data = self.load_image(a)
+        b_data = self.load_image(b)
+        return a_data, b_data, label
+
+    def load_image(self, p):
+        img_path = os.path.join(self.root_path, p)
+        data = Image.open(img_path)
+        if data is None:
+            return None
+        data = data.convert(mode="RGB")
+        data = self.transforms(data)
+        return data
+
+    def __len__(self):
+        return len(self.identity_list)
+
+
+def get_pair_list(pair_list):
+    print('Loading pair list')
+    with open(pair_list, 'r') as fd:
+        pairs = fd.readlines()
+    return [line.split() for line in pairs]
+
+
+def load_img_data(identity_list, root_path):
+    dataset = Dataset(identity_list, root_path)
+    loader = DataLoader(dataset,
+                        batch_size=batch_size,
+                        shuffle=False,
+                        # pin_memory=True,
+                        num_workers=0)
+    return loader
+
+
+def lfw_test2(model, identity_list, img_data, is_cuda=True):
+    print('Converting to features')
+    sims = []
+    labels = []
+    max_size = len(img_data) * batch_size
+    for i, sample in enumerate(img_data):
+        if i % 10 == 0:
+            print('%d of %d' % (i * batch_size, max_size))
+        a_data, b_data, label = sample
+        if is_cuda:
+            a_data = a_data.cuda()
+            b_data = b_data.cuda()
+
+        a_output = model(a_data).detach().cpu().numpy()
+        b_output = model(b_data).detach().cpu().numpy()
+
+        for idx in range(batch_size):
+            sim = cosin_metric(a_output[idx], b_output[idx])
+            sims.append(sim)
+            labels.append(np.bool(label[idx] == '1'))
+
+    acc, th = cal_accuracy(sims, labels)
+    print('lfw face verification accuracy: ', acc, 'threshold: ', th)
+    return acc
+
+
+def cosin_metric(x1, x2):
+    return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
+
+
+def cal_accuracy(y_score, y_true):
+    y_score = np.asarray(y_score)
+    y_true = np.asarray(y_true)
+    best_acc = 0
+    best_th = 0
+    for i in range(len(y_score)):
+        th = y_score[i]
+        y_test = (y_score >= th)
+        acc = np.mean((y_test == y_true).astype(int))
+        if acc > best_acc:
+            best_acc = acc
+            best_th = th
+
+    return best_acc, best_th
+
+
+def main(args=None):
+    parser = argparse.ArgumentParser(description='Testing script for face identification.')
+
+    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, default=50)
+    parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel',
+                        default=False, action='store_true')
+    parser.add_argument('--model', help='Path to model')
+    parser.add_argument('--batch_size', help='Batch size (default 50)', type=int, default=50)
+    parser.add_argument('--lfw_root', help='Path to LFW dataset')
+    parser.add_argument('--lfw_pair_list', help='Path to LFW pair list file')
+
+    parser = parser.parse_args(args)
+
+    is_cuda = torch.cuda.is_available()
+    print('CUDA available: {}'.format(is_cuda))
+
+    if parser.depth == 18:
+        model = resnet18()
+    elif parser.depth == 20:
+        model = sphere20()
+    elif parser.depth == 34:
+        model = resnet34()
+    elif parser.depth == 50:
+        model = resnet50()
+    elif parser.depth == 101:
+        model = resnet101()
+    elif parser.depth == 152:
+        model = resnet152()
+    else:
+        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
+
+    if parser.parallel:
+        model = DataParallel(model)
+
+    # load_model(model, opt.test_model_path)
+    model.load_state_dict(torch.load(parser.model))
+    if is_cuda:
+        model.cuda()
+
+    identity_list = get_pair_list(parser.lfw_pair_list)
+    img_data = load_img_data(identity_list, parser.lfw_root)
+
+    model.eval()
+    lfw_test2(model, identity_list, img_data, is_cuda=is_cuda)
+
+
+if __name__ == '__main__':
+    main()
--- a/vectorizer/recognition/train.py
+++ b/vectorizer/recognition/train.py
@ -0,0 +1,201 @@
+import argparse
+import os
+import time
+
+import numpy as np
+import torch
+import torch.nn as nn
+from PIL import Image
+from torch.optim.lr_scheduler import StepLR
+from torchvision import transforms as T
+
+from recognition.angle import AngleLinear, CosFace, SphereFace, ArcFace, AdaCos
+from recognition.focal_loss import FocalLoss
+from recognition.nets import resnet18, resnet34, resnet50, resnet101, resnet152, sphere20
+from recognition.test import lfw_test2, get_pair_list, load_img_data
+
+
+class Dataset(torch.utils.data.Dataset):
+    def __init__(self, root, data_list_file, imagesize):
+        with open(os.path.join(data_list_file), 'r') as fd:
+            imgs = fd.readlines()
+
+        imgs = [os.path.join(root, img[:-1]) for img in imgs]
+        self.labels = list(set([img.split()[1] for img in imgs]))
+        self.imgs = np.random.permutation(imgs)
+
+        normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
+                                 std=[0.229, 0.224, 0.225])
+
+        self.transforms = T.Compose([
+            T.RandomResizedCrop(imagesize),
+            T.RandomHorizontalFlip(),
+            T.ToTensor(),
+            normalize
+        ])
+
+    def __getitem__(self, index):
+        sample = self.imgs[index]
+        splits = sample.split()
+        img_path = splits[0]
+        data = Image.open(img_path)
+        data = data.convert(mode="RGB")
+        data = self.transforms(data)
+        cls = self.label_to_class(splits[1])
+        return data.float(), cls
+
+    def __len__(self):
+        return len(self.imgs)
+
+    def label_to_class(self, label):
+        for idx, v in enumerate(self.labels):
+            if v == label:
+                return idx
+        raise Exception("Unknown label %s" % label)
+
+    def num_labels(self):
+        return len(self.labels)
+
+
+def main(args=None):
+    parser = argparse.ArgumentParser(description='Training script for face identification.')
+
+    parser.add_argument('--print_freq', help='Print every N batch (default 100)', type=int, default=100)
+    parser.add_argument('--epochs', help='Number of epochs', type=int, default=50)
+    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, default=50)
+    parser.add_argument('--lr_step', help='Learning rate step (default 10)', type=int, default=10)
+    parser.add_argument('--lr', help='Learning rate (default 0.1)', type=float, default=0.1)
+    parser.add_argument('--weight_decay', help='Weight decay (default 0.0005)', type=float, default=0.0005)
+    parser.add_argument('--easy_margin', help='Use easy margin (default false)', dest='easy_margin', default=False, action='store_true')
+    parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel',
+                        default=False, action='store_true')
+    parser.add_argument('--loss', help='One of focal_loss. cross_entropy, arcface, cosface, sphereface, adacos (default cross_entropy)',
+                        type=str, default='cross_entropy')
+    parser.add_argument('--optimizer', help='One of sgd, adam (default sgd)',
+                        type=str, default='sgd')
+    parser.add_argument('--batch_size', help='Batch size (default 16)', type=int, default=16)
+    parser.add_argument('--casia_list', help='Path to CASIA dataset file list (training)')
+    parser.add_argument('--casia_root', help='Path to CASIA images (training)')
+    parser.add_argument('--lfw_root', help='Path to LFW dataset (testing)')
+    parser.add_argument('--lfw_pair_list', help='Path to LFW pair list file (testing)')
+    parser.add_argument('--model_name', help='Name of the model to save')
+
+    parser = parser.parse_args(args)
+
+    is_cuda = torch.cuda.is_available()
+    print('CUDA available: {}'.format(is_cuda))
+
+    imagesize = 224
+    if parser.depth == 18:
+        model = resnet18()
+    elif parser.depth == 20:
+        model = sphere20()
+    elif parser.depth == 34:
+        model = resnet34()
+    elif parser.depth == 50:
+        model = resnet50()
+    elif parser.depth == 101:
+        model = resnet101()
+    elif parser.depth == 152:
+        model = resnet152()
+    else:
+        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
+
+    # TODO split training dataset to train/validation and stop using test dataset for acc
+    train_dataset = Dataset(parser.casia_root, parser.casia_list, imagesize)
+    trainloader = torch.utils.data.DataLoader(train_dataset,
+                                              batch_size=parser.batch_size,
+                                              shuffle=True,
+                                              # pin_memory=True,
+                                              num_workers=0)
+    num_classes = train_dataset.num_labels()
+
+    if parser.loss == 'focal_loss':
+        metric_fc = nn.Linear(512, num_classes)
+        criterion = FocalLoss(gamma=2, is_cuda=is_cuda)
+    elif parser.loss == 'cross_entropy':
+        metric_fc = nn.Linear(512, num_classes)
+        criterion = torch.nn.CrossEntropyLoss()
+        if is_cuda:
+            criterion = criterion.cuda()
+    elif parser.loss == 'cosface':
+        metric_fc = AngleLinear(512, num_classes)
+        criterion = CosFace(is_cuda=is_cuda)
+    elif parser.loss == 'arcface':
+        metric_fc = AngleLinear(512, num_classes)
+        criterion = ArcFace(is_cuda=is_cuda)
+    elif parser.loss == 'sphereface':
+        metric_fc = AngleLinear(512, num_classes)
+        criterion = SphereFace(is_cuda=is_cuda)
+    elif parser.loss == 'adacos':
+        metric_fc = AngleLinear(512, num_classes)
+        criterion = AdaCos(num_classes, is_cuda=is_cuda)
+    else:
+        raise ValueError('Unknown loss %s' % parser.loss)
+
+    if parser.optimizer == 'sgd':
+        optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}],
+                                    lr=parser.lr, weight_decay=parser.weight_decay)
+    elif parser.optimizer == 'adam':
+        optimizer = torch.optim.Adam([{'params': model.parameters()}, {'params': metric_fc.parameters()}],
+                                     lr=parser.lr, weight_decay=parser.weight_decay)
+    else:
+        raise ValueError('Unknown optimizer %s' % parser.optimizer)
+
+    scheduler = StepLR(optimizer, step_size=parser.lr_step, gamma=0.1)
+
+    if parser.parallel:
+        model = nn.DataParallel(model)
+        metric_fc = nn.DataParallel(metric_fc)
+
+    if is_cuda:
+        model.cuda()
+        metric_fc.cuda()
+
+    print(model)
+    print(metric_fc)
+
+    identity_list = get_pair_list(parser.lfw_pair_list)
+    img_data = load_img_data(identity_list, parser.lfw_root)
+
+    print('{} train iters per epoch:'.format(len(trainloader)))
+
+    start = time.time()
+    last_acc = 0.0
+    for i in range(parser.epochs):
+        scheduler.step()
+
+        model.train()
+        for ii, data in enumerate(trainloader):
+            data_input, label = data
+            if is_cuda:
+                data_input = data_input.cuda()
+                label = label.cuda().long()
+            feature = model(data_input)
+            output = metric_fc(feature)
+            loss = criterion(output, label)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+            iters = i * len(trainloader) + ii
+
+            if iters % parser.print_freq == 0:
+                speed = parser.print_freq / (time.time() - start)
+                time_str = time.asctime(time.localtime(time.time()))
+                print('{} train epoch {} iter {} {} iters/s loss {}'.format(time_str, i, ii, speed, loss.item()))
+
+                start = time.time()
+
+        model.eval()
+        acc = lfw_test2(model, identity_list, img_data, is_cuda=is_cuda)
+        print('Accuracy: %f' % acc)
+        if last_acc < acc:
+            #TODO remove makedir
+            os.makedirs('./ckpt', exist_ok=True)
+            torch.save(model.state_dict(), './ckpt/' + parser.model_name + '_{}.pt'.format(i))
+            torch.save(metric_fc.state_dict(), './ckpt/' + parser.model_name + '_metric_{}.pt'.format(i))
+
+
+if __name__ == '__main__':
+    main()
--- a/vectorizer/requirements.txt
+++ b/vectorizer/requirements.txt
@ -0,0 +1,4 @@
+Flask
+Pillow
+https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl
+https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp37-cp37m-linux_x86_64.whl
--- a/vectorizer/vectorizer/server.py
+++ b/vectorizer/vectorizer/server.py
@ -0,0 +1,82 @@
+import logging
+import os
+import sys
+import tempfile
+
+from flask import Flask, request, abort, jsonify
+from werkzeug.utils import secure_filename
+
+import torch
+from recognition.nets import resnet50
+from torchvision import transforms as T
+from PIL import Image
+import identification.detector as fan
+
+is_cuda = torch.cuda.is_available()
+fan_model = fan.load_model('ckpt/wider6_10.pt', is_cuda=is_cuda)
+
+# load recognition model
+rec_model = resnet50()
+rec_model.load_state_dict(torch.load('ckpt/recongition3_37.pt', map_location=lambda storage, location: storage))
+rec_model.eval()
+if is_cuda:
+    rec_model = rec_model.cuda()
+
+# compute vectors
+normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
+                        std=[0.229, 0.224, 0.225])
+
+imagesize = 224
+transforms = T.Compose([
+    T.Resize((imagesize, imagesize)),
+    T.ToTensor(),
+    normalize
+])
+
+app = Flask(__name__)
+UPLOAD_FOLDER = tempfile.gettempdir()
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+
+def compute_vector(data):
+    with torch.no_grad():
+        data = transforms(data)
+        if is_cuda:
+            data = data.cuda()
+        mo = rec_model(data.unsqueeze(dim=0))
+        return mo.detach().cpu().numpy()
+
+
+@app.route('/vectorize', methods=['GET', 'POST'])
+def upload_file():
+    if request.method == 'GET':
+        return 'OK'
+
+    if request.method == 'POST':
+        # check if the post request has the file part
+        if 'file' not in request.files:
+            abort(500)
+        f = request.files['file']
+        if f:
+            filename = secure_filename(f.filename)
+            filepath = os.path.join(UPLOAD_FOLDER, filename)
+            f.save(filepath)
+
+            img = Image.open(filepath)
+            data = img.convert(mode="RGB")
+
+            with torch.no_grad():
+                boxes = fan.fan_detect(fan_model, data, threshold=0.9, is_cuda=is_cuda).astype(int)
+                boxes = [b for b in boxes if abs(b[1] - b[0]) >= imagesize / 2 and abs(b[2] - b[0]) >= imagesize / 2]
+
+                if boxes is None or len(boxes) == 0:
+                    abort(404)
+
+                extracted = [{'box': arr.tolist(), 'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist()} for arr in boxes]
+                return jsonify(extracted)
+        else:
+            abort(500)
+
+
+if __name__ == '__main__':
+    logging.basicConfig()
+    app.run()