Faceserver vectorizer training

6 years ago · aa6f45fc8b
parent 95d4f2ca7d
commit aa6f45fc8b
8 changed files with 7088 additions and 68 deletions
--- a/vectorizer/identification/csv_eval.py
+++ b/vectorizer/identification/csv_eval.py
@ -0,0 +1,238 @@
 import numpy as np
 import torch
 def compute_overlap(a, b):
    """
    Parameters
    ----------
    a: (N, 4) ndarray of float
    b: (K, 4) ndarray of float
    Returns
    -------
    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
    iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
    ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
    iw = np.maximum(iw, 0)
    ih = np.maximum(ih, 0)
    ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
    ua = np.maximum(ua, np.finfo(float).eps)
    intersection = iw * ih
    return intersection / ua
 def _compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves.
    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
    # Arguments
        recall:    The recall curve (list).
        precision: The precision curve (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))
    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]
    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap
 def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, is_cuda=True):
    """ Get the detections from the retinanet using the generator.
    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
    # Arguments
        dataset         : The generator used to run images through the retinanet.
        retinanet           : The retinanet to run on the images.
        score_threshold : The score confidence threshold to use.
        max_detections  : The maximum number of detections to use per image.
        is_cuda         : CUDA available
    # Returns
        A list of lists containing the detections for each image in the generator.
    """
    all_detections = [[None for i in range(dataset.num_classes())] for j in range(len(dataset))]
    retinanet.eval()
    with torch.no_grad():
        for index in range(len(dataset)):
            data = dataset[index]
            scale = data['scale']
            # run network
            img_data = data['img'].permute(2, 0, 1).float().unsqueeze(dim=0)
            if is_cuda:
                img_data = img_data.cuda()
            scores, labels, boxes = retinanet(img_data)
            if isinstance(scores, torch.Tensor):
                scores = scores.cpu().numpy()
                labels = labels.cpu().numpy()
                boxes = boxes.cpu().numpy()
                # correct boxes for image scale
                boxes /= scale
                # select indices which have a score above the threshold
                indices = np.where(scores > score_threshold)[0]
                # select those scores
                scores = scores[indices]
                # find the order with which to sort the scores
                scores_sort = np.argsort(-scores)[:max_detections]
                # select detections
                image_boxes = boxes[indices[scores_sort], :]
                image_scores = scores[scores_sort]
                image_labels = labels[indices[scores_sort]]
                image_detections = np.concatenate(
                    [image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
                # copy detections to all_detections
                for label in range(dataset.num_classes()):
                    all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1]
            else:
                # copy detections to all_detections
                for label in range(dataset.num_classes()):
                    all_detections[index][label] = np.zeros((0, 5))
            print('{}/{}'.format(index + 1, len(dataset)), end='\r')
    return all_detections
 def _get_annotations(generator):
    """ Get the ground truth annotations from the generator.
    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = annotations[num_detections, 5]
    # Arguments
        generator : The generator used to retrieve ground truth annotations.
    # Returns
        A list of lists containing the annotations for each image in the generator.
    """
    all_annotations = [[None for i in range(generator.num_classes())] for j in range(len(generator))]
    for i in range(len(generator)):
        # load the annotations
        annotations = generator.load_annotations(i)
        # copy detections to all_annotations
        for label in range(generator.num_classes()):
            all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
        print('{}/{}'.format(i + 1, len(generator)), end='\r')
    return all_annotations
 def evaluate(
        generator,
        retinanet,
        iou_threshold=0.5,
        score_threshold=0.05,
        max_detections=100,
        is_cuda=True,
        save_path=None
 ):
    """ Evaluate a given dataset using a given retinanet.
    # Arguments
        generator       : The generator that represents the dataset to evaluate.
        retinanet           : The retinanet to evaluate.
        iou_threshold   : The threshold used to consider when a detection is positive or negative.
        score_threshold : The score confidence threshold to use for detections.
        max_detections  : The maximum number of detections to use per image.
        is_cuda         : CUDA available
        save_path       : The path to save images with visualized detections to.
    # Returns
        A dict mapping class names to mAP scores.
    """
    # gather all detections and annotations
    all_detections = _get_detections(generator, retinanet, score_threshold=score_threshold,
                                     max_detections=max_detections, is_cuda=is_cuda)
    all_annotations = _get_annotations(generator)
    average_precisions = {}
    for label in range(generator.num_classes()):
        false_positives = np.zeros((0,))
        true_positives = np.zeros((0,))
        scores = np.zeros((0,))
        num_annotations = 0.0
        for i in range(len(generator)):
            detections = all_detections[i][label]
            annotations = all_annotations[i][label]
            num_annotations += annotations.shape[0]
            detected_annotations = []
            for d in detections:
                scores = np.append(scores, d[4])
                if annotations.shape[0] == 0:
                    false_positives = np.append(false_positives, 1)
                    true_positives = np.append(true_positives, 0)
                    continue
                overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
                assigned_annotation = np.argmax(overlaps, axis=1)
                max_overlap = overlaps[0, assigned_annotation]
                if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
                    false_positives = np.append(false_positives, 0)
                    true_positives = np.append(true_positives, 1)
                    detected_annotations.append(assigned_annotation)
                else:
                    false_positives = np.append(false_positives, 1)
                    true_positives = np.append(true_positives, 0)
        # no annotations -> AP for this class is 0 (is this correct?)
        if num_annotations == 0:
            average_precisions[label] = 0, 0
            continue
        # sort by score
        indices = np.argsort(-scores)
        false_positives = false_positives[indices]
        true_positives = true_positives[indices]
        # compute false positives and true positives
        false_positives = np.cumsum(false_positives)
        true_positives = np.cumsum(true_positives)
        # compute recall and precision
        recall = true_positives / num_annotations
        precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
        # compute average precision
        average_precision = _compute_ap(recall, precision)
        average_precisions[label] = average_precision, num_annotations
    print('\nmAP:')
    for label in range(generator.num_classes()):
        label_name = generator.label_to_name(label)
        print('{}: {}'.format(label_name, average_precisions[label][0]))
    return average_precisions
--- a/vectorizer/identification/dataloader.py
+++ b/vectorizer/identification/dataloader.py
@ -0,0 +1,566 @@
 import torch
 import numpy as np
 import random
 import csv
 import os
 from torch.utils.data import Dataset
 from torch.utils.data.sampler import Sampler
 from PIL import Image, ImageEnhance, ImageFilter
 class CSVDataset(Dataset):
    """CSV dataset."""
    def __init__(self, train_file, class_list, transform=None):
        """
        Args:
            train_file (string): CSV file with training annotations
            class_list (string): CSV file with class list
            transform (optional): Transformation function
        """
        self.train_file = train_file
        self.class_list = class_list
        self.transform = transform
        # parse the provided class file
        try:
            with open(self.class_list, 'r', newline='') as file:
                self.classes = CSVDataset.load_classes(csv.reader(file, delimiter=' '))
        except ValueError as e:
            raise (ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)), None)
        self.labels = {}
        for key, value in self.classes.items():
            self.labels[value] = key
        # csv with img_path, x1, y1, x2, y2, class_name
        try:
            with open(self.train_file, 'r', newline='') as file:
                self.image_data = CSVDataset._read_annotations(csv.reader(file, delimiter=' '), self.classes)
        except ValueError as e:
            raise (ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)), None)
        self.image_names = list(self.image_data.keys())
    @staticmethod
    def _parse(value, function, fmt):
        """
        Parse a string into a value, and format a nice ValueError if it fails.
        Returns `function(value)`.
        Any `ValueError` raised is catched and a new `ValueError` is raised
        with message `fmt.format(e)`, where `e` is the caught `ValueError`.
        """
        try:
            return function(value)
        except ValueError as e:
            raise (ValueError(fmt.format(e)), None)
    @staticmethod
    def load_classes(csv_reader):
        result = {}
        for line, row in enumerate(csv_reader):
            line += 1
            try:
                class_name, class_id = row
            except ValueError:
                raise (ValueError("line {}: format should be 'class_name,class_id'".format(line)), None)
            class_id = CSVDataset._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
            if class_name in result:
                raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
            result[class_name] = class_id
        return result
    def __len__(self):
        return len(self.image_names)
    def __getitem__(self, idx):
        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot, 'scale': 1}
        if self.transform:
            sample = self.transform(sample)
        return sample
    def load_image(self, image_index):
        img = Image.open(filepath)
        img = img.convert(mode="RGB")
        return img
    def load_annotations(self, image_index):
        # get ground truth annotations
        annotation_list = self.image_data[self.image_names[image_index]]
        annotations = np.zeros((0, 5))
        # some images appear to miss annotations (like image with id 257034)
        if len(annotation_list) == 0:
            return annotations
        # parse annotations
        for idx, a in enumerate(annotation_list):
            # some annotations have basically no width / height, skip them
            x1 = a['x1']
            x2 = a['x2']
            y1 = a['y1']
            y2 = a['y2']
            if (x2 - x1) < 1 or (y2 - y1) < 1:
                continue
            annotation = np.zeros((1, 5))
            annotation[0, 0] = x1
            annotation[0, 1] = y1
            annotation[0, 2] = x2
            annotation[0, 3] = y2
            annotation[0, 4] = self.name_to_label(a['class'])
            annotations = np.append(annotations, annotation, axis=0)
        return annotations
    @staticmethod
    def _read_annotations(csv_reader, classes):
        result = {}
        for line, row in enumerate(csv_reader):
            line += 1
            try:
                img_file, x1, y1, x2, y2, class_name = row[:6]
            except ValueError:
                raise (ValueError(
                    "line {}: format should be 'img_file,x1,y1,x2,y2,class_name' or 'img_file,,,,,'".format(line)),
                       None)
            if img_file not in result:
                result[img_file] = []
            # If a row contains only an image path, it's an image without annotations.
            if (x1, y1, x2, y2, class_name) == ('.', '.', '.', '.', '.'):
                continue
            x1 = CSVDataset._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
            y1 = CSVDataset._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
            x2 = CSVDataset._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
            y2 = CSVDataset._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
            if class_name != 'ignore':
                # Check that the bounding box is valid.
                if x2 <= x1:
                    raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
                if y2 <= y1:
                    raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
                # check if the current class name is correctly present
                if class_name not in classes:
                    raise ValueError(
                        'line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
            result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
        return result
    def name_to_label(self, name):
        return self.classes[name]
    def label_to_name(self, label):
        return self.labels[label]
    def num_classes(self):
        return max(self.classes.values()) + 1
    def image_aspect_ratio(self, image_index):
        image = Image.open(self.image_names[image_index])
        return float(image.width) / float(image.height)
 class WIDERDataset(Dataset):
    """Wider dataset."""
    def __init__(self, train_file, img_prefix='', transform=None):
        """
        Args:
            train_file (string): Wider txt file with training annotations
            img_prefix (string, optional): Prefix for images location
            transform (optional): Transformation function
        """
        self.train_file = train_file
        self.transform = transform
        self.img_prefix = img_prefix
        # WIDER dataset has only faces. Enhance for additional face properties (see below).
        self.classes = {'face': 0}
        self.labels = {}
        for key, value in self.classes.items():
            self.labels[value] = key
        # Wider file definition example:
        # file with image name, number of faces, x y w h blur expression illumination invalid occlusion pose
        # 0--Parade/0_Parade_marchingband_1_117.jpg
        # 9
        # 69 359 50 36 1 0 0 0 0 1
        # 227 382 56 43 1 0 1 0 0 1
        # 296 305 44 26 1 0 0 0 0 1
        # 353 280 40 36 2 0 0 0 2 1
        # 885 377 63 41 1 0 0 0 0 1
        # 819 391 34 43 2 0 0 0 1 0
        # 727 342 37 31 2 0 0 0 0 1
        # 598 246 33 29 2 0 0 0 0 1
        # 740 308 45 33 1 0 0 0 2 1
        try:
            with open(self.train_file, 'r') as file:
                self.image_data = WIDERDataset._read_data(file)
        except ValueError as e:
            raise (ValueError('invalid WIDER annotations file: {}: {}'.format(self.train_file, e)), None)
        self.image_names = list(self.image_data.keys())
    def __len__(self):
        return len(self.image_names)
    def __getitem__(self, idx):
        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot, 'scale': 1, 'img_name': self.image_names[idx]}
        if self.transform:
            sample = self.transform(sample)
        return sample
    def load_image(self, image_index):
        print('Loading image %s' % self.image_names[image_index])
        img = Image.open(os.path.join(self.img_prefix, self.image_names[image_index]))
        img = img.convert(mode="RGB")
        return img
    def load_annotations(self, image_index):
        # get ground truth annotations
        annotation_list = self.image_data[self.image_names[image_index]]
        annotations = np.zeros((0, 5))
        # some images appear to miss annotations (like image with id 257034)
        if len(annotation_list) == 0:
            return annotations
        # parse annotations
        for idx, a in enumerate(annotation_list):
            # some annotations have basically no width / height, skip them
            x1 = a['x1']
            x2 = a['x2']
            y1 = a['y1']
            y2 = a['y2']
            if (x2 - x1) < 1 or (y2 - y1) < 1:
                continue
            annotation = np.zeros((1, 5))
            annotation[0, 0] = x1
            annotation[0, 1] = y1
            annotation[0, 2] = x2
            annotation[0, 3] = y2
            annotation[0, 4] = self.name_to_label(a['class'])
            annotations = np.append(annotations, annotation, axis=0)
        return annotations
    @staticmethod
    def _read_data(reader):
        result = {}
        counter = 0
        img_file = None
        for line in reader:
            line = line.strip()
            if counter == 0:
                # file name or number of faces
                try:
                    counter = int(line)
                except ValueError:
                    if img_file and len(result[img_file]) == 0:
                        print("Warning - no faces: %s" % img_file)
                    img_file = line
            else:
                counter -= 1
                # coordinates e.g. 370 170 9 13 2 0 0 0 2 0
                nums = [int(x) for x in line.split()]
                result.setdefault(img_file, []).append({'x1': nums[0], 'x2': nums[0] + nums[2],
                                                        'y1': nums[1], 'y2': nums[1] + nums[3],
                                                        'class': 'face'})
        return result
    def name_to_label(self, name):
        return self.classes[name]
    def label_to_name(self, label):
        return self.labels[label]
    def num_classes(self):
        return max(self.classes.values()) + 1
    def image_aspect_ratio(self, image_index):
        image = Image.open(os.path.join(self.img_prefix, self.image_names[image_index]))
        return float(image.width) / float(image.height)
 def collater(data):
    imgs = [s['img'] for s in data]
    annots = [s['annot'] for s in data]
    scales = [s['scale'] for s in data]
    widths = [int(s.shape[0]) for s in imgs]
    heights = [int(s.shape[1]) for s in imgs]
    batch_size = len(imgs)
    max_width = np.array(widths).max()
    max_height = np.array(heights).max()
    padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
    for i in range(batch_size):
        img = imgs[i]
        padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
    max_num_annots = max(annot.shape[0] for annot in annots)
    # print(annot_padded.shape)
    if max_num_annots > 0:
        annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
        for idx, annot in enumerate(annots):
            # print(annot.shape)
            if annot.shape[0] > 0:
                annot_padded[idx, :annot.shape[0], :] = annot
    else:
        annot_padded = torch.ones((len(annots), 1, 5)) * -1
    padded_imgs = padded_imgs.permute(0, 3, 1, 2)
    return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}
 class Resizer(object):
    """Convert ndarrays in sample to Tensors."""
    def __call__(self, sample, min_side=800, max_side=1400):
        image, annots, scale = sample['img'], sample['annot'], sample['scale']
        cols, rows = image.size
        # scale = min_side / rows
        smallest_side = min(rows, cols)
        # rescale the image so the smallest side is min_side
        scale = min_side / smallest_side
        # check if the largest side is now greater than max_side, which can happen
        # when images have a large aspect ratio
        largest_side = max(rows, cols)
        if largest_side * scale > max_side:
            scale = max_side / largest_side
        # resize the image with the computed scale
        image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR))
        image = image  / 255.0
        rows, cols, cns = image.shape
        pad_w = 32 - rows % 32
        pad_h = 32 - cols % 32
        new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
        new_image[:rows, :cols, :] = image.astype(np.float32)
        annots[:, :4] *= scale
        return {'img': new_image, 'annot': annots, 'scale': scale}
 class Augmenter(object):
    """Convert ndarrays in sample to Tensors."""
    def __call__(self, sample, flip_x=0.5):
        if np.random.rand() < flip_x:
            image, annots, scales = sample['img'], sample['annot'], sample['scale']
            image = image[:, ::-1, :]
            rows, cols, channels = image.shape
            x1 = annots[:, 0].copy()
            x2 = annots[:, 2].copy()
            x_tmp = x1.copy()
            annots[:, 0] = cols - x2
            annots[:, 2] = cols - x_tmp
            sample = {'img': image, 'annot': annots, 'scale': scales}
        return sample
 class RandomCrop(object):
    def __call__(self, sample):
        image, annots, scales = sample['img'], sample['annot'], sample['scale']
        if not annots.shape[0]:
            return {'img': image, 'annot': annots, 'scale': scales}
        if random.choice([0, 1]):
            return {'img': image, 'annot': annots, 'scale': scales}
        else:
            rows, cols, cns = image.shape
            flag = 0
            while True:
                flag += 1
                if flag > 10:
                    return {'img': image, 'annot': annots, 'scale': scales}
                crop_ratio = random.uniform(0.5, 1)
                rows_zero = int(rows * random.uniform(0, 1 - crop_ratio))
                cols_zero = int(cols * random.uniform(0, 1 - crop_ratio))
                crop_rows = int(rows * crop_ratio)
                crop_cols = int(cols * crop_ratio)
                '''
                new_image = image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :]
                new_image = cv2.resize(new_image, (cols, rows))
                #new_image = skimage.transform.resize(new_image, (rows, cols))
                new_annots = np.zeros((0, 5))
                for i in range(annots.shape[0]):
                    x1 = max(annots[i, 0] - cols_zero, 0)
                    y1 = max(annots[i, 1] - rows_zero, 0)
                    x2 = min(annots[i, 2] - cols_zero, crop_cols)
                    y2 = min(annots[i, 3] - rows_zero, crop_rows)
                    label = annots[i, 4]
                    if x1 + 10 < x2 and y1 + 10 < y2:
                        x1 /= crop_ratio
                        y1 /= crop_ratio
                        x2 /= crop_ratio
                        y2 /= crop_ratio
                        new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0)
                if not new_annots.shape[0]:
                    continue
                '''
                new_image = np.zeros((rows, cols, cns))
                new_image[rows_zero:rows_zero + crop_rows, cols_zero:cols_zero + crop_cols, :] = \
                    image[
                    rows_zero:rows_zero + crop_rows,
                    cols_zero:cols_zero + crop_cols,
                    :]
                new_annots = np.zeros((0, 5))
                for i in range(annots.shape[0]):
                    x1 = max(cols_zero, annots[i, 0])
                    y1 = max(rows_zero, annots[i, 1])
                    x2 = min(cols_zero + crop_cols, annots[i, 2])
                    y2 = min(rows_zero + crop_rows, annots[i, 3])
                    label = annots[i, 4]
                    if x1 + 10 < x2 and y1 + 10 < y2:
                        new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0)
                if not new_annots.shape[0]:
                    continue
                return {'img': new_image, 'annot': new_annots, 'scale': scales}
 class Color(object):
    def __call__(self, sample):
        image, annots, scales = sample['img'], sample['annot'], sample['scale']
        image = Image.fromarray(image)
        ratio = [0.5, 0.8, 1.2, 1.5]
        if random.choice([0, 1]):
            enh_bri = ImageEnhance.Brightness(image)
            brightness = random.choice(ratio)
            image = enh_bri.enhance(brightness)
        if random.choice([0, 1]):
            enh_col = ImageEnhance.Color(image)
            color = random.choice(ratio)
            image = enh_col.enhance(color)
        if random.choice([0, 1]):
            enh_con = ImageEnhance.Contrast(image)
            contrast = random.choice(ratio)
            image = enh_con.enhance(contrast)
        if random.choice([0, 1]):
            enh_sha = ImageEnhance.Sharpness(image)
            sharpness = random.choice(ratio)
            image = enh_sha.enhance(sharpness)
        if random.choice([0, 1]):
            image = image.filter(ImageFilter.BLUR)
        image = np.asarray(image)
        return {'img': image, 'annot': annots, 'scale': scales}
 class Normalizer(object):
    def __init__(self):
        self.mean = np.array([[[0.485, 0.456, 0.406]]])
        self.std = np.array([[[0.229, 0.224, 0.225]]])
    def __call__(self, sample):
        image, annots, scales = sample['img'], sample['annot'], sample['scale']
        image = (image.astype(np.float32) - self.mean) / self.std
        sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales}
        return sample
 class UnNormalizer(object):
    def __init__(self, mean=None, std=None):
        if mean is None:
            self.mean = [0.485, 0.456, 0.406]
        else:
            self.mean = mean
        if std is None:
            self.std = [0.229, 0.224, 0.225]
        else:
            self.std = std
    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
        return tensor
 class AspectRatioBasedSampler(Sampler):
    def __init__(self, data_source, batch_size, drop_last):
        self.data_source = data_source
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.groups = self.group_images()
    def __iter__(self):
        random.shuffle(self.groups)
        for group in self.groups:
            yield group
    def __len__(self):
        if self.drop_last:
            return len(self.data_source) // self.batch_size
        else:
            return (len(self.data_source) + self.batch_size - 1) // self.batch_size
    def group_images(self):
        # determine the order of the images
        order = list(range(len(self.data_source)))
        order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
        # divide into groups, one group = one batch
        return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in
                range(0, len(order), self.batch_size)]
--- a/vectorizer/identification/detector.py
+++ b/vectorizer/identification/detector.py
@ -1,63 +1,13 @@
 import numpy as np
 import torch
-from PIL import Image
+import argparse
 import json
 from PIL import Image, ImageDraw
 from identification.dataloader import Normalizer, Resizer
 from torchvision import transforms
 class Resizer(object):
    """Convert ndarrays in sample to Tensors."""
    def __call__(self, sample, min_side=800, max_side=1400):
        image, annots, scale = sample['img'], sample['annot'], sample['scale']
        rows, cols = image.size
        # scale = min_side / rows
        smallest_side = min(rows, cols)
        # rescale the image so the smallest side is min_side
        scale = min_side / smallest_side
        # check if the largest side is now greater than max_side, which can happen
        # when images have a large aspect ratio
        largest_side = max(rows, cols)
        if largest_side * scale > max_side:
            scale = max_side / largest_side
        # resize the image with the computed scale
        image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR))
        image = image  / 255.0
        rows, cols, cns = image.shape
        pad_w = 32 - rows % 32
        pad_h = 32 - cols % 32
        new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
        new_image[:rows, :cols, :] = image.astype(np.float32)
        annots[:, :4] *= scale
        return {'img': new_image, 'annot': annots, 'scale': scale}
 class Normalizer(object):
    def __init__(self):
        self.mean = np.array([[[0.485, 0.456, 0.406]]])
        self.std = np.array([[[0.229, 0.224, 0.225]]])
    def __call__(self, sample):
        image, annots, scales = sample['img'], sample['annot'], sample['scale']
        image = (image.astype(np.float32) - self.mean) / self.std
        sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales}
        return sample
 def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True):
    input_data = {'img': img_data, 'annot': np.zeros((0, 5)), 'scale': 1}
    transform = transforms.Compose([Resizer(), Normalizer()])
@ -70,7 +20,7 @@ def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True)
            img_data = img_data.cuda()
        scores, labels, boxes = model(img_data)
        if scores is None:
-            return np.array()
+            return np.empty((0,0)), np.empty((0,0))
        scores = scores.cpu().numpy()
        scale = transformed['scale']
@ -81,7 +31,16 @@ def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True)
        scores_sort = np.argsort(-scores)[:max_detections]
        image_boxes = boxes[indices[scores_sort], :]
-    return image_boxes
+    return image_boxes, scores[:max_detections]
 def img_rectangles(img, output_path, boxes=None):
    if boxes is not None:
        draw = ImageDraw.Draw(img)
        for arr in boxes:
            draw.rectangle(((arr[0], arr[1]), (arr[2], arr[3])), outline="black", width=1)
    img.save(output_path)
 def load_model(model_path, is_cuda=True):
@ -93,3 +52,36 @@ def load_model(model_path, is_cuda=True):
    model.anchors.is_cuda=is_cuda
    return model
 def load_image(filepath):
    img = Image.open(filepath)
    img = img.convert(mode="RGB")
    return img
 def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--model', help='Path to model')
    parser.add_argument('--image', help='Path to image')
    parser.add_argument('--rect', help='Output image with rectangles')
    parser.add_argument('--threshold', help='Probability threshold (default 0.9)', type=float, default=0.9)
    parser.add_argument('--force-cpu', help='Force CPU for detection (default false)', dest='force_cpu',
                        default=False, action='store_true')
    parser = parser.parse_args(args)
    is_cuda = torch.cuda.is_available() and not parser.force_cpu
    model = load_model(parser.model, is_cuda=is_cuda)
    img = load_image(parser.image)
    boxes, scores = fan_detect(model, img, threshold=parser.threshold, is_cuda=is_cuda)
    print(json.dumps({'boxes': boxes.tolist(), 'scores': scores}))
    if parser.rect:
        img = load_image(parser.image)
        img_rectangles(img, parser.rect, boxes)
 if __name__ == '__main__':
    main()
--- a/vectorizer/identification/train.py
+++ b/vectorizer/identification/train.py
@ -0,0 +1,207 @@
 import argparse
 import collections
 import os
 import numpy as np
 import torch
 import torch.optim as optim
 from torchvision import transforms
 import torch.utils.model_zoo as model_zoo
 from identification.model_level_attention import resnet18, resnet34, resnet50, resnet101, resnet152
 from torch.utils.data import DataLoader
 from identification.csv_eval import evaluate
 from identification.dataloader import WIDERDataset, AspectRatioBasedSampler, collater, Resizer, Augmenter, Normalizer, CSVDataset
 is_cuda = torch.cuda.is_available()
 print('CUDA available: {}'.format(is_cuda))
 model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 }
 ckpt = False
 def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--wider_train', help='Path to file containing WIDER training annotations (see readme)')
    parser.add_argument('--wider_val',
                        help='Path to file containing WIDER validation annotations (optional, see readme)')
    parser.add_argument('--wider_train_prefix', help='Prefix path to WIDER train images')
    parser.add_argument('--wider_val_prefix', help='Prefix path to WIDER validation images')
    parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=50)
    parser.add_argument('--batch_size', help='Batch size (default 2)', type=int, default=2)
    parser.add_argument('--model_name', help='Name of the model to save')
    parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel',
                        default=False, action='store_true')
    parser.add_argument('--pretrained', help='Pretrained model name in weight directory')
    parser = parser.parse_args(args)
    # Create the data loaders
    if parser.wider_train is None:
        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
                                   transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()]))
    else:
        dataset_train = WIDERDataset(train_file=parser.wider_train, img_prefix=parser.wider_train_prefix,
                                     transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()]))
    if parser.wider_val is None:
        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            print('Loading CSV validation dataset')
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
                                     transform=transforms.Compose([Resizer(), Normalizer()]))
    else:
        print('Loading WIDER validation dataset')
        dataset_val = WIDERDataset(train_file=parser.wider_val, img_prefix=parser.wider_val_prefix,
                                   transform=transforms.Compose([Resizer(), Normalizer()]))
    print('Loading training dataset')
    sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
    if parser.parallel:
        dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler)
    else:
        dataloader_train = DataLoader(dataset_train, collate_fn=collater, batch_sampler=sampler)
    # Create the model_pose_level_attention
    if parser.depth == 18:
        retinanet = resnet18(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
    elif parser.depth == 34:
        retinanet = resnet34(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
    elif parser.depth == 50:
        retinanet = resnet50(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
    elif parser.depth == 101:
        retinanet = resnet101(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
    elif parser.depth == 152:
        retinanet = resnet152(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
    if ckpt:
        retinanet = torch.load('')
        print('Loading checkpoint')
    else:
        print('Loading pretrained model')
        retinanet_dict = retinanet.state_dict()
        if parser.pretrained is None:
            pretrained_dict = model_zoo.load_url(model_urls['resnet' + str(parser.depth)])
        else:
            pretrained_dict = torch.load(parser.pretrained)
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in retinanet_dict}
        retinanet_dict.update(pretrained_dict)
        retinanet.load_state_dict(retinanet_dict)
        print('load pretrained backbone')
    print(retinanet)
    if parser.parallel:
        retinanet = torch.nn.DataParallel(retinanet, device_ids=[0])
    if is_cuda:
        retinanet.cuda()
    retinanet.training = True
    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    # optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
    loss_hist = collections.deque(maxlen=500)
    retinanet.train()
    if parser.parallel:
        retinanet.module.freeze_bn()
    else:
        retinanet.freeze_bn()
    print('Num training images: {}'.format(len(dataset_train)))
    iters = 0
    for epoch_num in range(0, parser.epochs):
        retinanet.train()
        if parser.parallel:
            retinanet.module.freeze_bn()
        else:
            retinanet.freeze_bn()
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            iters += 1
            optimizer.zero_grad()
            img_data = data['img'].float()
            annot_data = data['annot']
            if is_cuda:
                img_data = img_data.cuda()
                annot_data = annot_data.cuda()
            print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % (torch.cuda.memory_allocated() / 1024**2, torch.cuda.max_memory_allocated() / 1024**2, torch.cuda.memory_cached() / 1024**2, torch.cuda.max_memory_cached() / 1024**2))
            classification_loss, regression_loss, mask_loss = retinanet([img_data, annot_data])
            del img_data
            del annot_data
            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()
            mask_loss = mask_loss.mean()
            loss = classification_loss + regression_loss + mask_loss
            if bool(loss == 0):
                continue
            loss.backward()
            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
            optimizer.step()
            loss_hist.append(float(loss.item()))
            epoch_loss.append(float(loss.item()))
            print(
                'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | '
                'mask_loss {:1.5f} | Running loss: {:1.5f}'.format(
                    epoch_num, iter_num, float(classification_loss), float(regression_loss), float(mask_loss),
                    np.mean(loss_hist)))
            del classification_loss
            del regression_loss
            del loss
        if parser.wider_val is not None:
            print('Evaluating dataset')
            evaluate(dataset_val, retinanet, is_cuda=is_cuda)
        scheduler.step(np.mean(epoch_loss))
        #TODO remove makedir
        os.makedirs('./ckpt', exist_ok=True)
        if parser.parallel:
            torch.save(retinanet.module, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))
        else:
            torch.save(retinanet, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))
 if __name__ == '__main__':
    main()
--- a/vectorizer/lfw_test_pair.txt
+++ b/vectorizer/lfw_test_pair.txt
--- a/vectorizer/train-rec.sh
+++ b/vectorizer/train-rec.sh
@ -0,0 +1,2 @@
 python3 -m recognition.train --casia_list /home/ehp/tmp/datasets/CASIA-maxpy-clean/train.txt --casia_root /home/ehp/tmp/datasets/CASIA-maxpy-clean --lfw_root /home/ehp/tmp/datasets/lfw \
 --lfw_pair_list /home/ehp/git/arcface/lfw_test_pair.txt --model_name recongition3 --batch_size 20 --loss adacos --print_freq 20 --depth 50
--- a/vectorizer/train.sh
+++ b/vectorizer/train.sh
@ -0,0 +1,7 @@
 #python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/sample.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/sample/images \
 #--wider_val /home/ehp/tmp/datasets/wider/sample_val.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/sample_val/images \
 #--depth 50 --epochs 30 --batch_size 1 --model_name wider_sample1
 python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/wider_face_train_bbx_gt.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/WIDER_train/images \
 --wider_val /home/ehp/tmp/datasets/wider/wider_face_val_bbx_gt.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/WIDER_val/images \
 --depth 50 --epochs 30 --batch_size 1 --model_name widernew1
--- a/vectorizer/vectorizer/server.py
+++ b/vectorizer/vectorizer/server.py
@ -13,6 +13,7 @@ from PIL import Image
 import identification.detector as fan
 is_cuda = torch.cuda.is_available()
 print('CUDA: %s' % is_cuda)
 fan_model = fan.load_model('ckpt/wider6_10.pt', is_cuda=is_cuda)
 # load recognition model
@ -61,18 +62,25 @@ def upload_file():
            filepath = os.path.join(UPLOAD_FOLDER, filename)
            f.save(filepath)
            try:
                img = Image.open(filepath)
                data = img.convert(mode="RGB")
                with torch.no_grad():
-                boxes = fan.fan_detect(fan_model, data, threshold=0.9, is_cuda=is_cuda).astype(int)
+                    boxes, scores = fan.fan_detect(fan_model, data, threshold=0.9, is_cuda=is_cuda)
                boxes = [b for b in boxes if abs(b[1] - b[0]) >= imagesize / 2 and abs(b[2] - b[0]) >= imagesize / 2]
                    if boxes is None or len(boxes) == 0:
-                    abort(404)
+                        return jsonify([])
-
+
-                extracted = [{'box': arr.tolist(), 'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist()} for arr in boxes]
+                    boxes = boxes.astype(int)
                    scores = scores.astype(float)
                    extracted = [{'box': arr.tolist(),
                                  'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist(),
                                  'scores': score.tolist()
                                  } for arr, score in zip(boxes, scores)]
                    return jsonify(extracted)
            finally:
                os.remove(filepath)
        else:
            abort(500)
		`@ -0,0 +1,2 @@`
							`python3 -m recognition.train --casia_list /home/ehp/tmp/datasets/CASIA-maxpy-clean/train.txt --casia_root /home/ehp/tmp/datasets/CASIA-maxpy-clean --lfw_root /home/ehp/tmp/datasets/lfw \`
							`--lfw_pair_list /home/ehp/git/arcface/lfw_test_pair.txt --model_name recongition3 --batch_size 20 --loss adacos --print_freq 20 --depth 50`