1
0
Fork 0

Faceserver vectorizer training

master
Petr Masopust 6 years ago
parent 95d4f2ca7d
commit aa6f45fc8b
  1. 238
      vectorizer/identification/csv_eval.py
  2. 566
      vectorizer/identification/dataloader.py
  3. 104
      vectorizer/identification/detector.py
  4. 207
      vectorizer/identification/train.py
  5. 6000
      vectorizer/lfw_test_pair.txt
  6. 2
      vectorizer/train-rec.sh
  7. 7
      vectorizer/train.sh
  8. 32
      vectorizer/vectorizer/server.py

@ -0,0 +1,238 @@
import numpy as np
import torch
def compute_overlap(a, b):
"""
Parameters
----------
a: (N, 4) ndarray of float
b: (K, 4) ndarray of float
Returns
-------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
iw = np.maximum(iw, 0)
ih = np.maximum(ih, 0)
ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
ua = np.maximum(ua, np.finfo(float).eps)
intersection = iw * ih
return intersection / ua
def _compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, is_cuda=True):
""" Get the detections from the retinanet using the generator.
The result is a list of lists such that the size is:
all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
# Arguments
dataset : The generator used to run images through the retinanet.
retinanet : The retinanet to run on the images.
score_threshold : The score confidence threshold to use.
max_detections : The maximum number of detections to use per image.
is_cuda : CUDA available
# Returns
A list of lists containing the detections for each image in the generator.
"""
all_detections = [[None for i in range(dataset.num_classes())] for j in range(len(dataset))]
retinanet.eval()
with torch.no_grad():
for index in range(len(dataset)):
data = dataset[index]
scale = data['scale']
# run network
img_data = data['img'].permute(2, 0, 1).float().unsqueeze(dim=0)
if is_cuda:
img_data = img_data.cuda()
scores, labels, boxes = retinanet(img_data)
if isinstance(scores, torch.Tensor):
scores = scores.cpu().numpy()
labels = labels.cpu().numpy()
boxes = boxes.cpu().numpy()
# correct boxes for image scale
boxes /= scale
# select indices which have a score above the threshold
indices = np.where(scores > score_threshold)[0]
# select those scores
scores = scores[indices]
# find the order with which to sort the scores
scores_sort = np.argsort(-scores)[:max_detections]
# select detections
image_boxes = boxes[indices[scores_sort], :]
image_scores = scores[scores_sort]
image_labels = labels[indices[scores_sort]]
image_detections = np.concatenate(
[image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
# copy detections to all_detections
for label in range(dataset.num_classes()):
all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1]
else:
# copy detections to all_detections
for label in range(dataset.num_classes()):
all_detections[index][label] = np.zeros((0, 5))
print('{}/{}'.format(index + 1, len(dataset)), end='\r')
return all_detections
def _get_annotations(generator):
""" Get the ground truth annotations from the generator.
The result is a list of lists such that the size is:
all_detections[num_images][num_classes] = annotations[num_detections, 5]
# Arguments
generator : The generator used to retrieve ground truth annotations.
# Returns
A list of lists containing the annotations for each image in the generator.
"""
all_annotations = [[None for i in range(generator.num_classes())] for j in range(len(generator))]
for i in range(len(generator)):
# load the annotations
annotations = generator.load_annotations(i)
# copy detections to all_annotations
for label in range(generator.num_classes()):
all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
print('{}/{}'.format(i + 1, len(generator)), end='\r')
return all_annotations
def evaluate(
generator,
retinanet,
iou_threshold=0.5,
score_threshold=0.05,
max_detections=100,
is_cuda=True,
save_path=None
):
""" Evaluate a given dataset using a given retinanet.
# Arguments
generator : The generator that represents the dataset to evaluate.
retinanet : The retinanet to evaluate.
iou_threshold : The threshold used to consider when a detection is positive or negative.
score_threshold : The score confidence threshold to use for detections.
max_detections : The maximum number of detections to use per image.
is_cuda : CUDA available
save_path : The path to save images with visualized detections to.
# Returns
A dict mapping class names to mAP scores.
"""
# gather all detections and annotations
all_detections = _get_detections(generator, retinanet, score_threshold=score_threshold,
max_detections=max_detections, is_cuda=is_cuda)
all_annotations = _get_annotations(generator)
average_precisions = {}
for label in range(generator.num_classes()):
false_positives = np.zeros((0,))
true_positives = np.zeros((0,))
scores = np.zeros((0,))
num_annotations = 0.0
for i in range(len(generator)):
detections = all_detections[i][label]
annotations = all_annotations[i][label]
num_annotations += annotations.shape[0]
detected_annotations = []
for d in detections:
scores = np.append(scores, d[4])
if annotations.shape[0] == 0:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
continue
overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
assigned_annotation = np.argmax(overlaps, axis=1)
max_overlap = overlaps[0, assigned_annotation]
if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
false_positives = np.append(false_positives, 0)
true_positives = np.append(true_positives, 1)
detected_annotations.append(assigned_annotation)
else:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
# no annotations -> AP for this class is 0 (is this correct?)
if num_annotations == 0:
average_precisions[label] = 0, 0
continue
# sort by score
indices = np.argsort(-scores)
false_positives = false_positives[indices]
true_positives = true_positives[indices]
# compute false positives and true positives
false_positives = np.cumsum(false_positives)
true_positives = np.cumsum(true_positives)
# compute recall and precision
recall = true_positives / num_annotations
precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
# compute average precision
average_precision = _compute_ap(recall, precision)
average_precisions[label] = average_precision, num_annotations
print('\nmAP:')
for label in range(generator.num_classes()):
label_name = generator.label_to_name(label)
print('{}: {}'.format(label_name, average_precisions[label][0]))
return average_precisions

@ -0,0 +1,566 @@
import torch
import numpy as np
import random
import csv
import os
from torch.utils.data import Dataset
from torch.utils.data.sampler import Sampler
from PIL import Image, ImageEnhance, ImageFilter
class CSVDataset(Dataset):
"""CSV dataset."""
def __init__(self, train_file, class_list, transform=None):
"""
Args:
train_file (string): CSV file with training annotations
class_list (string): CSV file with class list
transform (optional): Transformation function
"""
self.train_file = train_file
self.class_list = class_list
self.transform = transform
# parse the provided class file
try:
with open(self.class_list, 'r', newline='') as file:
self.classes = CSVDataset.load_classes(csv.reader(file, delimiter=' '))
except ValueError as e:
raise (ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)), None)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
# csv with img_path, x1, y1, x2, y2, class_name
try:
with open(self.train_file, 'r', newline='') as file:
self.image_data = CSVDataset._read_annotations(csv.reader(file, delimiter=' '), self.classes)
except ValueError as e:
raise (ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)), None)
self.image_names = list(self.image_data.keys())
@staticmethod
def _parse(value, function, fmt):
"""
Parse a string into a value, and format a nice ValueError if it fails.
Returns `function(value)`.
Any `ValueError` raised is catched and a new `ValueError` is raised
with message `fmt.format(e)`, where `e` is the caught `ValueError`.
"""
try:
return function(value)
except ValueError as e:
raise (ValueError(fmt.format(e)), None)
@staticmethod
def load_classes(csv_reader):
result = {}
for line, row in enumerate(csv_reader):
line += 1
try:
class_name, class_id = row
except ValueError:
raise (ValueError("line {}: format should be 'class_name,class_id'".format(line)), None)
class_id = CSVDataset._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
if class_name in result:
raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
result[class_name] = class_id
return result
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img = self.load_image(idx)
annot = self.load_annotations(idx)
sample = {'img': img, 'annot': annot, 'scale': 1}
if self.transform:
sample = self.transform(sample)
return sample
def load_image(self, image_index):
img = Image.open(filepath)
img = img.convert(mode="RGB")
return img
def load_annotations(self, image_index):
# get ground truth annotations
annotation_list = self.image_data[self.image_names[image_index]]
annotations = np.zeros((0, 5))
# some images appear to miss annotations (like image with id 257034)
if len(annotation_list) == 0:
return annotations
# parse annotations
for idx, a in enumerate(annotation_list):
# some annotations have basically no width / height, skip them
x1 = a['x1']
x2 = a['x2']
y1 = a['y1']
y2 = a['y2']
if (x2 - x1) < 1 or (y2 - y1) < 1:
continue
annotation = np.zeros((1, 5))
annotation[0, 0] = x1
annotation[0, 1] = y1
annotation[0, 2] = x2
annotation[0, 3] = y2
annotation[0, 4] = self.name_to_label(a['class'])
annotations = np.append(annotations, annotation, axis=0)
return annotations
@staticmethod
def _read_annotations(csv_reader, classes):
result = {}
for line, row in enumerate(csv_reader):
line += 1
try:
img_file, x1, y1, x2, y2, class_name = row[:6]
except ValueError:
raise (ValueError(
"line {}: format should be 'img_file,x1,y1,x2,y2,class_name' or 'img_file,,,,,'".format(line)),
None)
if img_file not in result:
result[img_file] = []
# If a row contains only an image path, it's an image without annotations.
if (x1, y1, x2, y2, class_name) == ('.', '.', '.', '.', '.'):
continue
x1 = CSVDataset._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
y1 = CSVDataset._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
x2 = CSVDataset._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
y2 = CSVDataset._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
if class_name != 'ignore':
# Check that the bounding box is valid.
if x2 <= x1:
raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
if y2 <= y1:
raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
# check if the current class name is correctly present
if class_name not in classes:
raise ValueError(
'line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
return result
def name_to_label(self, name):
return self.classes[name]
def label_to_name(self, label):
return self.labels[label]
def num_classes(self):
return max(self.classes.values()) + 1
def image_aspect_ratio(self, image_index):
image = Image.open(self.image_names[image_index])
return float(image.width) / float(image.height)
class WIDERDataset(Dataset):
"""Wider dataset."""
def __init__(self, train_file, img_prefix='', transform=None):
"""
Args:
train_file (string): Wider txt file with training annotations
img_prefix (string, optional): Prefix for images location
transform (optional): Transformation function
"""
self.train_file = train_file
self.transform = transform
self.img_prefix = img_prefix
# WIDER dataset has only faces. Enhance for additional face properties (see below).
self.classes = {'face': 0}
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
# Wider file definition example:
# file with image name, number of faces, x y w h blur expression illumination invalid occlusion pose
# 0--Parade/0_Parade_marchingband_1_117.jpg
# 9
# 69 359 50 36 1 0 0 0 0 1
# 227 382 56 43 1 0 1 0 0 1
# 296 305 44 26 1 0 0 0 0 1
# 353 280 40 36 2 0 0 0 2 1
# 885 377 63 41 1 0 0 0 0 1
# 819 391 34 43 2 0 0 0 1 0
# 727 342 37 31 2 0 0 0 0 1
# 598 246 33 29 2 0 0 0 0 1
# 740 308 45 33 1 0 0 0 2 1
try:
with open(self.train_file, 'r') as file:
self.image_data = WIDERDataset._read_data(file)
except ValueError as e:
raise (ValueError('invalid WIDER annotations file: {}: {}'.format(self.train_file, e)), None)
self.image_names = list(self.image_data.keys())
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img = self.load_image(idx)
annot = self.load_annotations(idx)
sample = {'img': img, 'annot': annot, 'scale': 1, 'img_name': self.image_names[idx]}
if self.transform:
sample = self.transform(sample)
return sample
def load_image(self, image_index):
print('Loading image %s' % self.image_names[image_index])
img = Image.open(os.path.join(self.img_prefix, self.image_names[image_index]))
img = img.convert(mode="RGB")
return img
def load_annotations(self, image_index):
# get ground truth annotations
annotation_list = self.image_data[self.image_names[image_index]]
annotations = np.zeros((0, 5))
# some images appear to miss annotations (like image with id 257034)
if len(annotation_list) == 0:
return annotations
# parse annotations
for idx, a in enumerate(annotation_list):
# some annotations have basically no width / height, skip them
x1 = a['x1']
x2 = a['x2']
y1 = a['y1']
y2 = a['y2']
if (x2 - x1) < 1 or (y2 - y1) < 1:
continue
annotation = np.zeros((1, 5))
annotation[0, 0] = x1
annotation[0, 1] = y1
annotation[0, 2] = x2
annotation[0, 3] = y2
annotation[0, 4] = self.name_to_label(a['class'])
annotations = np.append(annotations, annotation, axis=0)
return annotations
@staticmethod
def _read_data(reader):
result = {}
counter = 0
img_file = None
for line in reader:
line = line.strip()
if counter == 0:
# file name or number of faces
try:
counter = int(line)
except ValueError:
if img_file and len(result[img_file]) == 0:
print("Warning - no faces: %s" % img_file)
img_file = line
else:
counter -= 1
# coordinates e.g. 370 170 9 13 2 0 0 0 2 0
nums = [int(x) for x in line.split()]
result.setdefault(img_file, []).append({'x1': nums[0], 'x2': nums[0] + nums[2],
'y1': nums[1], 'y2': nums[1] + nums[3],
'class': 'face'})
return result
def name_to_label(self, name):
return self.classes[name]
def label_to_name(self, label):
return self.labels[label]
def num_classes(self):
return max(self.classes.values()) + 1
def image_aspect_ratio(self, image_index):
image = Image.open(os.path.join(self.img_prefix, self.image_names[image_index]))
return float(image.width) / float(image.height)
def collater(data):
imgs = [s['img'] for s in data]
annots = [s['annot'] for s in data]
scales = [s['scale'] for s in data]
widths = [int(s.shape[0]) for s in imgs]
heights = [int(s.shape[1]) for s in imgs]
batch_size = len(imgs)
max_width = np.array(widths).max()
max_height = np.array(heights).max()
padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
for i in range(batch_size):
img = imgs[i]
padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
max_num_annots = max(annot.shape[0] for annot in annots)
# print(annot_padded.shape)
if max_num_annots > 0:
annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
for idx, annot in enumerate(annots):
# print(annot.shape)
if annot.shape[0] > 0:
annot_padded[idx, :annot.shape[0], :] = annot
else:
annot_padded = torch.ones((len(annots), 1, 5)) * -1
padded_imgs = padded_imgs.permute(0, 3, 1, 2)
return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}
class Resizer(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample, min_side=800, max_side=1400):
image, annots, scale = sample['img'], sample['annot'], sample['scale']
cols, rows = image.size
# scale = min_side / rows
smallest_side = min(rows, cols)
# rescale the image so the smallest side is min_side
scale = min_side / smallest_side
# check if the largest side is now greater than max_side, which can happen
# when images have a large aspect ratio
largest_side = max(rows, cols)
if largest_side * scale > max_side:
scale = max_side / largest_side
# resize the image with the computed scale
image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR))
image = image / 255.0
rows, cols, cns = image.shape
pad_w = 32 - rows % 32
pad_h = 32 - cols % 32
new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
new_image[:rows, :cols, :] = image.astype(np.float32)
annots[:, :4] *= scale
return {'img': new_image, 'annot': annots, 'scale': scale}
class Augmenter(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample, flip_x=0.5):
if np.random.rand() < flip_x:
image, annots, scales = sample['img'], sample['annot'], sample['scale']
image = image[:, ::-1, :]
rows, cols, channels = image.shape
x1 = annots[:, 0].copy()
x2 = annots[:, 2].copy()
x_tmp = x1.copy()
annots[:, 0] = cols - x2
annots[:, 2] = cols - x_tmp
sample = {'img': image, 'annot': annots, 'scale': scales}
return sample
class RandomCrop(object):
def __call__(self, sample):
image, annots, scales = sample['img'], sample['annot'], sample['scale']
if not annots.shape[0]:
return {'img': image, 'annot': annots, 'scale': scales}
if random.choice([0, 1]):
return {'img': image, 'annot': annots, 'scale': scales}
else:
rows, cols, cns = image.shape
flag = 0
while True:
flag += 1
if flag > 10:
return {'img': image, 'annot': annots, 'scale': scales}
crop_ratio = random.uniform(0.5, 1)
rows_zero = int(rows * random.uniform(0, 1 - crop_ratio))
cols_zero = int(cols * random.uniform(0, 1 - crop_ratio))
crop_rows = int(rows * crop_ratio)
crop_cols = int(cols * crop_ratio)
'''
new_image = image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :]
new_image = cv2.resize(new_image, (cols, rows))
#new_image = skimage.transform.resize(new_image, (rows, cols))
new_annots = np.zeros((0, 5))
for i in range(annots.shape[0]):
x1 = max(annots[i, 0] - cols_zero, 0)
y1 = max(annots[i, 1] - rows_zero, 0)
x2 = min(annots[i, 2] - cols_zero, crop_cols)
y2 = min(annots[i, 3] - rows_zero, crop_rows)
label = annots[i, 4]
if x1 + 10 < x2 and y1 + 10 < y2:
x1 /= crop_ratio
y1 /= crop_ratio
x2 /= crop_ratio
y2 /= crop_ratio
new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0)
if not new_annots.shape[0]:
continue
'''
new_image = np.zeros((rows, cols, cns))
new_image[rows_zero:rows_zero + crop_rows, cols_zero:cols_zero + crop_cols, :] = \
image[
rows_zero:rows_zero + crop_rows,
cols_zero:cols_zero + crop_cols,
:]
new_annots = np.zeros((0, 5))
for i in range(annots.shape[0]):
x1 = max(cols_zero, annots[i, 0])
y1 = max(rows_zero, annots[i, 1])
x2 = min(cols_zero + crop_cols, annots[i, 2])
y2 = min(rows_zero + crop_rows, annots[i, 3])
label = annots[i, 4]
if x1 + 10 < x2 and y1 + 10 < y2:
new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0)
if not new_annots.shape[0]:
continue
return {'img': new_image, 'annot': new_annots, 'scale': scales}
class Color(object):
def __call__(self, sample):
image, annots, scales = sample['img'], sample['annot'], sample['scale']
image = Image.fromarray(image)
ratio = [0.5, 0.8, 1.2, 1.5]
if random.choice([0, 1]):
enh_bri = ImageEnhance.Brightness(image)
brightness = random.choice(ratio)
image = enh_bri.enhance(brightness)
if random.choice([0, 1]):
enh_col = ImageEnhance.Color(image)
color = random.choice(ratio)
image = enh_col.enhance(color)
if random.choice([0, 1]):
enh_con = ImageEnhance.Contrast(image)
contrast = random.choice(ratio)
image = enh_con.enhance(contrast)
if random.choice([0, 1]):
enh_sha = ImageEnhance.Sharpness(image)
sharpness = random.choice(ratio)
image = enh_sha.enhance(sharpness)
if random.choice([0, 1]):
image = image.filter(ImageFilter.BLUR)
image = np.asarray(image)
return {'img': image, 'annot': annots, 'scale': scales}
class Normalizer(object):
def __init__(self):
self.mean = np.array([[[0.485, 0.456, 0.406]]])
self.std = np.array([[[0.229, 0.224, 0.225]]])
def __call__(self, sample):
image, annots, scales = sample['img'], sample['annot'], sample['scale']
image = (image.astype(np.float32) - self.mean) / self.std
sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales}
return sample
class UnNormalizer(object):
def __init__(self, mean=None, std=None):
if mean is None:
self.mean = [0.485, 0.456, 0.406]
else:
self.mean = mean
if std is None:
self.std = [0.229, 0.224, 0.225]
else:
self.std = std
def __call__(self, tensor):
"""
Args:
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
Returns:
Tensor: Normalized image.
"""
for t, m, s in zip(tensor, self.mean, self.std):
t.mul_(s).add_(m)
return tensor
class AspectRatioBasedSampler(Sampler):
def __init__(self, data_source, batch_size, drop_last):
self.data_source = data_source
self.batch_size = batch_size
self.drop_last = drop_last
self.groups = self.group_images()
def __iter__(self):
random.shuffle(self.groups)
for group in self.groups:
yield group
def __len__(self):
if self.drop_last:
return len(self.data_source) // self.batch_size
else:
return (len(self.data_source) + self.batch_size - 1) // self.batch_size
def group_images(self):
# determine the order of the images
order = list(range(len(self.data_source)))
order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
# divide into groups, one group = one batch
return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in
range(0, len(order), self.batch_size)]

@ -1,63 +1,13 @@
import numpy as np
import torch
from PIL import Image
import argparse
import json
from PIL import Image, ImageDraw
from identification.dataloader import Normalizer, Resizer
from torchvision import transforms
class Resizer(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample, min_side=800, max_side=1400):
image, annots, scale = sample['img'], sample['annot'], sample['scale']
rows, cols = image.size
# scale = min_side / rows
smallest_side = min(rows, cols)
# rescale the image so the smallest side is min_side
scale = min_side / smallest_side
# check if the largest side is now greater than max_side, which can happen
# when images have a large aspect ratio
largest_side = max(rows, cols)
if largest_side * scale > max_side:
scale = max_side / largest_side
# resize the image with the computed scale
image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR))
image = image / 255.0
rows, cols, cns = image.shape
pad_w = 32 - rows % 32
pad_h = 32 - cols % 32
new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
new_image[:rows, :cols, :] = image.astype(np.float32)
annots[:, :4] *= scale
return {'img': new_image, 'annot': annots, 'scale': scale}
class Normalizer(object):
def __init__(self):
self.mean = np.array([[[0.485, 0.456, 0.406]]])
self.std = np.array([[[0.229, 0.224, 0.225]]])
def __call__(self, sample):
image, annots, scales = sample['img'], sample['annot'], sample['scale']
image = (image.astype(np.float32) - self.mean) / self.std
sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales}
return sample
def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True):
input_data = {'img': img_data, 'annot': np.zeros((0, 5)), 'scale': 1}
transform = transforms.Compose([Resizer(), Normalizer()])
@ -70,7 +20,7 @@ def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True)
img_data = img_data.cuda()
scores, labels, boxes = model(img_data)
if scores is None:
return np.array()
return np.empty((0,0)), np.empty((0,0))
scores = scores.cpu().numpy()
scale = transformed['scale']
@ -81,7 +31,16 @@ def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True)
scores_sort = np.argsort(-scores)[:max_detections]
image_boxes = boxes[indices[scores_sort], :]
return image_boxes
return image_boxes, scores[:max_detections]
def img_rectangles(img, output_path, boxes=None):
if boxes is not None:
draw = ImageDraw.Draw(img)
for arr in boxes:
draw.rectangle(((arr[0], arr[1]), (arr[2], arr[3])), outline="black", width=1)
img.save(output_path)
def load_model(model_path, is_cuda=True):
@ -93,3 +52,36 @@ def load_model(model_path, is_cuda=True):
model.anchors.is_cuda=is_cuda
return model
def load_image(filepath):
img = Image.open(filepath)
img = img.convert(mode="RGB")
return img
def main(args=None):
parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
parser.add_argument('--model', help='Path to model')
parser.add_argument('--image', help='Path to image')
parser.add_argument('--rect', help='Output image with rectangles')
parser.add_argument('--threshold', help='Probability threshold (default 0.9)', type=float, default=0.9)
parser.add_argument('--force-cpu', help='Force CPU for detection (default false)', dest='force_cpu',
default=False, action='store_true')
parser = parser.parse_args(args)
is_cuda = torch.cuda.is_available() and not parser.force_cpu
model = load_model(parser.model, is_cuda=is_cuda)
img = load_image(parser.image)
boxes, scores = fan_detect(model, img, threshold=parser.threshold, is_cuda=is_cuda)
print(json.dumps({'boxes': boxes.tolist(), 'scores': scores}))
if parser.rect:
img = load_image(parser.image)
img_rectangles(img, parser.rect, boxes)
if __name__ == '__main__':
main()

@ -0,0 +1,207 @@
import argparse
import collections
import os
import numpy as np
import torch
import torch.optim as optim
from torchvision import transforms
import torch.utils.model_zoo as model_zoo
from identification.model_level_attention import resnet18, resnet34, resnet50, resnet101, resnet152
from torch.utils.data import DataLoader
from identification.csv_eval import evaluate
from identification.dataloader import WIDERDataset, AspectRatioBasedSampler, collater, Resizer, Augmenter, Normalizer, CSVDataset
is_cuda = torch.cuda.is_available()
print('CUDA available: {}'.format(is_cuda))
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
ckpt = False
def main(args=None):
parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
parser.add_argument('--wider_train', help='Path to file containing WIDER training annotations (see readme)')
parser.add_argument('--wider_val',
help='Path to file containing WIDER validation annotations (optional, see readme)')
parser.add_argument('--wider_train_prefix', help='Prefix path to WIDER train images')
parser.add_argument('--wider_val_prefix', help='Prefix path to WIDER validation images')
parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
parser.add_argument('--epochs', help='Number of epochs', type=int, default=50)
parser.add_argument('--batch_size', help='Batch size (default 2)', type=int, default=2)
parser.add_argument('--model_name', help='Name of the model to save')
parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel',
default=False, action='store_true')
parser.add_argument('--pretrained', help='Pretrained model name in weight directory')
parser = parser.parse_args(args)
# Create the data loaders
if parser.wider_train is None:
dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()]))
else:
dataset_train = WIDERDataset(train_file=parser.wider_train, img_prefix=parser.wider_train_prefix,
transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()]))
if parser.wider_val is None:
if parser.csv_val is None:
dataset_val = None
print('No validation annotations provided.')
else:
print('Loading CSV validation dataset')
dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
transform=transforms.Compose([Resizer(), Normalizer()]))
else:
print('Loading WIDER validation dataset')
dataset_val = WIDERDataset(train_file=parser.wider_val, img_prefix=parser.wider_val_prefix,
transform=transforms.Compose([Resizer(), Normalizer()]))
print('Loading training dataset')
sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
if parser.parallel:
dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler)
else:
dataloader_train = DataLoader(dataset_train, collate_fn=collater, batch_sampler=sampler)
# Create the model_pose_level_attention
if parser.depth == 18:
retinanet = resnet18(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
elif parser.depth == 34:
retinanet = resnet34(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
elif parser.depth == 50:
retinanet = resnet50(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
elif parser.depth == 101:
retinanet = resnet101(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
elif parser.depth == 152:
retinanet = resnet152(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
else:
raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
if ckpt:
retinanet = torch.load('')
print('Loading checkpoint')
else:
print('Loading pretrained model')
retinanet_dict = retinanet.state_dict()
if parser.pretrained is None:
pretrained_dict = model_zoo.load_url(model_urls['resnet' + str(parser.depth)])
else:
pretrained_dict = torch.load(parser.pretrained)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in retinanet_dict}
retinanet_dict.update(pretrained_dict)
retinanet.load_state_dict(retinanet_dict)
print('load pretrained backbone')
print(retinanet)
if parser.parallel:
retinanet = torch.nn.DataParallel(retinanet, device_ids=[0])
if is_cuda:
retinanet.cuda()
retinanet.training = True
optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
# optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
loss_hist = collections.deque(maxlen=500)
retinanet.train()
if parser.parallel:
retinanet.module.freeze_bn()
else:
retinanet.freeze_bn()
print('Num training images: {}'.format(len(dataset_train)))
iters = 0
for epoch_num in range(0, parser.epochs):
retinanet.train()
if parser.parallel:
retinanet.module.freeze_bn()
else:
retinanet.freeze_bn()
epoch_loss = []
for iter_num, data in enumerate(dataloader_train):
iters += 1
optimizer.zero_grad()
img_data = data['img'].float()
annot_data = data['annot']
if is_cuda:
img_data = img_data.cuda()
annot_data = annot_data.cuda()
print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % (torch.cuda.memory_allocated() / 1024**2, torch.cuda.max_memory_allocated() / 1024**2, torch.cuda.memory_cached() / 1024**2, torch.cuda.max_memory_cached() / 1024**2))
classification_loss, regression_loss, mask_loss = retinanet([img_data, annot_data])
del img_data
del annot_data
classification_loss = classification_loss.mean()
regression_loss = regression_loss.mean()
mask_loss = mask_loss.mean()
loss = classification_loss + regression_loss + mask_loss
if bool(loss == 0):
continue
loss.backward()
torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
optimizer.step()
loss_hist.append(float(loss.item()))
epoch_loss.append(float(loss.item()))
print(
'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | '
'mask_loss {:1.5f} | Running loss: {:1.5f}'.format(
epoch_num, iter_num, float(classification_loss), float(regression_loss), float(mask_loss),
np.mean(loss_hist)))
del classification_loss
del regression_loss
del loss
if parser.wider_val is not None:
print('Evaluating dataset')
evaluate(dataset_val, retinanet, is_cuda=is_cuda)
scheduler.step(np.mean(epoch_loss))
#TODO remove makedir
os.makedirs('./ckpt', exist_ok=True)
if parser.parallel:
torch.save(retinanet.module, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))
else:
torch.save(retinanet, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))
if __name__ == '__main__':
main()

File diff suppressed because it is too large Load Diff

@ -0,0 +1,2 @@
python3 -m recognition.train --casia_list /home/ehp/tmp/datasets/CASIA-maxpy-clean/train.txt --casia_root /home/ehp/tmp/datasets/CASIA-maxpy-clean --lfw_root /home/ehp/tmp/datasets/lfw \
--lfw_pair_list /home/ehp/git/arcface/lfw_test_pair.txt --model_name recongition3 --batch_size 20 --loss adacos --print_freq 20 --depth 50

@ -0,0 +1,7 @@
#python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/sample.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/sample/images \
#--wider_val /home/ehp/tmp/datasets/wider/sample_val.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/sample_val/images \
#--depth 50 --epochs 30 --batch_size 1 --model_name wider_sample1
python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/wider_face_train_bbx_gt.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/WIDER_train/images \
--wider_val /home/ehp/tmp/datasets/wider/wider_face_val_bbx_gt.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/WIDER_val/images \
--depth 50 --epochs 30 --batch_size 1 --model_name widernew1

@ -13,6 +13,7 @@ from PIL import Image
import identification.detector as fan
is_cuda = torch.cuda.is_available()
print('CUDA: %s' % is_cuda)
fan_model = fan.load_model('ckpt/wider6_10.pt', is_cuda=is_cuda)
# load recognition model
@ -61,18 +62,25 @@ def upload_file():
filepath = os.path.join(UPLOAD_FOLDER, filename)
f.save(filepath)
img = Image.open(filepath)
data = img.convert(mode="RGB")
with torch.no_grad():
boxes = fan.fan_detect(fan_model, data, threshold=0.9, is_cuda=is_cuda).astype(int)
boxes = [b for b in boxes if abs(b[1] - b[0]) >= imagesize / 2 and abs(b[2] - b[0]) >= imagesize / 2]
if boxes is None or len(boxes) == 0:
abort(404)
extracted = [{'box': arr.tolist(), 'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist()} for arr in boxes]
return jsonify(extracted)
try:
img = Image.open(filepath)
data = img.convert(mode="RGB")
with torch.no_grad():
boxes, scores = fan.fan_detect(fan_model, data, threshold=0.9, is_cuda=is_cuda)
if boxes is None or len(boxes) == 0:
return jsonify([])
boxes = boxes.astype(int)
scores = scores.astype(float)
extracted = [{'box': arr.tolist(),
'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist(),
'scores': score.tolist()
} for arr, score in zip(boxes, scores)]
return jsonify(extracted)
finally:
os.remove(filepath)
else:
abort(500)

Loading…
Cancel
Save