1
0
Fork 0

Faceserver vectorizer training

master
Petr Masopust 6 years ago
parent 95d4f2ca7d
commit aa6f45fc8b
  1. 238
      vectorizer/identification/csv_eval.py
  2. 566
      vectorizer/identification/dataloader.py
  3. 104
      vectorizer/identification/detector.py
  4. 207
      vectorizer/identification/train.py
  5. 6000
      vectorizer/lfw_test_pair.txt
  6. 2
      vectorizer/train-rec.sh
  7. 7
      vectorizer/train.sh
  8. 18
      vectorizer/vectorizer/server.py

@ -0,0 +1,238 @@
import numpy as np
import torch
def compute_overlap(a, b):
"""
Parameters
----------
a: (N, 4) ndarray of float
b: (K, 4) ndarray of float
Returns
-------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
iw = np.maximum(iw, 0)
ih = np.maximum(ih, 0)
ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
ua = np.maximum(ua, np.finfo(float).eps)
intersection = iw * ih
return intersection / ua
def _compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, is_cuda=True):
""" Get the detections from the retinanet using the generator.
The result is a list of lists such that the size is:
all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
# Arguments
dataset : The generator used to run images through the retinanet.
retinanet : The retinanet to run on the images.
score_threshold : The score confidence threshold to use.
max_detections : The maximum number of detections to use per image.
is_cuda : CUDA available
# Returns
A list of lists containing the detections for each image in the generator.
"""
all_detections = [[None for i in range(dataset.num_classes())] for j in range(len(dataset))]
retinanet.eval()
with torch.no_grad():
for index in range(len(dataset)):
data = dataset[index]
scale = data['scale']
# run network
img_data = data['img'].permute(2, 0, 1).float().unsqueeze(dim=0)
if is_cuda:
img_data = img_data.cuda()
scores, labels, boxes = retinanet(img_data)
if isinstance(scores, torch.Tensor):
scores = scores.cpu().numpy()
labels = labels.cpu().numpy()
boxes = boxes.cpu().numpy()
# correct boxes for image scale
boxes /= scale
# select indices which have a score above the threshold
indices = np.where(scores > score_threshold)[0]
# select those scores
scores = scores[indices]
# find the order with which to sort the scores
scores_sort = np.argsort(-scores)[:max_detections]
# select detections
image_boxes = boxes[indices[scores_sort], :]
image_scores = scores[scores_sort]
image_labels = labels[indices[scores_sort]]
image_detections = np.concatenate(
[image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
# copy detections to all_detections
for label in range(dataset.num_classes()):
all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1]
else:
# copy detections to all_detections
for label in range(dataset.num_classes()):
all_detections[index][label] = np.zeros((0, 5))
print('{}/{}'.format(index + 1, len(dataset)), end='\r')
return all_detections
def _get_annotations(generator):
""" Get the ground truth annotations from the generator.
The result is a list of lists such that the size is:
all_detections[num_images][num_classes] = annotations[num_detections, 5]
# Arguments
generator : The generator used to retrieve ground truth annotations.
# Returns
A list of lists containing the annotations for each image in the generator.
"""
all_annotations = [[None for i in range(generator.num_classes())] for j in range(len(generator))]
for i in range(len(generator)):
# load the annotations
annotations = generator.load_annotations(i)
# copy detections to all_annotations
for label in range(generator.num_classes()):
all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
print('{}/{}'.format(i + 1, len(generator)), end='\r')
return all_annotations
def evaluate(
generator,
retinanet,
iou_threshold=0.5,
score_threshold=0.05,
max_detections=100,
is_cuda=True,
save_path=None
):
""" Evaluate a given dataset using a given retinanet.
# Arguments
generator : The generator that represents the dataset to evaluate.
retinanet : The retinanet to evaluate.
iou_threshold : The threshold used to consider when a detection is positive or negative.
score_threshold : The score confidence threshold to use for detections.
max_detections : The maximum number of detections to use per image.
is_cuda : CUDA available
save_path : The path to save images with visualized detections to.
# Returns
A dict mapping class names to mAP scores.
"""
# gather all detections and annotations
all_detections = _get_detections(generator, retinanet, score_threshold=score_threshold,
max_detections=max_detections, is_cuda=is_cuda)
all_annotations = _get_annotations(generator)
average_precisions = {}
for label in range(generator.num_classes()):
false_positives = np.zeros((0,))
true_positives = np.zeros((0,))
scores = np.zeros((0,))
num_annotations = 0.0
for i in range(len(generator)):
detections = all_detections[i][label]
annotations = all_annotations[i][label]
num_annotations += annotations.shape[0]
detected_annotations = []
for d in detections:
scores = np.append(scores, d[4])
if annotations.shape[0] == 0:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
continue
overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
assigned_annotation = np.argmax(overlaps, axis=1)
max_overlap = overlaps[0, assigned_annotation]
if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
false_positives = np.append(false_positives, 0)
true_positives = np.append(true_positives, 1)
detected_annotations.append(assigned_annotation)
else:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
# no annotations -> AP for this class is 0 (is this correct?)
if num_annotations == 0:
average_precisions[label] = 0, 0
continue
# sort by score
indices = np.argsort(-scores)
false_positives = false_positives[indices]
true_positives = true_positives[indices]
# compute false positives and true positives
false_positives = np.cumsum(false_positives)
true_positives = np.cumsum(true_positives)
# compute recall and precision
recall = true_positives / num_annotations
precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
# compute average precision
average_precision = _compute_ap(recall, precision)
average_precisions[label] = average_precision, num_annotations
print('\nmAP:')
for label in range(generator.num_classes()):
label_name = generator.label_to_name(label)
print('{}: {}'.format(label_name, average_precisions[label][0]))
return average_precisions

@ -0,0 +1,566 @@
import torch
import numpy as np
import random
import csv
import os
from torch.utils.data import Dataset
from torch.utils.data.sampler import Sampler
from PIL import Image, ImageEnhance, ImageFilter
class CSVDataset(Dataset):
"""CSV dataset."""
def __init__(self, train_file, class_list, transform=None):
"""
Args:
train_file (string): CSV file with training annotations
class_list (string): CSV file with class list
transform (optional): Transformation function
"""
self.train_file = train_file
self.class_list = class_list
self.transform = transform
# parse the provided class file
try:
with open(self.class_list, 'r', newline='') as file:
self.classes = CSVDataset.load_classes(csv.reader(file, delimiter=' '))
except ValueError as e:
raise (ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)), None)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
# csv with img_path, x1, y1, x2, y2, class_name
try:
with open(self.train_file, 'r', newline='') as file:
self.image_data = CSVDataset._read_annotations(csv.reader(file, delimiter=' '), self.classes)
except ValueError as e:
raise (ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)), None)
self.image_names = list(self.image_data.keys())
@staticmethod
def _parse(value, function, fmt):
"""
Parse a string into a value, and format a nice ValueError if it fails.
Returns `function(value)`.
Any `ValueError` raised is catched and a new `ValueError` is raised
with message `fmt.format(e)`, where `e` is the caught `ValueError`.
"""
try:
return function(value)
except ValueError as e:
raise (ValueError(fmt.format(e)), None)
@staticmethod
def load_classes(csv_reader):
result = {}
for line, row in enumerate(csv_reader):
line += 1
try:
class_name, class_id = row
except ValueError:
raise (ValueError("line {}: format should be 'class_name,class_id'".format(line)), None)
class_id = CSVDataset._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
if class_name in result:
raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
result[class_name] = class_id
return result
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img = self.load_image(idx)
annot = self.load_annotations(idx)
sample = {'img': img, 'annot': annot, 'scale': 1}
if self.transform:
sample = self.transform(sample)
return sample
def load_image(self, image_index):
img = Image.open(filepath)
img = img.convert(mode="RGB")
return img
def load_annotations(self, image_index):
# get ground truth annotations
annotation_list = self.image_data[self.image_names[image_index]]
annotations = np.zeros((0, 5))
# some images appear to miss annotations (like image with id 257034)
if len(annotation_list) == 0:
return annotations
# parse annotations
for idx, a in enumerate(annotation_list):
# some annotations have basically no width / height, skip them
x1 = a['x1']
x2 = a['x2']
y1 = a['y1']
y2 = a['y2']
if (x2 - x1) < 1 or (y2 - y1) < 1:
continue
annotation = np.zeros((1, 5))
annotation[0, 0] = x1
annotation[0, 1] = y1
annotation[0, 2] = x2
annotation[0, 3] = y2
annotation[0, 4] = self.name_to_label(a['class'])
annotations = np.append(annotations, annotation, axis=0)
return annotations
@staticmethod
def _read_annotations(csv_reader, classes):
result = {}
for line, row in enumerate(csv_reader):
line += 1
try:
img_file, x1, y1, x2, y2, class_name = row[:6]
except ValueError:
raise (ValueError(
"line {}: format should be 'img_file,x1,y1,x2,y2,class_name' or 'img_file,,,,,'".format(line)),
None)
if img_file not in result:
result[img_file] = []
# If a row contains only an image path, it's an image without annotations.
if (x1, y1, x2, y2, class_name) == ('.', '.', '.', '.', '.'):
continue
x1 = CSVDataset._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
y1 = CSVDataset._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
x2 = CSVDataset._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
y2 = CSVDataset._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
if class_name != 'ignore':
# Check that the bounding box is valid.
if x2 <= x1:
raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
if y2 <= y1:
raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
# check if the current class name is correctly present
if class_name not in classes:
raise ValueError(
'line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
return result
def name_to_label(self, name):
return self.classes[name]
def label_to_name(self, label):
return self.labels[label]
def num_classes(self):
return max(self.classes.values()) + 1
def image_aspect_ratio(self, image_index):
image = Image.open(self.image_names[image_index])
return float(image.width) / float(image.height)
class WIDERDataset(Dataset):
"""Wider dataset."""
def __init__(self, train_file, img_prefix='', transform=None):
"""
Args:
train_file (string): Wider txt file with training annotations
img_prefix (string, optional): Prefix for images location
transform (optional): Transformation function
"""
self.train_file = train_file
self.transform = transform
self.img_prefix = img_prefix
# WIDER dataset has only faces. Enhance for additional face properties (see below).
self.classes = {'face': 0}
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
# Wider file definition example:
# file with image name, number of faces, x y w h blur expression illumination invalid occlusion pose
# 0--Parade/0_Parade_marchingband_1_117.jpg
# 9
# 69 359 50 36 1 0 0 0 0 1
# 227 382 56 43 1 0 1 0 0 1
# 296 305 44 26 1 0 0 0 0 1
# 353 280 40 36 2 0 0 0 2 1
# 885 377 63 41 1 0 0 0 0 1
# 819 391 34 43 2 0 0 0 1 0
# 727 342 37 31 2 0 0 0 0 1
# 598 246 33 29 2 0 0 0 0 1
# 740 308 45 33 1 0 0 0 2 1
try:
with open(self.train_file, 'r') as file:
self.image_data = WIDERDataset._read_data(file)
except ValueError as e:
raise (ValueError('invalid WIDER annotations file: {}: {}'.format(self.train_file, e)), None)
self.image_names = list(self.image_data.keys())
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img = self.load_image(idx)
annot = self.load_annotations(idx)
sample = {'img': img, 'annot': annot, 'scale': 1, 'img_name': self.image_names[idx]}
if self.transform:
sample = self.transform(sample)
return sample
def load_image(self, image_index):
print('Loading image %s' % self.image_names[image_index])
img = Image.open(os.path.join(self.img_prefix, self.image_names[image_index]))
img = img.convert(mode="RGB")
return img
def load_annotations(self, image_index):
# get ground truth annotations
annotation_list = self.image_data[self.image_names[image_index]]
annotations = np.zeros((0, 5))
# some images appear to miss annotations (like image with id 257034)
if len(annotation_list) == 0:
return annotations
# parse annotations
for idx, a in enumerate(annotation_list):
# some annotations have basically no width / height, skip them
x1 = a['x1']
x2 = a['x2']
y1 = a['y1']
y2 = a['y2']
if (x2 - x1) < 1 or (y2 - y1) < 1:
continue
annotation = np.zeros((1, 5))
annotation[0, 0] = x1
annotation[0, 1] = y1
annotation[0, 2] = x2
annotation[0, 3] = y2
annotation[0, 4] = self.name_to_label(a['class'])
annotations = np.append(annotations, annotation, axis=0)
return annotations
@staticmethod
def _read_data(reader):
result = {}
counter = 0
img_file = None
for line in reader:
line = line.strip()
if counter == 0:
# file name or number of faces
try:
counter = int(line)
except ValueError:
if img_file and len(result[img_file]) == 0:
print("Warning - no faces: %s" % img_file)
img_file = line
else:
counter -= 1
# coordinates e.g. 370 170 9 13 2 0 0 0 2 0
nums = [int(x) for x in line.split()]
result.setdefault(img_file, []).append({'x1': nums[0], 'x2': nums[0] + nums[2],
'y1': nums[1], 'y2': nums[1] + nums[3],
'class': 'face'})
return result
def name_to_label(self, name):
return self.classes[name]
def label_to_name(self, label):
return self.labels[label]
def num_classes(self):
return max(self.classes.values()) + 1
def image_aspect_ratio(self, image_index):
image = Image.open(os.path.join(self.img_prefix, self.image_names[image_index]))
return float(image.width) / float(image.height)
def collater(data):
imgs = [s['img'] for s in data]
annots = [s['annot'] for s in data]
scales = [s['scale'] for s in data]
widths = [int(s.shape[0]) for s in imgs]
heights = [int(s.shape[1]) for s in imgs]
batch_size = len(imgs)
max_width = np.array(widths).max()
max_height = np.array(heights).max()
padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
for i in range(batch_size):
img = imgs[i]
padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
max_num_annots = max(annot.shape[0] for annot in annots)
# print(annot_padded.shape)
if max_num_annots > 0:
annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
for idx, annot in enumerate(annots):
# print(annot.shape)
if annot.shape[0] > 0:
annot_padded[idx, :annot.shape[0], :] = annot
else:
annot_padded = torch.ones((len(annots), 1, 5)) * -1
padded_imgs = padded_imgs.permute(0, 3, 1, 2)
return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}
class Resizer(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample, min_side=800, max_side=1400):
image, annots, scale = sample['img'], sample['annot'], sample['scale']
cols, rows = image.size
# scale = min_side / rows
smallest_side = min(rows, cols)
# rescale the image so the smallest side is min_side
scale = min_side / smallest_side
# check if the largest side is now greater than max_side, which can happen
# when images have a large aspect ratio
largest_side = max(rows, cols)
if largest_side * scale > max_side:
scale = max_side / largest_side
# resize the image with the computed scale
image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR))
image = image / 255.0
rows, cols, cns = image.shape
pad_w = 32 - rows % 32
pad_h = 32 - cols % 32
new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
new_image[:rows, :cols, :] = image.astype(np.float32)
annots[:, :4] *= scale
return {'img': new_image, 'annot': annots, 'scale': scale}
class Augmenter(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample, flip_x=0.5):
if np.random.rand() < flip_x:
image, annots, scales = sample['img'], sample['annot'], sample['scale']
image = image[:, ::-1, :]
rows, cols, channels = image.shape
x1 = annots[:, 0].copy()
x2 = annots[:, 2].copy()
x_tmp = x1.copy()
annots[:, 0] = cols - x2
annots[:, 2] = cols - x_tmp
sample = {'img': image, 'annot': annots, 'scale': scales}
return sample
class RandomCrop(object):
def __call__(self, sample):
image, annots, scales = sample['img'], sample['annot'], sample['scale']
if not annots.shape[0]:
return {'img': image, 'annot': annots, 'scale': scales}
if random.choice([0, 1]):
return {'img': image, 'annot': annots, 'scale': scales}
else:
rows, cols, cns = image.shape
flag = 0
while True:
flag += 1
if flag > 10:
return {'img': image, 'annot': annots, 'scale': scales}
crop_ratio = random.uniform(0.5, 1)
rows_zero = int(rows * random.uniform(0, 1 - crop_ratio))
cols_zero = int(cols * random.uniform(0, 1 - crop_ratio))
crop_rows = int(rows * crop_ratio)
crop_cols = int(cols * crop_ratio)
'''
new_image = image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :]
new_image = cv2.resize(new_image, (cols, rows))
#new_image = skimage.transform.resize(new_image, (rows, cols))
new_annots = np.zeros((0, 5))
for i in range(annots.shape[0]):
x1 = max(annots[i, 0] - cols_zero, 0)
y1 = max(annots[i, 1] - rows_zero, 0)
x2 = min(annots[i, 2] - cols_zero, crop_cols)
y2 = min(annots[i, 3] - rows_zero, crop_rows)
label = annots[i, 4]
if x1 + 10 < x2 and y1 + 10 < y2:
x1 /= crop_ratio
y1 /= crop_ratio
x2 /= crop_ratio
y2 /= crop_ratio
new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0)
if not new_annots.shape[0]:
continue
'''
new_image = np.zeros((rows, cols, cns))
new_image[rows_zero:rows_zero + crop_rows, cols_zero:cols_zero + crop_cols, :] = \
image[
rows_zero:rows_zero + crop_rows,
cols_zero:cols_zero + crop_cols,
:]
new_annots = np.zeros((0, 5))
for i in range(annots.shape[0]):
x1 = max(cols_zero, annots[i, 0])
y1 = max(rows_zero, annots[i, 1])
x2 = min(cols_zero + crop_cols, annots[i, 2])
y2 = min(rows_zero + crop_rows, annots[i, 3])
label = annots[i, 4]
if x1 + 10 < x2 and y1 + 10 < y2:
new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0)
if not new_annots.shape[0]:
continue
return {'img': new_image, 'annot': new_annots, 'scale': scales}
class Color(object):
def __call__(self, sample):
image, annots, scales = sample['img'], sample['annot'], sample['scale']
image = Image.fromarray(image)
ratio = [0.5, 0.8, 1.2, 1.5]
if random.choice([0, 1]):
enh_bri = ImageEnhance.Brightness(image)
brightness = random.choice(ratio)
image = enh_bri.enhance(brightness)
if random.choice([0, 1]):
enh_col = ImageEnhance.Color(image)
color = random.choice(ratio)
image = enh_col.enhance(color)
if random.choice([0, 1]):
enh_con = ImageEnhance.Contrast(image)
contrast = random.choice(ratio)
image = enh_con.enhance(contrast)
if random.choice([0, 1]):
enh_sha = ImageEnhance.Sharpness(image)
sharpness = random.choice(ratio)
image = enh_sha.enhance(sharpness)
if random.choice([0, 1]):
image = image.filter(ImageFilter.BLUR)
image = np.asarray(image)
return {'img': image, 'annot': annots, 'scale': scales}
class Normalizer(object):
def __init__(self):
self.mean = np.array([[[0.485, 0.456, 0.406]]])
self.std = np.array([[[0.229, 0.224, 0.225]]])
def __call__(self, sample):
image, annots, scales = sample['img'], sample['annot'], sample['scale']
image = (image.astype(np.float32) - self.mean) / self.std
sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales}
return sample
class UnNormalizer(object):
def __init__(self, mean=None, std=None):
if mean is None:
self.mean = [0.485, 0.456, 0.406]
else:
self.mean = mean
if std is None:
self.std = [0.229, 0.224, 0.225]
else:
self.std = std
def __call__(self, tensor):
"""
Args:
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
Returns:
Tensor: Normalized image.
"""
for t, m, s in zip(tensor, self.mean, self.std):
t.mul_(s).add_(m)
return tensor
class AspectRatioBasedSampler(Sampler):
def __init__(self, data_source, batch_size, drop_last):
self.data_source = data_source
self.batch_size = batch_size
self.drop_last = drop_last
self.groups = self.group_images()
def __iter__(self):
random.shuffle(self.groups)
for group in self.groups:
yield group
def __len__(self):
if self.drop_last:
return len(self.data_source) // self.batch_size
else:
return (len(self.data_source) + self.batch_size - 1) // self.batch_size
def group_images(self):
# determine the order of the images
order = list(range(len(self.data_source)))
order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
# divide into groups, one group = one batch
return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in
range(0, len(order), self.batch_size)]

@ -1,63 +1,13 @@
import numpy as np import numpy as np
import torch import torch
from PIL import Image import argparse
import json
from PIL import Image, ImageDraw
from identification.dataloader import Normalizer, Resizer
from torchvision import transforms from torchvision import transforms
class Resizer(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample, min_side=800, max_side=1400):
image, annots, scale = sample['img'], sample['annot'], sample['scale']
rows, cols = image.size
# scale = min_side / rows
smallest_side = min(rows, cols)
# rescale the image so the smallest side is min_side
scale = min_side / smallest_side
# check if the largest side is now greater than max_side, which can happen
# when images have a large aspect ratio
largest_side = max(rows, cols)
if largest_side * scale > max_side:
scale = max_side / largest_side
# resize the image with the computed scale
image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR))
image = image / 255.0
rows, cols, cns = image.shape
pad_w = 32 - rows % 32
pad_h = 32 - cols % 32
new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
new_image[:rows, :cols, :] = image.astype(np.float32)
annots[:, :4] *= scale
return {'img': new_image, 'annot': annots, 'scale': scale}
class Normalizer(object):
def __init__(self):
self.mean = np.array([[[0.485, 0.456, 0.406]]])
self.std = np.array([[[0.229, 0.224, 0.225]]])
def __call__(self, sample):
image, annots, scales = sample['img'], sample['annot'], sample['scale']
image = (image.astype(np.float32) - self.mean) / self.std
sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales}
return sample
def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True): def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True):
input_data = {'img': img_data, 'annot': np.zeros((0, 5)), 'scale': 1} input_data = {'img': img_data, 'annot': np.zeros((0, 5)), 'scale': 1}
transform = transforms.Compose([Resizer(), Normalizer()]) transform = transforms.Compose([Resizer(), Normalizer()])
@ -70,7 +20,7 @@ def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True)
img_data = img_data.cuda() img_data = img_data.cuda()
scores, labels, boxes = model(img_data) scores, labels, boxes = model(img_data)
if scores is None: if scores is None:
return np.array() return np.empty((0,0)), np.empty((0,0))
scores = scores.cpu().numpy() scores = scores.cpu().numpy()
scale = transformed['scale'] scale = transformed['scale']
@ -81,7 +31,16 @@ def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True)
scores_sort = np.argsort(-scores)[:max_detections] scores_sort = np.argsort(-scores)[:max_detections]
image_boxes = boxes[indices[scores_sort], :] image_boxes = boxes[indices[scores_sort], :]
return image_boxes return image_boxes, scores[:max_detections]
def img_rectangles(img, output_path, boxes=None):
if boxes is not None:
draw = ImageDraw.Draw(img)
for arr in boxes:
draw.rectangle(((arr[0], arr[1]), (arr[2], arr[3])), outline="black", width=1)
img.save(output_path)
def load_model(model_path, is_cuda=True): def load_model(model_path, is_cuda=True):
@ -93,3 +52,36 @@ def load_model(model_path, is_cuda=True):
model.anchors.is_cuda=is_cuda model.anchors.is_cuda=is_cuda
return model return model
def load_image(filepath):
img = Image.open(filepath)
img = img.convert(mode="RGB")
return img
def main(args=None):
parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
parser.add_argument('--model', help='Path to model')
parser.add_argument('--image', help='Path to image')
parser.add_argument('--rect', help='Output image with rectangles')
parser.add_argument('--threshold', help='Probability threshold (default 0.9)', type=float, default=0.9)
parser.add_argument('--force-cpu', help='Force CPU for detection (default false)', dest='force_cpu',
default=False, action='store_true')
parser = parser.parse_args(args)
is_cuda = torch.cuda.is_available() and not parser.force_cpu
model = load_model(parser.model, is_cuda=is_cuda)
img = load_image(parser.image)
boxes, scores = fan_detect(model, img, threshold=parser.threshold, is_cuda=is_cuda)
print(json.dumps({'boxes': boxes.tolist(), 'scores': scores}))
if parser.rect:
img = load_image(parser.image)
img_rectangles(img, parser.rect, boxes)
if __name__ == '__main__':
main()

@ -0,0 +1,207 @@
import argparse
import collections
import os
import numpy as np
import torch
import torch.optim as optim
from torchvision import transforms
import torch.utils.model_zoo as model_zoo
from identification.model_level_attention import resnet18, resnet34, resnet50, resnet101, resnet152
from torch.utils.data import DataLoader
from identification.csv_eval import evaluate
from identification.dataloader import WIDERDataset, AspectRatioBasedSampler, collater, Resizer, Augmenter, Normalizer, CSVDataset
is_cuda = torch.cuda.is_available()
print('CUDA available: {}'.format(is_cuda))
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
ckpt = False
def main(args=None):
parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
parser.add_argument('--wider_train', help='Path to file containing WIDER training annotations (see readme)')
parser.add_argument('--wider_val',
help='Path to file containing WIDER validation annotations (optional, see readme)')
parser.add_argument('--wider_train_prefix', help='Prefix path to WIDER train images')
parser.add_argument('--wider_val_prefix', help='Prefix path to WIDER validation images')
parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
parser.add_argument('--epochs', help='Number of epochs', type=int, default=50)
parser.add_argument('--batch_size', help='Batch size (default 2)', type=int, default=2)
parser.add_argument('--model_name', help='Name of the model to save')
parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel',
default=False, action='store_true')
parser.add_argument('--pretrained', help='Pretrained model name in weight directory')
parser = parser.parse_args(args)
# Create the data loaders
if parser.wider_train is None:
dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()]))
else:
dataset_train = WIDERDataset(train_file=parser.wider_train, img_prefix=parser.wider_train_prefix,
transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()]))
if parser.wider_val is None:
if parser.csv_val is None:
dataset_val = None
print('No validation annotations provided.')
else:
print('Loading CSV validation dataset')
dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
transform=transforms.Compose([Resizer(), Normalizer()]))
else:
print('Loading WIDER validation dataset')
dataset_val = WIDERDataset(train_file=parser.wider_val, img_prefix=parser.wider_val_prefix,
transform=transforms.Compose([Resizer(), Normalizer()]))
print('Loading training dataset')
sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
if parser.parallel:
dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler)
else:
dataloader_train = DataLoader(dataset_train, collate_fn=collater, batch_sampler=sampler)
# Create the model_pose_level_attention
if parser.depth == 18:
retinanet = resnet18(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
elif parser.depth == 34:
retinanet = resnet34(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
elif parser.depth == 50:
retinanet = resnet50(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
elif parser.depth == 101:
retinanet = resnet101(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
elif parser.depth == 152:
retinanet = resnet152(num_classes=dataset_train.num_classes(), is_cuda=is_cuda)
else:
raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
if ckpt:
retinanet = torch.load('')
print('Loading checkpoint')
else:
print('Loading pretrained model')
retinanet_dict = retinanet.state_dict()
if parser.pretrained is None:
pretrained_dict = model_zoo.load_url(model_urls['resnet' + str(parser.depth)])
else:
pretrained_dict = torch.load(parser.pretrained)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in retinanet_dict}
retinanet_dict.update(pretrained_dict)
retinanet.load_state_dict(retinanet_dict)
print('load pretrained backbone')
print(retinanet)
if parser.parallel:
retinanet = torch.nn.DataParallel(retinanet, device_ids=[0])
if is_cuda:
retinanet.cuda()
retinanet.training = True
optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
# optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
loss_hist = collections.deque(maxlen=500)
retinanet.train()
if parser.parallel:
retinanet.module.freeze_bn()
else:
retinanet.freeze_bn()
print('Num training images: {}'.format(len(dataset_train)))
iters = 0
for epoch_num in range(0, parser.epochs):
retinanet.train()
if parser.parallel:
retinanet.module.freeze_bn()
else:
retinanet.freeze_bn()
epoch_loss = []
for iter_num, data in enumerate(dataloader_train):
iters += 1
optimizer.zero_grad()
img_data = data['img'].float()
annot_data = data['annot']
if is_cuda:
img_data = img_data.cuda()
annot_data = annot_data.cuda()
print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % (torch.cuda.memory_allocated() / 1024**2, torch.cuda.max_memory_allocated() / 1024**2, torch.cuda.memory_cached() / 1024**2, torch.cuda.max_memory_cached() / 1024**2))
classification_loss, regression_loss, mask_loss = retinanet([img_data, annot_data])
del img_data
del annot_data
classification_loss = classification_loss.mean()
regression_loss = regression_loss.mean()
mask_loss = mask_loss.mean()
loss = classification_loss + regression_loss + mask_loss
if bool(loss == 0):
continue
loss.backward()
torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
optimizer.step()
loss_hist.append(float(loss.item()))
epoch_loss.append(float(loss.item()))
print(
'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | '
'mask_loss {:1.5f} | Running loss: {:1.5f}'.format(
epoch_num, iter_num, float(classification_loss), float(regression_loss), float(mask_loss),
np.mean(loss_hist)))
del classification_loss
del regression_loss
del loss
if parser.wider_val is not None:
print('Evaluating dataset')
evaluate(dataset_val, retinanet, is_cuda=is_cuda)
scheduler.step(np.mean(epoch_loss))
#TODO remove makedir
os.makedirs('./ckpt', exist_ok=True)
if parser.parallel:
torch.save(retinanet.module, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))
else:
torch.save(retinanet, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))
if __name__ == '__main__':
main()

File diff suppressed because it is too large Load Diff

@ -0,0 +1,2 @@
python3 -m recognition.train --casia_list /home/ehp/tmp/datasets/CASIA-maxpy-clean/train.txt --casia_root /home/ehp/tmp/datasets/CASIA-maxpy-clean --lfw_root /home/ehp/tmp/datasets/lfw \
--lfw_pair_list /home/ehp/git/arcface/lfw_test_pair.txt --model_name recongition3 --batch_size 20 --loss adacos --print_freq 20 --depth 50

@ -0,0 +1,7 @@
#python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/sample.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/sample/images \
#--wider_val /home/ehp/tmp/datasets/wider/sample_val.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/sample_val/images \
#--depth 50 --epochs 30 --batch_size 1 --model_name wider_sample1
python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/wider_face_train_bbx_gt.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/WIDER_train/images \
--wider_val /home/ehp/tmp/datasets/wider/wider_face_val_bbx_gt.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/WIDER_val/images \
--depth 50 --epochs 30 --batch_size 1 --model_name widernew1

@ -13,6 +13,7 @@ from PIL import Image
import identification.detector as fan import identification.detector as fan
is_cuda = torch.cuda.is_available() is_cuda = torch.cuda.is_available()
print('CUDA: %s' % is_cuda)
fan_model = fan.load_model('ckpt/wider6_10.pt', is_cuda=is_cuda) fan_model = fan.load_model('ckpt/wider6_10.pt', is_cuda=is_cuda)
# load recognition model # load recognition model
@ -61,18 +62,25 @@ def upload_file():
filepath = os.path.join(UPLOAD_FOLDER, filename) filepath = os.path.join(UPLOAD_FOLDER, filename)
f.save(filepath) f.save(filepath)
try:
img = Image.open(filepath) img = Image.open(filepath)
data = img.convert(mode="RGB") data = img.convert(mode="RGB")
with torch.no_grad(): with torch.no_grad():
boxes = fan.fan_detect(fan_model, data, threshold=0.9, is_cuda=is_cuda).astype(int) boxes, scores = fan.fan_detect(fan_model, data, threshold=0.9, is_cuda=is_cuda)
boxes = [b for b in boxes if abs(b[1] - b[0]) >= imagesize / 2 and abs(b[2] - b[0]) >= imagesize / 2]
if boxes is None or len(boxes) == 0: if boxes is None or len(boxes) == 0:
abort(404) return jsonify([])
extracted = [{'box': arr.tolist(), 'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist()} for arr in boxes] boxes = boxes.astype(int)
scores = scores.astype(float)
extracted = [{'box': arr.tolist(),
'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist(),
'scores': score.tolist()
} for arr, score in zip(boxes, scores)]
return jsonify(extracted) return jsonify(extracted)
finally:
os.remove(filepath)
else: else:
abort(500) abort(500)

Loading…
Cancel
Save