parent
95d4f2ca7d
commit
aa6f45fc8b
8 changed files with 7088 additions and 68 deletions
@ -0,0 +1,238 @@ |
||||
import numpy as np |
||||
|
||||
import torch |
||||
|
||||
|
||||
def compute_overlap(a, b): |
||||
""" |
||||
Parameters |
||||
---------- |
||||
a: (N, 4) ndarray of float |
||||
b: (K, 4) ndarray of float |
||||
Returns |
||||
------- |
||||
overlaps: (N, K) ndarray of overlap between boxes and query_boxes |
||||
""" |
||||
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) |
||||
|
||||
iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0]) |
||||
ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1]) |
||||
|
||||
iw = np.maximum(iw, 0) |
||||
ih = np.maximum(ih, 0) |
||||
|
||||
ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih |
||||
|
||||
ua = np.maximum(ua, np.finfo(float).eps) |
||||
|
||||
intersection = iw * ih |
||||
|
||||
return intersection / ua |
||||
|
||||
|
||||
def _compute_ap(recall, precision): |
||||
""" Compute the average precision, given the recall and precision curves. |
||||
Code originally from https://github.com/rbgirshick/py-faster-rcnn. |
||||
# Arguments |
||||
recall: The recall curve (list). |
||||
precision: The precision curve (list). |
||||
# Returns |
||||
The average precision as computed in py-faster-rcnn. |
||||
""" |
||||
# correct AP calculation |
||||
# first append sentinel values at the end |
||||
mrec = np.concatenate(([0.], recall, [1.])) |
||||
mpre = np.concatenate(([0.], precision, [0.])) |
||||
|
||||
# compute the precision envelope |
||||
for i in range(mpre.size - 1, 0, -1): |
||||
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) |
||||
|
||||
# to calculate area under PR curve, look for points |
||||
# where X axis (recall) changes value |
||||
i = np.where(mrec[1:] != mrec[:-1])[0] |
||||
|
||||
# and sum (\Delta recall) * prec |
||||
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) |
||||
return ap |
||||
|
||||
|
||||
def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, is_cuda=True): |
||||
""" Get the detections from the retinanet using the generator. |
||||
The result is a list of lists such that the size is: |
||||
all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes] |
||||
# Arguments |
||||
dataset : The generator used to run images through the retinanet. |
||||
retinanet : The retinanet to run on the images. |
||||
score_threshold : The score confidence threshold to use. |
||||
max_detections : The maximum number of detections to use per image. |
||||
is_cuda : CUDA available |
||||
# Returns |
||||
A list of lists containing the detections for each image in the generator. |
||||
""" |
||||
all_detections = [[None for i in range(dataset.num_classes())] for j in range(len(dataset))] |
||||
|
||||
retinanet.eval() |
||||
|
||||
with torch.no_grad(): |
||||
|
||||
for index in range(len(dataset)): |
||||
data = dataset[index] |
||||
scale = data['scale'] |
||||
|
||||
# run network |
||||
img_data = data['img'].permute(2, 0, 1).float().unsqueeze(dim=0) |
||||
if is_cuda: |
||||
img_data = img_data.cuda() |
||||
scores, labels, boxes = retinanet(img_data) |
||||
if isinstance(scores, torch.Tensor): |
||||
scores = scores.cpu().numpy() |
||||
labels = labels.cpu().numpy() |
||||
boxes = boxes.cpu().numpy() |
||||
|
||||
# correct boxes for image scale |
||||
boxes /= scale |
||||
|
||||
# select indices which have a score above the threshold |
||||
indices = np.where(scores > score_threshold)[0] |
||||
|
||||
# select those scores |
||||
scores = scores[indices] |
||||
|
||||
# find the order with which to sort the scores |
||||
scores_sort = np.argsort(-scores)[:max_detections] |
||||
|
||||
# select detections |
||||
image_boxes = boxes[indices[scores_sort], :] |
||||
image_scores = scores[scores_sort] |
||||
image_labels = labels[indices[scores_sort]] |
||||
image_detections = np.concatenate( |
||||
[image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1) |
||||
|
||||
# copy detections to all_detections |
||||
for label in range(dataset.num_classes()): |
||||
all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1] |
||||
else: |
||||
# copy detections to all_detections |
||||
for label in range(dataset.num_classes()): |
||||
all_detections[index][label] = np.zeros((0, 5)) |
||||
|
||||
print('{}/{}'.format(index + 1, len(dataset)), end='\r') |
||||
|
||||
return all_detections |
||||
|
||||
|
||||
def _get_annotations(generator): |
||||
""" Get the ground truth annotations from the generator. |
||||
The result is a list of lists such that the size is: |
||||
all_detections[num_images][num_classes] = annotations[num_detections, 5] |
||||
# Arguments |
||||
generator : The generator used to retrieve ground truth annotations. |
||||
# Returns |
||||
A list of lists containing the annotations for each image in the generator. |
||||
""" |
||||
all_annotations = [[None for i in range(generator.num_classes())] for j in range(len(generator))] |
||||
|
||||
for i in range(len(generator)): |
||||
# load the annotations |
||||
annotations = generator.load_annotations(i) |
||||
|
||||
# copy detections to all_annotations |
||||
for label in range(generator.num_classes()): |
||||
all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy() |
||||
|
||||
print('{}/{}'.format(i + 1, len(generator)), end='\r') |
||||
|
||||
return all_annotations |
||||
|
||||
|
||||
def evaluate( |
||||
generator, |
||||
retinanet, |
||||
iou_threshold=0.5, |
||||
score_threshold=0.05, |
||||
max_detections=100, |
||||
is_cuda=True, |
||||
save_path=None |
||||
): |
||||
""" Evaluate a given dataset using a given retinanet. |
||||
# Arguments |
||||
generator : The generator that represents the dataset to evaluate. |
||||
retinanet : The retinanet to evaluate. |
||||
iou_threshold : The threshold used to consider when a detection is positive or negative. |
||||
score_threshold : The score confidence threshold to use for detections. |
||||
max_detections : The maximum number of detections to use per image. |
||||
is_cuda : CUDA available |
||||
save_path : The path to save images with visualized detections to. |
||||
# Returns |
||||
A dict mapping class names to mAP scores. |
||||
""" |
||||
|
||||
# gather all detections and annotations |
||||
|
||||
all_detections = _get_detections(generator, retinanet, score_threshold=score_threshold, |
||||
max_detections=max_detections, is_cuda=is_cuda) |
||||
all_annotations = _get_annotations(generator) |
||||
|
||||
average_precisions = {} |
||||
|
||||
for label in range(generator.num_classes()): |
||||
false_positives = np.zeros((0,)) |
||||
true_positives = np.zeros((0,)) |
||||
scores = np.zeros((0,)) |
||||
num_annotations = 0.0 |
||||
|
||||
for i in range(len(generator)): |
||||
detections = all_detections[i][label] |
||||
annotations = all_annotations[i][label] |
||||
num_annotations += annotations.shape[0] |
||||
detected_annotations = [] |
||||
|
||||
for d in detections: |
||||
scores = np.append(scores, d[4]) |
||||
|
||||
if annotations.shape[0] == 0: |
||||
false_positives = np.append(false_positives, 1) |
||||
true_positives = np.append(true_positives, 0) |
||||
continue |
||||
|
||||
overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations) |
||||
assigned_annotation = np.argmax(overlaps, axis=1) |
||||
max_overlap = overlaps[0, assigned_annotation] |
||||
|
||||
if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations: |
||||
false_positives = np.append(false_positives, 0) |
||||
true_positives = np.append(true_positives, 1) |
||||
detected_annotations.append(assigned_annotation) |
||||
else: |
||||
false_positives = np.append(false_positives, 1) |
||||
true_positives = np.append(true_positives, 0) |
||||
|
||||
# no annotations -> AP for this class is 0 (is this correct?) |
||||
if num_annotations == 0: |
||||
average_precisions[label] = 0, 0 |
||||
continue |
||||
|
||||
# sort by score |
||||
indices = np.argsort(-scores) |
||||
false_positives = false_positives[indices] |
||||
true_positives = true_positives[indices] |
||||
|
||||
# compute false positives and true positives |
||||
false_positives = np.cumsum(false_positives) |
||||
true_positives = np.cumsum(true_positives) |
||||
|
||||
# compute recall and precision |
||||
recall = true_positives / num_annotations |
||||
precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) |
||||
|
||||
# compute average precision |
||||
average_precision = _compute_ap(recall, precision) |
||||
average_precisions[label] = average_precision, num_annotations |
||||
|
||||
print('\nmAP:') |
||||
for label in range(generator.num_classes()): |
||||
label_name = generator.label_to_name(label) |
||||
print('{}: {}'.format(label_name, average_precisions[label][0])) |
||||
|
||||
return average_precisions |
@ -0,0 +1,566 @@ |
||||
import torch |
||||
import numpy as np |
||||
import random |
||||
import csv |
||||
import os |
||||
|
||||
from torch.utils.data import Dataset |
||||
from torch.utils.data.sampler import Sampler |
||||
|
||||
from PIL import Image, ImageEnhance, ImageFilter |
||||
|
||||
|
||||
class CSVDataset(Dataset): |
||||
"""CSV dataset.""" |
||||
|
||||
def __init__(self, train_file, class_list, transform=None): |
||||
""" |
||||
Args: |
||||
train_file (string): CSV file with training annotations |
||||
class_list (string): CSV file with class list |
||||
transform (optional): Transformation function |
||||
""" |
||||
self.train_file = train_file |
||||
self.class_list = class_list |
||||
self.transform = transform |
||||
|
||||
# parse the provided class file |
||||
try: |
||||
with open(self.class_list, 'r', newline='') as file: |
||||
self.classes = CSVDataset.load_classes(csv.reader(file, delimiter=' ')) |
||||
except ValueError as e: |
||||
raise (ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)), None) |
||||
|
||||
self.labels = {} |
||||
for key, value in self.classes.items(): |
||||
self.labels[value] = key |
||||
|
||||
# csv with img_path, x1, y1, x2, y2, class_name |
||||
try: |
||||
with open(self.train_file, 'r', newline='') as file: |
||||
self.image_data = CSVDataset._read_annotations(csv.reader(file, delimiter=' '), self.classes) |
||||
except ValueError as e: |
||||
raise (ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)), None) |
||||
self.image_names = list(self.image_data.keys()) |
||||
|
||||
@staticmethod |
||||
def _parse(value, function, fmt): |
||||
""" |
||||
Parse a string into a value, and format a nice ValueError if it fails. |
||||
Returns `function(value)`. |
||||
Any `ValueError` raised is catched and a new `ValueError` is raised |
||||
with message `fmt.format(e)`, where `e` is the caught `ValueError`. |
||||
""" |
||||
try: |
||||
return function(value) |
||||
except ValueError as e: |
||||
raise (ValueError(fmt.format(e)), None) |
||||
|
||||
@staticmethod |
||||
def load_classes(csv_reader): |
||||
result = {} |
||||
|
||||
for line, row in enumerate(csv_reader): |
||||
line += 1 |
||||
|
||||
try: |
||||
class_name, class_id = row |
||||
except ValueError: |
||||
raise (ValueError("line {}: format should be 'class_name,class_id'".format(line)), None) |
||||
class_id = CSVDataset._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line)) |
||||
|
||||
if class_name in result: |
||||
raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name)) |
||||
result[class_name] = class_id |
||||
return result |
||||
|
||||
def __len__(self): |
||||
return len(self.image_names) |
||||
|
||||
def __getitem__(self, idx): |
||||
img = self.load_image(idx) |
||||
annot = self.load_annotations(idx) |
||||
sample = {'img': img, 'annot': annot, 'scale': 1} |
||||
if self.transform: |
||||
sample = self.transform(sample) |
||||
|
||||
return sample |
||||
|
||||
def load_image(self, image_index): |
||||
img = Image.open(filepath) |
||||
img = img.convert(mode="RGB") |
||||
|
||||
return img |
||||
|
||||
def load_annotations(self, image_index): |
||||
# get ground truth annotations |
||||
annotation_list = self.image_data[self.image_names[image_index]] |
||||
annotations = np.zeros((0, 5)) |
||||
|
||||
# some images appear to miss annotations (like image with id 257034) |
||||
if len(annotation_list) == 0: |
||||
return annotations |
||||
|
||||
# parse annotations |
||||
for idx, a in enumerate(annotation_list): |
||||
# some annotations have basically no width / height, skip them |
||||
x1 = a['x1'] |
||||
x2 = a['x2'] |
||||
y1 = a['y1'] |
||||
y2 = a['y2'] |
||||
|
||||
if (x2 - x1) < 1 or (y2 - y1) < 1: |
||||
continue |
||||
|
||||
annotation = np.zeros((1, 5)) |
||||
|
||||
annotation[0, 0] = x1 |
||||
annotation[0, 1] = y1 |
||||
annotation[0, 2] = x2 |
||||
annotation[0, 3] = y2 |
||||
|
||||
annotation[0, 4] = self.name_to_label(a['class']) |
||||
annotations = np.append(annotations, annotation, axis=0) |
||||
|
||||
return annotations |
||||
|
||||
@staticmethod |
||||
def _read_annotations(csv_reader, classes): |
||||
result = {} |
||||
for line, row in enumerate(csv_reader): |
||||
line += 1 |
||||
|
||||
try: |
||||
img_file, x1, y1, x2, y2, class_name = row[:6] |
||||
except ValueError: |
||||
raise (ValueError( |
||||
"line {}: format should be 'img_file,x1,y1,x2,y2,class_name' or 'img_file,,,,,'".format(line)), |
||||
None) |
||||
|
||||
if img_file not in result: |
||||
result[img_file] = [] |
||||
|
||||
# If a row contains only an image path, it's an image without annotations. |
||||
if (x1, y1, x2, y2, class_name) == ('.', '.', '.', '.', '.'): |
||||
continue |
||||
|
||||
x1 = CSVDataset._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line)) |
||||
y1 = CSVDataset._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line)) |
||||
x2 = CSVDataset._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line)) |
||||
y2 = CSVDataset._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line)) |
||||
|
||||
if class_name != 'ignore': |
||||
# Check that the bounding box is valid. |
||||
if x2 <= x1: |
||||
raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1)) |
||||
if y2 <= y1: |
||||
raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1)) |
||||
|
||||
# check if the current class name is correctly present |
||||
if class_name not in classes: |
||||
raise ValueError( |
||||
'line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes)) |
||||
|
||||
result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name}) |
||||
return result |
||||
|
||||
def name_to_label(self, name): |
||||
return self.classes[name] |
||||
|
||||
def label_to_name(self, label): |
||||
return self.labels[label] |
||||
|
||||
def num_classes(self): |
||||
return max(self.classes.values()) + 1 |
||||
|
||||
def image_aspect_ratio(self, image_index): |
||||
image = Image.open(self.image_names[image_index]) |
||||
return float(image.width) / float(image.height) |
||||
|
||||
|
||||
class WIDERDataset(Dataset): |
||||
"""Wider dataset.""" |
||||
|
||||
def __init__(self, train_file, img_prefix='', transform=None): |
||||
""" |
||||
Args: |
||||
train_file (string): Wider txt file with training annotations |
||||
img_prefix (string, optional): Prefix for images location |
||||
transform (optional): Transformation function |
||||
""" |
||||
self.train_file = train_file |
||||
self.transform = transform |
||||
self.img_prefix = img_prefix |
||||
|
||||
# WIDER dataset has only faces. Enhance for additional face properties (see below). |
||||
self.classes = {'face': 0} |
||||
|
||||
self.labels = {} |
||||
for key, value in self.classes.items(): |
||||
self.labels[value] = key |
||||
|
||||
# Wider file definition example: |
||||
# file with image name, number of faces, x y w h blur expression illumination invalid occlusion pose |
||||
# 0--Parade/0_Parade_marchingband_1_117.jpg |
||||
# 9 |
||||
# 69 359 50 36 1 0 0 0 0 1 |
||||
# 227 382 56 43 1 0 1 0 0 1 |
||||
# 296 305 44 26 1 0 0 0 0 1 |
||||
# 353 280 40 36 2 0 0 0 2 1 |
||||
# 885 377 63 41 1 0 0 0 0 1 |
||||
# 819 391 34 43 2 0 0 0 1 0 |
||||
# 727 342 37 31 2 0 0 0 0 1 |
||||
# 598 246 33 29 2 0 0 0 0 1 |
||||
# 740 308 45 33 1 0 0 0 2 1 |
||||
try: |
||||
with open(self.train_file, 'r') as file: |
||||
self.image_data = WIDERDataset._read_data(file) |
||||
except ValueError as e: |
||||
raise (ValueError('invalid WIDER annotations file: {}: {}'.format(self.train_file, e)), None) |
||||
self.image_names = list(self.image_data.keys()) |
||||
|
||||
def __len__(self): |
||||
return len(self.image_names) |
||||
|
||||
def __getitem__(self, idx): |
||||
img = self.load_image(idx) |
||||
annot = self.load_annotations(idx) |
||||
sample = {'img': img, 'annot': annot, 'scale': 1, 'img_name': self.image_names[idx]} |
||||
if self.transform: |
||||
sample = self.transform(sample) |
||||
|
||||
return sample |
||||
|
||||
def load_image(self, image_index): |
||||
print('Loading image %s' % self.image_names[image_index]) |
||||
img = Image.open(os.path.join(self.img_prefix, self.image_names[image_index])) |
||||
img = img.convert(mode="RGB") |
||||
|
||||
return img |
||||
|
||||
def load_annotations(self, image_index): |
||||
# get ground truth annotations |
||||
annotation_list = self.image_data[self.image_names[image_index]] |
||||
annotations = np.zeros((0, 5)) |
||||
|
||||
# some images appear to miss annotations (like image with id 257034) |
||||
if len(annotation_list) == 0: |
||||
return annotations |
||||
|
||||
# parse annotations |
||||
for idx, a in enumerate(annotation_list): |
||||
# some annotations have basically no width / height, skip them |
||||
x1 = a['x1'] |
||||
x2 = a['x2'] |
||||
y1 = a['y1'] |
||||
y2 = a['y2'] |
||||
|
||||
if (x2 - x1) < 1 or (y2 - y1) < 1: |
||||
continue |
||||
|
||||
annotation = np.zeros((1, 5)) |
||||
|
||||
annotation[0, 0] = x1 |
||||
annotation[0, 1] = y1 |
||||
annotation[0, 2] = x2 |
||||
annotation[0, 3] = y2 |
||||
|
||||
annotation[0, 4] = self.name_to_label(a['class']) |
||||
annotations = np.append(annotations, annotation, axis=0) |
||||
|
||||
return annotations |
||||
|
||||
@staticmethod |
||||
def _read_data(reader): |
||||
result = {} |
||||
counter = 0 |
||||
img_file = None |
||||
for line in reader: |
||||
line = line.strip() |
||||
if counter == 0: |
||||
# file name or number of faces |
||||
try: |
||||
counter = int(line) |
||||
except ValueError: |
||||
if img_file and len(result[img_file]) == 0: |
||||
print("Warning - no faces: %s" % img_file) |
||||
img_file = line |
||||
else: |
||||
counter -= 1 |
||||
# coordinates e.g. 370 170 9 13 2 0 0 0 2 0 |
||||
nums = [int(x) for x in line.split()] |
||||
result.setdefault(img_file, []).append({'x1': nums[0], 'x2': nums[0] + nums[2], |
||||
'y1': nums[1], 'y2': nums[1] + nums[3], |
||||
'class': 'face'}) |
||||
return result |
||||
|
||||
def name_to_label(self, name): |
||||
return self.classes[name] |
||||
|
||||
def label_to_name(self, label): |
||||
return self.labels[label] |
||||
|
||||
def num_classes(self): |
||||
return max(self.classes.values()) + 1 |
||||
|
||||
def image_aspect_ratio(self, image_index): |
||||
image = Image.open(os.path.join(self.img_prefix, self.image_names[image_index])) |
||||
return float(image.width) / float(image.height) |
||||
|
||||
|
||||
def collater(data): |
||||
imgs = [s['img'] for s in data] |
||||
annots = [s['annot'] for s in data] |
||||
scales = [s['scale'] for s in data] |
||||
|
||||
widths = [int(s.shape[0]) for s in imgs] |
||||
heights = [int(s.shape[1]) for s in imgs] |
||||
batch_size = len(imgs) |
||||
|
||||
max_width = np.array(widths).max() |
||||
max_height = np.array(heights).max() |
||||
|
||||
padded_imgs = torch.zeros(batch_size, max_width, max_height, 3) |
||||
|
||||
for i in range(batch_size): |
||||
img = imgs[i] |
||||
padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img |
||||
|
||||
max_num_annots = max(annot.shape[0] for annot in annots) |
||||
# print(annot_padded.shape) |
||||
if max_num_annots > 0: |
||||
annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1 |
||||
for idx, annot in enumerate(annots): |
||||
# print(annot.shape) |
||||
if annot.shape[0] > 0: |
||||
annot_padded[idx, :annot.shape[0], :] = annot |
||||
else: |
||||
annot_padded = torch.ones((len(annots), 1, 5)) * -1 |
||||
|
||||
padded_imgs = padded_imgs.permute(0, 3, 1, 2) |
||||
|
||||
return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales} |
||||
|
||||
|
||||
class Resizer(object): |
||||
"""Convert ndarrays in sample to Tensors.""" |
||||
|
||||
def __call__(self, sample, min_side=800, max_side=1400): |
||||
image, annots, scale = sample['img'], sample['annot'], sample['scale'] |
||||
|
||||
cols, rows = image.size |
||||
|
||||
# scale = min_side / rows |
||||
|
||||
smallest_side = min(rows, cols) |
||||
|
||||
# rescale the image so the smallest side is min_side |
||||
scale = min_side / smallest_side |
||||
|
||||
# check if the largest side is now greater than max_side, which can happen |
||||
# when images have a large aspect ratio |
||||
largest_side = max(rows, cols) |
||||
|
||||
if largest_side * scale > max_side: |
||||
scale = max_side / largest_side |
||||
|
||||
# resize the image with the computed scale |
||||
|
||||
image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR)) |
||||
image = image / 255.0 |
||||
|
||||
rows, cols, cns = image.shape |
||||
|
||||
pad_w = 32 - rows % 32 |
||||
pad_h = 32 - cols % 32 |
||||
|
||||
new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32) |
||||
new_image[:rows, :cols, :] = image.astype(np.float32) |
||||
|
||||
annots[:, :4] *= scale |
||||
|
||||
return {'img': new_image, 'annot': annots, 'scale': scale} |
||||
|
||||
|
||||
class Augmenter(object): |
||||
"""Convert ndarrays in sample to Tensors.""" |
||||
|
||||
def __call__(self, sample, flip_x=0.5): |
||||
if np.random.rand() < flip_x: |
||||
image, annots, scales = sample['img'], sample['annot'], sample['scale'] |
||||
image = image[:, ::-1, :] |
||||
|
||||
rows, cols, channels = image.shape |
||||
|
||||
x1 = annots[:, 0].copy() |
||||
x2 = annots[:, 2].copy() |
||||
|
||||
x_tmp = x1.copy() |
||||
|
||||
annots[:, 0] = cols - x2 |
||||
annots[:, 2] = cols - x_tmp |
||||
|
||||
sample = {'img': image, 'annot': annots, 'scale': scales} |
||||
|
||||
return sample |
||||
|
||||
|
||||
class RandomCrop(object): |
||||
def __call__(self, sample): |
||||
image, annots, scales = sample['img'], sample['annot'], sample['scale'] |
||||
|
||||
if not annots.shape[0]: |
||||
return {'img': image, 'annot': annots, 'scale': scales} |
||||
if random.choice([0, 1]): |
||||
return {'img': image, 'annot': annots, 'scale': scales} |
||||
else: |
||||
rows, cols, cns = image.shape |
||||
flag = 0 |
||||
while True: |
||||
flag += 1 |
||||
if flag > 10: |
||||
return {'img': image, 'annot': annots, 'scale': scales} |
||||
|
||||
crop_ratio = random.uniform(0.5, 1) |
||||
rows_zero = int(rows * random.uniform(0, 1 - crop_ratio)) |
||||
cols_zero = int(cols * random.uniform(0, 1 - crop_ratio)) |
||||
crop_rows = int(rows * crop_ratio) |
||||
crop_cols = int(cols * crop_ratio) |
||||
''' |
||||
new_image = image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :] |
||||
new_image = cv2.resize(new_image, (cols, rows)) |
||||
#new_image = skimage.transform.resize(new_image, (rows, cols)) |
||||
|
||||
new_annots = np.zeros((0, 5)) |
||||
for i in range(annots.shape[0]): |
||||
x1 = max(annots[i, 0] - cols_zero, 0) |
||||
y1 = max(annots[i, 1] - rows_zero, 0) |
||||
x2 = min(annots[i, 2] - cols_zero, crop_cols) |
||||
y2 = min(annots[i, 3] - rows_zero, crop_rows) |
||||
label = annots[i, 4] |
||||
if x1 + 10 < x2 and y1 + 10 < y2: |
||||
x1 /= crop_ratio |
||||
y1 /= crop_ratio |
||||
x2 /= crop_ratio |
||||
y2 /= crop_ratio |
||||
new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0) |
||||
|
||||
if not new_annots.shape[0]: |
||||
continue |
||||
''' |
||||
new_image = np.zeros((rows, cols, cns)) |
||||
new_image[rows_zero:rows_zero + crop_rows, cols_zero:cols_zero + crop_cols, :] = \ |
||||
image[ |
||||
rows_zero:rows_zero + crop_rows, |
||||
cols_zero:cols_zero + crop_cols, |
||||
:] |
||||
|
||||
new_annots = np.zeros((0, 5)) |
||||
for i in range(annots.shape[0]): |
||||
x1 = max(cols_zero, annots[i, 0]) |
||||
y1 = max(rows_zero, annots[i, 1]) |
||||
x2 = min(cols_zero + crop_cols, annots[i, 2]) |
||||
y2 = min(rows_zero + crop_rows, annots[i, 3]) |
||||
label = annots[i, 4] |
||||
if x1 + 10 < x2 and y1 + 10 < y2: |
||||
new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0) |
||||
|
||||
if not new_annots.shape[0]: |
||||
continue |
||||
|
||||
return {'img': new_image, 'annot': new_annots, 'scale': scales} |
||||
|
||||
|
||||
class Color(object): |
||||
def __call__(self, sample): |
||||
image, annots, scales = sample['img'], sample['annot'], sample['scale'] |
||||
image = Image.fromarray(image) |
||||
|
||||
ratio = [0.5, 0.8, 1.2, 1.5] |
||||
|
||||
if random.choice([0, 1]): |
||||
enh_bri = ImageEnhance.Brightness(image) |
||||
brightness = random.choice(ratio) |
||||
image = enh_bri.enhance(brightness) |
||||
if random.choice([0, 1]): |
||||
enh_col = ImageEnhance.Color(image) |
||||
color = random.choice(ratio) |
||||
image = enh_col.enhance(color) |
||||
if random.choice([0, 1]): |
||||
enh_con = ImageEnhance.Contrast(image) |
||||
contrast = random.choice(ratio) |
||||
image = enh_con.enhance(contrast) |
||||
if random.choice([0, 1]): |
||||
enh_sha = ImageEnhance.Sharpness(image) |
||||
sharpness = random.choice(ratio) |
||||
image = enh_sha.enhance(sharpness) |
||||
if random.choice([0, 1]): |
||||
image = image.filter(ImageFilter.BLUR) |
||||
|
||||
image = np.asarray(image) |
||||
return {'img': image, 'annot': annots, 'scale': scales} |
||||
|
||||
|
||||
class Normalizer(object): |
||||
def __init__(self): |
||||
self.mean = np.array([[[0.485, 0.456, 0.406]]]) |
||||
self.std = np.array([[[0.229, 0.224, 0.225]]]) |
||||
|
||||
def __call__(self, sample): |
||||
image, annots, scales = sample['img'], sample['annot'], sample['scale'] |
||||
|
||||
image = (image.astype(np.float32) - self.mean) / self.std |
||||
|
||||
sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales} |
||||
return sample |
||||
|
||||
|
||||
class UnNormalizer(object): |
||||
def __init__(self, mean=None, std=None): |
||||
if mean is None: |
||||
self.mean = [0.485, 0.456, 0.406] |
||||
else: |
||||
self.mean = mean |
||||
if std is None: |
||||
self.std = [0.229, 0.224, 0.225] |
||||
else: |
||||
self.std = std |
||||
|
||||
def __call__(self, tensor): |
||||
""" |
||||
Args: |
||||
tensor (Tensor): Tensor image of size (C, H, W) to be normalized. |
||||
Returns: |
||||
Tensor: Normalized image. |
||||
""" |
||||
for t, m, s in zip(tensor, self.mean, self.std): |
||||
t.mul_(s).add_(m) |
||||
return tensor |
||||
|
||||
|
||||
class AspectRatioBasedSampler(Sampler): |
||||
def __init__(self, data_source, batch_size, drop_last): |
||||
self.data_source = data_source |
||||
self.batch_size = batch_size |
||||
self.drop_last = drop_last |
||||
self.groups = self.group_images() |
||||
|
||||
def __iter__(self): |
||||
random.shuffle(self.groups) |
||||
for group in self.groups: |
||||
yield group |
||||
|
||||
def __len__(self): |
||||
if self.drop_last: |
||||
return len(self.data_source) // self.batch_size |
||||
else: |
||||
return (len(self.data_source) + self.batch_size - 1) // self.batch_size |
||||
|
||||
def group_images(self): |
||||
# determine the order of the images |
||||
order = list(range(len(self.data_source))) |
||||
order.sort(key=lambda x: self.data_source.image_aspect_ratio(x)) |
||||
|
||||
# divide into groups, one group = one batch |
||||
return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in |
||||
range(0, len(order), self.batch_size)] |
@ -0,0 +1,207 @@ |
||||
import argparse |
||||
import collections |
||||
import os |
||||
|
||||
import numpy as np |
||||
|
||||
import torch |
||||
import torch.optim as optim |
||||
from torchvision import transforms |
||||
import torch.utils.model_zoo as model_zoo |
||||
|
||||
from identification.model_level_attention import resnet18, resnet34, resnet50, resnet101, resnet152 |
||||
from torch.utils.data import DataLoader |
||||
from identification.csv_eval import evaluate |
||||
from identification.dataloader import WIDERDataset, AspectRatioBasedSampler, collater, Resizer, Augmenter, Normalizer, CSVDataset |
||||
|
||||
is_cuda = torch.cuda.is_available() |
||||
print('CUDA available: {}'.format(is_cuda)) |
||||
|
||||
model_urls = { |
||||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', |
||||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', |
||||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', |
||||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', |
||||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', |
||||
} |
||||
|
||||
ckpt = False |
||||
|
||||
def main(args=None): |
||||
parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') |
||||
|
||||
parser.add_argument('--wider_train', help='Path to file containing WIDER training annotations (see readme)') |
||||
parser.add_argument('--wider_val', |
||||
help='Path to file containing WIDER validation annotations (optional, see readme)') |
||||
parser.add_argument('--wider_train_prefix', help='Prefix path to WIDER train images') |
||||
parser.add_argument('--wider_val_prefix', help='Prefix path to WIDER validation images') |
||||
|
||||
parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') |
||||
parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') |
||||
parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') |
||||
|
||||
parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) |
||||
parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) |
||||
parser.add_argument('--batch_size', help='Batch size (default 2)', type=int, default=2) |
||||
|
||||
parser.add_argument('--model_name', help='Name of the model to save') |
||||
parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel', |
||||
default=False, action='store_true') |
||||
parser.add_argument('--pretrained', help='Pretrained model name in weight directory') |
||||
|
||||
parser = parser.parse_args(args) |
||||
|
||||
# Create the data loaders |
||||
if parser.wider_train is None: |
||||
dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, |
||||
transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()])) |
||||
else: |
||||
dataset_train = WIDERDataset(train_file=parser.wider_train, img_prefix=parser.wider_train_prefix, |
||||
transform=transforms.Compose([Resizer(), Augmenter(), Normalizer()])) |
||||
|
||||
if parser.wider_val is None: |
||||
if parser.csv_val is None: |
||||
dataset_val = None |
||||
print('No validation annotations provided.') |
||||
else: |
||||
print('Loading CSV validation dataset') |
||||
dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, |
||||
transform=transforms.Compose([Resizer(), Normalizer()])) |
||||
else: |
||||
print('Loading WIDER validation dataset') |
||||
dataset_val = WIDERDataset(train_file=parser.wider_val, img_prefix=parser.wider_val_prefix, |
||||
transform=transforms.Compose([Resizer(), Normalizer()])) |
||||
|
||||
print('Loading training dataset') |
||||
sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) |
||||
if parser.parallel: |
||||
dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) |
||||
else: |
||||
dataloader_train = DataLoader(dataset_train, collate_fn=collater, batch_sampler=sampler) |
||||
|
||||
# Create the model_pose_level_attention |
||||
if parser.depth == 18: |
||||
retinanet = resnet18(num_classes=dataset_train.num_classes(), is_cuda=is_cuda) |
||||
elif parser.depth == 34: |
||||
retinanet = resnet34(num_classes=dataset_train.num_classes(), is_cuda=is_cuda) |
||||
elif parser.depth == 50: |
||||
retinanet = resnet50(num_classes=dataset_train.num_classes(), is_cuda=is_cuda) |
||||
elif parser.depth == 101: |
||||
retinanet = resnet101(num_classes=dataset_train.num_classes(), is_cuda=is_cuda) |
||||
elif parser.depth == 152: |
||||
retinanet = resnet152(num_classes=dataset_train.num_classes(), is_cuda=is_cuda) |
||||
else: |
||||
raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') |
||||
|
||||
if ckpt: |
||||
retinanet = torch.load('') |
||||
print('Loading checkpoint') |
||||
else: |
||||
print('Loading pretrained model') |
||||
retinanet_dict = retinanet.state_dict() |
||||
if parser.pretrained is None: |
||||
pretrained_dict = model_zoo.load_url(model_urls['resnet' + str(parser.depth)]) |
||||
else: |
||||
pretrained_dict = torch.load(parser.pretrained) |
||||
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in retinanet_dict} |
||||
retinanet_dict.update(pretrained_dict) |
||||
retinanet.load_state_dict(retinanet_dict) |
||||
print('load pretrained backbone') |
||||
|
||||
print(retinanet) |
||||
if parser.parallel: |
||||
retinanet = torch.nn.DataParallel(retinanet, device_ids=[0]) |
||||
if is_cuda: |
||||
retinanet.cuda() |
||||
|
||||
retinanet.training = True |
||||
|
||||
optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) |
||||
# optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4) |
||||
|
||||
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) |
||||
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1) |
||||
|
||||
loss_hist = collections.deque(maxlen=500) |
||||
|
||||
retinanet.train() |
||||
if parser.parallel: |
||||
retinanet.module.freeze_bn() |
||||
else: |
||||
retinanet.freeze_bn() |
||||
|
||||
print('Num training images: {}'.format(len(dataset_train))) |
||||
iters = 0 |
||||
for epoch_num in range(0, parser.epochs): |
||||
|
||||
retinanet.train() |
||||
if parser.parallel: |
||||
retinanet.module.freeze_bn() |
||||
else: |
||||
retinanet.freeze_bn() |
||||
|
||||
epoch_loss = [] |
||||
|
||||
for iter_num, data in enumerate(dataloader_train): |
||||
|
||||
iters += 1 |
||||
|
||||
optimizer.zero_grad() |
||||
|
||||
img_data = data['img'].float() |
||||
annot_data = data['annot'] |
||||
if is_cuda: |
||||
img_data = img_data.cuda() |
||||
annot_data = annot_data.cuda() |
||||
|
||||
print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % (torch.cuda.memory_allocated() / 1024**2, torch.cuda.max_memory_allocated() / 1024**2, torch.cuda.memory_cached() / 1024**2, torch.cuda.max_memory_cached() / 1024**2)) |
||||
classification_loss, regression_loss, mask_loss = retinanet([img_data, annot_data]) |
||||
|
||||
del img_data |
||||
del annot_data |
||||
|
||||
classification_loss = classification_loss.mean() |
||||
regression_loss = regression_loss.mean() |
||||
mask_loss = mask_loss.mean() |
||||
|
||||
loss = classification_loss + regression_loss + mask_loss |
||||
|
||||
if bool(loss == 0): |
||||
continue |
||||
|
||||
loss.backward() |
||||
|
||||
torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) |
||||
|
||||
optimizer.step() |
||||
|
||||
loss_hist.append(float(loss.item())) |
||||
|
||||
epoch_loss.append(float(loss.item())) |
||||
|
||||
print( |
||||
'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | ' |
||||
'mask_loss {:1.5f} | Running loss: {:1.5f}'.format( |
||||
epoch_num, iter_num, float(classification_loss), float(regression_loss), float(mask_loss), |
||||
np.mean(loss_hist))) |
||||
|
||||
del classification_loss |
||||
del regression_loss |
||||
del loss |
||||
|
||||
if parser.wider_val is not None: |
||||
print('Evaluating dataset') |
||||
evaluate(dataset_val, retinanet, is_cuda=is_cuda) |
||||
|
||||
scheduler.step(np.mean(epoch_loss)) |
||||
|
||||
#TODO remove makedir |
||||
os.makedirs('./ckpt', exist_ok=True) |
||||
if parser.parallel: |
||||
torch.save(retinanet.module, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num)) |
||||
else: |
||||
torch.save(retinanet, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num)) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
main() |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,2 @@ |
||||
python3 -m recognition.train --casia_list /home/ehp/tmp/datasets/CASIA-maxpy-clean/train.txt --casia_root /home/ehp/tmp/datasets/CASIA-maxpy-clean --lfw_root /home/ehp/tmp/datasets/lfw \ |
||||
--lfw_pair_list /home/ehp/git/arcface/lfw_test_pair.txt --model_name recongition3 --batch_size 20 --loss adacos --print_freq 20 --depth 50 |
@ -0,0 +1,7 @@ |
||||
#python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/sample.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/sample/images \ |
||||
#--wider_val /home/ehp/tmp/datasets/wider/sample_val.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/sample_val/images \ |
||||
#--depth 50 --epochs 30 --batch_size 1 --model_name wider_sample1 |
||||
|
||||
python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/wider_face_train_bbx_gt.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/WIDER_train/images \ |
||||
--wider_val /home/ehp/tmp/datasets/wider/wider_face_val_bbx_gt.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/WIDER_val/images \ |
||||
--depth 50 --epochs 30 --batch_size 1 --model_name widernew1 |
Loading…
Reference in new issue