commit
95d4f2ca7d
13 changed files with 1969 additions and 0 deletions
@ -0,0 +1,71 @@ |
||||
# Runtime directories |
||||
ckpt/ |
||||
mAP_txt/ |
||||
summary/ |
||||
weight/ |
||||
|
||||
# IntelliJ IDEA |
||||
.idea/ |
||||
*.iml |
||||
|
||||
# Byte-compiled / optimized / DLL files |
||||
__pycache__/ |
||||
*.py[cod] |
||||
*$py.class |
||||
|
||||
# C extensions |
||||
*.so |
||||
|
||||
# Distribution / packaging |
||||
# lib is NOT ignored |
||||
.Python |
||||
build/ |
||||
develop-eggs/ |
||||
dist/ |
||||
downloads/ |
||||
eggs/ |
||||
.eggs/ |
||||
parts/ |
||||
sdist/ |
||||
var/ |
||||
wheels/ |
||||
share/python-wheels/ |
||||
*.egg-info/ |
||||
.installed.cfg |
||||
*.egg |
||||
MANIFEST |
||||
|
||||
# PyInstaller |
||||
# Usually these files are written by a python script from a template |
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it. |
||||
*.manifest |
||||
*.spec |
||||
|
||||
# Installer logs |
||||
pip-log.txt |
||||
pip-delete-this-directory.txt |
||||
|
||||
# Unit test / coverage reports |
||||
htmlcov/ |
||||
.tox/ |
||||
.nox/ |
||||
.coverage |
||||
.coverage.* |
||||
.cache |
||||
nosetests.xml |
||||
coverage.xml |
||||
*.cover |
||||
.hypothesis/ |
||||
.pytest_cache/ |
||||
|
||||
# Jupyter Notebook |
||||
.ipynb_checkpoints |
||||
|
||||
# Environments |
||||
.env |
||||
.venv |
||||
env/ |
||||
venv/ |
||||
ENV/ |
||||
env.bak/ |
||||
venv.bak/ |
||||
@ -0,0 +1,131 @@ |
||||
import numpy as np |
||||
import torch |
||||
import torch.nn as nn |
||||
|
||||
|
||||
class Anchors(nn.Module): |
||||
def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None, is_cuda=True): |
||||
super(Anchors, self).__init__() |
||||
|
||||
self.is_cuda = is_cuda |
||||
if pyramid_levels is None: |
||||
self.pyramid_levels = [3, 4, 5, 6, 7] |
||||
if strides is None: |
||||
self.strides = [2 ** x for x in self.pyramid_levels] |
||||
if sizes is None: |
||||
self.sizes = [2 ** (x + 2) for x in self.pyramid_levels] |
||||
if ratios is None: |
||||
# self.ratios = np.array([1., 1.5, 2., 2.5, 3.]) |
||||
self.ratios = np.array([0.5, 1., 2.]) |
||||
if scales is None: |
||||
self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) |
||||
|
||||
def forward(self, image): |
||||
|
||||
image_shape = image.shape[2:] |
||||
image_shape = np.array(image_shape) |
||||
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels] |
||||
|
||||
# compute anchors over all pyramid levels |
||||
all_anchors = np.zeros((0, 4)).astype(np.float32) |
||||
|
||||
for idx, p in enumerate(self.pyramid_levels): |
||||
anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales) |
||||
shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors) |
||||
all_anchors = np.append(all_anchors, shifted_anchors, axis=0) |
||||
|
||||
all_anchors = np.expand_dims(all_anchors, axis=0) |
||||
all_anchors = torch.from_numpy(all_anchors.astype(np.float32)) |
||||
if self.is_cuda: |
||||
all_anchors = all_anchors.cuda() |
||||
|
||||
return all_anchors |
||||
|
||||
|
||||
def generate_anchors(base_size=16, ratios=None, scales=None): |
||||
""" |
||||
Generate anchor (reference) windows by enumerating aspect ratios X |
||||
scales w.r.t. a reference window. |
||||
""" |
||||
|
||||
if ratios is None: |
||||
ratios = np.array([0.5, 1, 2]) |
||||
|
||||
if scales is None: |
||||
scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) |
||||
|
||||
num_anchors = len(ratios) * len(scales) |
||||
|
||||
# initialize output anchors |
||||
anchors = np.zeros((num_anchors, 4)) |
||||
|
||||
# scale base_size |
||||
anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T |
||||
|
||||
# compute areas of anchors |
||||
areas = anchors[:, 2] * anchors[:, 3] |
||||
|
||||
# correct for ratios |
||||
anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales))) |
||||
anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales)) |
||||
|
||||
# transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2) |
||||
anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T |
||||
anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T |
||||
|
||||
return anchors |
||||
|
||||
|
||||
def compute_shape(image_shape, pyramid_levels): |
||||
"""Compute shapes based on pyramid levels. |
||||
|
||||
:param image_shape: |
||||
:param pyramid_levels: |
||||
:return: |
||||
""" |
||||
image_shape = np.array(image_shape[:2]) |
||||
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels] |
||||
return image_shapes |
||||
|
||||
|
||||
def anchors_for_shape( |
||||
image_shape, |
||||
pyramid_levels=None, |
||||
ratios=None, |
||||
scales=None, |
||||
strides=None, |
||||
sizes=None, |
||||
): |
||||
image_shapes = compute_shape(image_shape, pyramid_levels) |
||||
|
||||
# compute anchors over all pyramid levels |
||||
all_anchors = np.zeros((0, 4)) |
||||
for idx, p in enumerate(pyramid_levels): |
||||
anchors = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales) |
||||
shifted_anchors = shift(image_shapes[idx], strides[idx], anchors) |
||||
all_anchors = np.append(all_anchors, shifted_anchors, axis=0) |
||||
|
||||
return all_anchors |
||||
|
||||
|
||||
def shift(shape, stride, anchors): |
||||
shift_x = (np.arange(0, shape[1]) + 0.5) * stride |
||||
shift_y = (np.arange(0, shape[0]) + 0.5) * stride |
||||
|
||||
shift_x, shift_y = np.meshgrid(shift_x, shift_y) |
||||
|
||||
shifts = np.vstack(( |
||||
shift_x.ravel(), shift_y.ravel(), |
||||
shift_x.ravel(), shift_y.ravel() |
||||
)).transpose() |
||||
|
||||
# add A anchors (1, A, 4) to |
||||
# cell K shifts (K, 1, 4) to get |
||||
# shift anchors (K, A, 4) |
||||
# reshape to (K*A, 4) shifted anchors |
||||
A = anchors.shape[0] |
||||
K = shifts.shape[0] |
||||
all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) |
||||
all_anchors = all_anchors.reshape((K * A, 4)) |
||||
|
||||
return all_anchors |
||||
@ -0,0 +1,95 @@ |
||||
import numpy as np |
||||
import torch |
||||
from PIL import Image |
||||
|
||||
from torchvision import transforms |
||||
|
||||
|
||||
class Resizer(object): |
||||
"""Convert ndarrays in sample to Tensors.""" |
||||
|
||||
def __call__(self, sample, min_side=800, max_side=1400): |
||||
image, annots, scale = sample['img'], sample['annot'], sample['scale'] |
||||
|
||||
rows, cols = image.size |
||||
|
||||
# scale = min_side / rows |
||||
|
||||
smallest_side = min(rows, cols) |
||||
|
||||
# rescale the image so the smallest side is min_side |
||||
scale = min_side / smallest_side |
||||
|
||||
# check if the largest side is now greater than max_side, which can happen |
||||
# when images have a large aspect ratio |
||||
largest_side = max(rows, cols) |
||||
|
||||
if largest_side * scale > max_side: |
||||
scale = max_side / largest_side |
||||
|
||||
# resize the image with the computed scale |
||||
image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR)) |
||||
image = image / 255.0 |
||||
|
||||
rows, cols, cns = image.shape |
||||
|
||||
pad_w = 32 - rows % 32 |
||||
pad_h = 32 - cols % 32 |
||||
|
||||
new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32) |
||||
new_image[:rows, :cols, :] = image.astype(np.float32) |
||||
|
||||
annots[:, :4] *= scale |
||||
|
||||
return {'img': new_image, 'annot': annots, 'scale': scale} |
||||
|
||||
|
||||
class Normalizer(object): |
||||
def __init__(self): |
||||
self.mean = np.array([[[0.485, 0.456, 0.406]]]) |
||||
self.std = np.array([[[0.229, 0.224, 0.225]]]) |
||||
|
||||
def __call__(self, sample): |
||||
image, annots, scales = sample['img'], sample['annot'], sample['scale'] |
||||
|
||||
image = (image.astype(np.float32) - self.mean) / self.std |
||||
|
||||
sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales} |
||||
return sample |
||||
|
||||
|
||||
def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True): |
||||
input_data = {'img': img_data, 'annot': np.zeros((0, 5)), 'scale': 1} |
||||
transform = transforms.Compose([Resizer(), Normalizer()]) |
||||
transformed = transform(input_data) |
||||
|
||||
model.eval() |
||||
with torch.no_grad(): |
||||
img_data = transformed['img'].permute(2, 0, 1).float().unsqueeze(dim=0) |
||||
if is_cuda: |
||||
img_data = img_data.cuda() |
||||
scores, labels, boxes = model(img_data) |
||||
if scores is None: |
||||
return np.array() |
||||
|
||||
scores = scores.cpu().numpy() |
||||
scale = transformed['scale'] |
||||
boxes = boxes.cpu().numpy() / scale |
||||
|
||||
indices = np.where(scores > threshold)[0] |
||||
scores = scores[indices] |
||||
scores_sort = np.argsort(-scores)[:max_detections] |
||||
image_boxes = boxes[indices[scores_sort], :] |
||||
|
||||
return image_boxes |
||||
|
||||
|
||||
def load_model(model_path, is_cuda=True): |
||||
# load possible cuda model as cpu |
||||
model = torch.load(model_path, map_location=lambda storage, location: storage) |
||||
if is_cuda: |
||||
model = model.cuda() |
||||
|
||||
model.anchors.is_cuda=is_cuda |
||||
|
||||
return model |
||||
@ -0,0 +1,289 @@ |
||||
import math |
||||
import torch |
||||
import torch.nn as nn |
||||
import torch.nn.functional as F |
||||
|
||||
|
||||
def memprint(a): |
||||
print(a.shape) |
||||
print(a.element_size() * a.nelement()) |
||||
|
||||
def calc_iou(a, b): |
||||
step = 20 |
||||
IoU = torch.zeros((len(a), len(b))).cuda() |
||||
step_count = int(len(b) / step) |
||||
if len(b) % step != 0: |
||||
step_count += 1 |
||||
|
||||
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) |
||||
|
||||
for i in range(step_count): |
||||
iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[i * step:(i+1) * step, 2]) |
||||
iw.sub_(torch.max(torch.unsqueeze(a[:, 0], 1), b[i * step:(i+1) * step, 0])) |
||||
|
||||
ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[i * step:(i+1) * step, 3]) |
||||
ih.sub_(torch.max(torch.unsqueeze(a[:, 1], 1), b[i * step:(i+1) * step, 1])) |
||||
|
||||
iw.clamp_(min=0) |
||||
ih.clamp_(min=0) |
||||
|
||||
iw.mul_(ih) |
||||
del ih |
||||
|
||||
ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area[i * step:(i+1) * step] - iw |
||||
ua = torch.clamp(ua, min=1e-8) |
||||
iw.div_(ua) |
||||
del ua |
||||
|
||||
IoU[:, i * step:(i+1) * step] = iw |
||||
|
||||
return IoU |
||||
|
||||
|
||||
def calc_iou_vis(a, b): |
||||
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) |
||||
|
||||
iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0]) |
||||
ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1]) |
||||
|
||||
iw = torch.clamp(iw, min=0) |
||||
ih = torch.clamp(ih, min=0) |
||||
|
||||
intersection = iw * ih |
||||
|
||||
IoU = intersection / area |
||||
|
||||
return IoU |
||||
|
||||
|
||||
def IoG(box_a, box_b): |
||||
inter_xmin = torch.max(box_a[:, 0], box_b[:, 0]) |
||||
inter_ymin = torch.max(box_a[:, 1], box_b[:, 1]) |
||||
inter_xmax = torch.min(box_a[:, 2], box_b[:, 2]) |
||||
inter_ymax = torch.min(box_a[:, 3], box_b[:, 3]) |
||||
Iw = torch.clamp(inter_xmax - inter_xmin, min=0) |
||||
Ih = torch.clamp(inter_ymax - inter_ymin, min=0) |
||||
I = Iw * Ih |
||||
G = (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]) |
||||
return I / G |
||||
|
||||
|
||||
class FocalLoss(nn.Module): |
||||
def __init__(self, is_cuda=True): |
||||
super(FocalLoss, self).__init__() |
||||
self.is_cuda = is_cuda |
||||
|
||||
def forward(self, classifications, regressions, anchors, annotations): |
||||
alpha = 0.25 |
||||
gamma = 2.0 |
||||
batch_size = classifications.shape[0] |
||||
classification_losses = [] |
||||
regression_losses = [] |
||||
|
||||
anchor = anchors[0, :, :] |
||||
|
||||
anchor_widths = anchor[:, 2] - anchor[:, 0] |
||||
anchor_heights = anchor[:, 3] - anchor[:, 1] |
||||
anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths |
||||
anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights |
||||
|
||||
for j in range(batch_size): |
||||
|
||||
classification = classifications[j, :, :] |
||||
regression = regressions[j, :, :] |
||||
|
||||
bbox_annotation = annotations[j, :, :] |
||||
bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] |
||||
|
||||
if bbox_annotation.shape[0] == 0: |
||||
if self.is_cuda: |
||||
regression_losses.append(torch.tensor(0).float().cuda()) |
||||
classification_losses.append(torch.tensor(0).float().cuda()) |
||||
else: |
||||
regression_losses.append(torch.tensor(0).float()) |
||||
classification_losses.append(torch.tensor(0).float()) |
||||
|
||||
continue |
||||
|
||||
classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) |
||||
|
||||
IoU = calc_iou(anchor, bbox_annotation[:, :4]) # num_anchors x num_annotations |
||||
|
||||
IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 |
||||
|
||||
# compute the loss for classification |
||||
targets = torch.ones(classification.shape) * -1 |
||||
if self.is_cuda: |
||||
targets = targets.cuda() |
||||
|
||||
targets[torch.lt(IoU_max, 0.4), :] = 0 |
||||
|
||||
positive_ful = torch.ge(IoU_max, 0.5) |
||||
positive_indices = positive_ful |
||||
|
||||
num_positive_anchors = positive_indices.sum() |
||||
|
||||
assigned_annotations = bbox_annotation[IoU_argmax, :] |
||||
|
||||
targets[positive_indices, :] = 0 |
||||
targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 |
||||
try: |
||||
alpha_factor = torch.ones(targets.shape) |
||||
if self.is_cuda: |
||||
alpha_factor = alpha_factor.cuda() |
||||
alpha_factor *= alpha |
||||
except: |
||||
print(targets) |
||||
print(targets.shape) |
||||
|
||||
alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) |
||||
focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) |
||||
focal_weight = alpha_factor * torch.pow(focal_weight, gamma) |
||||
|
||||
bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) |
||||
|
||||
# cls_loss = focal_weight * torch.pow(bce, gamma) |
||||
cls_loss = focal_weight * bce |
||||
|
||||
cls_zeros = torch.zeros(cls_loss.shape) |
||||
if self.is_cuda: |
||||
cls_zeros = cls_zeros.cuda() |
||||
cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, cls_zeros) |
||||
|
||||
classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) |
||||
|
||||
# compute the loss for regression |
||||
|
||||
if positive_indices.sum() > 0: |
||||
assigned_annotations = assigned_annotations[positive_indices, :] |
||||
|
||||
anchor_widths_pi = anchor_widths[positive_indices] |
||||
anchor_heights_pi = anchor_heights[positive_indices] |
||||
anchor_ctr_x_pi = anchor_ctr_x[positive_indices] |
||||
anchor_ctr_y_pi = anchor_ctr_y[positive_indices] |
||||
|
||||
gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] |
||||
gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] |
||||
gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths |
||||
gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights |
||||
|
||||
# clip widths to 1 |
||||
gt_widths = torch.clamp(gt_widths, min=1) |
||||
gt_heights = torch.clamp(gt_heights, min=1) |
||||
|
||||
targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi |
||||
targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi |
||||
targets_dw = torch.log(gt_widths / anchor_widths_pi) |
||||
targets_dh = torch.log(gt_heights / anchor_heights_pi) |
||||
|
||||
targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh)) |
||||
targets = targets.t() |
||||
|
||||
if self.is_cuda: |
||||
targets = targets.cuda() / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda() |
||||
else: |
||||
targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]) |
||||
|
||||
regression_diff = torch.abs(targets - regression[positive_indices, :]) |
||||
|
||||
regression_loss = torch.where( |
||||
torch.le(regression_diff, 1.0 / 9.0), |
||||
0.5 * 9.0 * torch.pow(regression_diff, 2), |
||||
regression_diff - 0.5 / 9.0 |
||||
) |
||||
regression_losses.append(regression_loss.mean()) |
||||
else: |
||||
if self.is_cuda: |
||||
regression_losses.append(torch.tensor(0).float().cuda()) |
||||
else: |
||||
regression_losses.append(torch.tensor(0).float()) |
||||
|
||||
return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses) \ |
||||
.mean(dim=0, keepdim=True) |
||||
|
||||
|
||||
class LevelAttentionLoss(nn.Module): |
||||
def __init__(self, is_cuda=True): |
||||
super(LevelAttentionLoss, self).__init__() |
||||
self.is_cuda = is_cuda |
||||
|
||||
def forward(self, img_batch_shape, attention_mask, bboxs): |
||||
h, w = img_batch_shape[2], img_batch_shape[3] |
||||
|
||||
mask_losses = [] |
||||
|
||||
batch_size = bboxs.shape[0] |
||||
for j in range(batch_size): |
||||
|
||||
bbox_annotation = bboxs[j, :, :] |
||||
bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] |
||||
|
||||
if bbox_annotation.shape[0] == 0: |
||||
if self.is_cuda: |
||||
mask_losses.append(torch.tensor(0).float().cuda()) |
||||
else: |
||||
mask_losses.append(torch.tensor(0).float()) |
||||
continue |
||||
|
||||
cond1 = torch.le(bbox_annotation[:, 0], w) |
||||
cond2 = torch.le(bbox_annotation[:, 1], h) |
||||
cond3 = torch.le(bbox_annotation[:, 2], w) |
||||
cond4 = torch.le(bbox_annotation[:, 3], h) |
||||
cond = cond1 * cond2 * cond3 * cond4 |
||||
|
||||
bbox_annotation = bbox_annotation[cond, :] |
||||
|
||||
if bbox_annotation.shape[0] == 0: |
||||
if self.is_cuda: |
||||
mask_losses.append(torch.tensor(0).float().cuda()) |
||||
else: |
||||
mask_losses.append(torch.tensor(0).float()) |
||||
continue |
||||
|
||||
bbox_area = (bbox_annotation[:, 2] - bbox_annotation[:, 0]) * ( |
||||
bbox_annotation[:, 3] - bbox_annotation[:, 1]) |
||||
|
||||
mask_loss = [] |
||||
for id in range(len(attention_mask)): |
||||
|
||||
attention_map = attention_mask[id][j, 0, :, :] |
||||
|
||||
min_area = (2 ** (id + 5)) ** 2 * 0.5 |
||||
max_area = (2 ** (id + 5) * 1.58) ** 2 * 2 |
||||
|
||||
level_bbox_indice1 = torch.ge(bbox_area, min_area) |
||||
level_bbox_indice2 = torch.le(bbox_area, max_area) |
||||
|
||||
level_bbox_indice = level_bbox_indice1 * level_bbox_indice2 |
||||
|
||||
level_bbox_annotation = bbox_annotation[level_bbox_indice, :].clone() |
||||
|
||||
# level_bbox_annotation = bbox_annotation.clone() |
||||
|
||||
attention_h, attention_w = attention_map.shape |
||||
|
||||
if level_bbox_annotation.shape[0]: |
||||
level_bbox_annotation[:, 0] *= attention_w / w |
||||
level_bbox_annotation[:, 1] *= attention_h / h |
||||
level_bbox_annotation[:, 2] *= attention_w / w |
||||
level_bbox_annotation[:, 3] *= attention_h / h |
||||
|
||||
mask_gt = torch.zeros(attention_map.shape) |
||||
if self.is_cuda: |
||||
mask_gt = mask_gt.cuda() |
||||
|
||||
for i in range(level_bbox_annotation.shape[0]): |
||||
x1 = max(int(level_bbox_annotation[i, 0]), 0) |
||||
y1 = max(int(level_bbox_annotation[i, 1]), 0) |
||||
x2 = min(math.ceil(level_bbox_annotation[i, 2]) + 1, attention_w) |
||||
y2 = min(math.ceil(level_bbox_annotation[i, 3]) + 1, attention_h) |
||||
|
||||
mask_gt[y1:y2, x1:x2] = 1 |
||||
|
||||
mask_gt = mask_gt[mask_gt >= 0] |
||||
mask_predict = attention_map[attention_map >= 0] |
||||
|
||||
mask_loss.append(F.binary_cross_entropy(mask_predict, mask_gt)) |
||||
mask_losses.append(torch.stack(mask_loss).mean()) |
||||
|
||||
return torch.stack(mask_losses).mean(dim=0, keepdim=True) |
||||
@ -0,0 +1,385 @@ |
||||
import torch.nn as nn |
||||
import torch |
||||
import math |
||||
from identification.utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes |
||||
from identification.anchors import Anchors |
||||
from identification.losses import LevelAttentionLoss, FocalLoss |
||||
from torchvision.ops.boxes import nms as tv_nms |
||||
|
||||
|
||||
def nms(dets, thresh): |
||||
"""Dispatch to either CPU or GPU NMS implementations. Accept dets as tensor""" |
||||
return tv_nms(dets[:, :4], dets[:, 4], thresh) |
||||
|
||||
|
||||
class PyramidFeatures(nn.Module): |
||||
def __init__(self, c3_size, c4_size, c5_size, feature_size=256): |
||||
super(PyramidFeatures, self).__init__() |
||||
|
||||
# upsample C5 to get P5 from the FPN paper |
||||
self.p5_1 = nn.Conv2d(c5_size, feature_size, kernel_size=1, stride=1, padding=0) |
||||
self.p5_upsampled = nn.Upsample(scale_factor=2, mode='nearest') |
||||
self.p5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) |
||||
|
||||
# add P5 elementwise to C4 |
||||
self.p4_1 = nn.Conv2d(c4_size, feature_size, kernel_size=1, stride=1, padding=0) |
||||
self.p4_upsampled = nn.Upsample(scale_factor=2, mode='nearest') |
||||
self.p4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) |
||||
|
||||
# add P4 elementwise to C3 |
||||
self.p3_1 = nn.Conv2d(c3_size, feature_size, kernel_size=1, stride=1, padding=0) |
||||
self.p3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) |
||||
|
||||
# "P6 is obtained via a 3x3 stride-2 conv on C5" |
||||
self.p6 = nn.Conv2d(c5_size, feature_size, kernel_size=3, stride=2, padding=1) |
||||
|
||||
# "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6" |
||||
self.p7_1 = nn.ReLU() |
||||
self.p7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1) |
||||
|
||||
def forward(self, inputs): |
||||
c3, c4, c5 = inputs |
||||
|
||||
# TODO hack for old model |
||||
self.p5_1.padding_mode = 'zeros' |
||||
self.p5_2.padding_mode = 'zeros' |
||||
self.p4_1.padding_mode = 'zeros' |
||||
self.p4_2.padding_mode = 'zeros' |
||||
self.p3_1.padding_mode = 'zeros' |
||||
self.p3_2.padding_mode = 'zeros' |
||||
self.p6.padding_mode = 'zeros' |
||||
self.p7_2.padding_mode = 'zeros' |
||||
|
||||
p5_x = self.p5_1(c5) |
||||
p5_upsampled_x = self.p5_upsampled(p5_x) |
||||
p5_x = self.p5_2(p5_x) |
||||
|
||||
p4_x = self.p4_1(c4) |
||||
p4_x = p5_upsampled_x + p4_x |
||||
p4_upsampled_x = self.p4_upsampled(p4_x) |
||||
p4_x = self.p4_2(p4_x) |
||||
|
||||
p3_x = self.p3_1(c3) |
||||
p3_x = p3_x + p4_upsampled_x |
||||
p3_x = self.p3_2(p3_x) |
||||
|
||||
p6_x = self.p6(c5) |
||||
|
||||
p7_x = self.p7_1(p6_x) |
||||
p7_x = self.p7_2(p7_x) |
||||
|
||||
return [p3_x, p4_x, p5_x, p6_x, p7_x] |
||||
|
||||
|
||||
class RegressionModel(nn.Module): |
||||
def __init__(self, num_features_in, num_anchors=9, feature_size=256): |
||||
super(RegressionModel, self).__init__() |
||||
|
||||
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) |
||||
self.act1 = nn.ReLU() |
||||
|
||||
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) |
||||
self.act2 = nn.ReLU() |
||||
|
||||
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) |
||||
self.act3 = nn.ReLU() |
||||
|
||||
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) |
||||
self.act4 = nn.ReLU() |
||||
|
||||
self.output = nn.Conv2d(feature_size, num_anchors * 4, kernel_size=3, padding=1) |
||||
|
||||
def forward(self, x): |
||||
# TODO hack for old model |
||||
self.conv1.padding_mode = 'zeros' |
||||
self.conv2.padding_mode = 'zeros' |
||||
self.conv3.padding_mode = 'zeros' |
||||
self.conv4.padding_mode = 'zeros' |
||||
self.output.padding_mode = 'zeros' |
||||
|
||||
out = self.conv1(x) |
||||
out = self.act1(out) |
||||
|
||||
out = self.conv2(out) |
||||
out = self.act2(out) |
||||
|
||||
out = self.conv3(out) |
||||
out = self.act3(out) |
||||
|
||||
out = self.conv4(out) |
||||
out = self.act4(out) |
||||
|
||||
out = self.output(out) |
||||
|
||||
# out is B x C x W x H, with C = 4*num_anchors |
||||
out = out.permute(0, 2, 3, 1) |
||||
|
||||
return out.contiguous().view(out.shape[0], -1, 4) |
||||
|
||||
|
||||
class ClassificationModel(nn.Module): |
||||
def __init__(self, num_features_in, num_anchors=9, num_classes=80, feature_size=256): |
||||
super(ClassificationModel, self).__init__() |
||||
|
||||
self.num_classes = num_classes |
||||
self.num_anchors = num_anchors |
||||
|
||||
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) |
||||
self.act1 = nn.ReLU() |
||||
|
||||
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) |
||||
self.act2 = nn.ReLU() |
||||
|
||||
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) |
||||
self.act3 = nn.ReLU() |
||||
|
||||
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) |
||||
self.act4 = nn.ReLU() |
||||
|
||||
self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1) |
||||
self.output_act = nn.Sigmoid() |
||||
|
||||
def forward(self, x): |
||||
# TODO hack for old model |
||||
self.conv1.padding_mode = 'zeros' |
||||
self.conv2.padding_mode = 'zeros' |
||||
self.conv3.padding_mode = 'zeros' |
||||
self.conv4.padding_mode = 'zeros' |
||||
self.output.padding_mode = 'zeros' |
||||
|
||||
out = self.conv1(x) |
||||
out = self.act1(out) |
||||
|
||||
out = self.conv2(out) |
||||
out = self.act2(out) |
||||
|
||||
out = self.conv3(out) |
||||
out = self.act3(out) |
||||
|
||||
out = self.conv4(out) |
||||
out = self.act4(out) |
||||
|
||||
out = self.output(out) |
||||
out = self.output_act(out) |
||||
|
||||
# out is B x C x W x H, with C = n_classes + n_anchors |
||||
out1 = out.permute(0, 2, 3, 1) |
||||
|
||||
batch_size, width, height, channels = out1.shape |
||||
|
||||
out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes) |
||||
|
||||
return out2.contiguous().view(x.shape[0], -1, self.num_classes) |
||||
|
||||
|
||||
class LevelAttentionModel(nn.Module): |
||||
def __init__(self, num_features_in, feature_size=256): |
||||
super(LevelAttentionModel, self).__init__() |
||||
|
||||
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) |
||||
self.act1 = nn.ReLU() |
||||
|
||||
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) |
||||
self.act2 = nn.ReLU() |
||||
|
||||
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) |
||||
self.act3 = nn.ReLU() |
||||
|
||||
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) |
||||
self.act4 = nn.ReLU() |
||||
|
||||
self.conv5 = nn.Conv2d(feature_size, 1, kernel_size=3, padding=1) |
||||
|
||||
self.output_act = nn.Sigmoid() |
||||
|
||||
def forward(self, x): |
||||
# TODO hack for old model |
||||
self.conv1.padding_mode = 'zeros' |
||||
self.conv2.padding_mode = 'zeros' |
||||
self.conv3.padding_mode = 'zeros' |
||||
self.conv4.padding_mode = 'zeros' |
||||
self.conv5.padding_mode = 'zeros' |
||||
|
||||
out = self.conv1(x) |
||||
out = self.act1(out) |
||||
|
||||
out = self.conv2(out) |
||||
out = self.act2(out) |
||||
|
||||
out = self.conv3(out) |
||||
out = self.act3(out) |
||||
|
||||
out = self.conv4(out) |
||||
out = self.act4(out) |
||||
|
||||
out = self.conv5(out) |
||||
out_attention = self.output_act(out) |
||||
|
||||
return out_attention |
||||
|
||||
|
||||
class ResNet(nn.Module): |
||||
def __init__(self, num_classes, block, layers, is_cuda=True): |
||||
self.inplanes = 64 |
||||
super(ResNet, self).__init__() |
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) |
||||
self.bn1 = nn.BatchNorm2d(64) |
||||
self.relu = nn.ReLU(inplace=True) |
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) |
||||
self.layer1 = self._make_layer(block, 64, layers[0]) |
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) |
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) |
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) |
||||
|
||||
if block == BasicBlock: |
||||
fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, |
||||
self.layer4[layers[3] - 1].conv2.out_channels] |
||||
elif block == Bottleneck: |
||||
fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, |
||||
self.layer4[layers[3] - 1].conv3.out_channels] |
||||
else: |
||||
raise Exception("Invalid block type") |
||||
|
||||
self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) |
||||
|
||||
self.regressionModel = RegressionModel(256) |
||||
self.classificationModel = ClassificationModel(256, num_classes=num_classes) |
||||
self.levelattentionModel = LevelAttentionModel(256) |
||||
|
||||
self.anchors = Anchors(is_cuda=is_cuda) |
||||
|
||||
self.regressBoxes = BBoxTransform(is_cuda=is_cuda) |
||||
|
||||
self.clipBoxes = ClipBoxes() |
||||
|
||||
self.levelattentionLoss = LevelAttentionLoss(is_cuda=is_cuda) |
||||
|
||||
self.focalLoss = FocalLoss(is_cuda=is_cuda) |
||||
|
||||
for m in self.modules(): |
||||
if isinstance(m, nn.Conv2d): |
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels |
||||
m.weight.data.normal_(0, math.sqrt(2. / n)) |
||||
# init.xavier_normal(m.weight) |
||||
elif isinstance(m, nn.BatchNorm2d): |
||||
m.weight.data.fill_(1) |
||||
m.bias.data.zero_() |
||||
|
||||
prior = 0.01 |
||||
|
||||
self.classificationModel.output.weight.data.fill_(0) |
||||
self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior)) |
||||
|
||||
self.regressionModel.output.weight.data.fill_(0) |
||||
self.regressionModel.output.bias.data.fill_(0) |
||||
|
||||
self.levelattentionModel.conv5.weight.data.fill_(0) |
||||
self.levelattentionModel.conv5.bias.data.fill_(0) |
||||
|
||||
self.freeze_bn() |
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1): |
||||
downsample = None |
||||
if stride != 1 or self.inplanes != planes * block.expansion: |
||||
downsample = nn.Sequential( |
||||
nn.Conv2d(self.inplanes, planes * block.expansion, |
||||
kernel_size=1, stride=stride, bias=False), |
||||
nn.BatchNorm2d(planes * block.expansion), |
||||
) |
||||
|
||||
layers = [block(self.inplanes, planes, stride, downsample)] |
||||
self.inplanes = planes * block.expansion |
||||
for i in range(1, blocks): |
||||
layers.append(block(self.inplanes, planes)) |
||||
|
||||
return nn.Sequential(*layers) |
||||
|
||||
def freeze_bn(self): |
||||
"""Freeze BatchNorm layers.""" |
||||
for layer in self.modules(): |
||||
if isinstance(layer, nn.BatchNorm2d): |
||||
layer.eval() |
||||
|
||||
def forward(self, inputs): |
||||
if self.training: |
||||
img_batch, annotations = inputs |
||||
else: |
||||
img_batch = inputs |
||||
annotations = None |
||||
|
||||
# TODO hack for old model |
||||
self.conv1.padding_mode = 'zeros' |
||||
|
||||
x = self.conv1(img_batch) |
||||
x = self.bn1(x) |
||||
x = self.relu(x) |
||||
x = self.maxpool(x) |
||||
|
||||
x1 = self.layer1(x) |
||||
x2 = self.layer2(x1) |
||||
x3 = self.layer3(x2) |
||||
x4 = self.layer4(x3) |
||||
|
||||
features = self.fpn([x2, x3, x4]) |
||||
|
||||
attention = [self.levelattentionModel(feature) for feature in features] |
||||
|
||||
# i = 1 |
||||
# for level in attention: |
||||
# i += 1 |
||||
# level = level.squeeze(0) |
||||
# level = np.array(255 * unnormalize(level)).copy() |
||||
# level = np.transpose(level, (1, 2, 0)) |
||||
# plt.imsave(os.path.join('./output', str(i) + '.jpg'), level[:,:,0]) |
||||
|
||||
features = [features[i] * torch.exp(attention[i]) for i in range(len(features))] |
||||
|
||||
regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1) |
||||
|
||||
classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1) |
||||
|
||||
anchors = self.anchors(img_batch) |
||||
|
||||
if self.training: |
||||
clc_loss, reg_loss = self.focalLoss(classification, regression, anchors, annotations) |
||||
mask_loss = self.levelattentionLoss(img_batch.shape, attention, annotations) |
||||
return clc_loss, reg_loss, mask_loss |
||||
else: |
||||
# transformed_anchors = self.regressBoxes(anchors, regression) |
||||
transformed_anchors = self.clipBoxes(anchors, img_batch) |
||||
|
||||
scores = torch.max(classification, dim=2, keepdim=True)[0] |
||||
scores_over_thresh = (scores > 0.05)[0, :, 0] |
||||
|
||||
if scores_over_thresh.sum() == 0: |
||||
# no boxes to NMS, just return |
||||
# return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] |
||||
return [None, None, None] |
||||
|
||||
classification = classification[:, scores_over_thresh, :] |
||||
transformed_anchors = transformed_anchors[:, scores_over_thresh, :] |
||||
scores = scores[:, scores_over_thresh, :] |
||||
|
||||
anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.3) |
||||
nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1) |
||||
return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]] |
||||
|
||||
|
||||
def resnet18(num_classes, is_cuda=True): |
||||
return ResNet(num_classes, BasicBlock, [2, 2, 2, 2], is_cuda=is_cuda) |
||||
|
||||
|
||||
def resnet34(num_classes, is_cuda=True): |
||||
return ResNet(num_classes, BasicBlock, [3, 4, 6, 3], is_cuda=is_cuda) |
||||
|
||||
|
||||
def resnet50(num_classes, is_cuda=True): |
||||
return ResNet(num_classes, Bottleneck, [3, 4, 6, 3], is_cuda=is_cuda) |
||||
|
||||
|
||||
def resnet101(num_classes, is_cuda=True): |
||||
return ResNet(num_classes, Bottleneck, [3, 4, 23, 3], is_cuda=is_cuda) |
||||
|
||||
|
||||
def resnet152(num_classes, is_cuda=True): |
||||
return ResNet(num_classes, Bottleneck, [3, 8, 36, 3], is_cuda=is_cuda) |
||||
@ -0,0 +1,282 @@ |
||||
import torch |
||||
import torch.nn as nn |
||||
import numpy as np |
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1): |
||||
"""3x3 convolution with padding""" |
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, |
||||
padding=1, bias=False) |
||||
|
||||
|
||||
class BasicBlock(nn.Module): |
||||
expansion = 1 |
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None): |
||||
super(BasicBlock, self).__init__() |
||||
self.conv1 = conv3x3(inplanes, planes, stride) |
||||
self.bn1 = nn.BatchNorm2d(planes) |
||||
self.relu = nn.ReLU(inplace=True) |
||||
self.conv2 = conv3x3(planes, planes) |
||||
self.bn2 = nn.BatchNorm2d(planes) |
||||
self.downsample = downsample |
||||
self.stride = stride |
||||
|
||||
def forward(self, x): |
||||
residual = x |
||||
|
||||
out = self.conv1(x) |
||||
out = self.bn1(out) |
||||
out = self.relu(out) |
||||
|
||||
out = self.conv2(out) |
||||
out = self.bn2(out) |
||||
|
||||
if self.downsample is not None: |
||||
residual = self.downsample(x) |
||||
|
||||
out += residual |
||||
out = self.relu(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
class Bottleneck(nn.Module): |
||||
expansion = 4 |
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None): |
||||
super(Bottleneck, self).__init__() |
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) |
||||
self.bn1 = nn.BatchNorm2d(planes) |
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, |
||||
padding=1, bias=False) |
||||
self.bn2 = nn.BatchNorm2d(planes) |
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) |
||||
self.bn3 = nn.BatchNorm2d(planes * 4) |
||||
self.relu = nn.ReLU(inplace=True) |
||||
self.downsample = downsample |
||||
self.stride = stride |
||||
|
||||
def forward(self, x): |
||||
residual = x |
||||
|
||||
# TODO hack for old model |
||||
self.conv1.padding_mode = 'zeros' |
||||
self.conv2.padding_mode = 'zeros' |
||||
self.conv3.padding_mode = 'zeros' |
||||
if self.downsample is not None: |
||||
self.downsample[0].padding_mode = 'zeros' |
||||
|
||||
out = self.conv1(x) |
||||
out = self.bn1(out) |
||||
out = self.relu(out) |
||||
|
||||
out = self.conv2(out) |
||||
out = self.bn2(out) |
||||
out = self.relu(out) |
||||
|
||||
out = self.conv3(out) |
||||
out = self.bn3(out) |
||||
|
||||
if self.downsample is not None: |
||||
residual = self.downsample(x) |
||||
|
||||
out += residual |
||||
out = self.relu(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
class SELayer(nn.Module): |
||||
def __init__(self, channel, reduction=16): |
||||
super(SELayer, self).__init__() |
||||
self.avg_pool = nn.AdaptiveAvgPool2d(1) |
||||
self.fc = nn.Sequential( |
||||
nn.Linear(channel, channel // reduction), |
||||
nn.ReLU(inplace=True), |
||||
nn.Linear(channel // reduction, channel), |
||||
nn.Sigmoid() |
||||
) |
||||
|
||||
def forward(self, x): |
||||
b, c, _, _ = x.size() |
||||
y = self.avg_pool(x).view(b, c) |
||||
y = self.fc(y).view(b, c, 1, 1) |
||||
return x * y |
||||
|
||||
|
||||
class BottleneckSE(nn.Module): |
||||
expansion = 4 |
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=16): |
||||
super(BottleneckSE, self).__init__() |
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) |
||||
self.bn1 = nn.BatchNorm2d(planes) |
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, |
||||
padding=1, bias=False) |
||||
self.bn2 = nn.BatchNorm2d(planes) |
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) |
||||
self.bn3 = nn.BatchNorm2d(planes * 4) |
||||
self.relu = nn.ReLU(inplace=True) |
||||
self.se = SELayer(planes * 4, reduction) |
||||
self.downsample = downsample |
||||
self.stride = stride |
||||
|
||||
def forward(self, x): |
||||
residual = x |
||||
|
||||
out = self.conv1(x) |
||||
out = self.bn1(out) |
||||
out = self.relu(out) |
||||
|
||||
out = self.conv2(out) |
||||
out = self.bn2(out) |
||||
out = self.relu(out) |
||||
|
||||
out = self.conv3(out) |
||||
out = self.bn3(out) |
||||
out = self.se(out) |
||||
|
||||
if self.downsample is not None: |
||||
residual = self.downsample(x) |
||||
|
||||
out += residual |
||||
out = self.relu(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
class CBAMModule(nn.Module): |
||||
def __init__(self, channels, reduction): |
||||
super(CBAMModule, self).__init__() |
||||
self.avg_pool = nn.AdaptiveAvgPool2d(1) |
||||
self.max_pool = nn.AdaptiveMaxPool2d(1) |
||||
self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, |
||||
padding=0) |
||||
self.relu = nn.ReLU(inplace=True) |
||||
self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, |
||||
padding=0) |
||||
self.sigmoid_channel = nn.Sigmoid() |
||||
self.conv_after_concat = nn.Conv2d(2, 1, kernel_size=7, stride=1, padding=3) |
||||
self.sigmoid_spatial = nn.Sigmoid() |
||||
|
||||
def forward(self, x): |
||||
module_input = x |
||||
avg = self.avg_pool(x) |
||||
mx = self.max_pool(x) |
||||
avg = self.fc1(avg) |
||||
mx = self.fc1(mx) |
||||
avg = self.relu(avg) |
||||
mx = self.relu(mx) |
||||
avg = self.fc2(avg) |
||||
mx = self.fc2(mx) |
||||
x = avg + mx |
||||
x = self.sigmoid_channel(x) |
||||
x = module_input * x |
||||
module_input = x |
||||
avg = torch.mean(x, 1, True) |
||||
mx, _ = torch.max(x, 1, True) |
||||
x = torch.cat((avg, mx), 1) |
||||
x = self.conv_after_concat(x) |
||||
x = self.sigmoid_spatial(x) |
||||
x = module_input * x |
||||
return x |
||||
|
||||
|
||||
class BottleneckCBAM(nn.Module): |
||||
expansion = 4 |
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=16): |
||||
super(BottleneckCBAM, self).__init__() |
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) |
||||
self.bn1 = nn.BatchNorm2d(planes) |
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, |
||||
padding=1, bias=False) |
||||
self.bn2 = nn.BatchNorm2d(planes) |
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) |
||||
self.bn3 = nn.BatchNorm2d(planes * 4) |
||||
self.relu = nn.ReLU(inplace=True) |
||||
self.se = CBAMModule(planes * 4, reduction) |
||||
self.downsample = downsample |
||||
self.stride = stride |
||||
|
||||
def forward(self, x): |
||||
residual = x |
||||
|
||||
out = self.conv1(x) |
||||
out = self.bn1(out) |
||||
out = self.relu(out) |
||||
|
||||
out = self.conv2(out) |
||||
out = self.bn2(out) |
||||
out = self.relu(out) |
||||
|
||||
out = self.conv3(out) |
||||
out = self.bn3(out) |
||||
out = self.se(out) |
||||
|
||||
if self.downsample is not None: |
||||
residual = self.downsample(x) |
||||
|
||||
out += residual |
||||
out = self.relu(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
class BBoxTransform(nn.Module): |
||||
def __init__(self, mean=None, std=None, is_cuda=True): |
||||
super(BBoxTransform, self).__init__() |
||||
if mean is None: |
||||
self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)) |
||||
if is_cuda: |
||||
self.mean = self.mean.cuda() |
||||
else: |
||||
self.mean = mean |
||||
if std is None: |
||||
self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)) |
||||
if is_cuda: |
||||
self.std = self.std.cuda() |
||||
else: |
||||
self.std = std |
||||
|
||||
def forward(self, boxes, deltas): |
||||
widths = boxes[:, :, 2] - boxes[:, :, 0] |
||||
heights = boxes[:, :, 3] - boxes[:, :, 1] |
||||
ctr_x = boxes[:, :, 0] + 0.5 * widths |
||||
ctr_y = boxes[:, :, 1] + 0.5 * heights |
||||
|
||||
dx = deltas[:, :, 0] * self.std[0] + self.mean[0] |
||||
dy = deltas[:, :, 1] * self.std[1] + self.mean[1] |
||||
dw = deltas[:, :, 2] * self.std[2] + self.mean[2] |
||||
dh = deltas[:, :, 3] * self.std[3] + self.mean[3] |
||||
|
||||
pred_ctr_x = ctr_x + dx * widths |
||||
pred_ctr_y = ctr_y + dy * heights |
||||
pred_w = torch.exp(dw) * widths |
||||
pred_h = torch.exp(dh) * heights |
||||
|
||||
pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w |
||||
pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h |
||||
pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w |
||||
pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h |
||||
|
||||
pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2) |
||||
|
||||
return pred_boxes |
||||
|
||||
|
||||
class ClipBoxes(nn.Module): |
||||
def __init__(self): |
||||
super(ClipBoxes, self).__init__() |
||||
|
||||
def forward(self, boxes, img): |
||||
batch_size, num_channels, height, width = img.shape |
||||
|
||||
boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0) |
||||
boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0) |
||||
|
||||
boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width) |
||||
boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height) |
||||
|
||||
return boxes |
||||
@ -0,0 +1,108 @@ |
||||
# -*- coding: utf-8 -*- |
||||
|
||||
import math |
||||
|
||||
import torch |
||||
import torch.nn as nn |
||||
import torch.nn.functional as F |
||||
from torch.nn import Parameter |
||||
|
||||
|
||||
class AngleLinear(nn.Module): |
||||
def __init__(self, in_features, out_features): |
||||
super(AngleLinear, self).__init__() |
||||
self.W = Parameter(torch.FloatTensor(out_features, in_features)) |
||||
nn.init.xavier_uniform_(self.W) |
||||
|
||||
def forward(self, input): |
||||
x = F.normalize(input) |
||||
W = F.normalize(self.W) |
||||
return F.linear(x, W) |
||||
|
||||
|
||||
class AdaCos(nn.Module): |
||||
def __init__(self, num_classes, m=0.50, is_cuda=True): |
||||
super(AdaCos, self).__init__() |
||||
self.n_classes = num_classes |
||||
self.s = math.sqrt(2) * math.log(num_classes - 1) |
||||
self.base_s = self.s |
||||
self.m = m |
||||
self.criterion = nn.CrossEntropyLoss() |
||||
if is_cuda: |
||||
self.criterion = self.criterion.cuda() |
||||
|
||||
def forward(self, input, label): |
||||
# changed to fixed adacos |
||||
# theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7)) |
||||
# one_hot = torch.zeros_like(input) |
||||
# one_hot.scatter_(1, label.view(-1, 1).long(), 1) |
||||
# with torch.no_grad(): |
||||
# B_avg = torch.where(one_hot < 1, torch.exp(self.s * input), torch.zeros_like(input)) |
||||
# B_avg = torch.sum(B_avg) / input.size(0) |
||||
# theta_med = torch.median(theta) |
||||
# self.s = torch.log(B_avg) / torch.cos(torch.min(math.pi/4 * torch.ones_like(theta_med), theta_med)) |
||||
# # TODO why converge to infinity ? |
||||
# self.s = torch.clamp(self.s, self.base_s / 2, self.base_s * 2) |
||||
# print(self.s) |
||||
output = self.s * input |
||||
|
||||
return self.criterion(output, label) |
||||
|
||||
|
||||
class ArcFace(nn.Module): |
||||
def __init__(self, s=30.0, m=0.50, is_cuda=True): |
||||
super(ArcFace, self).__init__() |
||||
self.s = s |
||||
self.m = m |
||||
self.criterion = nn.CrossEntropyLoss() |
||||
if is_cuda: |
||||
self.criterion = self.criterion.cuda() |
||||
|
||||
def forward(self, input, label): |
||||
theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7)) |
||||
target_logits = torch.cos(theta + self.m) |
||||
one_hot = torch.zeros_like(input) |
||||
one_hot.scatter_(1, label.view(-1, 1).long(), 1) |
||||
output = input * (1 - one_hot) + target_logits * one_hot |
||||
output *= self.s |
||||
|
||||
return self.criterion(output, label) |
||||
|
||||
|
||||
class SphereFace(nn.Module): |
||||
def __init__(self, s=30.0, m=1.35, is_cuda=True): |
||||
super(SphereFace, self).__init__() |
||||
self.s = s |
||||
self.m = m |
||||
self.criterion = nn.CrossEntropyLoss() |
||||
if is_cuda: |
||||
self.criterion = self.criterion.cuda() |
||||
|
||||
def forward(self, input, label): |
||||
theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7)) |
||||
target_logits = torch.cos(self.m * theta) |
||||
one_hot = torch.zeros_like(input) |
||||
one_hot.scatter_(1, label.view(-1, 1).long(), 1) |
||||
output = input * (1 - one_hot) + target_logits * one_hot |
||||
output *= self.s |
||||
|
||||
return self.criterion(output, label) |
||||
|
||||
|
||||
class CosFace(nn.Module): |
||||
def __init__(self, s=30.0, m=0.35, is_cuda=True): |
||||
super(CosFace, self).__init__() |
||||
self.s = s |
||||
self.m = m |
||||
self.criterion = nn.CrossEntropyLoss() |
||||
if is_cuda: |
||||
self.criterion = self.criterion.cuda() |
||||
|
||||
def forward(self, input, label): |
||||
target_logits = input - self.m |
||||
one_hot = torch.zeros_like(input) |
||||
one_hot.scatter_(1, label.view(-1, 1).long(), 1) |
||||
output = input * (1 - one_hot) + target_logits * one_hot |
||||
output *= self.s |
||||
|
||||
return self.criterion(output, label) |
||||
@ -0,0 +1,25 @@ |
||||
# -*- coding: utf-8 -*- |
||||
""" |
||||
Created on 18-6-7 上午10:11 |
||||
|
||||
@author: ronghuaiyang |
||||
""" |
||||
|
||||
import torch |
||||
import torch.nn as nn |
||||
|
||||
|
||||
class FocalLoss(nn.Module): |
||||
def __init__(self, gamma=0, eps=1e-7, is_cuda=True): |
||||
super(FocalLoss, self).__init__() |
||||
self.gamma = gamma |
||||
self.eps = eps |
||||
self.ce = nn.CrossEntropyLoss() |
||||
if is_cuda: |
||||
self.ce = self.ce.cuda() |
||||
|
||||
def forward(self, inp, target): |
||||
logp = self.ce(inp, target) |
||||
p = torch.exp(-logp) |
||||
loss = (1 - p) ** self.gamma * logp |
||||
return loss.mean() |
||||
@ -0,0 +1,129 @@ |
||||
import torchvision.models as models |
||||
from torch import nn |
||||
|
||||
|
||||
def resnet18(pretrained=False, **kwargs): |
||||
"""Constructs a ResNet-18 model. |
||||
Args: |
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet |
||||
""" |
||||
model = models.resnet18(num_classes=512, **kwargs) |
||||
return model |
||||
|
||||
|
||||
def resnet34(pretrained=False, **kwargs): |
||||
"""Constructs a ResNet-34 model. |
||||
Args: |
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet |
||||
""" |
||||
model = models.resnet34(num_classes=512, **kwargs) |
||||
return model |
||||
|
||||
|
||||
def resnet50(pretrained=False, **kwargs): |
||||
"""Constructs a ResNet-50 model. |
||||
Args: |
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet |
||||
""" |
||||
model = models.resnet50(num_classes=512, **kwargs) |
||||
return model |
||||
|
||||
|
||||
def resnet101(pretrained=False, **kwargs): |
||||
"""Constructs a ResNet-101 model. |
||||
Args: |
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet |
||||
""" |
||||
model = models.resnet101(num_classes=512, **kwargs) |
||||
return model |
||||
|
||||
|
||||
def resnet152(pretrained=False, **kwargs): |
||||
"""Constructs a ResNet-152 model. |
||||
Args: |
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet |
||||
""" |
||||
model = models.resnet152(num_classes=512, **kwargs) |
||||
return model |
||||
|
||||
def sphere20(): |
||||
return sphere20a() |
||||
|
||||
|
||||
class sphere20a(nn.Module): |
||||
def __init__(self): |
||||
super(sphere20a, self).__init__() |
||||
#input = B*3*112*96 |
||||
self.conv1_1 = nn.Conv2d(3,64,3,2,1) #=>B*64*56*48 |
||||
self.relu1_1 = nn.PReLU(64) |
||||
self.conv1_2 = nn.Conv2d(64,64,3,1,1) |
||||
self.relu1_2 = nn.PReLU(64) |
||||
self.conv1_3 = nn.Conv2d(64,64,3,1,1) |
||||
self.relu1_3 = nn.PReLU(64) |
||||
|
||||
self.conv2_1 = nn.Conv2d(64,128,3,2,1) #=>B*128*28*24 |
||||
self.relu2_1 = nn.PReLU(128) |
||||
self.conv2_2 = nn.Conv2d(128,128,3,1,1) |
||||
self.relu2_2 = nn.PReLU(128) |
||||
self.conv2_3 = nn.Conv2d(128,128,3,1,1) |
||||
self.relu2_3 = nn.PReLU(128) |
||||
|
||||
self.conv2_4 = nn.Conv2d(128,128,3,1,1) #=>B*128*28*24 |
||||
self.relu2_4 = nn.PReLU(128) |
||||
self.conv2_5 = nn.Conv2d(128,128,3,1,1) |
||||
self.relu2_5 = nn.PReLU(128) |
||||
|
||||
|
||||
self.conv3_1 = nn.Conv2d(128,256,3,2,1) #=>B*256*14*12 |
||||
self.relu3_1 = nn.PReLU(256) |
||||
self.conv3_2 = nn.Conv2d(256,256,3,1,1) |
||||
self.relu3_2 = nn.PReLU(256) |
||||
self.conv3_3 = nn.Conv2d(256,256,3,1,1) |
||||
self.relu3_3 = nn.PReLU(256) |
||||
|
||||
self.conv3_4 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12 |
||||
self.relu3_4 = nn.PReLU(256) |
||||
self.conv3_5 = nn.Conv2d(256,256,3,1,1) |
||||
self.relu3_5 = nn.PReLU(256) |
||||
|
||||
self.conv3_6 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12 |
||||
self.relu3_6 = nn.PReLU(256) |
||||
self.conv3_7 = nn.Conv2d(256,256,3,1,1) |
||||
self.relu3_7 = nn.PReLU(256) |
||||
|
||||
self.conv3_8 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12 |
||||
self.relu3_8 = nn.PReLU(256) |
||||
self.conv3_9 = nn.Conv2d(256,256,3,1,1) |
||||
self.relu3_9 = nn.PReLU(256) |
||||
|
||||
self.conv4_1 = nn.Conv2d(256,512,3,2,1) #=>B*512*7*6 |
||||
self.relu4_1 = nn.PReLU(512) |
||||
self.conv4_2 = nn.Conv2d(512,512,3,1,1) |
||||
self.relu4_2 = nn.PReLU(512) |
||||
self.conv4_3 = nn.Conv2d(512,512,3,1,1) |
||||
self.relu4_3 = nn.PReLU(512) |
||||
|
||||
self.fc5 = nn.Linear(512*14*14,512) |
||||
# ORIGINAL for 112x96: self.fc5 = nn.Linear(512*7*6,512) |
||||
|
||||
|
||||
def forward(self, x): |
||||
x = self.relu1_1(self.conv1_1(x)) |
||||
x = x + self.relu1_3(self.conv1_3(self.relu1_2(self.conv1_2(x)))) |
||||
|
||||
x = self.relu2_1(self.conv2_1(x)) |
||||
x = x + self.relu2_3(self.conv2_3(self.relu2_2(self.conv2_2(x)))) |
||||
x = x + self.relu2_5(self.conv2_5(self.relu2_4(self.conv2_4(x)))) |
||||
|
||||
x = self.relu3_1(self.conv3_1(x)) |
||||
x = x + self.relu3_3(self.conv3_3(self.relu3_2(self.conv3_2(x)))) |
||||
x = x + self.relu3_5(self.conv3_5(self.relu3_4(self.conv3_4(x)))) |
||||
x = x + self.relu3_7(self.conv3_7(self.relu3_6(self.conv3_6(x)))) |
||||
x = x + self.relu3_9(self.conv3_9(self.relu3_8(self.conv3_8(x)))) |
||||
|
||||
x = self.relu4_1(self.conv4_1(x)) |
||||
x = x + self.relu4_3(self.conv4_3(self.relu4_2(self.conv4_2(x)))) |
||||
|
||||
x = x.view(x.size(0),-1) |
||||
x = self.fc5(x) |
||||
return x |
||||
@ -0,0 +1,167 @@ |
||||
# -*- coding: utf-8 -*- |
||||
""" |
||||
Created on 18-5-30 下午4:55 |
||||
|
||||
@author: ronghuaiyang |
||||
""" |
||||
import os |
||||
import argparse |
||||
|
||||
from torch.utils.data import TensorDataset, DataLoader |
||||
|
||||
from recognition.nets import resnet18, resnet34, resnet50, resnet101, resnet152, sphere20 |
||||
import torch |
||||
import numpy as np |
||||
from torch.nn import DataParallel |
||||
from PIL import Image |
||||
from torchvision import transforms as T |
||||
|
||||
|
||||
imagesize = 224 |
||||
batch_size = 20 |
||||
|
||||
|
||||
class Dataset(torch.utils.data.Dataset): |
||||
def __init__(self, identity_list, root_path): |
||||
self.identity_list = identity_list |
||||
self.root_path = root_path |
||||
|
||||
normalize = T.Normalize(mean=[0.485, 0.456, 0.406], |
||||
std=[0.229, 0.224, 0.225]) |
||||
|
||||
self.transforms = T.Compose([ |
||||
T.Resize(imagesize), |
||||
T.ToTensor(), |
||||
normalize |
||||
]) |
||||
|
||||
def __getitem__(self, index): |
||||
a, b, label = self.identity_list[index] |
||||
a_data = self.load_image(a) |
||||
b_data = self.load_image(b) |
||||
return a_data, b_data, label |
||||
|
||||
def load_image(self, p): |
||||
img_path = os.path.join(self.root_path, p) |
||||
data = Image.open(img_path) |
||||
if data is None: |
||||
return None |
||||
data = data.convert(mode="RGB") |
||||
data = self.transforms(data) |
||||
return data |
||||
|
||||
def __len__(self): |
||||
return len(self.identity_list) |
||||
|
||||
|
||||
def get_pair_list(pair_list): |
||||
print('Loading pair list') |
||||
with open(pair_list, 'r') as fd: |
||||
pairs = fd.readlines() |
||||
return [line.split() for line in pairs] |
||||
|
||||
|
||||
def load_img_data(identity_list, root_path): |
||||
dataset = Dataset(identity_list, root_path) |
||||
loader = DataLoader(dataset, |
||||
batch_size=batch_size, |
||||
shuffle=False, |
||||
# pin_memory=True, |
||||
num_workers=0) |
||||
return loader |
||||
|
||||
|
||||
def lfw_test2(model, identity_list, img_data, is_cuda=True): |
||||
print('Converting to features') |
||||
sims = [] |
||||
labels = [] |
||||
max_size = len(img_data) * batch_size |
||||
for i, sample in enumerate(img_data): |
||||
if i % 10 == 0: |
||||
print('%d of %d' % (i * batch_size, max_size)) |
||||
a_data, b_data, label = sample |
||||
if is_cuda: |
||||
a_data = a_data.cuda() |
||||
b_data = b_data.cuda() |
||||
|
||||
a_output = model(a_data).detach().cpu().numpy() |
||||
b_output = model(b_data).detach().cpu().numpy() |
||||
|
||||
for idx in range(batch_size): |
||||
sim = cosin_metric(a_output[idx], b_output[idx]) |
||||
sims.append(sim) |
||||
labels.append(np.bool(label[idx] == '1')) |
||||
|
||||
acc, th = cal_accuracy(sims, labels) |
||||
print('lfw face verification accuracy: ', acc, 'threshold: ', th) |
||||
return acc |
||||
|
||||
|
||||
def cosin_metric(x1, x2): |
||||
return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2)) |
||||
|
||||
|
||||
def cal_accuracy(y_score, y_true): |
||||
y_score = np.asarray(y_score) |
||||
y_true = np.asarray(y_true) |
||||
best_acc = 0 |
||||
best_th = 0 |
||||
for i in range(len(y_score)): |
||||
th = y_score[i] |
||||
y_test = (y_score >= th) |
||||
acc = np.mean((y_test == y_true).astype(int)) |
||||
if acc > best_acc: |
||||
best_acc = acc |
||||
best_th = th |
||||
|
||||
return best_acc, best_th |
||||
|
||||
|
||||
def main(args=None): |
||||
parser = argparse.ArgumentParser(description='Testing script for face identification.') |
||||
|
||||
parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, default=50) |
||||
parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel', |
||||
default=False, action='store_true') |
||||
parser.add_argument('--model', help='Path to model') |
||||
parser.add_argument('--batch_size', help='Batch size (default 50)', type=int, default=50) |
||||
parser.add_argument('--lfw_root', help='Path to LFW dataset') |
||||
parser.add_argument('--lfw_pair_list', help='Path to LFW pair list file') |
||||
|
||||
parser = parser.parse_args(args) |
||||
|
||||
is_cuda = torch.cuda.is_available() |
||||
print('CUDA available: {}'.format(is_cuda)) |
||||
|
||||
if parser.depth == 18: |
||||
model = resnet18() |
||||
elif parser.depth == 20: |
||||
model = sphere20() |
||||
elif parser.depth == 34: |
||||
model = resnet34() |
||||
elif parser.depth == 50: |
||||
model = resnet50() |
||||
elif parser.depth == 101: |
||||
model = resnet101() |
||||
elif parser.depth == 152: |
||||
model = resnet152() |
||||
else: |
||||
raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') |
||||
|
||||
if parser.parallel: |
||||
model = DataParallel(model) |
||||
|
||||
# load_model(model, opt.test_model_path) |
||||
model.load_state_dict(torch.load(parser.model)) |
||||
if is_cuda: |
||||
model.cuda() |
||||
|
||||
identity_list = get_pair_list(parser.lfw_pair_list) |
||||
img_data = load_img_data(identity_list, parser.lfw_root) |
||||
|
||||
model.eval() |
||||
lfw_test2(model, identity_list, img_data, is_cuda=is_cuda) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
main() |
||||
@ -0,0 +1,201 @@ |
||||
import argparse |
||||
import os |
||||
import time |
||||
|
||||
import numpy as np |
||||
import torch |
||||
import torch.nn as nn |
||||
from PIL import Image |
||||
from torch.optim.lr_scheduler import StepLR |
||||
from torchvision import transforms as T |
||||
|
||||
from recognition.angle import AngleLinear, CosFace, SphereFace, ArcFace, AdaCos |
||||
from recognition.focal_loss import FocalLoss |
||||
from recognition.nets import resnet18, resnet34, resnet50, resnet101, resnet152, sphere20 |
||||
from recognition.test import lfw_test2, get_pair_list, load_img_data |
||||
|
||||
|
||||
class Dataset(torch.utils.data.Dataset): |
||||
def __init__(self, root, data_list_file, imagesize): |
||||
with open(os.path.join(data_list_file), 'r') as fd: |
||||
imgs = fd.readlines() |
||||
|
||||
imgs = [os.path.join(root, img[:-1]) for img in imgs] |
||||
self.labels = list(set([img.split()[1] for img in imgs])) |
||||
self.imgs = np.random.permutation(imgs) |
||||
|
||||
normalize = T.Normalize(mean=[0.485, 0.456, 0.406], |
||||
std=[0.229, 0.224, 0.225]) |
||||
|
||||
self.transforms = T.Compose([ |
||||
T.RandomResizedCrop(imagesize), |
||||
T.RandomHorizontalFlip(), |
||||
T.ToTensor(), |
||||
normalize |
||||
]) |
||||
|
||||
def __getitem__(self, index): |
||||
sample = self.imgs[index] |
||||
splits = sample.split() |
||||
img_path = splits[0] |
||||
data = Image.open(img_path) |
||||
data = data.convert(mode="RGB") |
||||
data = self.transforms(data) |
||||
cls = self.label_to_class(splits[1]) |
||||
return data.float(), cls |
||||
|
||||
def __len__(self): |
||||
return len(self.imgs) |
||||
|
||||
def label_to_class(self, label): |
||||
for idx, v in enumerate(self.labels): |
||||
if v == label: |
||||
return idx |
||||
raise Exception("Unknown label %s" % label) |
||||
|
||||
def num_labels(self): |
||||
return len(self.labels) |
||||
|
||||
|
||||
def main(args=None): |
||||
parser = argparse.ArgumentParser(description='Training script for face identification.') |
||||
|
||||
parser.add_argument('--print_freq', help='Print every N batch (default 100)', type=int, default=100) |
||||
parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) |
||||
parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, default=50) |
||||
parser.add_argument('--lr_step', help='Learning rate step (default 10)', type=int, default=10) |
||||
parser.add_argument('--lr', help='Learning rate (default 0.1)', type=float, default=0.1) |
||||
parser.add_argument('--weight_decay', help='Weight decay (default 0.0005)', type=float, default=0.0005) |
||||
parser.add_argument('--easy_margin', help='Use easy margin (default false)', dest='easy_margin', default=False, action='store_true') |
||||
parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel', |
||||
default=False, action='store_true') |
||||
parser.add_argument('--loss', help='One of focal_loss. cross_entropy, arcface, cosface, sphereface, adacos (default cross_entropy)', |
||||
type=str, default='cross_entropy') |
||||
parser.add_argument('--optimizer', help='One of sgd, adam (default sgd)', |
||||
type=str, default='sgd') |
||||
parser.add_argument('--batch_size', help='Batch size (default 16)', type=int, default=16) |
||||
parser.add_argument('--casia_list', help='Path to CASIA dataset file list (training)') |
||||
parser.add_argument('--casia_root', help='Path to CASIA images (training)') |
||||
parser.add_argument('--lfw_root', help='Path to LFW dataset (testing)') |
||||
parser.add_argument('--lfw_pair_list', help='Path to LFW pair list file (testing)') |
||||
parser.add_argument('--model_name', help='Name of the model to save') |
||||
|
||||
parser = parser.parse_args(args) |
||||
|
||||
is_cuda = torch.cuda.is_available() |
||||
print('CUDA available: {}'.format(is_cuda)) |
||||
|
||||
imagesize = 224 |
||||
if parser.depth == 18: |
||||
model = resnet18() |
||||
elif parser.depth == 20: |
||||
model = sphere20() |
||||
elif parser.depth == 34: |
||||
model = resnet34() |
||||
elif parser.depth == 50: |
||||
model = resnet50() |
||||
elif parser.depth == 101: |
||||
model = resnet101() |
||||
elif parser.depth == 152: |
||||
model = resnet152() |
||||
else: |
||||
raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') |
||||
|
||||
# TODO split training dataset to train/validation and stop using test dataset for acc |
||||
train_dataset = Dataset(parser.casia_root, parser.casia_list, imagesize) |
||||
trainloader = torch.utils.data.DataLoader(train_dataset, |
||||
batch_size=parser.batch_size, |
||||
shuffle=True, |
||||
# pin_memory=True, |
||||
num_workers=0) |
||||
num_classes = train_dataset.num_labels() |
||||
|
||||
if parser.loss == 'focal_loss': |
||||
metric_fc = nn.Linear(512, num_classes) |
||||
criterion = FocalLoss(gamma=2, is_cuda=is_cuda) |
||||
elif parser.loss == 'cross_entropy': |
||||
metric_fc = nn.Linear(512, num_classes) |
||||
criterion = torch.nn.CrossEntropyLoss() |
||||
if is_cuda: |
||||
criterion = criterion.cuda() |
||||
elif parser.loss == 'cosface': |
||||
metric_fc = AngleLinear(512, num_classes) |
||||
criterion = CosFace(is_cuda=is_cuda) |
||||
elif parser.loss == 'arcface': |
||||
metric_fc = AngleLinear(512, num_classes) |
||||
criterion = ArcFace(is_cuda=is_cuda) |
||||
elif parser.loss == 'sphereface': |
||||
metric_fc = AngleLinear(512, num_classes) |
||||
criterion = SphereFace(is_cuda=is_cuda) |
||||
elif parser.loss == 'adacos': |
||||
metric_fc = AngleLinear(512, num_classes) |
||||
criterion = AdaCos(num_classes, is_cuda=is_cuda) |
||||
else: |
||||
raise ValueError('Unknown loss %s' % parser.loss) |
||||
|
||||
if parser.optimizer == 'sgd': |
||||
optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}], |
||||
lr=parser.lr, weight_decay=parser.weight_decay) |
||||
elif parser.optimizer == 'adam': |
||||
optimizer = torch.optim.Adam([{'params': model.parameters()}, {'params': metric_fc.parameters()}], |
||||
lr=parser.lr, weight_decay=parser.weight_decay) |
||||
else: |
||||
raise ValueError('Unknown optimizer %s' % parser.optimizer) |
||||
|
||||
scheduler = StepLR(optimizer, step_size=parser.lr_step, gamma=0.1) |
||||
|
||||
if parser.parallel: |
||||
model = nn.DataParallel(model) |
||||
metric_fc = nn.DataParallel(metric_fc) |
||||
|
||||
if is_cuda: |
||||
model.cuda() |
||||
metric_fc.cuda() |
||||
|
||||
print(model) |
||||
print(metric_fc) |
||||
|
||||
identity_list = get_pair_list(parser.lfw_pair_list) |
||||
img_data = load_img_data(identity_list, parser.lfw_root) |
||||
|
||||
print('{} train iters per epoch:'.format(len(trainloader))) |
||||
|
||||
start = time.time() |
||||
last_acc = 0.0 |
||||
for i in range(parser.epochs): |
||||
scheduler.step() |
||||
|
||||
model.train() |
||||
for ii, data in enumerate(trainloader): |
||||
data_input, label = data |
||||
if is_cuda: |
||||
data_input = data_input.cuda() |
||||
label = label.cuda().long() |
||||
feature = model(data_input) |
||||
output = metric_fc(feature) |
||||
loss = criterion(output, label) |
||||
optimizer.zero_grad() |
||||
loss.backward() |
||||
optimizer.step() |
||||
|
||||
iters = i * len(trainloader) + ii |
||||
|
||||
if iters % parser.print_freq == 0: |
||||
speed = parser.print_freq / (time.time() - start) |
||||
time_str = time.asctime(time.localtime(time.time())) |
||||
print('{} train epoch {} iter {} {} iters/s loss {}'.format(time_str, i, ii, speed, loss.item())) |
||||
|
||||
start = time.time() |
||||
|
||||
model.eval() |
||||
acc = lfw_test2(model, identity_list, img_data, is_cuda=is_cuda) |
||||
print('Accuracy: %f' % acc) |
||||
if last_acc < acc: |
||||
#TODO remove makedir |
||||
os.makedirs('./ckpt', exist_ok=True) |
||||
torch.save(model.state_dict(), './ckpt/' + parser.model_name + '_{}.pt'.format(i)) |
||||
torch.save(metric_fc.state_dict(), './ckpt/' + parser.model_name + '_metric_{}.pt'.format(i)) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
main() |
||||
@ -0,0 +1,4 @@ |
||||
Flask |
||||
Pillow |
||||
https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl |
||||
https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp37-cp37m-linux_x86_64.whl |
||||
@ -0,0 +1,82 @@ |
||||
import logging |
||||
import os |
||||
import sys |
||||
import tempfile |
||||
|
||||
from flask import Flask, request, abort, jsonify |
||||
from werkzeug.utils import secure_filename |
||||
|
||||
import torch |
||||
from recognition.nets import resnet50 |
||||
from torchvision import transforms as T |
||||
from PIL import Image |
||||
import identification.detector as fan |
||||
|
||||
is_cuda = torch.cuda.is_available() |
||||
fan_model = fan.load_model('ckpt/wider6_10.pt', is_cuda=is_cuda) |
||||
|
||||
# load recognition model |
||||
rec_model = resnet50() |
||||
rec_model.load_state_dict(torch.load('ckpt/recongition3_37.pt', map_location=lambda storage, location: storage)) |
||||
rec_model.eval() |
||||
if is_cuda: |
||||
rec_model = rec_model.cuda() |
||||
|
||||
# compute vectors |
||||
normalize = T.Normalize(mean=[0.485, 0.456, 0.406], |
||||
std=[0.229, 0.224, 0.225]) |
||||
|
||||
imagesize = 224 |
||||
transforms = T.Compose([ |
||||
T.Resize((imagesize, imagesize)), |
||||
T.ToTensor(), |
||||
normalize |
||||
]) |
||||
|
||||
app = Flask(__name__) |
||||
UPLOAD_FOLDER = tempfile.gettempdir() |
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
||||
|
||||
def compute_vector(data): |
||||
with torch.no_grad(): |
||||
data = transforms(data) |
||||
if is_cuda: |
||||
data = data.cuda() |
||||
mo = rec_model(data.unsqueeze(dim=0)) |
||||
return mo.detach().cpu().numpy() |
||||
|
||||
|
||||
@app.route('/vectorize', methods=['GET', 'POST']) |
||||
def upload_file(): |
||||
if request.method == 'GET': |
||||
return 'OK' |
||||
|
||||
if request.method == 'POST': |
||||
# check if the post request has the file part |
||||
if 'file' not in request.files: |
||||
abort(500) |
||||
f = request.files['file'] |
||||
if f: |
||||
filename = secure_filename(f.filename) |
||||
filepath = os.path.join(UPLOAD_FOLDER, filename) |
||||
f.save(filepath) |
||||
|
||||
img = Image.open(filepath) |
||||
data = img.convert(mode="RGB") |
||||
|
||||
with torch.no_grad(): |
||||
boxes = fan.fan_detect(fan_model, data, threshold=0.9, is_cuda=is_cuda).astype(int) |
||||
boxes = [b for b in boxes if abs(b[1] - b[0]) >= imagesize / 2 and abs(b[2] - b[0]) >= imagesize / 2] |
||||
|
||||
if boxes is None or len(boxes) == 0: |
||||
abort(404) |
||||
|
||||
extracted = [{'box': arr.tolist(), 'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist()} for arr in boxes] |
||||
return jsonify(extracted) |
||||
else: |
||||
abort(500) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
logging.basicConfig() |
||||
app.run() |
||||
Loading…
Reference in new issue