1
0
Fork 0

Faceserver vectorizer initial commit

master
Petr Masopust 6 years ago
commit 95d4f2ca7d
  1. 71
      .gitignore
  2. 131
      vectorizer/identification/anchors.py
  3. 95
      vectorizer/identification/detector.py
  4. 289
      vectorizer/identification/losses.py
  5. 385
      vectorizer/identification/model_level_attention.py
  6. 282
      vectorizer/identification/utils.py
  7. 108
      vectorizer/recognition/angle.py
  8. 25
      vectorizer/recognition/focal_loss.py
  9. 129
      vectorizer/recognition/nets.py
  10. 167
      vectorizer/recognition/test.py
  11. 201
      vectorizer/recognition/train.py
  12. 4
      vectorizer/requirements.txt
  13. 82
      vectorizer/vectorizer/server.py

71
.gitignore vendored

@ -0,0 +1,71 @@
# Runtime directories
ckpt/
mAP_txt/
summary/
weight/
# IntelliJ IDEA
.idea/
*.iml
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
# lib is NOT ignored
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Jupyter Notebook
.ipynb_checkpoints
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

@ -0,0 +1,131 @@
import numpy as np
import torch
import torch.nn as nn
class Anchors(nn.Module):
def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None, is_cuda=True):
super(Anchors, self).__init__()
self.is_cuda = is_cuda
if pyramid_levels is None:
self.pyramid_levels = [3, 4, 5, 6, 7]
if strides is None:
self.strides = [2 ** x for x in self.pyramid_levels]
if sizes is None:
self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
if ratios is None:
# self.ratios = np.array([1., 1.5, 2., 2.5, 3.])
self.ratios = np.array([0.5, 1., 2.])
if scales is None:
self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
def forward(self, image):
image_shape = image.shape[2:]
image_shape = np.array(image_shape)
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
# compute anchors over all pyramid levels
all_anchors = np.zeros((0, 4)).astype(np.float32)
for idx, p in enumerate(self.pyramid_levels):
anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
all_anchors = np.expand_dims(all_anchors, axis=0)
all_anchors = torch.from_numpy(all_anchors.astype(np.float32))
if self.is_cuda:
all_anchors = all_anchors.cuda()
return all_anchors
def generate_anchors(base_size=16, ratios=None, scales=None):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales w.r.t. a reference window.
"""
if ratios is None:
ratios = np.array([0.5, 1, 2])
if scales is None:
scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
num_anchors = len(ratios) * len(scales)
# initialize output anchors
anchors = np.zeros((num_anchors, 4))
# scale base_size
anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
# compute areas of anchors
areas = anchors[:, 2] * anchors[:, 3]
# correct for ratios
anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
# transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
return anchors
def compute_shape(image_shape, pyramid_levels):
"""Compute shapes based on pyramid levels.
:param image_shape:
:param pyramid_levels:
:return:
"""
image_shape = np.array(image_shape[:2])
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
return image_shapes
def anchors_for_shape(
image_shape,
pyramid_levels=None,
ratios=None,
scales=None,
strides=None,
sizes=None,
):
image_shapes = compute_shape(image_shape, pyramid_levels)
# compute anchors over all pyramid levels
all_anchors = np.zeros((0, 4))
for idx, p in enumerate(pyramid_levels):
anchors = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
return all_anchors
def shift(shape, stride, anchors):
shift_x = (np.arange(0, shape[1]) + 0.5) * stride
shift_y = (np.arange(0, shape[0]) + 0.5) * stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((
shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel()
)).transpose()
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = anchors.shape[0]
K = shifts.shape[0]
all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
return all_anchors

@ -0,0 +1,95 @@
import numpy as np
import torch
from PIL import Image
from torchvision import transforms
class Resizer(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample, min_side=800, max_side=1400):
image, annots, scale = sample['img'], sample['annot'], sample['scale']
rows, cols = image.size
# scale = min_side / rows
smallest_side = min(rows, cols)
# rescale the image so the smallest side is min_side
scale = min_side / smallest_side
# check if the largest side is now greater than max_side, which can happen
# when images have a large aspect ratio
largest_side = max(rows, cols)
if largest_side * scale > max_side:
scale = max_side / largest_side
# resize the image with the computed scale
image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR))
image = image / 255.0
rows, cols, cns = image.shape
pad_w = 32 - rows % 32
pad_h = 32 - cols % 32
new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
new_image[:rows, :cols, :] = image.astype(np.float32)
annots[:, :4] *= scale
return {'img': new_image, 'annot': annots, 'scale': scale}
class Normalizer(object):
def __init__(self):
self.mean = np.array([[[0.485, 0.456, 0.406]]])
self.std = np.array([[[0.229, 0.224, 0.225]]])
def __call__(self, sample):
image, annots, scales = sample['img'], sample['annot'], sample['scale']
image = (image.astype(np.float32) - self.mean) / self.std
sample = {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale': scales}
return sample
def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True):
input_data = {'img': img_data, 'annot': np.zeros((0, 5)), 'scale': 1}
transform = transforms.Compose([Resizer(), Normalizer()])
transformed = transform(input_data)
model.eval()
with torch.no_grad():
img_data = transformed['img'].permute(2, 0, 1).float().unsqueeze(dim=0)
if is_cuda:
img_data = img_data.cuda()
scores, labels, boxes = model(img_data)
if scores is None:
return np.array()
scores = scores.cpu().numpy()
scale = transformed['scale']
boxes = boxes.cpu().numpy() / scale
indices = np.where(scores > threshold)[0]
scores = scores[indices]
scores_sort = np.argsort(-scores)[:max_detections]
image_boxes = boxes[indices[scores_sort], :]
return image_boxes
def load_model(model_path, is_cuda=True):
# load possible cuda model as cpu
model = torch.load(model_path, map_location=lambda storage, location: storage)
if is_cuda:
model = model.cuda()
model.anchors.is_cuda=is_cuda
return model

@ -0,0 +1,289 @@
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
def memprint(a):
print(a.shape)
print(a.element_size() * a.nelement())
def calc_iou(a, b):
step = 20
IoU = torch.zeros((len(a), len(b))).cuda()
step_count = int(len(b) / step)
if len(b) % step != 0:
step_count += 1
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
for i in range(step_count):
iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[i * step:(i+1) * step, 2])
iw.sub_(torch.max(torch.unsqueeze(a[:, 0], 1), b[i * step:(i+1) * step, 0]))
ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[i * step:(i+1) * step, 3])
ih.sub_(torch.max(torch.unsqueeze(a[:, 1], 1), b[i * step:(i+1) * step, 1]))
iw.clamp_(min=0)
ih.clamp_(min=0)
iw.mul_(ih)
del ih
ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area[i * step:(i+1) * step] - iw
ua = torch.clamp(ua, min=1e-8)
iw.div_(ua)
del ua
IoU[:, i * step:(i+1) * step] = iw
return IoU
def calc_iou_vis(a, b):
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
iw = torch.clamp(iw, min=0)
ih = torch.clamp(ih, min=0)
intersection = iw * ih
IoU = intersection / area
return IoU
def IoG(box_a, box_b):
inter_xmin = torch.max(box_a[:, 0], box_b[:, 0])
inter_ymin = torch.max(box_a[:, 1], box_b[:, 1])
inter_xmax = torch.min(box_a[:, 2], box_b[:, 2])
inter_ymax = torch.min(box_a[:, 3], box_b[:, 3])
Iw = torch.clamp(inter_xmax - inter_xmin, min=0)
Ih = torch.clamp(inter_ymax - inter_ymin, min=0)
I = Iw * Ih
G = (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])
return I / G
class FocalLoss(nn.Module):
def __init__(self, is_cuda=True):
super(FocalLoss, self).__init__()
self.is_cuda = is_cuda
def forward(self, classifications, regressions, anchors, annotations):
alpha = 0.25
gamma = 2.0
batch_size = classifications.shape[0]
classification_losses = []
regression_losses = []
anchor = anchors[0, :, :]
anchor_widths = anchor[:, 2] - anchor[:, 0]
anchor_heights = anchor[:, 3] - anchor[:, 1]
anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
for j in range(batch_size):
classification = classifications[j, :, :]
regression = regressions[j, :, :]
bbox_annotation = annotations[j, :, :]
bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
if bbox_annotation.shape[0] == 0:
if self.is_cuda:
regression_losses.append(torch.tensor(0).float().cuda())
classification_losses.append(torch.tensor(0).float().cuda())
else:
regression_losses.append(torch.tensor(0).float())
classification_losses.append(torch.tensor(0).float())
continue
classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
IoU = calc_iou(anchor, bbox_annotation[:, :4]) # num_anchors x num_annotations
IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
# compute the loss for classification
targets = torch.ones(classification.shape) * -1
if self.is_cuda:
targets = targets.cuda()
targets[torch.lt(IoU_max, 0.4), :] = 0
positive_ful = torch.ge(IoU_max, 0.5)
positive_indices = positive_ful
num_positive_anchors = positive_indices.sum()
assigned_annotations = bbox_annotation[IoU_argmax, :]
targets[positive_indices, :] = 0
targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
try:
alpha_factor = torch.ones(targets.shape)
if self.is_cuda:
alpha_factor = alpha_factor.cuda()
alpha_factor *= alpha
except:
print(targets)
print(targets.shape)
alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
# cls_loss = focal_weight * torch.pow(bce, gamma)
cls_loss = focal_weight * bce
cls_zeros = torch.zeros(cls_loss.shape)
if self.is_cuda:
cls_zeros = cls_zeros.cuda()
cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, cls_zeros)
classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))
# compute the loss for regression
if positive_indices.sum() > 0:
assigned_annotations = assigned_annotations[positive_indices, :]
anchor_widths_pi = anchor_widths[positive_indices]
anchor_heights_pi = anchor_heights[positive_indices]
anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights
# clip widths to 1
gt_widths = torch.clamp(gt_widths, min=1)
gt_heights = torch.clamp(gt_heights, min=1)
targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
targets_dw = torch.log(gt_widths / anchor_widths_pi)
targets_dh = torch.log(gt_heights / anchor_heights_pi)
targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
targets = targets.t()
if self.is_cuda:
targets = targets.cuda() / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
else:
targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
regression_diff = torch.abs(targets - regression[positive_indices, :])
regression_loss = torch.where(
torch.le(regression_diff, 1.0 / 9.0),
0.5 * 9.0 * torch.pow(regression_diff, 2),
regression_diff - 0.5 / 9.0
)
regression_losses.append(regression_loss.mean())
else:
if self.is_cuda:
regression_losses.append(torch.tensor(0).float().cuda())
else:
regression_losses.append(torch.tensor(0).float())
return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses) \
.mean(dim=0, keepdim=True)
class LevelAttentionLoss(nn.Module):
def __init__(self, is_cuda=True):
super(LevelAttentionLoss, self).__init__()
self.is_cuda = is_cuda
def forward(self, img_batch_shape, attention_mask, bboxs):
h, w = img_batch_shape[2], img_batch_shape[3]
mask_losses = []
batch_size = bboxs.shape[0]
for j in range(batch_size):
bbox_annotation = bboxs[j, :, :]
bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
if bbox_annotation.shape[0] == 0:
if self.is_cuda:
mask_losses.append(torch.tensor(0).float().cuda())
else:
mask_losses.append(torch.tensor(0).float())
continue
cond1 = torch.le(bbox_annotation[:, 0], w)
cond2 = torch.le(bbox_annotation[:, 1], h)
cond3 = torch.le(bbox_annotation[:, 2], w)
cond4 = torch.le(bbox_annotation[:, 3], h)
cond = cond1 * cond2 * cond3 * cond4
bbox_annotation = bbox_annotation[cond, :]
if bbox_annotation.shape[0] == 0:
if self.is_cuda:
mask_losses.append(torch.tensor(0).float().cuda())
else:
mask_losses.append(torch.tensor(0).float())
continue
bbox_area = (bbox_annotation[:, 2] - bbox_annotation[:, 0]) * (
bbox_annotation[:, 3] - bbox_annotation[:, 1])
mask_loss = []
for id in range(len(attention_mask)):
attention_map = attention_mask[id][j, 0, :, :]
min_area = (2 ** (id + 5)) ** 2 * 0.5
max_area = (2 ** (id + 5) * 1.58) ** 2 * 2
level_bbox_indice1 = torch.ge(bbox_area, min_area)
level_bbox_indice2 = torch.le(bbox_area, max_area)
level_bbox_indice = level_bbox_indice1 * level_bbox_indice2
level_bbox_annotation = bbox_annotation[level_bbox_indice, :].clone()
# level_bbox_annotation = bbox_annotation.clone()
attention_h, attention_w = attention_map.shape
if level_bbox_annotation.shape[0]:
level_bbox_annotation[:, 0] *= attention_w / w
level_bbox_annotation[:, 1] *= attention_h / h
level_bbox_annotation[:, 2] *= attention_w / w
level_bbox_annotation[:, 3] *= attention_h / h
mask_gt = torch.zeros(attention_map.shape)
if self.is_cuda:
mask_gt = mask_gt.cuda()
for i in range(level_bbox_annotation.shape[0]):
x1 = max(int(level_bbox_annotation[i, 0]), 0)
y1 = max(int(level_bbox_annotation[i, 1]), 0)
x2 = min(math.ceil(level_bbox_annotation[i, 2]) + 1, attention_w)
y2 = min(math.ceil(level_bbox_annotation[i, 3]) + 1, attention_h)
mask_gt[y1:y2, x1:x2] = 1
mask_gt = mask_gt[mask_gt >= 0]
mask_predict = attention_map[attention_map >= 0]
mask_loss.append(F.binary_cross_entropy(mask_predict, mask_gt))
mask_losses.append(torch.stack(mask_loss).mean())
return torch.stack(mask_losses).mean(dim=0, keepdim=True)

@ -0,0 +1,385 @@
import torch.nn as nn
import torch
import math
from identification.utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes
from identification.anchors import Anchors
from identification.losses import LevelAttentionLoss, FocalLoss
from torchvision.ops.boxes import nms as tv_nms
def nms(dets, thresh):
"""Dispatch to either CPU or GPU NMS implementations. Accept dets as tensor"""
return tv_nms(dets[:, :4], dets[:, 4], thresh)
class PyramidFeatures(nn.Module):
def __init__(self, c3_size, c4_size, c5_size, feature_size=256):
super(PyramidFeatures, self).__init__()
# upsample C5 to get P5 from the FPN paper
self.p5_1 = nn.Conv2d(c5_size, feature_size, kernel_size=1, stride=1, padding=0)
self.p5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
self.p5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
# add P5 elementwise to C4
self.p4_1 = nn.Conv2d(c4_size, feature_size, kernel_size=1, stride=1, padding=0)
self.p4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
self.p4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
# add P4 elementwise to C3
self.p3_1 = nn.Conv2d(c3_size, feature_size, kernel_size=1, stride=1, padding=0)
self.p3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
# "P6 is obtained via a 3x3 stride-2 conv on C5"
self.p6 = nn.Conv2d(c5_size, feature_size, kernel_size=3, stride=2, padding=1)
# "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
self.p7_1 = nn.ReLU()
self.p7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
def forward(self, inputs):
c3, c4, c5 = inputs
# TODO hack for old model
self.p5_1.padding_mode = 'zeros'
self.p5_2.padding_mode = 'zeros'
self.p4_1.padding_mode = 'zeros'
self.p4_2.padding_mode = 'zeros'
self.p3_1.padding_mode = 'zeros'
self.p3_2.padding_mode = 'zeros'
self.p6.padding_mode = 'zeros'
self.p7_2.padding_mode = 'zeros'
p5_x = self.p5_1(c5)
p5_upsampled_x = self.p5_upsampled(p5_x)
p5_x = self.p5_2(p5_x)
p4_x = self.p4_1(c4)
p4_x = p5_upsampled_x + p4_x
p4_upsampled_x = self.p4_upsampled(p4_x)
p4_x = self.p4_2(p4_x)
p3_x = self.p3_1(c3)
p3_x = p3_x + p4_upsampled_x
p3_x = self.p3_2(p3_x)
p6_x = self.p6(c5)
p7_x = self.p7_1(p6_x)
p7_x = self.p7_2(p7_x)
return [p3_x, p4_x, p5_x, p6_x, p7_x]
class RegressionModel(nn.Module):
def __init__(self, num_features_in, num_anchors=9, feature_size=256):
super(RegressionModel, self).__init__()
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
self.act1 = nn.ReLU()
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act2 = nn.ReLU()
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act3 = nn.ReLU()
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act4 = nn.ReLU()
self.output = nn.Conv2d(feature_size, num_anchors * 4, kernel_size=3, padding=1)
def forward(self, x):
# TODO hack for old model
self.conv1.padding_mode = 'zeros'
self.conv2.padding_mode = 'zeros'
self.conv3.padding_mode = 'zeros'
self.conv4.padding_mode = 'zeros'
self.output.padding_mode = 'zeros'
out = self.conv1(x)
out = self.act1(out)
out = self.conv2(out)
out = self.act2(out)
out = self.conv3(out)
out = self.act3(out)
out = self.conv4(out)
out = self.act4(out)
out = self.output(out)
# out is B x C x W x H, with C = 4*num_anchors
out = out.permute(0, 2, 3, 1)
return out.contiguous().view(out.shape[0], -1, 4)
class ClassificationModel(nn.Module):
def __init__(self, num_features_in, num_anchors=9, num_classes=80, feature_size=256):
super(ClassificationModel, self).__init__()
self.num_classes = num_classes
self.num_anchors = num_anchors
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
self.act1 = nn.ReLU()
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act2 = nn.ReLU()
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act3 = nn.ReLU()
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act4 = nn.ReLU()
self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1)
self.output_act = nn.Sigmoid()
def forward(self, x):
# TODO hack for old model
self.conv1.padding_mode = 'zeros'
self.conv2.padding_mode = 'zeros'
self.conv3.padding_mode = 'zeros'
self.conv4.padding_mode = 'zeros'
self.output.padding_mode = 'zeros'
out = self.conv1(x)
out = self.act1(out)
out = self.conv2(out)
out = self.act2(out)
out = self.conv3(out)
out = self.act3(out)
out = self.conv4(out)
out = self.act4(out)
out = self.output(out)
out = self.output_act(out)
# out is B x C x W x H, with C = n_classes + n_anchors
out1 = out.permute(0, 2, 3, 1)
batch_size, width, height, channels = out1.shape
out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
return out2.contiguous().view(x.shape[0], -1, self.num_classes)
class LevelAttentionModel(nn.Module):
def __init__(self, num_features_in, feature_size=256):
super(LevelAttentionModel, self).__init__()
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
self.act1 = nn.ReLU()
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act2 = nn.ReLU()
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act3 = nn.ReLU()
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act4 = nn.ReLU()
self.conv5 = nn.Conv2d(feature_size, 1, kernel_size=3, padding=1)
self.output_act = nn.Sigmoid()
def forward(self, x):
# TODO hack for old model
self.conv1.padding_mode = 'zeros'
self.conv2.padding_mode = 'zeros'
self.conv3.padding_mode = 'zeros'
self.conv4.padding_mode = 'zeros'
self.conv5.padding_mode = 'zeros'
out = self.conv1(x)
out = self.act1(out)
out = self.conv2(out)
out = self.act2(out)
out = self.conv3(out)
out = self.act3(out)
out = self.conv4(out)
out = self.act4(out)
out = self.conv5(out)
out_attention = self.output_act(out)
return out_attention
class ResNet(nn.Module):
def __init__(self, num_classes, block, layers, is_cuda=True):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
if block == BasicBlock:
fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
self.layer4[layers[3] - 1].conv2.out_channels]
elif block == Bottleneck:
fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
self.layer4[layers[3] - 1].conv3.out_channels]
else:
raise Exception("Invalid block type")
self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
self.regressionModel = RegressionModel(256)
self.classificationModel = ClassificationModel(256, num_classes=num_classes)
self.levelattentionModel = LevelAttentionModel(256)
self.anchors = Anchors(is_cuda=is_cuda)
self.regressBoxes = BBoxTransform(is_cuda=is_cuda)
self.clipBoxes = ClipBoxes()
self.levelattentionLoss = LevelAttentionLoss(is_cuda=is_cuda)
self.focalLoss = FocalLoss(is_cuda=is_cuda)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
# init.xavier_normal(m.weight)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
prior = 0.01
self.classificationModel.output.weight.data.fill_(0)
self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))
self.regressionModel.output.weight.data.fill_(0)
self.regressionModel.output.bias.data.fill_(0)
self.levelattentionModel.conv5.weight.data.fill_(0)
self.levelattentionModel.conv5.bias.data.fill_(0)
self.freeze_bn()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = [block(self.inplanes, planes, stride, downsample)]
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def freeze_bn(self):
"""Freeze BatchNorm layers."""
for layer in self.modules():
if isinstance(layer, nn.BatchNorm2d):
layer.eval()
def forward(self, inputs):
if self.training:
img_batch, annotations = inputs
else:
img_batch = inputs
annotations = None
# TODO hack for old model
self.conv1.padding_mode = 'zeros'
x = self.conv1(img_batch)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x1 = self.layer1(x)
x2 = self.layer2(x1)
x3 = self.layer3(x2)
x4 = self.layer4(x3)
features = self.fpn([x2, x3, x4])
attention = [self.levelattentionModel(feature) for feature in features]
# i = 1
# for level in attention:
# i += 1
# level = level.squeeze(0)
# level = np.array(255 * unnormalize(level)).copy()
# level = np.transpose(level, (1, 2, 0))
# plt.imsave(os.path.join('./output', str(i) + '.jpg'), level[:,:,0])
features = [features[i] * torch.exp(attention[i]) for i in range(len(features))]
regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
anchors = self.anchors(img_batch)
if self.training:
clc_loss, reg_loss = self.focalLoss(classification, regression, anchors, annotations)
mask_loss = self.levelattentionLoss(img_batch.shape, attention, annotations)
return clc_loss, reg_loss, mask_loss
else:
# transformed_anchors = self.regressBoxes(anchors, regression)
transformed_anchors = self.clipBoxes(anchors, img_batch)
scores = torch.max(classification, dim=2, keepdim=True)[0]
scores_over_thresh = (scores > 0.05)[0, :, 0]
if scores_over_thresh.sum() == 0:
# no boxes to NMS, just return
# return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
return [None, None, None]
classification = classification[:, scores_over_thresh, :]
transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
scores = scores[:, scores_over_thresh, :]
anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.3)
nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
def resnet18(num_classes, is_cuda=True):
return ResNet(num_classes, BasicBlock, [2, 2, 2, 2], is_cuda=is_cuda)
def resnet34(num_classes, is_cuda=True):
return ResNet(num_classes, BasicBlock, [3, 4, 6, 3], is_cuda=is_cuda)
def resnet50(num_classes, is_cuda=True):
return ResNet(num_classes, Bottleneck, [3, 4, 6, 3], is_cuda=is_cuda)
def resnet101(num_classes, is_cuda=True):
return ResNet(num_classes, Bottleneck, [3, 4, 23, 3], is_cuda=is_cuda)
def resnet152(num_classes, is_cuda=True):
return ResNet(num_classes, Bottleneck, [3, 8, 36, 3], is_cuda=is_cuda)

@ -0,0 +1,282 @@
import torch
import torch.nn as nn
import numpy as np
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
# TODO hack for old model
self.conv1.padding_mode = 'zeros'
self.conv2.padding_mode = 'zeros'
self.conv3.padding_mode = 'zeros'
if self.downsample is not None:
self.downsample[0].padding_mode = 'zeros'
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class SELayer(nn.Module):
def __init__(self, channel, reduction=16):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel),
nn.Sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y
class BottleneckSE(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=16):
super(BottleneckSE, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se = SELayer(planes * 4, reduction)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out = self.se(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class CBAMModule(nn.Module):
def __init__(self, channels, reduction):
super(CBAMModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
padding=0)
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
padding=0)
self.sigmoid_channel = nn.Sigmoid()
self.conv_after_concat = nn.Conv2d(2, 1, kernel_size=7, stride=1, padding=3)
self.sigmoid_spatial = nn.Sigmoid()
def forward(self, x):
module_input = x
avg = self.avg_pool(x)
mx = self.max_pool(x)
avg = self.fc1(avg)
mx = self.fc1(mx)
avg = self.relu(avg)
mx = self.relu(mx)
avg = self.fc2(avg)
mx = self.fc2(mx)
x = avg + mx
x = self.sigmoid_channel(x)
x = module_input * x
module_input = x
avg = torch.mean(x, 1, True)
mx, _ = torch.max(x, 1, True)
x = torch.cat((avg, mx), 1)
x = self.conv_after_concat(x)
x = self.sigmoid_spatial(x)
x = module_input * x
return x
class BottleneckCBAM(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=16):
super(BottleneckCBAM, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se = CBAMModule(planes * 4, reduction)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out = self.se(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class BBoxTransform(nn.Module):
def __init__(self, mean=None, std=None, is_cuda=True):
super(BBoxTransform, self).__init__()
if mean is None:
self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32))
if is_cuda:
self.mean = self.mean.cuda()
else:
self.mean = mean
if std is None:
self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32))
if is_cuda:
self.std = self.std.cuda()
else:
self.std = std
def forward(self, boxes, deltas):
widths = boxes[:, :, 2] - boxes[:, :, 0]
heights = boxes[:, :, 3] - boxes[:, :, 1]
ctr_x = boxes[:, :, 0] + 0.5 * widths
ctr_y = boxes[:, :, 1] + 0.5 * heights
dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
pred_ctr_x = ctr_x + dx * widths
pred_ctr_y = ctr_y + dy * heights
pred_w = torch.exp(dw) * widths
pred_h = torch.exp(dh) * heights
pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
return pred_boxes
class ClipBoxes(nn.Module):
def __init__(self):
super(ClipBoxes, self).__init__()
def forward(self, boxes, img):
batch_size, num_channels, height, width = img.shape
boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
return boxes

@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
class AngleLinear(nn.Module):
def __init__(self, in_features, out_features):
super(AngleLinear, self).__init__()
self.W = Parameter(torch.FloatTensor(out_features, in_features))
nn.init.xavier_uniform_(self.W)
def forward(self, input):
x = F.normalize(input)
W = F.normalize(self.W)
return F.linear(x, W)
class AdaCos(nn.Module):
def __init__(self, num_classes, m=0.50, is_cuda=True):
super(AdaCos, self).__init__()
self.n_classes = num_classes
self.s = math.sqrt(2) * math.log(num_classes - 1)
self.base_s = self.s
self.m = m
self.criterion = nn.CrossEntropyLoss()
if is_cuda:
self.criterion = self.criterion.cuda()
def forward(self, input, label):
# changed to fixed adacos
# theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7))
# one_hot = torch.zeros_like(input)
# one_hot.scatter_(1, label.view(-1, 1).long(), 1)
# with torch.no_grad():
# B_avg = torch.where(one_hot < 1, torch.exp(self.s * input), torch.zeros_like(input))
# B_avg = torch.sum(B_avg) / input.size(0)
# theta_med = torch.median(theta)
# self.s = torch.log(B_avg) / torch.cos(torch.min(math.pi/4 * torch.ones_like(theta_med), theta_med))
# # TODO why converge to infinity ?
# self.s = torch.clamp(self.s, self.base_s / 2, self.base_s * 2)
# print(self.s)
output = self.s * input
return self.criterion(output, label)
class ArcFace(nn.Module):
def __init__(self, s=30.0, m=0.50, is_cuda=True):
super(ArcFace, self).__init__()
self.s = s
self.m = m
self.criterion = nn.CrossEntropyLoss()
if is_cuda:
self.criterion = self.criterion.cuda()
def forward(self, input, label):
theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7))
target_logits = torch.cos(theta + self.m)
one_hot = torch.zeros_like(input)
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
output = input * (1 - one_hot) + target_logits * one_hot
output *= self.s
return self.criterion(output, label)
class SphereFace(nn.Module):
def __init__(self, s=30.0, m=1.35, is_cuda=True):
super(SphereFace, self).__init__()
self.s = s
self.m = m
self.criterion = nn.CrossEntropyLoss()
if is_cuda:
self.criterion = self.criterion.cuda()
def forward(self, input, label):
theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7))
target_logits = torch.cos(self.m * theta)
one_hot = torch.zeros_like(input)
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
output = input * (1 - one_hot) + target_logits * one_hot
output *= self.s
return self.criterion(output, label)
class CosFace(nn.Module):
def __init__(self, s=30.0, m=0.35, is_cuda=True):
super(CosFace, self).__init__()
self.s = s
self.m = m
self.criterion = nn.CrossEntropyLoss()
if is_cuda:
self.criterion = self.criterion.cuda()
def forward(self, input, label):
target_logits = input - self.m
one_hot = torch.zeros_like(input)
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
output = input * (1 - one_hot) + target_logits * one_hot
output *= self.s
return self.criterion(output, label)

@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
"""
Created on 18-6-7 上午10:11
@author: ronghuaiyang
"""
import torch
import torch.nn as nn
class FocalLoss(nn.Module):
def __init__(self, gamma=0, eps=1e-7, is_cuda=True):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.eps = eps
self.ce = nn.CrossEntropyLoss()
if is_cuda:
self.ce = self.ce.cuda()
def forward(self, inp, target):
logp = self.ce(inp, target)
p = torch.exp(-logp)
loss = (1 - p) ** self.gamma * logp
return loss.mean()

@ -0,0 +1,129 @@
import torchvision.models as models
from torch import nn
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = models.resnet18(num_classes=512, **kwargs)
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = models.resnet34(num_classes=512, **kwargs)
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = models.resnet50(num_classes=512, **kwargs)
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = models.resnet101(num_classes=512, **kwargs)
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = models.resnet152(num_classes=512, **kwargs)
return model
def sphere20():
return sphere20a()
class sphere20a(nn.Module):
def __init__(self):
super(sphere20a, self).__init__()
#input = B*3*112*96
self.conv1_1 = nn.Conv2d(3,64,3,2,1) #=>B*64*56*48
self.relu1_1 = nn.PReLU(64)
self.conv1_2 = nn.Conv2d(64,64,3,1,1)
self.relu1_2 = nn.PReLU(64)
self.conv1_3 = nn.Conv2d(64,64,3,1,1)
self.relu1_3 = nn.PReLU(64)
self.conv2_1 = nn.Conv2d(64,128,3,2,1) #=>B*128*28*24
self.relu2_1 = nn.PReLU(128)
self.conv2_2 = nn.Conv2d(128,128,3,1,1)
self.relu2_2 = nn.PReLU(128)
self.conv2_3 = nn.Conv2d(128,128,3,1,1)
self.relu2_3 = nn.PReLU(128)
self.conv2_4 = nn.Conv2d(128,128,3,1,1) #=>B*128*28*24
self.relu2_4 = nn.PReLU(128)
self.conv2_5 = nn.Conv2d(128,128,3,1,1)
self.relu2_5 = nn.PReLU(128)
self.conv3_1 = nn.Conv2d(128,256,3,2,1) #=>B*256*14*12
self.relu3_1 = nn.PReLU(256)
self.conv3_2 = nn.Conv2d(256,256,3,1,1)
self.relu3_2 = nn.PReLU(256)
self.conv3_3 = nn.Conv2d(256,256,3,1,1)
self.relu3_3 = nn.PReLU(256)
self.conv3_4 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
self.relu3_4 = nn.PReLU(256)
self.conv3_5 = nn.Conv2d(256,256,3,1,1)
self.relu3_5 = nn.PReLU(256)
self.conv3_6 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
self.relu3_6 = nn.PReLU(256)
self.conv3_7 = nn.Conv2d(256,256,3,1,1)
self.relu3_7 = nn.PReLU(256)
self.conv3_8 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
self.relu3_8 = nn.PReLU(256)
self.conv3_9 = nn.Conv2d(256,256,3,1,1)
self.relu3_9 = nn.PReLU(256)
self.conv4_1 = nn.Conv2d(256,512,3,2,1) #=>B*512*7*6
self.relu4_1 = nn.PReLU(512)
self.conv4_2 = nn.Conv2d(512,512,3,1,1)
self.relu4_2 = nn.PReLU(512)
self.conv4_3 = nn.Conv2d(512,512,3,1,1)
self.relu4_3 = nn.PReLU(512)
self.fc5 = nn.Linear(512*14*14,512)
# ORIGINAL for 112x96: self.fc5 = nn.Linear(512*7*6,512)
def forward(self, x):
x = self.relu1_1(self.conv1_1(x))
x = x + self.relu1_3(self.conv1_3(self.relu1_2(self.conv1_2(x))))
x = self.relu2_1(self.conv2_1(x))
x = x + self.relu2_3(self.conv2_3(self.relu2_2(self.conv2_2(x))))
x = x + self.relu2_5(self.conv2_5(self.relu2_4(self.conv2_4(x))))
x = self.relu3_1(self.conv3_1(x))
x = x + self.relu3_3(self.conv3_3(self.relu3_2(self.conv3_2(x))))
x = x + self.relu3_5(self.conv3_5(self.relu3_4(self.conv3_4(x))))
x = x + self.relu3_7(self.conv3_7(self.relu3_6(self.conv3_6(x))))
x = x + self.relu3_9(self.conv3_9(self.relu3_8(self.conv3_8(x))))
x = self.relu4_1(self.conv4_1(x))
x = x + self.relu4_3(self.conv4_3(self.relu4_2(self.conv4_2(x))))
x = x.view(x.size(0),-1)
x = self.fc5(x)
return x

@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
"""
Created on 18-5-30 下午4:55
@author: ronghuaiyang
"""
import os
import argparse
from torch.utils.data import TensorDataset, DataLoader
from recognition.nets import resnet18, resnet34, resnet50, resnet101, resnet152, sphere20
import torch
import numpy as np
from torch.nn import DataParallel
from PIL import Image
from torchvision import transforms as T
imagesize = 224
batch_size = 20
class Dataset(torch.utils.data.Dataset):
def __init__(self, identity_list, root_path):
self.identity_list = identity_list
self.root_path = root_path
normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
self.transforms = T.Compose([
T.Resize(imagesize),
T.ToTensor(),
normalize
])
def __getitem__(self, index):
a, b, label = self.identity_list[index]
a_data = self.load_image(a)
b_data = self.load_image(b)
return a_data, b_data, label
def load_image(self, p):
img_path = os.path.join(self.root_path, p)
data = Image.open(img_path)
if data is None:
return None
data = data.convert(mode="RGB")
data = self.transforms(data)
return data
def __len__(self):
return len(self.identity_list)
def get_pair_list(pair_list):
print('Loading pair list')
with open(pair_list, 'r') as fd:
pairs = fd.readlines()
return [line.split() for line in pairs]
def load_img_data(identity_list, root_path):
dataset = Dataset(identity_list, root_path)
loader = DataLoader(dataset,
batch_size=batch_size,
shuffle=False,
# pin_memory=True,
num_workers=0)
return loader
def lfw_test2(model, identity_list, img_data, is_cuda=True):
print('Converting to features')
sims = []
labels = []
max_size = len(img_data) * batch_size
for i, sample in enumerate(img_data):
if i % 10 == 0:
print('%d of %d' % (i * batch_size, max_size))
a_data, b_data, label = sample
if is_cuda:
a_data = a_data.cuda()
b_data = b_data.cuda()
a_output = model(a_data).detach().cpu().numpy()
b_output = model(b_data).detach().cpu().numpy()
for idx in range(batch_size):
sim = cosin_metric(a_output[idx], b_output[idx])
sims.append(sim)
labels.append(np.bool(label[idx] == '1'))
acc, th = cal_accuracy(sims, labels)
print('lfw face verification accuracy: ', acc, 'threshold: ', th)
return acc
def cosin_metric(x1, x2):
return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
def cal_accuracy(y_score, y_true):
y_score = np.asarray(y_score)
y_true = np.asarray(y_true)
best_acc = 0
best_th = 0
for i in range(len(y_score)):
th = y_score[i]
y_test = (y_score >= th)
acc = np.mean((y_test == y_true).astype(int))
if acc > best_acc:
best_acc = acc
best_th = th
return best_acc, best_th
def main(args=None):
parser = argparse.ArgumentParser(description='Testing script for face identification.')
parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, default=50)
parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel',
default=False, action='store_true')
parser.add_argument('--model', help='Path to model')
parser.add_argument('--batch_size', help='Batch size (default 50)', type=int, default=50)
parser.add_argument('--lfw_root', help='Path to LFW dataset')
parser.add_argument('--lfw_pair_list', help='Path to LFW pair list file')
parser = parser.parse_args(args)
is_cuda = torch.cuda.is_available()
print('CUDA available: {}'.format(is_cuda))
if parser.depth == 18:
model = resnet18()
elif parser.depth == 20:
model = sphere20()
elif parser.depth == 34:
model = resnet34()
elif parser.depth == 50:
model = resnet50()
elif parser.depth == 101:
model = resnet101()
elif parser.depth == 152:
model = resnet152()
else:
raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
if parser.parallel:
model = DataParallel(model)
# load_model(model, opt.test_model_path)
model.load_state_dict(torch.load(parser.model))
if is_cuda:
model.cuda()
identity_list = get_pair_list(parser.lfw_pair_list)
img_data = load_img_data(identity_list, parser.lfw_root)
model.eval()
lfw_test2(model, identity_list, img_data, is_cuda=is_cuda)
if __name__ == '__main__':
main()

@ -0,0 +1,201 @@
import argparse
import os
import time
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from torch.optim.lr_scheduler import StepLR
from torchvision import transforms as T
from recognition.angle import AngleLinear, CosFace, SphereFace, ArcFace, AdaCos
from recognition.focal_loss import FocalLoss
from recognition.nets import resnet18, resnet34, resnet50, resnet101, resnet152, sphere20
from recognition.test import lfw_test2, get_pair_list, load_img_data
class Dataset(torch.utils.data.Dataset):
def __init__(self, root, data_list_file, imagesize):
with open(os.path.join(data_list_file), 'r') as fd:
imgs = fd.readlines()
imgs = [os.path.join(root, img[:-1]) for img in imgs]
self.labels = list(set([img.split()[1] for img in imgs]))
self.imgs = np.random.permutation(imgs)
normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
self.transforms = T.Compose([
T.RandomResizedCrop(imagesize),
T.RandomHorizontalFlip(),
T.ToTensor(),
normalize
])
def __getitem__(self, index):
sample = self.imgs[index]
splits = sample.split()
img_path = splits[0]
data = Image.open(img_path)
data = data.convert(mode="RGB")
data = self.transforms(data)
cls = self.label_to_class(splits[1])
return data.float(), cls
def __len__(self):
return len(self.imgs)
def label_to_class(self, label):
for idx, v in enumerate(self.labels):
if v == label:
return idx
raise Exception("Unknown label %s" % label)
def num_labels(self):
return len(self.labels)
def main(args=None):
parser = argparse.ArgumentParser(description='Training script for face identification.')
parser.add_argument('--print_freq', help='Print every N batch (default 100)', type=int, default=100)
parser.add_argument('--epochs', help='Number of epochs', type=int, default=50)
parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, default=50)
parser.add_argument('--lr_step', help='Learning rate step (default 10)', type=int, default=10)
parser.add_argument('--lr', help='Learning rate (default 0.1)', type=float, default=0.1)
parser.add_argument('--weight_decay', help='Weight decay (default 0.0005)', type=float, default=0.0005)
parser.add_argument('--easy_margin', help='Use easy margin (default false)', dest='easy_margin', default=False, action='store_true')
parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel',
default=False, action='store_true')
parser.add_argument('--loss', help='One of focal_loss. cross_entropy, arcface, cosface, sphereface, adacos (default cross_entropy)',
type=str, default='cross_entropy')
parser.add_argument('--optimizer', help='One of sgd, adam (default sgd)',
type=str, default='sgd')
parser.add_argument('--batch_size', help='Batch size (default 16)', type=int, default=16)
parser.add_argument('--casia_list', help='Path to CASIA dataset file list (training)')
parser.add_argument('--casia_root', help='Path to CASIA images (training)')
parser.add_argument('--lfw_root', help='Path to LFW dataset (testing)')
parser.add_argument('--lfw_pair_list', help='Path to LFW pair list file (testing)')
parser.add_argument('--model_name', help='Name of the model to save')
parser = parser.parse_args(args)
is_cuda = torch.cuda.is_available()
print('CUDA available: {}'.format(is_cuda))
imagesize = 224
if parser.depth == 18:
model = resnet18()
elif parser.depth == 20:
model = sphere20()
elif parser.depth == 34:
model = resnet34()
elif parser.depth == 50:
model = resnet50()
elif parser.depth == 101:
model = resnet101()
elif parser.depth == 152:
model = resnet152()
else:
raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
# TODO split training dataset to train/validation and stop using test dataset for acc
train_dataset = Dataset(parser.casia_root, parser.casia_list, imagesize)
trainloader = torch.utils.data.DataLoader(train_dataset,
batch_size=parser.batch_size,
shuffle=True,
# pin_memory=True,
num_workers=0)
num_classes = train_dataset.num_labels()
if parser.loss == 'focal_loss':
metric_fc = nn.Linear(512, num_classes)
criterion = FocalLoss(gamma=2, is_cuda=is_cuda)
elif parser.loss == 'cross_entropy':
metric_fc = nn.Linear(512, num_classes)
criterion = torch.nn.CrossEntropyLoss()
if is_cuda:
criterion = criterion.cuda()
elif parser.loss == 'cosface':
metric_fc = AngleLinear(512, num_classes)
criterion = CosFace(is_cuda=is_cuda)
elif parser.loss == 'arcface':
metric_fc = AngleLinear(512, num_classes)
criterion = ArcFace(is_cuda=is_cuda)
elif parser.loss == 'sphereface':
metric_fc = AngleLinear(512, num_classes)
criterion = SphereFace(is_cuda=is_cuda)
elif parser.loss == 'adacos':
metric_fc = AngleLinear(512, num_classes)
criterion = AdaCos(num_classes, is_cuda=is_cuda)
else:
raise ValueError('Unknown loss %s' % parser.loss)
if parser.optimizer == 'sgd':
optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}],
lr=parser.lr, weight_decay=parser.weight_decay)
elif parser.optimizer == 'adam':
optimizer = torch.optim.Adam([{'params': model.parameters()}, {'params': metric_fc.parameters()}],
lr=parser.lr, weight_decay=parser.weight_decay)
else:
raise ValueError('Unknown optimizer %s' % parser.optimizer)
scheduler = StepLR(optimizer, step_size=parser.lr_step, gamma=0.1)
if parser.parallel:
model = nn.DataParallel(model)
metric_fc = nn.DataParallel(metric_fc)
if is_cuda:
model.cuda()
metric_fc.cuda()
print(model)
print(metric_fc)
identity_list = get_pair_list(parser.lfw_pair_list)
img_data = load_img_data(identity_list, parser.lfw_root)
print('{} train iters per epoch:'.format(len(trainloader)))
start = time.time()
last_acc = 0.0
for i in range(parser.epochs):
scheduler.step()
model.train()
for ii, data in enumerate(trainloader):
data_input, label = data
if is_cuda:
data_input = data_input.cuda()
label = label.cuda().long()
feature = model(data_input)
output = metric_fc(feature)
loss = criterion(output, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
iters = i * len(trainloader) + ii
if iters % parser.print_freq == 0:
speed = parser.print_freq / (time.time() - start)
time_str = time.asctime(time.localtime(time.time()))
print('{} train epoch {} iter {} {} iters/s loss {}'.format(time_str, i, ii, speed, loss.item()))
start = time.time()
model.eval()
acc = lfw_test2(model, identity_list, img_data, is_cuda=is_cuda)
print('Accuracy: %f' % acc)
if last_acc < acc:
#TODO remove makedir
os.makedirs('./ckpt', exist_ok=True)
torch.save(model.state_dict(), './ckpt/' + parser.model_name + '_{}.pt'.format(i))
torch.save(metric_fc.state_dict(), './ckpt/' + parser.model_name + '_metric_{}.pt'.format(i))
if __name__ == '__main__':
main()

@ -0,0 +1,4 @@
Flask
Pillow
https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl
https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp37-cp37m-linux_x86_64.whl

@ -0,0 +1,82 @@
import logging
import os
import sys
import tempfile
from flask import Flask, request, abort, jsonify
from werkzeug.utils import secure_filename
import torch
from recognition.nets import resnet50
from torchvision import transforms as T
from PIL import Image
import identification.detector as fan
is_cuda = torch.cuda.is_available()
fan_model = fan.load_model('ckpt/wider6_10.pt', is_cuda=is_cuda)
# load recognition model
rec_model = resnet50()
rec_model.load_state_dict(torch.load('ckpt/recongition3_37.pt', map_location=lambda storage, location: storage))
rec_model.eval()
if is_cuda:
rec_model = rec_model.cuda()
# compute vectors
normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
imagesize = 224
transforms = T.Compose([
T.Resize((imagesize, imagesize)),
T.ToTensor(),
normalize
])
app = Flask(__name__)
UPLOAD_FOLDER = tempfile.gettempdir()
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
def compute_vector(data):
with torch.no_grad():
data = transforms(data)
if is_cuda:
data = data.cuda()
mo = rec_model(data.unsqueeze(dim=0))
return mo.detach().cpu().numpy()
@app.route('/vectorize', methods=['GET', 'POST'])
def upload_file():
if request.method == 'GET':
return 'OK'
if request.method == 'POST':
# check if the post request has the file part
if 'file' not in request.files:
abort(500)
f = request.files['file']
if f:
filename = secure_filename(f.filename)
filepath = os.path.join(UPLOAD_FOLDER, filename)
f.save(filepath)
img = Image.open(filepath)
data = img.convert(mode="RGB")
with torch.no_grad():
boxes = fan.fan_detect(fan_model, data, threshold=0.9, is_cuda=is_cuda).astype(int)
boxes = [b for b in boxes if abs(b[1] - b[0]) >= imagesize / 2 and abs(b[2] - b[0]) >= imagesize / 2]
if boxes is None or len(boxes) == 0:
abort(404)
extracted = [{'box': arr.tolist(), 'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist()} for arr in boxes]
return jsonify(extracted)
else:
abort(500)
if __name__ == '__main__':
logging.basicConfig()
app.run()
Loading…
Cancel
Save