1
0
Fork 0
Face identification and recognition scalable server with multiple face directories. https://github.com/ehp/faceserver
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

404 lines
14 KiB

# -*- coding: utf-8 -*-
"""
Copyright 2019 Petr Masopust, Aprar s.r.o.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Adopted code from https://github.com/rainofmine/Face_Attention_Network
"""
import torch.nn as nn
import torch
import math
from identification.utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes
from identification.anchors import Anchors
from identification.losses import LevelAttentionLoss, FocalLoss
from torchvision.ops.boxes import nms as tv_nms
def nms(dets, thresh):
"""Dispatch to either CPU or GPU NMS implementations. Accept dets as tensor"""
return tv_nms(dets[:, :4], dets[:, 4], thresh)
class PyramidFeatures(nn.Module):
def __init__(self, c3_size, c4_size, c5_size, feature_size=256):
super(PyramidFeatures, self).__init__()
# upsample C5 to get P5 from the FPN paper
self.p5_1 = nn.Conv2d(c5_size, feature_size, kernel_size=1, stride=1, padding=0)
self.p5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
self.p5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
# add P5 elementwise to C4
self.p4_1 = nn.Conv2d(c4_size, feature_size, kernel_size=1, stride=1, padding=0)
self.p4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
self.p4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
# add P4 elementwise to C3
self.p3_1 = nn.Conv2d(c3_size, feature_size, kernel_size=1, stride=1, padding=0)
self.p3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
# "P6 is obtained via a 3x3 stride-2 conv on C5"
self.p6 = nn.Conv2d(c5_size, feature_size, kernel_size=3, stride=2, padding=1)
# "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
self.p7_1 = nn.ReLU()
self.p7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
def forward(self, inputs):
c3, c4, c5 = inputs
# TODO hack for old model
self.p5_1.padding_mode = 'zeros'
self.p5_2.padding_mode = 'zeros'
self.p4_1.padding_mode = 'zeros'
self.p4_2.padding_mode = 'zeros'
self.p3_1.padding_mode = 'zeros'
self.p3_2.padding_mode = 'zeros'
self.p6.padding_mode = 'zeros'
self.p7_2.padding_mode = 'zeros'
p5_x = self.p5_1(c5)
p5_upsampled_x = self.p5_upsampled(p5_x)
p5_x = self.p5_2(p5_x)
p4_x = self.p4_1(c4)
p4_x = p5_upsampled_x + p4_x
p4_upsampled_x = self.p4_upsampled(p4_x)
p4_x = self.p4_2(p4_x)
p3_x = self.p3_1(c3)
p3_x = p3_x + p4_upsampled_x
p3_x = self.p3_2(p3_x)
p6_x = self.p6(c5)
p7_x = self.p7_1(p6_x)
p7_x = self.p7_2(p7_x)
return [p3_x, p4_x, p5_x, p6_x, p7_x]
class RegressionModel(nn.Module):
def __init__(self, num_features_in, num_anchors=9, feature_size=256):
super(RegressionModel, self).__init__()
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
self.act1 = nn.ReLU()
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act2 = nn.ReLU()
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act3 = nn.ReLU()
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act4 = nn.ReLU()
self.output = nn.Conv2d(feature_size, num_anchors * 4, kernel_size=3, padding=1)
def forward(self, x):
# TODO hack for old model
self.conv1.padding_mode = 'zeros'
self.conv2.padding_mode = 'zeros'
self.conv3.padding_mode = 'zeros'
self.conv4.padding_mode = 'zeros'
self.output.padding_mode = 'zeros'
out = self.conv1(x)
out = self.act1(out)
out = self.conv2(out)
out = self.act2(out)
out = self.conv3(out)
out = self.act3(out)
out = self.conv4(out)
out = self.act4(out)
out = self.output(out)
# out is B x C x W x H, with C = 4*num_anchors
out = out.permute(0, 2, 3, 1)
return out.contiguous().view(out.shape[0], -1, 4)
class ClassificationModel(nn.Module):
def __init__(self, num_features_in, num_anchors=9, num_classes=80, feature_size=256):
super(ClassificationModel, self).__init__()
self.num_classes = num_classes
self.num_anchors = num_anchors
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
self.act1 = nn.ReLU()
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act2 = nn.ReLU()
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act3 = nn.ReLU()
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act4 = nn.ReLU()
self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1)
self.output_act = nn.Sigmoid()
def forward(self, x):
# TODO hack for old model
self.conv1.padding_mode = 'zeros'
self.conv2.padding_mode = 'zeros'
self.conv3.padding_mode = 'zeros'
self.conv4.padding_mode = 'zeros'
self.output.padding_mode = 'zeros'
out = self.conv1(x)
out = self.act1(out)
out = self.conv2(out)
out = self.act2(out)
out = self.conv3(out)
out = self.act3(out)
out = self.conv4(out)
out = self.act4(out)
out = self.output(out)
out = self.output_act(out)
# out is B x C x W x H, with C = n_classes + n_anchors
out1 = out.permute(0, 2, 3, 1)
batch_size, width, height, channels = out1.shape
out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
return out2.contiguous().view(x.shape[0], -1, self.num_classes)
class LevelAttentionModel(nn.Module):
def __init__(self, num_features_in, feature_size=256):
super(LevelAttentionModel, self).__init__()
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
self.act1 = nn.ReLU()
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act2 = nn.ReLU()
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act3 = nn.ReLU()
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act4 = nn.ReLU()
self.conv5 = nn.Conv2d(feature_size, 1, kernel_size=3, padding=1)
self.output_act = nn.Sigmoid()
def forward(self, x):
# TODO hack for old model
self.conv1.padding_mode = 'zeros'
self.conv2.padding_mode = 'zeros'
self.conv3.padding_mode = 'zeros'
self.conv4.padding_mode = 'zeros'
self.conv5.padding_mode = 'zeros'
out = self.conv1(x)
out = self.act1(out)
out = self.conv2(out)
out = self.act2(out)
out = self.conv3(out)
out = self.act3(out)
out = self.conv4(out)
out = self.act4(out)
out = self.conv5(out)
out_attention = self.output_act(out)
return out_attention
class ResNet(nn.Module):
def __init__(self, num_classes, block, layers, is_cuda=True):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
if block == BasicBlock:
fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
self.layer4[layers[3] - 1].conv2.out_channels]
elif block == Bottleneck:
fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
self.layer4[layers[3] - 1].conv3.out_channels]
else:
raise Exception("Invalid block type")
self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
self.regressionModel = RegressionModel(256)
self.classificationModel = ClassificationModel(256, num_classes=num_classes)
self.levelattentionModel = LevelAttentionModel(256)
self.anchors = Anchors(is_cuda=is_cuda)
self.regressBoxes = BBoxTransform(is_cuda=is_cuda)
self.clipBoxes = ClipBoxes()
self.levelattentionLoss = LevelAttentionLoss(is_cuda=is_cuda)
self.focalLoss = FocalLoss(is_cuda=is_cuda)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
# init.xavier_normal(m.weight)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
prior = 0.01
self.classificationModel.output.weight.data.fill_(0)
self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))
self.regressionModel.output.weight.data.fill_(0)
self.regressionModel.output.bias.data.fill_(0)
self.levelattentionModel.conv5.weight.data.fill_(0)
self.levelattentionModel.conv5.bias.data.fill_(0)
self.freeze_bn()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = [block(self.inplanes, planes, stride, downsample)]
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def freeze_bn(self):
"""Freeze BatchNorm layers."""
for layer in self.modules():
if isinstance(layer, nn.BatchNorm2d):
layer.eval()
def forward(self, inputs):
if self.training:
img_batch, annotations = inputs
else:
img_batch = inputs
annotations = None
# TODO hack for old model
self.conv1.padding_mode = 'zeros'
x = self.conv1(img_batch)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x1 = self.layer1(x)
x2 = self.layer2(x1)
x3 = self.layer3(x2)
x4 = self.layer4(x3)
features = self.fpn([x2, x3, x4])
attention = [self.levelattentionModel(feature) for feature in features]
# i = 1
# for level in attention:
# i += 1
# level = level.squeeze(0)
# level = np.array(255 * unnormalize(level)).copy()
# level = np.transpose(level, (1, 2, 0))
# plt.imsave(os.path.join('./output', str(i) + '.jpg'), level[:,:,0])
features = [features[i] * torch.exp(attention[i]) for i in range(len(features))]
regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
anchors = self.anchors(img_batch)
if self.training:
clc_loss, reg_loss = self.focalLoss(classification, regression, anchors, annotations)
mask_loss = self.levelattentionLoss(img_batch.shape, attention, annotations)
return clc_loss, reg_loss, mask_loss
else:
# transformed_anchors = self.regressBoxes(anchors, regression)
transformed_anchors = self.clipBoxes(anchors, img_batch)
scores = torch.max(classification, dim=2, keepdim=True)[0]
scores_over_thresh = (scores > 0.05)[0, :, 0]
if scores_over_thresh.sum() == 0:
# no boxes to NMS, just return
# return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
return [None, None, None]
classification = classification[:, scores_over_thresh, :]
transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
scores = scores[:, scores_over_thresh, :]
anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.3)
nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
def resnet18(num_classes, is_cuda=True):
return ResNet(num_classes, BasicBlock, [2, 2, 2, 2], is_cuda=is_cuda)
def resnet34(num_classes, is_cuda=True):
return ResNet(num_classes, BasicBlock, [3, 4, 6, 3], is_cuda=is_cuda)
def resnet50(num_classes, is_cuda=True):
return ResNet(num_classes, Bottleneck, [3, 4, 6, 3], is_cuda=is_cuda)
def resnet101(num_classes, is_cuda=True):
return ResNet(num_classes, Bottleneck, [3, 4, 23, 3], is_cuda=is_cuda)
def resnet152(num_classes, is_cuda=True):
return ResNet(num_classes, Bottleneck, [3, 8, 36, 3], is_cuda=is_cuda)