faceserver/vectorizer/identification/losses.py

# -*- coding: utf-8 -*-
"""
   Copyright 2019 Petr Masopust, Aprar s.r.o.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

   Adopted code from https://github.com/rainofmine/Face_Attention_Network
"""

import math
import torch
import torch.nn as nn
import torch.nn.functional as F


def calc_iou(a, b, is_cuda=False):
    step = 20
    IoU = torch.zeros((len(a), len(b)))
    if is_cuda:
        IoU = IoU.cuda()
    step_count = int(len(b) / step)
    if len(b) % step != 0:
        step_count += 1

    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])

    for i in range(step_count):
        iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[i * step:(i + 1) * step, 2])
        iw.sub_(torch.max(torch.unsqueeze(a[:, 0], 1), b[i * step:(i + 1) * step, 0]))

        ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[i * step:(i + 1) * step, 3])
        ih.sub_(torch.max(torch.unsqueeze(a[:, 1], 1), b[i * step:(i + 1) * step, 1]))

        iw.clamp_(min=0)
        ih.clamp_(min=0)

        iw.mul_(ih)
        del ih

        ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area[i * step:(i + 1) * step] - iw
        ua = torch.clamp(ua, min=1e-8)
        iw.div_(ua)
        del ua

        IoU[:, i * step:(i + 1) * step] = iw

    return IoU


def calc_iou_vis(a, b):
    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])

    iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
    ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])

    iw = torch.clamp(iw, min=0)
    ih = torch.clamp(ih, min=0)

    intersection = iw * ih

    IoU = intersection / area

    return IoU


def IoG(box_a, box_b):
    inter_xmin = torch.max(box_a[:, 0], box_b[:, 0])
    inter_ymin = torch.max(box_a[:, 1], box_b[:, 1])
    inter_xmax = torch.min(box_a[:, 2], box_b[:, 2])
    inter_ymax = torch.min(box_a[:, 3], box_b[:, 3])
    Iw = torch.clamp(inter_xmax - inter_xmin, min=0)
    Ih = torch.clamp(inter_ymax - inter_ymin, min=0)
    I = Iw * Ih
    G = (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])
    return I / G


class FocalLoss(nn.Module):
    def __init__(self, is_cuda=True):
        super(FocalLoss, self).__init__()
        self.is_cuda = is_cuda

    def forward(self, classifications, regressions, anchors, annotations):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[0, :, :]

        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            if bbox_annotation.shape[0] == 0:
                if self.is_cuda:
                    regression_losses.append(torch.tensor(0).float().cuda())
                    classification_losses.append(torch.tensor(0).float().cuda())
                else:
                    regression_losses.append(torch.tensor(0).float())
                    classification_losses.append(torch.tensor(0).float())

                continue

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            IoU = calc_iou(anchor, bbox_annotation[:, :4], self.is_cuda)  # num_anchors x num_annotations

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1

            # compute the loss for classification
            targets = torch.ones(classification.shape) * -1
            if self.is_cuda:
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_ful = torch.ge(IoU_max, 0.5)
            positive_indices = positive_ful

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
            try:
                alpha_factor = torch.ones(targets.shape)
                if self.is_cuda:
                    alpha_factor = alpha_factor.cuda()
                alpha_factor *= alpha
            except:
                print(targets)
                print(targets.shape)

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))

            # cls_loss = focal_weight * torch.pow(bce, gamma)
            cls_loss = focal_weight * bce

            cls_zeros = torch.zeros(cls_loss.shape)
            if self.is_cuda:
                cls_zeros = cls_zeros.cuda()
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, cls_zeros)

            classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))

            # compute the loss for regression

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
                gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # clip widths to 1
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
                targets = targets.t()

                if self.is_cuda:
                    targets = targets.cuda() / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
                else:
                    targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]])

                regression_diff = torch.abs(targets - regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0
                )
                regression_losses.append(regression_loss.mean())
            else:
                if self.is_cuda:
                    regression_losses.append(torch.tensor(0).float().cuda())
                else:
                    regression_losses.append(torch.tensor(0).float())

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses) \
            .mean(dim=0, keepdim=True)


class LevelAttentionLoss(nn.Module):
    def __init__(self, is_cuda=True):
        super(LevelAttentionLoss, self).__init__()
        self.is_cuda = is_cuda

    def forward(self, img_batch_shape, attention_mask, bboxs):
        h, w = img_batch_shape[2], img_batch_shape[3]

        mask_losses = []

        batch_size = bboxs.shape[0]
        for j in range(batch_size):

            bbox_annotation = bboxs[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            if bbox_annotation.shape[0] == 0:
                if self.is_cuda:
                    mask_losses.append(torch.tensor(0).float().cuda())
                else:
                    mask_losses.append(torch.tensor(0).float())
                continue

            cond1 = torch.le(bbox_annotation[:, 0], w)
            cond2 = torch.le(bbox_annotation[:, 1], h)
            cond3 = torch.le(bbox_annotation[:, 2], w)
            cond4 = torch.le(bbox_annotation[:, 3], h)
            cond = cond1 * cond2 * cond3 * cond4

            bbox_annotation = bbox_annotation[cond, :]

            if bbox_annotation.shape[0] == 0:
                if self.is_cuda:
                    mask_losses.append(torch.tensor(0).float().cuda())
                else:
                    mask_losses.append(torch.tensor(0).float())
                continue

            bbox_area = (bbox_annotation[:, 2] - bbox_annotation[:, 0]) * (
                    bbox_annotation[:, 3] - bbox_annotation[:, 1])

            mask_loss = []
            for id in range(len(attention_mask)):

                attention_map = attention_mask[id][j, 0, :, :]

                min_area = (2 ** (id + 5)) ** 2 * 0.5
                max_area = (2 ** (id + 5) * 1.58) ** 2 * 2

                level_bbox_indice1 = torch.ge(bbox_area, min_area)
                level_bbox_indice2 = torch.le(bbox_area, max_area)

                level_bbox_indice = level_bbox_indice1 * level_bbox_indice2

                level_bbox_annotation = bbox_annotation[level_bbox_indice, :].clone()

                # level_bbox_annotation = bbox_annotation.clone()

                attention_h, attention_w = attention_map.shape

                if level_bbox_annotation.shape[0]:
                    level_bbox_annotation[:, 0] *= attention_w / w
                    level_bbox_annotation[:, 1] *= attention_h / h
                    level_bbox_annotation[:, 2] *= attention_w / w
                    level_bbox_annotation[:, 3] *= attention_h / h

                mask_gt = torch.zeros(attention_map.shape)
                if self.is_cuda:
                    mask_gt = mask_gt.cuda()

                for i in range(level_bbox_annotation.shape[0]):
                    x1 = max(int(level_bbox_annotation[i, 0]), 0)
                    y1 = max(int(level_bbox_annotation[i, 1]), 0)
                    x2 = min(math.ceil(level_bbox_annotation[i, 2]) + 1, attention_w)
                    y2 = min(math.ceil(level_bbox_annotation[i, 3]) + 1, attention_h)

                    mask_gt[y1:y2, x1:x2] = 1

                mask_gt = mask_gt[mask_gt >= 0]
                mask_predict = attention_map[attention_map >= 0]

                mask_loss.append(F.binary_cross_entropy(mask_predict, mask_gt))
            mask_losses.append(torch.stack(mask_loss).mean())

        return torch.stack(mask_losses).mean(dim=0, keepdim=True)