| import torch |
| from torch import nn |
| import torch.nn.functional as F |
| from torch.autograd import Variable |
| from math import exp |
| from config import Config |
|
|
|
|
| class ContourLoss(torch.nn.Module): |
| def __init__(self): |
| super(ContourLoss, self).__init__() |
|
|
| def forward(self, pred, target, weight=10): |
| """ |
| target, pred: tensor of shape (B, C, H, W), where target[:,:,region_in_contour] == 1, |
| target[:,:,region_out_contour] == 0. |
| weight: scalar, length term weight. |
| """ |
| |
| delta_r = ( |
| pred[:, :, 1:, :] - pred[:, :, :-1, :] |
| ) |
| delta_c = ( |
| pred[:, :, :, 1:] - pred[:, :, :, :-1] |
| ) |
|
|
| delta_r = delta_r[:, :, 1:, :-2] ** 2 |
| delta_c = delta_c[:, :, :-2, 1:] ** 2 |
| delta_pred = torch.abs(delta_r + delta_c) |
|
|
| epsilon = 1e-8 |
| length = torch.mean( |
| torch.sqrt(delta_pred + epsilon) |
| ) |
|
|
| c_in = torch.ones_like(pred) |
| c_out = torch.zeros_like(pred) |
|
|
| region_in = torch.mean( |
| pred * (target - c_in) ** 2 |
| ) |
| region_out = torch.mean((1 - pred) * (target - c_out) ** 2) |
| region = region_in + region_out |
|
|
| loss = weight * length + region |
|
|
| return loss |
|
|
|
|
| class IoULoss(torch.nn.Module): |
| def __init__(self): |
| super(IoULoss, self).__init__() |
|
|
| def forward(self, pred, target): |
| b = pred.shape[0] |
| IoU = 0.0 |
| for i in range(0, b): |
| |
| Iand1 = torch.sum(target[i, :, :, :] * pred[i, :, :, :]) |
| Ior1 = torch.sum(target[i, :, :, :]) + torch.sum(pred[i, :, :, :]) - Iand1 |
| IoU1 = Iand1 / Ior1 |
| |
| IoU = IoU + (1 - IoU1) |
| |
| return IoU |
|
|
|
|
| class StructureLoss(torch.nn.Module): |
| def __init__(self): |
| super(StructureLoss, self).__init__() |
|
|
| def forward(self, pred, target): |
| weit = 1 + 5 * torch.abs( |
| F.avg_pool2d(target, kernel_size=31, stride=1, padding=15) - target |
| ) |
| wbce = F.binary_cross_entropy_with_logits(pred, target, reduction="none") |
| wbce = (weit * wbce).sum(dim=(2, 3)) / weit.sum(dim=(2, 3)) |
|
|
| pred = torch.sigmoid(pred) |
| inter = ((pred * target) * weit).sum(dim=(2, 3)) |
| union = ((pred + target) * weit).sum(dim=(2, 3)) |
| wiou = 1 - (inter + 1) / (union - inter + 1) |
|
|
| return (wbce + wiou).mean() |
|
|
|
|
| class PatchIoULoss(torch.nn.Module): |
| def __init__(self): |
| super(PatchIoULoss, self).__init__() |
| self.iou_loss = IoULoss() |
|
|
| def forward(self, pred, target): |
| win_y, win_x = 64, 64 |
| iou_loss = 0.0 |
| for anchor_y in range(0, target.shape[0], win_y): |
| for anchor_x in range(0, target.shape[1], win_y): |
| patch_pred = pred[ |
| :, :, anchor_y : anchor_y + win_y, anchor_x : anchor_x + win_x |
| ] |
| patch_target = target[ |
| :, :, anchor_y : anchor_y + win_y, anchor_x : anchor_x + win_x |
| ] |
| patch_iou_loss = self.iou_loss(patch_pred, patch_target) |
| iou_loss += patch_iou_loss |
| return iou_loss |
|
|
|
|
| class ThrReg_loss(torch.nn.Module): |
| def __init__(self): |
| super(ThrReg_loss, self).__init__() |
|
|
| def forward(self, pred, gt=None): |
| return torch.mean(1 - ((pred - 0) ** 2 + (pred - 1) ** 2)) |
|
|
|
|
| class ClsLoss(nn.Module): |
| """ |
| Auxiliary classification loss for each refined class output. |
| """ |
|
|
| def __init__(self): |
| super(ClsLoss, self).__init__() |
| self.config = Config() |
| self.lambdas_cls = self.config.lambdas_cls |
|
|
| self.criterions_last = {"ce": nn.CrossEntropyLoss()} |
|
|
| def forward(self, preds, gt): |
| loss = 0.0 |
| for _, pred_lvl in enumerate(preds): |
| if pred_lvl is None: |
| continue |
| for criterion_name, criterion in self.criterions_last.items(): |
| loss += criterion(pred_lvl, gt) * self.lambdas_cls[criterion_name] |
| return loss |
|
|
|
|
| class PixLoss(nn.Module): |
| """ |
| Pixel loss for each refined map output. |
| """ |
|
|
| def __init__(self): |
| super(PixLoss, self).__init__() |
| self.config = Config() |
| self.lambdas_pix_last = self.config.lambdas_pix_last |
|
|
| self.criterions_last = {} |
| if "bce" in self.lambdas_pix_last and self.lambdas_pix_last["bce"]: |
| self.criterions_last["bce"] = nn.BCELoss() |
| if "iou" in self.lambdas_pix_last and self.lambdas_pix_last["iou"]: |
| self.criterions_last["iou"] = IoULoss() |
| if "iou_patch" in self.lambdas_pix_last and self.lambdas_pix_last["iou_patch"]: |
| self.criterions_last["iou_patch"] = PatchIoULoss() |
| if "ssim" in self.lambdas_pix_last and self.lambdas_pix_last["ssim"]: |
| self.criterions_last["ssim"] = SSIMLoss() |
| if "mae" in self.lambdas_pix_last and self.lambdas_pix_last["mae"]: |
| self.criterions_last["mae"] = nn.L1Loss() |
| if "mse" in self.lambdas_pix_last and self.lambdas_pix_last["mse"]: |
| self.criterions_last["mse"] = nn.MSELoss() |
| if "reg" in self.lambdas_pix_last and self.lambdas_pix_last["reg"]: |
| self.criterions_last["reg"] = ThrReg_loss() |
| if "cnt" in self.lambdas_pix_last and self.lambdas_pix_last["cnt"]: |
| self.criterions_last["cnt"] = ContourLoss() |
| if "structure" in self.lambdas_pix_last and self.lambdas_pix_last["structure"]: |
| self.criterions_last["structure"] = StructureLoss() |
|
|
| def forward(self, scaled_preds, gt): |
| loss = 0.0 |
| for _, pred_lvl in enumerate(scaled_preds): |
| if pred_lvl.shape != gt.shape: |
| pred_lvl = nn.functional.interpolate( |
| pred_lvl, size=gt.shape[2:], mode="bilinear", align_corners=True |
| ) |
| for criterion_name, criterion in self.criterions_last.items(): |
| _loss = ( |
| criterion(pred_lvl.sigmoid(), gt) |
| * self.lambdas_pix_last[criterion_name] |
| ) |
| loss += _loss |
| |
| return loss |
|
|
|
|
| class SSIMLoss(torch.nn.Module): |
| def __init__(self, window_size=11, size_average=True): |
| super(SSIMLoss, self).__init__() |
| self.window_size = window_size |
| self.size_average = size_average |
| self.channel = 1 |
| self.window = create_window(window_size, self.channel) |
|
|
| def forward(self, img1, img2): |
| (_, channel, _, _) = img1.size() |
| if channel == self.channel and self.window.data.type() == img1.data.type(): |
| window = self.window |
| else: |
| window = create_window(self.window_size, channel) |
| if img1.is_cuda: |
| window = window.cuda(img1.get_device()) |
| window = window.type_as(img1) |
| self.window = window |
| self.channel = channel |
| return ( |
| 1 |
| - ( |
| 1 |
| + _ssim( |
| img1, img2, window, self.window_size, channel, self.size_average |
| ) |
| ) |
| / 2 |
| ) |
|
|
|
|
| def gaussian(window_size, sigma): |
| gauss = torch.Tensor( |
| [ |
| exp(-((x - window_size // 2) ** 2) / float(2 * sigma**2)) |
| for x in range(window_size) |
| ] |
| ) |
| return gauss / gauss.sum() |
|
|
|
|
| def create_window(window_size, channel): |
| _1D_window = gaussian(window_size, 1.5).unsqueeze(1) |
| _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) |
| window = Variable( |
| _2D_window.expand(channel, 1, window_size, window_size).contiguous() |
| ) |
| return window |
|
|
|
|
| def _ssim(img1, img2, window, window_size, channel, size_average=True): |
| mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel) |
| mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel) |
|
|
| mu1_sq = mu1.pow(2) |
| mu2_sq = mu2.pow(2) |
| mu1_mu2 = mu1 * mu2 |
|
|
| sigma1_sq = ( |
| F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq |
| ) |
| sigma2_sq = ( |
| F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq |
| ) |
| sigma12 = ( |
| F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) |
| - mu1_mu2 |
| ) |
|
|
| C1 = 0.01**2 |
| C2 = 0.03**2 |
|
|
| ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ( |
| (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2) |
| ) |
|
|
| if size_average: |
| return ssim_map.mean() |
| else: |
| return ssim_map.mean(1).mean(1).mean(1) |
|
|
|
|
| def SSIM(x, y): |
| C1 = 0.01**2 |
| C2 = 0.03**2 |
|
|
| mu_x = nn.AvgPool2d(3, 1, 1)(x) |
| mu_y = nn.AvgPool2d(3, 1, 1)(y) |
| mu_x_mu_y = mu_x * mu_y |
| mu_x_sq = mu_x.pow(2) |
| mu_y_sq = mu_y.pow(2) |
|
|
| sigma_x = nn.AvgPool2d(3, 1, 1)(x * x) - mu_x_sq |
| sigma_y = nn.AvgPool2d(3, 1, 1)(y * y) - mu_y_sq |
| sigma_xy = nn.AvgPool2d(3, 1, 1)(x * y) - mu_x_mu_y |
|
|
| SSIM_n = (2 * mu_x_mu_y + C1) * (2 * sigma_xy + C2) |
| SSIM_d = (mu_x_sq + mu_y_sq + C1) * (sigma_x + sigma_y + C2) |
| SSIM = SSIM_n / SSIM_d |
|
|
| return torch.clamp((1 - SSIM) / 2, 0, 1) |
|
|
|
|
| def saliency_structure_consistency(x, y): |
| ssim = torch.mean(SSIM(x, y)) |
| return ssim |
|
|