franchesoni's picture
v0
e1b51e5
print("Importing standard...")
from abc import ABC, abstractmethod
print("Importing external...")
import torch
from torch.nn.functional import binary_cross_entropy
# from matplotlib import pyplot as plt
print("Importing internal...")
from utils import preprocess_masks_features, get_row_col, symlog, calculate_iou
######### BINARY LOSSES ###############
def my_lovasz_hinge(logits, gt, downsample=False):
if downsample:
offset = int(torch.randint(downsample - 1, (1,)))
logits, gt = logits[:, offset::downsample], gt[:, offset::downsample]
# B, HW
gt = 1.0 * gt # go float
areas = gt.sum(dim=1, keepdims=True) # B, 1
# per_image = True, ignore = None
signs = 2 * gt - 1
errors = 1 - logits * signs
errors_sorted, perm = torch.sort(errors, dim=1, descending=True)
gt_sorted = torch.gather(gt, 1, perm) # B, HW
# lovasz grad
intersection = areas - gt_sorted.cumsum(dim=1) # B, HW
union = areas + (1 - gt_sorted).cumsum(dim=1) # B, HW
jaccard = 1 - intersection / union # B, HW
jaccard[:, 1:] = jaccard[:, 1:] - jaccard[:, :-1]
loss = (torch.relu(errors_sorted) * jaccard).sum(dim=1) # B,
return torch.nanmean(loss)
def focal_loss(scores, targets, alpha=0.25, gamma=2):
p = scores
ce_loss = binary_cross_entropy(p, targets, reduction="none")
p_t = p * targets + (1 - p) * (1 - targets)
loss = ce_loss * ((1 - p_t) ** gamma)
if alpha >= 0:
alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
loss = alpha_t * loss
return loss
# also binary_cross_entropy and lovasz
########## SUBFUNCTIONS ######################3
def get_distances(features, refs, sigma, norm_p, square_distances, H, W):
# features: B, 1, F, HW
# refs: B, M, F, 1
# sigma: B, M, 1, 1
B, M = refs.shape[0], refs.shape[1]
distances = torch.norm(
features - refs, dim=2, p=norm_p, keepdim=True
) # B, M, 1, H*W
distances = distances**2 if square_distances else distances
distances = (distances / (2 * sigma**2)).reshape(B, M, H * W)
return distances
def activate(features, masks, activation, use_sigma, offset_pos, ret_prediction):
# sigmoid is very similar to exp
# prepare features
assert activation in ["sigmoid", "symlog"]
if masks is None: # when inferencing
B, M = 1, 1
F, N = sorted(features.shape)
H, W = [int(N ** (0.5))] * 2
features = features.reshape(1, 1, -1, H * W)
else:
masks, features, M, B, H, W, F = preprocess_masks_features(masks, features)
# features: B, 1, F, H*W
# masks: B, M, 1, H*W
if use_sigma:
sigma = torch.nn.functional.softplus(features)[:, :, -1:] # B, 1, 1, H*W
features = features[:, :, :-1]
F = features.shape[2]
else:
sigma = 1
features = symlog(features) if activation == "symlog" else torch.sigmoid(features)
if offset_pos:
assert F >= 2
row, col = get_row_col(H, W, features.device)
row = row.reshape(1, 1, 1, H, 1).expand(B, 1, 1, H, W).reshape(B, 1, 1, H * W)
col = col.reshape(1, 1, 1, 1, W).expand(B, 1, 1, H, W).reshape(B, 1, 1, H * W)
positional_features = torch.cat([row, col], dim=2) # B, 1, 2, H*W
features[:, :, :2] = features[:, :, :2] + positional_features
prediction = features.reshape(B, 1, -1, H, W) if ret_prediction else None
if masks is None:
features = features.reshape(-1, H * W)
sigma = sigma.reshape(-1, H * W) if use_sigma else 1
return features, sigma, H, W
return features, masks, sigma, prediction, B, M, F, H, W
class AbstractLoss(ABC):
@staticmethod
@abstractmethod
def loss(features, masks, ret_prediction=False, **kwargs):
pass
@staticmethod
@abstractmethod
def get_mask_from_query(features, sindex, **kwargs):
pass
class IISLoss(AbstractLoss):
@staticmethod
def loss(features, masks, ret_prediction=False, K=3, logger=None):
features, masks, sigma, prediction, B, M, F, H, W = activate(
features, masks, "symlog", False, False, ret_prediction
)
rindices = torch.randperm(H * W, device=masks.device)
# the following should work if all masks have more than K pixels
sindices = torch.stack(
[
torch.stack([rindices[masks[b, m, 0, rindices]][:K] for m in range(M)])
for b in range(B)
]
) # B, M, K
feats_at_sindices = torch.gather(
features.permute(0, 3, 1, 2).expand(B, H * W, K, F),
dim=1,
index=sindices.reshape(B, M, K, 1).expand(B, M, K, F),
) # B, M, K, F
feats_at_sindices = feats_at_sindices.reshape(B, M, K, F, 1) # B, M, K, F, 1
dists = get_distances(
features, feats_at_sindices.reshape(B, M * K, F, 1), sigma, 2, True, H, W
)
score = torch.exp(-dists) # B, M*K, H*W [0, 1]
targets = (
masks.expand(B, M, K, H * W).reshape(B, M * K, H * W).float()
) # B, M, K, H*W
floss = focal_loss(score, targets).mean()
lloss = my_lovasz_hinge(
score.view(B * M * K, H * W) * 2 - 1,
targets.view(B * M * K, H * W),
)
loss = floss + lloss
return loss, prediction
@staticmethod
def get_mask_from_query(features, sindex):
features, _, H, W = activate(features, None, "symlog", False, False, False)
F = features.shape[0]
query_feat = features[:, sindex]
dists = get_distances(
features.reshape(1, 1, F, H * W),
query_feat.reshape(1, 1, F, 1),
1,
2,
True,
H,
W,
)
score = torch.exp(-dists) # 1, H*W
pred = score > 0.5
return pred
def iis_iou(features, masks, get_mask_from_query, K=20):
masks, features, M, B, H, W, F = preprocess_masks_features(masks, features)
# features: B, 1, F, H*W
# masks: B, M, 1, H*W
rindices = torch.randperm(H * W).to(masks.device)
sindices = torch.stack(
[
torch.stack([rindices[masks[b, m, 0, rindices]][:K] for m in range(M)])
for b in range(B)
]
) # B, M, K
cum_iou, n_samples = 0, 0
for b in range(B):
for m in range(M):
for k in range(K):
sindex = sindices[b, m, k]
pred = get_mask_from_query(features[b, 0], sindex)
iou = calculate_iou(pred, masks[b, m, 0, :])
cum_iou += iou
n_samples += 1
return cum_iou / n_samples
losses_names = [
"iis",
]
#
def get_loss_class(loss_name):
if loss_name == "iis":
return IISLoss
else:
raise NotImplementedError
def get_get_mask_from_query(loss_name):
loss_class = get_loss_class(loss_name)
return loss_class.get_mask_from_query
def get_loss(loss_name):
loss_class = get_loss_class(loss_name)
return loss_class.loss