| |
| |
| |
| |
| |
| |
| |
| |
| import collections |
| import torch |
| import itertools |
| from typing import List |
| import math |
| import torchvision |
| SSDBoxSizes = collections.namedtuple('SSDBoxSizes', ['min', 'max']) |
|
|
| SSDSpec = collections.namedtuple('SSDSpec', ['feature_map_size', 'shrinkage', 'box_sizes', 'aspect_ratios']) |
|
|
|
|
| def generate_ssd_priors(specs: List[SSDSpec], image_size, clamp=True) -> torch.Tensor: |
| """Generate SSD Prior Boxes. |
| |
| It returns the center, height and width of the priors. The values are relative to the image size |
| Args: |
| specs: SSDSpecs about the shapes of sizes of prior boxes. i.e. |
| specs = [ |
| SSDSpec(38, 8, SSDBoxSizes(30, 60), [2]), |
| SSDSpec(19, 16, SSDBoxSizes(60, 111), [2, 3]), |
| SSDSpec(10, 32, SSDBoxSizes(111, 162), [2, 3]), |
| SSDSpec(5, 64, SSDBoxSizes(162, 213), [2, 3]), |
| SSDSpec(3, 100, SSDBoxSizes(213, 264), [2]), |
| SSDSpec(1, 300, SSDBoxSizes(264, 315), [2]) |
| ] |
| image_size: image size. |
| clamp: if true, clamp the values to make fall between [0.0, 1.0] |
| Returns: |
| priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values |
| are relative to the image size. |
| """ |
| priors = [] |
| for spec in specs: |
| scale = image_size / spec.shrinkage |
| for j, i in itertools.product(range(spec.feature_map_size), repeat=2): |
| x_center = (i + 0.5) / scale |
| y_center = (j + 0.5) / scale |
|
|
| |
| size = spec.box_sizes.min |
| h = w = size / image_size |
| priors.append([ |
| x_center, |
| y_center, |
| w, |
| h |
| ]) |
|
|
| |
| size = math.sqrt(spec.box_sizes.max * spec.box_sizes.min) |
| h = w = size / image_size |
| priors.append([ |
| x_center, |
| y_center, |
| w, |
| h |
| ]) |
|
|
| |
| size = spec.box_sizes.min |
| h = w = size / image_size |
| for ratio in spec.aspect_ratios: |
| ratio = math.sqrt(ratio) |
| priors.append([ |
| x_center, |
| y_center, |
| w * ratio, |
| h / ratio |
| ]) |
| priors.append([ |
| x_center, |
| y_center, |
| w / ratio, |
| h * ratio |
| ]) |
|
|
| priors = torch.tensor(priors) |
| if clamp: |
| torch.clamp(priors, 0.0, 1.0, out=priors) |
| return priors |
|
|
|
|
| def convert_locations_to_boxes(locations, priors, center_variance, |
| size_variance): |
| """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w). |
| |
| The conversion: |
| $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$ |
| $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$ |
| We do it in the inverse direction here. |
| Args: |
| locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well. |
| priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes. |
| center_variance: a float used to change the scale of center. |
| size_variance: a float used to change of scale of size. |
| Returns: |
| boxes: priors: [[center_x, center_y, h, w]]. All the values |
| are relative to the image size. |
| """ |
| |
| if priors.dim() + 1 == locations.dim(): |
| priors = priors.unsqueeze(0) |
| return torch.cat([ |
| locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2], |
| torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:] |
| ], dim=locations.dim() - 1) |
|
|
|
|
| def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance): |
| |
| if center_form_priors.dim() + 1 == center_form_boxes.dim(): |
| center_form_priors = center_form_priors.unsqueeze(0) |
| return torch.cat([ |
| (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance, |
| torch.log(torch.clamp(center_form_boxes[..., 2:] / center_form_priors[..., 2:], min=1e-8)) / size_variance |
| ], dim=center_form_boxes.dim() - 1) |
|
|
|
|
| def area_of(left_top, right_bottom) -> torch.Tensor: |
| """Compute the areas of rectangles given two corners. |
| |
| Args: |
| left_top (N, 2): left top corner. |
| right_bottom (N, 2): right bottom corner. |
| |
| Returns: |
| area (N): return the area. |
| """ |
| hw = torch.clamp(right_bottom - left_top, min=0.0) |
| return hw[..., 0] * hw[..., 1] |
|
|
|
|
| def iou_of(boxes0, boxes1, eps=1e-5): |
| """Return intersection-over-union (Jaccard index) of boxes. |
| |
| Args: |
| boxes0 (N, 4): ground truth boxes. |
| boxes1 (N or 1, 4): predicted boxes. |
| eps: a small number to avoid 0 as denominator. |
| Returns: |
| iou (N): IoU values. |
| """ |
| overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2]) |
| overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:]) |
|
|
| overlap_area = area_of(overlap_left_top, overlap_right_bottom) |
| area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) |
| area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) |
| return overlap_area / (area0 + area1 - overlap_area + eps) |
|
|
|
|
| def assign_priors(gt_boxes, gt_labels, corner_form_priors, |
| iou_threshold): |
| """Assign ground truth boxes and targets to priors. |
| |
| Args: |
| gt_boxes (num_targets, 4): ground truth boxes. |
| gt_labels (num_targets): labels of targets. |
| priors (num_priors, 4): corner form priors |
| Returns: |
| boxes (num_priors, 4): real values for priors. |
| labels (num_priros): labels for priors. |
| """ |
| |
| ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1)) |
|
|
| if gt_boxes.size(0) == 0: |
| |
| labels = torch.zeros(corner_form_priors.size(0), dtype=torch.long) |
| boxes = torch.zeros_like(corner_form_priors) |
| return boxes, labels |
|
|
| |
| best_target_per_prior, best_target_per_prior_index = ious.max(1) |
| |
| best_prior_per_target, best_prior_per_target_index = ious.max(0) |
|
|
| for target_index, prior_index in enumerate(best_prior_per_target_index): |
| best_target_per_prior_index[prior_index] = target_index |
| |
| best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2) |
| |
| labels = gt_labels[best_target_per_prior_index] |
| labels[best_target_per_prior < iou_threshold] = 0 |
| boxes = gt_boxes[best_target_per_prior_index] |
| return boxes, labels |
|
|
|
|
| def hard_negative_mining(loss, labels, neg_pos_ratio): |
| """ |
| It used to suppress the presence of a large number of negative prediction. |
| It works on image level not batch level. |
| For any example/image, it keeps all the positive predictions and |
| cut the number of negative predictions to make sure the ratio |
| between the negative examples and positive examples is no more |
| the given ratio for an image. |
| |
| Args: |
| loss (N, num_priors): the loss for each example. |
| labels (N, num_priors): the labels. |
| neg_pos_ratio: the ratio between the negative examples and positive examples. |
| """ |
| pos_mask = labels > 0 |
| num_pos = pos_mask.long().sum(dim=1, keepdim=True) |
| num_neg = num_pos * neg_pos_ratio |
|
|
| loss[pos_mask] = -math.inf |
| _, indexes = loss.sort(dim=1, descending=True) |
| _, orders = indexes.sort(dim=1) |
| neg_mask = orders < num_neg |
| return pos_mask | neg_mask |
|
|
|
|
| def center_form_to_corner_form(locations): |
| return torch.cat([locations[..., :2] - locations[..., 2:]/2, |
| locations[..., :2] + locations[..., 2:]/2], locations.dim() - 1) |
|
|
|
|
| def corner_form_to_center_form(boxes): |
| return torch.cat([ |
| (boxes[..., :2] + boxes[..., 2:]) / 2, |
| boxes[..., 2:] - boxes[..., :2] |
| ], boxes.dim() - 1) |
|
|
|
|
| def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): |
| """ |
| |
| Args: |
| box_scores (N, 5): boxes in corner-form and probabilities. |
| iou_threshold: intersection over union threshold. |
| top_k: keep top_k results. If k <= 0, keep all the results. |
| candidate_size: only consider the candidates with the highest scores. |
| Returns: |
| picked: a list of indexes of the kept boxes |
| """ |
| scores = box_scores[:, -1] |
| boxes = box_scores[:, :-1] |
| picked = [] |
| _, indexes = scores.sort(descending=True) |
| indexes = indexes[:candidate_size] |
| while len(indexes) > 0: |
| current = indexes[0] |
| picked.append(current.item()) |
| if 0 < top_k == len(picked) or len(indexes) == 1: |
| break |
| current_box = boxes[current, :] |
| indexes = indexes[1:] |
| rest_boxes = boxes[indexes, :] |
| iou = iou_of( |
| rest_boxes, |
| current_box.unsqueeze(0), |
| ) |
| indexes = indexes[iou <= iou_threshold] |
|
|
| return box_scores[picked, :] |
|
|
|
|
| def nms(box_scores, nms_method=None, score_threshold=None, iou_threshold=None, |
| sigma=0.5, top_k=-1, candidate_size=200): |
| if nms_method == "soft": |
| return soft_nms(box_scores, score_threshold, sigma, top_k) |
| else: |
| |
| if box_scores.size(0) == 0: |
| return box_scores |
| |
| boxes = box_scores[:, :4] |
| scores = box_scores[:, 4] |
| |
| keep = torchvision.ops.nms(boxes, scores, iou_threshold) |
| |
| if top_k > 0: |
| keep = keep[:top_k] |
| |
| return box_scores[keep] |
|
|
|
|
| def soft_nms(box_scores, score_threshold, sigma=0.5, top_k=-1): |
| """Soft NMS implementation. |
| |
| References: |
| https://arxiv.org/abs/1704.04503 |
| https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/cython_nms.pyx |
| |
| Args: |
| box_scores (N, 5): boxes in corner-form and probabilities. |
| score_threshold: boxes with scores less than value are not considered. |
| sigma: the parameter in score re-computation. |
| scores[i] = scores[i] * exp(-(iou_i)^2 / simga) |
| top_k: keep top_k results. If k <= 0, keep all the results. |
| Returns: |
| picked_box_scores (K, 5): results of NMS. |
| """ |
| picked_box_scores = [] |
| while box_scores.size(0) > 0: |
| max_score_index = torch.argmax(box_scores[:, 4]) |
| cur_box_prob = torch.tensor(box_scores[max_score_index, :]) |
| picked_box_scores.append(cur_box_prob) |
| if len(picked_box_scores) == top_k > 0 or box_scores.size(0) == 1: |
| break |
| cur_box = cur_box_prob[:-1] |
| box_scores[max_score_index, :] = box_scores[-1, :] |
| box_scores = box_scores[:-1, :] |
| ious = iou_of(cur_box.unsqueeze(0), box_scores[:, :-1]) |
| box_scores[:, -1] = box_scores[:, -1] * torch.exp(-(ious * ious) / sigma) |
| box_scores = box_scores[box_scores[:, -1] > score_threshold, :] |
| if len(picked_box_scores) > 0: |
| return torch.stack(picked_box_scores) |
| else: |
| return torch.tensor([]) |
|
|
|
|
|
|
|
|