| | |
| | import math |
| | from typing import List, Tuple, Union |
| | import torch |
| | from fvcore.nn import giou_loss, smooth_l1_loss |
| | from torch.nn import functional as F |
| |
|
| | from detectron2.layers import cat, ciou_loss, diou_loss |
| | from detectron2.structures import Boxes |
| |
|
| | |
| | |
| | |
| | _DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16) |
| |
|
| |
|
| | __all__ = ["Box2BoxTransform", "Box2BoxTransformRotated", "Box2BoxTransformLinear"] |
| |
|
| |
|
| | @torch.jit.script |
| | class Box2BoxTransform: |
| | """ |
| | The box-to-box transform defined in R-CNN. The transformation is parameterized |
| | by 4 deltas: (dx, dy, dw, dh). The transformation scales the box's width and height |
| | by exp(dw), exp(dh) and shifts a box's center by the offset (dx * width, dy * height). |
| | """ |
| |
|
| | def __init__( |
| | self, weights: Tuple[float, float, float, float], scale_clamp: float = _DEFAULT_SCALE_CLAMP |
| | ): |
| | """ |
| | Args: |
| | weights (4-element tuple): Scaling factors that are applied to the |
| | (dx, dy, dw, dh) deltas. In Fast R-CNN, these were originally set |
| | such that the deltas have unit variance; now they are treated as |
| | hyperparameters of the system. |
| | scale_clamp (float): When predicting deltas, the predicted box scaling |
| | factors (dw and dh) are clamped such that they are <= scale_clamp. |
| | """ |
| | self.weights = weights |
| | self.scale_clamp = scale_clamp |
| |
|
| | def get_deltas(self, src_boxes, target_boxes): |
| | """ |
| | Get box regression transformation deltas (dx, dy, dw, dh) that can be used |
| | to transform the `src_boxes` into the `target_boxes`. That is, the relation |
| | ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless |
| | any delta is too large and is clamped). |
| | |
| | Args: |
| | src_boxes (Tensor): source boxes, e.g., object proposals |
| | target_boxes (Tensor): target of the transformation, e.g., ground-truth |
| | boxes. |
| | """ |
| | assert isinstance(src_boxes, torch.Tensor), type(src_boxes) |
| | assert isinstance(target_boxes, torch.Tensor), type(target_boxes) |
| |
|
| | src_widths = src_boxes[:, 2] - src_boxes[:, 0] |
| | src_heights = src_boxes[:, 3] - src_boxes[:, 1] |
| | src_ctr_x = src_boxes[:, 0] + 0.5 * src_widths |
| | src_ctr_y = src_boxes[:, 1] + 0.5 * src_heights |
| |
|
| | target_widths = target_boxes[:, 2] - target_boxes[:, 0] |
| | target_heights = target_boxes[:, 3] - target_boxes[:, 1] |
| | target_ctr_x = target_boxes[:, 0] + 0.5 * target_widths |
| | target_ctr_y = target_boxes[:, 1] + 0.5 * target_heights |
| |
|
| | wx, wy, ww, wh = self.weights |
| | dx = wx * (target_ctr_x - src_ctr_x) / src_widths |
| | dy = wy * (target_ctr_y - src_ctr_y) / src_heights |
| | dw = ww * torch.log(target_widths / src_widths) |
| | dh = wh * torch.log(target_heights / src_heights) |
| |
|
| | deltas = torch.stack((dx, dy, dw, dh), dim=1) |
| | assert (src_widths > 0).all().item(), "Input boxes to Box2BoxTransform are not valid!" |
| | return deltas |
| |
|
| | def apply_deltas(self, deltas, boxes): |
| | """ |
| | Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`. |
| | |
| | Args: |
| | deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1. |
| | deltas[i] represents k potentially different class-specific |
| | box transformations for the single box boxes[i]. |
| | boxes (Tensor): boxes to transform, of shape (N, 4) |
| | """ |
| | deltas = deltas.float() |
| | boxes = boxes.to(deltas.dtype) |
| |
|
| | widths = boxes[:, 2] - boxes[:, 0] |
| | heights = boxes[:, 3] - boxes[:, 1] |
| | ctr_x = boxes[:, 0] + 0.5 * widths |
| | ctr_y = boxes[:, 1] + 0.5 * heights |
| |
|
| | wx, wy, ww, wh = self.weights |
| | dx = deltas[:, 0::4] / wx |
| | dy = deltas[:, 1::4] / wy |
| | dw = deltas[:, 2::4] / ww |
| | dh = deltas[:, 3::4] / wh |
| |
|
| | |
| | dw = torch.clamp(dw, max=self.scale_clamp) |
| | dh = torch.clamp(dh, max=self.scale_clamp) |
| |
|
| | pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] |
| | pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] |
| | pred_w = torch.exp(dw) * widths[:, None] |
| | pred_h = torch.exp(dh) * heights[:, None] |
| |
|
| | x1 = pred_ctr_x - 0.5 * pred_w |
| | y1 = pred_ctr_y - 0.5 * pred_h |
| | x2 = pred_ctr_x + 0.5 * pred_w |
| | y2 = pred_ctr_y + 0.5 * pred_h |
| | pred_boxes = torch.stack((x1, y1, x2, y2), dim=-1) |
| | return pred_boxes.reshape(deltas.shape) |
| |
|
| |
|
| | |
| | class Box2BoxTransformRotated: |
| | """ |
| | The box-to-box transform defined in Rotated R-CNN. The transformation is parameterized |
| | by 5 deltas: (dx, dy, dw, dh, da). The transformation scales the box's width and height |
| | by exp(dw), exp(dh), shifts a box's center by the offset (dx * width, dy * height), |
| | and rotate a box's angle by da (radians). |
| | Note: angles of deltas are in radians while angles of boxes are in degrees. |
| | """ |
| |
|
| | def __init__( |
| | self, |
| | weights: Tuple[float, float, float, float, float], |
| | scale_clamp: float = _DEFAULT_SCALE_CLAMP, |
| | ): |
| | """ |
| | Args: |
| | weights (5-element tuple): Scaling factors that are applied to the |
| | (dx, dy, dw, dh, da) deltas. These are treated as |
| | hyperparameters of the system. |
| | scale_clamp (float): When predicting deltas, the predicted box scaling |
| | factors (dw and dh) are clamped such that they are <= scale_clamp. |
| | """ |
| | self.weights = weights |
| | self.scale_clamp = scale_clamp |
| |
|
| | def get_deltas(self, src_boxes, target_boxes): |
| | """ |
| | Get box regression transformation deltas (dx, dy, dw, dh, da) that can be used |
| | to transform the `src_boxes` into the `target_boxes`. That is, the relation |
| | ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless |
| | any delta is too large and is clamped). |
| | |
| | Args: |
| | src_boxes (Tensor): Nx5 source boxes, e.g., object proposals |
| | target_boxes (Tensor): Nx5 target of the transformation, e.g., ground-truth |
| | boxes. |
| | """ |
| | assert isinstance(src_boxes, torch.Tensor), type(src_boxes) |
| | assert isinstance(target_boxes, torch.Tensor), type(target_boxes) |
| |
|
| | src_ctr_x, src_ctr_y, src_widths, src_heights, src_angles = torch.unbind(src_boxes, dim=1) |
| |
|
| | target_ctr_x, target_ctr_y, target_widths, target_heights, target_angles = torch.unbind( |
| | target_boxes, dim=1 |
| | ) |
| |
|
| | wx, wy, ww, wh, wa = self.weights |
| | dx = wx * (target_ctr_x - src_ctr_x) / src_widths |
| | dy = wy * (target_ctr_y - src_ctr_y) / src_heights |
| | dw = ww * torch.log(target_widths / src_widths) |
| | dh = wh * torch.log(target_heights / src_heights) |
| | |
| | |
| | da = target_angles - src_angles |
| | da = (da + 180.0) % 360.0 - 180.0 |
| | da *= wa * math.pi / 180.0 |
| |
|
| | deltas = torch.stack((dx, dy, dw, dh, da), dim=1) |
| | assert ( |
| | (src_widths > 0).all().item() |
| | ), "Input boxes to Box2BoxTransformRotated are not valid!" |
| | return deltas |
| |
|
| | def apply_deltas(self, deltas, boxes): |
| | """ |
| | Apply transformation `deltas` (dx, dy, dw, dh, da) to `boxes`. |
| | |
| | Args: |
| | deltas (Tensor): transformation deltas of shape (N, k*5). |
| | deltas[i] represents box transformation for the single box boxes[i]. |
| | boxes (Tensor): boxes to transform, of shape (N, 5) |
| | """ |
| | assert deltas.shape[1] % 5 == 0 and boxes.shape[1] == 5 |
| |
|
| | boxes = boxes.to(deltas.dtype).unsqueeze(2) |
| |
|
| | ctr_x = boxes[:, 0] |
| | ctr_y = boxes[:, 1] |
| | widths = boxes[:, 2] |
| | heights = boxes[:, 3] |
| | angles = boxes[:, 4] |
| |
|
| | wx, wy, ww, wh, wa = self.weights |
| |
|
| | dx = deltas[:, 0::5] / wx |
| | dy = deltas[:, 1::5] / wy |
| | dw = deltas[:, 2::5] / ww |
| | dh = deltas[:, 3::5] / wh |
| | da = deltas[:, 4::5] / wa |
| |
|
| | |
| | dw = torch.clamp(dw, max=self.scale_clamp) |
| | dh = torch.clamp(dh, max=self.scale_clamp) |
| |
|
| | pred_boxes = torch.zeros_like(deltas) |
| | pred_boxes[:, 0::5] = dx * widths + ctr_x |
| | pred_boxes[:, 1::5] = dy * heights + ctr_y |
| | pred_boxes[:, 2::5] = torch.exp(dw) * widths |
| | pred_boxes[:, 3::5] = torch.exp(dh) * heights |
| |
|
| | |
| | |
| | pred_angle = da * 180.0 / math.pi + angles |
| | pred_angle = (pred_angle + 180.0) % 360.0 - 180.0 |
| |
|
| | pred_boxes[:, 4::5] = pred_angle |
| |
|
| | return pred_boxes |
| |
|
| |
|
| | class Box2BoxTransformLinear: |
| | """ |
| | The linear box-to-box transform defined in FCOS. The transformation is parameterized |
| | by the distance from the center of (square) src box to 4 edges of the target box. |
| | """ |
| |
|
| | def __init__(self, normalize_by_size=True): |
| | """ |
| | Args: |
| | normalize_by_size: normalize deltas by the size of src (anchor) boxes. |
| | """ |
| | self.normalize_by_size = normalize_by_size |
| |
|
| | def get_deltas(self, src_boxes, target_boxes): |
| | """ |
| | Get box regression transformation deltas (dx1, dy1, dx2, dy2) that can be used |
| | to transform the `src_boxes` into the `target_boxes`. That is, the relation |
| | ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true. |
| | The center of src must be inside target boxes. |
| | |
| | Args: |
| | src_boxes (Tensor): square source boxes, e.g., anchors |
| | target_boxes (Tensor): target of the transformation, e.g., ground-truth |
| | boxes. |
| | """ |
| | assert isinstance(src_boxes, torch.Tensor), type(src_boxes) |
| | assert isinstance(target_boxes, torch.Tensor), type(target_boxes) |
| |
|
| | src_ctr_x = 0.5 * (src_boxes[:, 0] + src_boxes[:, 2]) |
| | src_ctr_y = 0.5 * (src_boxes[:, 1] + src_boxes[:, 3]) |
| |
|
| | target_l = src_ctr_x - target_boxes[:, 0] |
| | target_t = src_ctr_y - target_boxes[:, 1] |
| | target_r = target_boxes[:, 2] - src_ctr_x |
| | target_b = target_boxes[:, 3] - src_ctr_y |
| |
|
| | deltas = torch.stack((target_l, target_t, target_r, target_b), dim=1) |
| | if self.normalize_by_size: |
| | stride_w = src_boxes[:, 2] - src_boxes[:, 0] |
| | stride_h = src_boxes[:, 3] - src_boxes[:, 1] |
| | strides = torch.stack([stride_w, stride_h, stride_w, stride_h], axis=1) |
| | deltas = deltas / strides |
| |
|
| | return deltas |
| |
|
| | def apply_deltas(self, deltas, boxes): |
| | """ |
| | Apply transformation `deltas` (dx1, dy1, dx2, dy2) to `boxes`. |
| | |
| | Args: |
| | deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1. |
| | deltas[i] represents k potentially different class-specific |
| | box transformations for the single box boxes[i]. |
| | boxes (Tensor): boxes to transform, of shape (N, 4) |
| | """ |
| | |
| | deltas = F.relu(deltas) |
| | boxes = boxes.to(deltas.dtype) |
| |
|
| | ctr_x = 0.5 * (boxes[:, 0] + boxes[:, 2]) |
| | ctr_y = 0.5 * (boxes[:, 1] + boxes[:, 3]) |
| | if self.normalize_by_size: |
| | stride_w = boxes[:, 2] - boxes[:, 0] |
| | stride_h = boxes[:, 3] - boxes[:, 1] |
| | strides = torch.stack([stride_w, stride_h, stride_w, stride_h], axis=1) |
| | deltas = deltas * strides |
| |
|
| | l = deltas[:, 0::4] |
| | t = deltas[:, 1::4] |
| | r = deltas[:, 2::4] |
| | b = deltas[:, 3::4] |
| |
|
| | pred_boxes = torch.zeros_like(deltas) |
| | pred_boxes[:, 0::4] = ctr_x[:, None] - l |
| | pred_boxes[:, 1::4] = ctr_y[:, None] - t |
| | pred_boxes[:, 2::4] = ctr_x[:, None] + r |
| | pred_boxes[:, 3::4] = ctr_y[:, None] + b |
| | return pred_boxes |
| |
|
| |
|
| | def _dense_box_regression_loss( |
| | anchors: List[Union[Boxes, torch.Tensor]], |
| | box2box_transform: Box2BoxTransform, |
| | pred_anchor_deltas: List[torch.Tensor], |
| | gt_boxes: List[torch.Tensor], |
| | fg_mask: torch.Tensor, |
| | box_reg_loss_type="smooth_l1", |
| | smooth_l1_beta=0.0, |
| | ): |
| | """ |
| | Compute loss for dense multi-level box regression. |
| | Loss is accumulated over ``fg_mask``. |
| | |
| | Args: |
| | anchors: #lvl anchor boxes, each is (HixWixA, 4) |
| | pred_anchor_deltas: #lvl predictions, each is (N, HixWixA, 4) |
| | gt_boxes: N ground truth boxes, each has shape (R, 4) (R = sum(Hi * Wi * A)) |
| | fg_mask: the foreground boolean mask of shape (N, R) to compute loss on |
| | box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou", |
| | "diou", "ciou". |
| | smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to |
| | use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1" |
| | """ |
| | if isinstance(anchors[0], Boxes): |
| | anchors = type(anchors[0]).cat(anchors).tensor |
| | else: |
| | anchors = cat(anchors) |
| | if box_reg_loss_type == "smooth_l1": |
| | gt_anchor_deltas = [box2box_transform.get_deltas(anchors, k) for k in gt_boxes] |
| | gt_anchor_deltas = torch.stack(gt_anchor_deltas) |
| | loss_box_reg = smooth_l1_loss( |
| | cat(pred_anchor_deltas, dim=1)[fg_mask], |
| | gt_anchor_deltas[fg_mask], |
| | beta=smooth_l1_beta, |
| | reduction="sum", |
| | ) |
| | elif box_reg_loss_type == "giou": |
| | pred_boxes = [ |
| | box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1) |
| | ] |
| | loss_box_reg = giou_loss( |
| | torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum" |
| | ) |
| | elif box_reg_loss_type == "diou": |
| | pred_boxes = [ |
| | box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1) |
| | ] |
| | loss_box_reg = diou_loss( |
| | torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum" |
| | ) |
| | elif box_reg_loss_type == "ciou": |
| | pred_boxes = [ |
| | box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1) |
| | ] |
| | loss_box_reg = ciou_loss( |
| | torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum" |
| | ) |
| | else: |
| | raise ValueError(f"Invalid dense box regression loss type '{box_reg_loss_type}'") |
| | return loss_box_reg |
| |
|