|
|
|
import collections
|
|
import math
|
|
from typing import List
|
|
import torch
|
|
from torch import nn
|
|
|
|
from detectron2.config import configurable
|
|
from detectron2.layers import ShapeSpec, move_device_like
|
|
from detectron2.structures import Boxes, RotatedBoxes
|
|
from detectron2.utils.registry import Registry
|
|
|
|
ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR")
|
|
ANCHOR_GENERATOR_REGISTRY.__doc__ = """
|
|
Registry for modules that creates object detection anchors for feature maps.
|
|
|
|
The registered object will be called with `obj(cfg, input_shape)`.
|
|
"""
|
|
|
|
|
|
class BufferList(nn.Module):
|
|
"""
|
|
Similar to nn.ParameterList, but for buffers
|
|
"""
|
|
|
|
def __init__(self, buffers):
|
|
super().__init__()
|
|
for i, buffer in enumerate(buffers):
|
|
|
|
self.register_buffer(str(i), buffer, persistent=False)
|
|
|
|
def __len__(self):
|
|
return len(self._buffers)
|
|
|
|
def __iter__(self):
|
|
return iter(self._buffers.values())
|
|
|
|
|
|
def _create_grid_offsets(
|
|
size: List[int], stride: int, offset: float, target_device_tensor: torch.Tensor
|
|
):
|
|
grid_height, grid_width = size
|
|
shifts_x = move_device_like(
|
|
torch.arange(offset * stride, grid_width * stride, step=stride, dtype=torch.float32),
|
|
target_device_tensor,
|
|
)
|
|
shifts_y = move_device_like(
|
|
torch.arange(offset * stride, grid_height * stride, step=stride, dtype=torch.float32),
|
|
target_device_tensor,
|
|
)
|
|
|
|
shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
|
|
shift_x = shift_x.reshape(-1)
|
|
shift_y = shift_y.reshape(-1)
|
|
return shift_x, shift_y
|
|
|
|
|
|
def _broadcast_params(params, num_features, name):
|
|
"""
|
|
If one size (or aspect ratio) is specified and there are multiple feature
|
|
maps, we "broadcast" anchors of that single size (or aspect ratio)
|
|
over all feature maps.
|
|
|
|
If params is list[float], or list[list[float]] with len(params) == 1, repeat
|
|
it num_features time.
|
|
|
|
Returns:
|
|
list[list[float]]: param for each feature
|
|
"""
|
|
assert isinstance(
|
|
params, collections.abc.Sequence
|
|
), f"{name} in anchor generator has to be a list! Got {params}."
|
|
assert len(params), f"{name} in anchor generator cannot be empty!"
|
|
if not isinstance(params[0], collections.abc.Sequence):
|
|
return [params] * num_features
|
|
if len(params) == 1:
|
|
return list(params) * num_features
|
|
assert len(params) == num_features, (
|
|
f"Got {name} of length {len(params)} in anchor generator, "
|
|
f"but the number of input features is {num_features}!"
|
|
)
|
|
return params
|
|
|
|
|
|
@ANCHOR_GENERATOR_REGISTRY.register()
|
|
class DefaultAnchorGenerator(nn.Module):
|
|
"""
|
|
Compute anchors in the standard ways described in
|
|
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks".
|
|
"""
|
|
|
|
box_dim: torch.jit.Final[int] = 4
|
|
"""
|
|
the dimension of each anchor box.
|
|
"""
|
|
|
|
@configurable
|
|
def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5):
|
|
"""
|
|
This interface is experimental.
|
|
|
|
Args:
|
|
sizes (list[list[float]] or list[float]):
|
|
If ``sizes`` is list[list[float]], ``sizes[i]`` is the list of anchor sizes
|
|
(i.e. sqrt of anchor area) to use for the i-th feature map.
|
|
If ``sizes`` is list[float], ``sizes`` is used for all feature maps.
|
|
Anchor sizes are given in absolute lengths in units of
|
|
the input image; they do not dynamically scale if the input image size changes.
|
|
aspect_ratios (list[list[float]] or list[float]): list of aspect ratios
|
|
(i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies.
|
|
strides (list[int]): stride of each input feature.
|
|
offset (float): Relative offset between the center of the first anchor and the top-left
|
|
corner of the image. Value has to be in [0, 1).
|
|
Recommend to use 0.5, which means half stride.
|
|
"""
|
|
super().__init__()
|
|
|
|
self.strides = strides
|
|
self.num_features = len(self.strides)
|
|
sizes = _broadcast_params(sizes, self.num_features, "sizes")
|
|
aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios")
|
|
self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios)
|
|
|
|
self.offset = offset
|
|
assert 0.0 <= self.offset < 1.0, self.offset
|
|
|
|
@classmethod
|
|
def from_config(cls, cfg, input_shape: List[ShapeSpec]):
|
|
return {
|
|
"sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES,
|
|
"aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS,
|
|
"strides": [x.stride for x in input_shape],
|
|
"offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET,
|
|
}
|
|
|
|
def _calculate_anchors(self, sizes, aspect_ratios):
|
|
cell_anchors = [
|
|
self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios)
|
|
]
|
|
return BufferList(cell_anchors)
|
|
|
|
@property
|
|
@torch.jit.unused
|
|
def num_cell_anchors(self):
|
|
"""
|
|
Alias of `num_anchors`.
|
|
"""
|
|
return self.num_anchors
|
|
|
|
@property
|
|
@torch.jit.unused
|
|
def num_anchors(self):
|
|
"""
|
|
Returns:
|
|
list[int]: Each int is the number of anchors at every pixel
|
|
location, on that feature map.
|
|
For example, if at every pixel we use anchors of 3 aspect
|
|
ratios and 5 sizes, the number of anchors is 15.
|
|
(See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config)
|
|
|
|
In standard RPN models, `num_anchors` on every feature map is the same.
|
|
"""
|
|
return [len(cell_anchors) for cell_anchors in self.cell_anchors]
|
|
|
|
def _grid_anchors(self, grid_sizes: List[List[int]]):
|
|
"""
|
|
Returns:
|
|
list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4
|
|
"""
|
|
anchors = []
|
|
|
|
buffers: List[torch.Tensor] = [x[1] for x in self.cell_anchors.named_buffers()]
|
|
for size, stride, base_anchors in zip(grid_sizes, self.strides, buffers):
|
|
shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors)
|
|
shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1)
|
|
|
|
anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4))
|
|
|
|
return anchors
|
|
|
|
def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)):
|
|
"""
|
|
Generate a tensor storing canonical anchor boxes, which are all anchor
|
|
boxes of different sizes and aspect_ratios centered at (0, 0).
|
|
We can later build the set of anchors for a full feature map by
|
|
shifting and tiling these tensors (see `meth:_grid_anchors`).
|
|
|
|
Args:
|
|
sizes (tuple[float]):
|
|
aspect_ratios (tuple[float]]):
|
|
|
|
Returns:
|
|
Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes
|
|
in XYXY format.
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
anchors = []
|
|
for size in sizes:
|
|
area = size**2.0
|
|
for aspect_ratio in aspect_ratios:
|
|
|
|
|
|
|
|
|
|
|
|
w = math.sqrt(area / aspect_ratio)
|
|
h = aspect_ratio * w
|
|
x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0
|
|
anchors.append([x0, y0, x1, y1])
|
|
return torch.tensor(anchors)
|
|
|
|
def forward(self, features: List[torch.Tensor]):
|
|
"""
|
|
Args:
|
|
features (list[Tensor]): list of backbone feature maps on which to generate anchors.
|
|
|
|
Returns:
|
|
list[Boxes]: a list of Boxes containing all the anchors for each feature map
|
|
(i.e. the cell anchors repeated over all locations in the feature map).
|
|
The number of anchors of each feature map is Hi x Wi x num_cell_anchors,
|
|
where Hi, Wi are resolution of the feature map divided by anchor stride.
|
|
"""
|
|
grid_sizes = [feature_map.shape[-2:] for feature_map in features]
|
|
anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
|
|
return [Boxes(x) for x in anchors_over_all_feature_maps]
|
|
|
|
|
|
@ANCHOR_GENERATOR_REGISTRY.register()
|
|
class RotatedAnchorGenerator(nn.Module):
|
|
"""
|
|
Compute rotated anchors used by Rotated RPN (RRPN), described in
|
|
"Arbitrary-Oriented Scene Text Detection via Rotation Proposals".
|
|
"""
|
|
|
|
box_dim: int = 5
|
|
"""
|
|
the dimension of each anchor box.
|
|
"""
|
|
|
|
@configurable
|
|
def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5):
|
|
"""
|
|
This interface is experimental.
|
|
|
|
Args:
|
|
sizes (list[list[float]] or list[float]):
|
|
If sizes is list[list[float]], sizes[i] is the list of anchor sizes
|
|
(i.e. sqrt of anchor area) to use for the i-th feature map.
|
|
If sizes is list[float], the sizes are used for all feature maps.
|
|
Anchor sizes are given in absolute lengths in units of
|
|
the input image; they do not dynamically scale if the input image size changes.
|
|
aspect_ratios (list[list[float]] or list[float]): list of aspect ratios
|
|
(i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies.
|
|
strides (list[int]): stride of each input feature.
|
|
angles (list[list[float]] or list[float]): list of angles (in degrees CCW)
|
|
to use for anchors. Same "broadcast" rule for `sizes` applies.
|
|
offset (float): Relative offset between the center of the first anchor and the top-left
|
|
corner of the image. Value has to be in [0, 1).
|
|
Recommend to use 0.5, which means half stride.
|
|
"""
|
|
super().__init__()
|
|
|
|
self.strides = strides
|
|
self.num_features = len(self.strides)
|
|
sizes = _broadcast_params(sizes, self.num_features, "sizes")
|
|
aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios")
|
|
angles = _broadcast_params(angles, self.num_features, "angles")
|
|
self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles)
|
|
|
|
self.offset = offset
|
|
assert 0.0 <= self.offset < 1.0, self.offset
|
|
|
|
@classmethod
|
|
def from_config(cls, cfg, input_shape: List[ShapeSpec]):
|
|
return {
|
|
"sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES,
|
|
"aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS,
|
|
"strides": [x.stride for x in input_shape],
|
|
"offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET,
|
|
"angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES,
|
|
}
|
|
|
|
def _calculate_anchors(self, sizes, aspect_ratios, angles):
|
|
cell_anchors = [
|
|
self.generate_cell_anchors(size, aspect_ratio, angle).float()
|
|
for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles)
|
|
]
|
|
return BufferList(cell_anchors)
|
|
|
|
@property
|
|
def num_cell_anchors(self):
|
|
"""
|
|
Alias of `num_anchors`.
|
|
"""
|
|
return self.num_anchors
|
|
|
|
@property
|
|
def num_anchors(self):
|
|
"""
|
|
Returns:
|
|
list[int]: Each int is the number of anchors at every pixel
|
|
location, on that feature map.
|
|
For example, if at every pixel we use anchors of 3 aspect
|
|
ratios, 2 sizes and 5 angles, the number of anchors is 30.
|
|
(See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS
|
|
and ANCHOR_GENERATOR.ANGLES in config)
|
|
|
|
In standard RRPN models, `num_anchors` on every feature map is the same.
|
|
"""
|
|
return [len(cell_anchors) for cell_anchors in self.cell_anchors]
|
|
|
|
def _grid_anchors(self, grid_sizes):
|
|
anchors = []
|
|
for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors):
|
|
shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors)
|
|
zeros = torch.zeros_like(shift_x)
|
|
shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1)
|
|
|
|
anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5))
|
|
|
|
return anchors
|
|
|
|
def generate_cell_anchors(
|
|
self,
|
|
sizes=(32, 64, 128, 256, 512),
|
|
aspect_ratios=(0.5, 1, 2),
|
|
angles=(-90, -60, -30, 0, 30, 60, 90),
|
|
):
|
|
"""
|
|
Generate a tensor storing canonical anchor boxes, which are all anchor
|
|
boxes of different sizes, aspect_ratios, angles centered at (0, 0).
|
|
We can later build the set of anchors for a full feature map by
|
|
shifting and tiling these tensors (see `meth:_grid_anchors`).
|
|
|
|
Args:
|
|
sizes (tuple[float]):
|
|
aspect_ratios (tuple[float]]):
|
|
angles (tuple[float]]):
|
|
|
|
Returns:
|
|
Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5)
|
|
storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format.
|
|
"""
|
|
anchors = []
|
|
for size in sizes:
|
|
area = size**2.0
|
|
for aspect_ratio in aspect_ratios:
|
|
|
|
|
|
|
|
|
|
|
|
w = math.sqrt(area / aspect_ratio)
|
|
h = aspect_ratio * w
|
|
anchors.extend([0, 0, w, h, a] for a in angles)
|
|
|
|
return torch.tensor(anchors)
|
|
|
|
def forward(self, features):
|
|
"""
|
|
Args:
|
|
features (list[Tensor]): list of backbone feature maps on which to generate anchors.
|
|
|
|
Returns:
|
|
list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map
|
|
(i.e. the cell anchors repeated over all locations in the feature map).
|
|
The number of anchors of each feature map is Hi x Wi x num_cell_anchors,
|
|
where Hi, Wi are resolution of the feature map divided by anchor stride.
|
|
"""
|
|
grid_sizes = [feature_map.shape[-2:] for feature_map in features]
|
|
anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
|
|
return [RotatedBoxes(x) for x in anchors_over_all_feature_maps]
|
|
|
|
|
|
def build_anchor_generator(cfg, input_shape):
|
|
"""
|
|
Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`.
|
|
"""
|
|
anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME
|
|
return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape)
|
|
|