Pinwheel's picture
HF Demo
128757a
import torch
import numpy as np
from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.data import transforms as T
from maskrcnn_benchmark.structures.image_list import to_image_list
from maskrcnn_benchmark.structures.bounding_box import BoxList
from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist
from maskrcnn_benchmark.layers import nms, soft_nms
def im_detect_bbox_aug(model, images, device, captions=None, positive_map_label_to_token=None):
# Collect detections computed under different transformations
boxlists_ts = []
for _ in range(len(images)):
boxlists_ts.append([])
def add_preds_t(boxlists_t):
for i, boxlist_t in enumerate(boxlists_t):
# Resize the boxlist as the first one
boxlists_ts[i].append(boxlist_t.resize(images[i].size))
# Compute detections at different scales
if len(cfg.TEST.RANGES)==len(cfg.TEST.SCALES):
keep_ranges = cfg.TEST.RANGES
else:
keep_ranges = [None for _ in cfg.TEST.SCALES]
for scale, keep_range in zip(cfg.TEST.SCALES, keep_ranges):
max_size = cfg.TEST.MAX_SIZE
boxlists_scl = im_detect_bbox_scale(
model, images, scale, max_size, device,
captions=captions,
positive_map_label_to_token=positive_map_label_to_token,
)
if keep_range is not None:
boxlists_scl = remove_boxes(boxlists_scl, *keep_range)
add_preds_t(boxlists_scl)
if cfg.TEST.FLIP:
boxlists_scl_hf = im_detect_bbox_scale(
model, images, scale, max_size, device,
captions=captions,
positive_map_label_to_token=positive_map_label_to_token,
hflip=True
)
if keep_range is not None:
boxlists_scl_hf = remove_boxes(boxlists_scl_hf, *keep_range)
add_preds_t(boxlists_scl_hf)
# Merge boxlists detected by different bbox aug params
boxlists = []
for i, boxlist_ts in enumerate(boxlists_ts):
bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts])
scores = torch.cat([boxlist_t.get_field('scores') for boxlist_t in boxlist_ts])
labels = torch.cat([boxlist_t.get_field('labels') for boxlist_t in boxlist_ts])
boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode)
boxlist.add_field('scores', scores)
boxlist.add_field('labels', labels)
boxlists.append(boxlist)
results = merge_result_from_multi_scales(boxlists)
return results
def im_detect_bbox(model, images, target_scale, target_max_size, device,
captions=None,
positive_map_label_to_token=None
):
"""
Performs bbox detection on the original image.
"""
if cfg.INPUT.FORMAT is not '':
input_format = cfg.INPUT.FORMAT
elif cfg.INPUT.TO_BGR255:
input_format = 'bgr255'
transform = T.Compose([
T.Resize(target_scale, target_max_size),
T.ToTensor(),
T.Normalize(
mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, format=input_format
)
])
images = [transform(image) for image in images]
images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
if captions is None:
return model(images.to(device))
else:
return model(images.to(device),
captions=captions,
positive_map=positive_map_label_to_token
)
def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device,
captions=None,
positive_map_label_to_token=None
):
"""
Performs bbox detection on the horizontally flipped image.
Function signature is the same as for im_detect_bbox.
"""
if cfg.INPUT.FORMAT is not '':
input_format = cfg.INPUT.FORMAT
elif cfg.INPUT.TO_BGR255:
input_format = 'bgr255'
transform = T.Compose([
T.Resize(target_scale, target_max_size),
T.RandomHorizontalFlip(1.0),
T.ToTensor(),
T.Normalize(
mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, format=input_format
)
])
images = [transform(image) for image in images]
images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
if captions is None:
boxlists = model(images.to(device))
else:
boxlists = model(images.to(device),
captions=captions,
positive_map=positive_map_label_to_token
)
# Invert the detections computed on the flipped image
boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists]
return boxlists_inv
def im_detect_bbox_scale(model, images, target_scale, target_max_size, device,
captions=None,
positive_map_label_to_token=None,
hflip=False):
"""
Computes bbox detections at the given scale.
Returns predictions in the scaled image space.
"""
if hflip:
boxlists_scl = im_detect_bbox_hflip(model, images, target_scale, target_max_size, device,
captions=captions,
positive_map_label_to_token=positive_map_label_to_token
)
else:
boxlists_scl = im_detect_bbox(model, images, target_scale, target_max_size, device,
captions=captions,
positive_map_label_to_token=positive_map_label_to_token
)
return boxlists_scl
def remove_boxes(boxlist_ts, min_scale, max_scale):
new_boxlist_ts = []
for _, boxlist_t in enumerate(boxlist_ts):
mode = boxlist_t.mode
boxlist_t = boxlist_t.convert("xyxy")
boxes = boxlist_t.bbox
keep = []
for j, box in enumerate(boxes):
w = box[2] - box[0] + 1
h = box[3] - box[1] + 1
if (w * h > min_scale * min_scale) and (w * h < max_scale * max_scale):
keep.append(j)
new_boxlist_ts.append(boxlist_t[keep].convert(mode))
return new_boxlist_ts
def merge_result_from_multi_scales(boxlists):
num_images = len(boxlists)
results = []
for i in range(num_images):
scores = boxlists[i].get_field("scores")
labels = boxlists[i].get_field("labels")
boxes = boxlists[i].bbox
boxlist = boxlists[i]
result = []
# test on classes
if len(cfg.TEST.SELECT_CLASSES):
class_list = cfg.TEST.SELECT_CLASSES
else:
class_list = range(1, cfg.TEST.NUM_CLASSES)
for j in class_list:
inds = (labels == j).nonzero().view(-1)
scores_j = scores[inds]
boxes_j = boxes[inds, :].view(-1, 4)
boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
boxlist_for_class.add_field("scores", scores_j)
boxlist_for_class = boxlist_nms(boxlist_for_class, cfg.TEST.TH, score_field="scores", nms_type=cfg.TEST.SPECIAL_NMS)
num_labels = len(boxlist_for_class)
boxlist_for_class.add_field("labels", torch.full((num_labels,), j, dtype=torch.int64, device=scores.device))
result.append(boxlist_for_class)
result = cat_boxlist(result)
number_of_detections = len(result)
# Limit to max_per_image detections **over all classes**
if number_of_detections > cfg.TEST.PRE_NMS_TOP_N > 0:
cls_scores = result.get_field("scores")
image_thresh, _ = torch.kthvalue(
cls_scores.cpu(),
number_of_detections - cfg.TEST.PRE_NMS_TOP_N + 1
)
keep = cls_scores >= image_thresh.item()
keep = torch.nonzero(keep).squeeze(1)
result = result[keep]
results.append(result)
return results
def boxlist_nms(boxlist, thresh, max_proposals=-1, score_field="scores", nms_type='nms'):
if thresh <= 0:
return boxlist
mode = boxlist.mode
boxlist = boxlist.convert("xyxy")
boxes = boxlist.bbox
score = boxlist.get_field(score_field)
if nms_type == 'vote':
boxes_vote, scores_vote = bbox_vote(boxes, score, thresh)
if len(boxes_vote) > 0:
boxlist.bbox = boxes_vote
boxlist.extra_fields['scores'] = scores_vote
elif nms_type == 'soft-vote':
boxes_vote, scores_vote = soft_bbox_vote(boxes, score, thresh)
if len(boxes_vote) > 0:
boxlist.bbox = boxes_vote
boxlist.extra_fields['scores'] = scores_vote
elif nms_type == 'soft-nms':
keep, new_score = soft_nms(boxes.cpu(), score.cpu(), thresh, 0.95)
if max_proposals > 0:
keep = keep[: max_proposals]
boxlist = boxlist[keep]
boxlist.extra_fields['scores'] = new_score
else:
keep = nms(boxes, score, thresh)
if max_proposals > 0:
keep = keep[: max_proposals]
boxlist = boxlist[keep]
return boxlist.convert(mode)
def bbox_vote(boxes, scores, vote_thresh):
boxes = boxes.cpu().numpy()
scores = scores.cpu().numpy().reshape(-1, 1)
det = np.concatenate((boxes, scores), axis=1)
if det.shape[0] <= 1:
return np.zeros((0, 5)), np.zeros((0, 1))
order = det[:, 4].ravel().argsort()[::-1]
det = det[order, :]
dets = []
while det.shape[0] > 0:
# IOU
area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
xx1 = np.maximum(det[0, 0], det[:, 0])
yy1 = np.maximum(det[0, 1], det[:, 1])
xx2 = np.minimum(det[0, 2], det[:, 2])
yy2 = np.minimum(det[0, 3], det[:, 3])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
o = inter / (area[0] + area[:] - inter)
# get needed merge det and delete these det
merge_index = np.where(o >= vote_thresh)[0]
det_accu = det[merge_index, :]
det = np.delete(det, merge_index, 0)
if merge_index.shape[0] <= 1:
try:
dets = np.row_stack((dets, det_accu))
except:
dets = det_accu
continue
else:
det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
max_score = np.max(det_accu[:, 4])
det_accu_sum = np.zeros((1, 5))
det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])
det_accu_sum[:, 4] = max_score
try:
dets = np.row_stack((dets, det_accu_sum))
except:
dets = det_accu_sum
boxes = torch.from_numpy(dets[:, :4]).float().cuda()
scores = torch.from_numpy(dets[:, 4]).float().cuda()
return boxes, scores
def soft_bbox_vote(boxes, scores, vote_thresh):
boxes = boxes.cpu().numpy()
scores = scores.cpu().numpy().reshape(-1, 1)
det = np.concatenate((boxes, scores), axis=1)
if det.shape[0] <= 1:
return np.zeros((0, 5)), np.zeros((0, 1))
order = det[:, 4].ravel().argsort()[::-1]
det = det[order, :]
dets = []
while det.shape[0] > 0:
# IOU
area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
xx1 = np.maximum(det[0, 0], det[:, 0])
yy1 = np.maximum(det[0, 1], det[:, 1])
xx2 = np.minimum(det[0, 2], det[:, 2])
yy2 = np.minimum(det[0, 3], det[:, 3])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
o = inter / (area[0] + area[:] - inter)
# get needed merge det and delete these det
merge_index = np.where(o >= vote_thresh)[0]
det_accu = det[merge_index, :]
det_accu_iou = o[merge_index]
det = np.delete(det, merge_index, 0)
if merge_index.shape[0] <= 1:
try:
dets = np.row_stack((dets, det_accu))
except:
dets = det_accu
continue
else:
soft_det_accu = det_accu.copy()
soft_det_accu[:, 4] = soft_det_accu[:, 4] * (1 - det_accu_iou)
soft_index = np.where(soft_det_accu[:, 4] >= cfg.MODEL.RETINANET.INFERENCE_TH)[0]
soft_det_accu = soft_det_accu[soft_index, :]
det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
max_score = np.max(det_accu[:, 4])
det_accu_sum = np.zeros((1, 5))
det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])
det_accu_sum[:, 4] = max_score
if soft_det_accu.shape[0] > 0:
det_accu_sum = np.row_stack((det_accu_sum, soft_det_accu))
try:
dets = np.row_stack((dets, det_accu_sum))
except:
dets = det_accu_sum
order = dets[:, 4].ravel().argsort()[::-1]
dets = dets[order, :]
boxes = torch.from_numpy(dets[:, :4]).float().cuda()
scores = torch.from_numpy(dets[:, 4]).float().cuda()
return boxes, scores