Spaces:
Sleeping
Sleeping
import numpy as np | |
from PIL import Image | |
def nms(boxes, overlap_threshold=0.5, mode='union'): | |
"""Non-maximum suppression. | |
Arguments: | |
boxes: a float numpy array of shape [n, 5], | |
where each row is (xmin, ymin, xmax, ymax, score). | |
overlap_threshold: a float number. | |
mode: 'union' or 'min'. | |
Returns: | |
list with indices of the selected boxes | |
""" | |
# if there are no boxes, return the empty list | |
if len(boxes) == 0: | |
return [] | |
# list of picked indices | |
pick = [] | |
# grab the coordinates of the bounding boxes | |
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] | |
area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0) | |
ids = np.argsort(score) # in increasing order | |
while len(ids) > 0: | |
# grab index of the largest value | |
last = len(ids) - 1 | |
i = ids[last] | |
pick.append(i) | |
# compute intersections | |
# of the box with the largest score | |
# with the rest of boxes | |
# left top corner of intersection boxes | |
ix1 = np.maximum(x1[i], x1[ids[:last]]) | |
iy1 = np.maximum(y1[i], y1[ids[:last]]) | |
# right bottom corner of intersection boxes | |
ix2 = np.minimum(x2[i], x2[ids[:last]]) | |
iy2 = np.minimum(y2[i], y2[ids[:last]]) | |
# width and height of intersection boxes | |
w = np.maximum(0.0, ix2 - ix1 + 1.0) | |
h = np.maximum(0.0, iy2 - iy1 + 1.0) | |
# intersections' areas | |
inter = w * h | |
if mode == 'min': | |
overlap = inter / np.minimum(area[i], area[ids[:last]]) | |
elif mode == 'union': | |
# intersection over union (IoU) | |
overlap = inter / (area[i] + area[ids[:last]] - inter) | |
# delete all boxes where overlap is too big | |
ids = np.delete( | |
ids, | |
np.concatenate([[last], np.where(overlap > overlap_threshold)[0]]) | |
) | |
return pick | |
def convert_to_square(bboxes): | |
"""Convert bounding boxes to a square form. | |
Arguments: | |
bboxes: a float numpy array of shape [n, 5]. | |
Returns: | |
a float numpy array of shape [n, 5], | |
squared bounding boxes. | |
""" | |
square_bboxes = np.zeros_like(bboxes) | |
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] | |
h = y2 - y1 + 1.0 | |
w = x2 - x1 + 1.0 | |
max_side = np.maximum(h, w) | |
square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5 | |
square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5 | |
square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 | |
square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 | |
return square_bboxes | |
def calibrate_box(bboxes, offsets): | |
"""Transform bounding boxes to be more like true bounding boxes. | |
'offsets' is one of the outputs of the nets. | |
Arguments: | |
bboxes: a float numpy array of shape [n, 5]. | |
offsets: a float numpy array of shape [n, 4]. | |
Returns: | |
a float numpy array of shape [n, 5]. | |
""" | |
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] | |
w = x2 - x1 + 1.0 | |
h = y2 - y1 + 1.0 | |
w = np.expand_dims(w, 1) | |
h = np.expand_dims(h, 1) | |
# this is what happening here: | |
# tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] | |
# x1_true = x1 + tx1*w | |
# y1_true = y1 + ty1*h | |
# x2_true = x2 + tx2*w | |
# y2_true = y2 + ty2*h | |
# below is just more compact form of this | |
# are offsets always such that | |
# x1 < x2 and y1 < y2 ? | |
translation = np.hstack([w, h, w, h]) * offsets | |
bboxes[:, 0:4] = bboxes[:, 0:4] + translation | |
return bboxes | |
def get_image_boxes(bounding_boxes, img, size=24): | |
"""Cut out boxes from the image. | |
Arguments: | |
bounding_boxes: a float numpy array of shape [n, 5]. | |
img: an instance of PIL.Image. | |
size: an integer, size of cutouts. | |
Returns: | |
a float numpy array of shape [n, 3, size, size]. | |
""" | |
num_boxes = len(bounding_boxes) | |
width, height = img.size | |
[dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height) | |
img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') | |
for i in range(num_boxes): | |
img_box = np.zeros((h[i], w[i], 3), 'uint8') | |
img_array = np.asarray(img, 'uint8') | |
img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \ | |
img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] | |
# resize | |
img_box = Image.fromarray(img_box) | |
img_box = img_box.resize((size, size), Image.BILINEAR) | |
img_box = np.asarray(img_box, 'float32') | |
img_boxes[i, :, :, :] = _preprocess(img_box) | |
return img_boxes | |
def correct_bboxes(bboxes, width, height): | |
"""Crop boxes that are too big and get coordinates | |
with respect to cutouts. | |
Arguments: | |
bboxes: a float numpy array of shape [n, 5], | |
where each row is (xmin, ymin, xmax, ymax, score). | |
width: a float number. | |
height: a float number. | |
Returns: | |
dy, dx, edy, edx: a int numpy arrays of shape [n], | |
coordinates of the boxes with respect to the cutouts. | |
y, x, ey, ex: a int numpy arrays of shape [n], | |
corrected ymin, xmin, ymax, xmax. | |
h, w: a int numpy arrays of shape [n], | |
just heights and widths of boxes. | |
in the following order: | |
[dy, edy, dx, edx, y, ey, x, ex, w, h]. | |
""" | |
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] | |
w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 | |
num_boxes = bboxes.shape[0] | |
# 'e' stands for end | |
# (x, y) -> (ex, ey) | |
x, y, ex, ey = x1, y1, x2, y2 | |
# we need to cut out a box from the image. | |
# (x, y, ex, ey) are corrected coordinates of the box | |
# in the image. | |
# (dx, dy, edx, edy) are coordinates of the box in the cutout | |
# from the image. | |
dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,)) | |
edx, edy = w.copy() - 1.0, h.copy() - 1.0 | |
# if box's bottom right corner is too far right | |
ind = np.where(ex > width - 1.0)[0] | |
edx[ind] = w[ind] + width - 2.0 - ex[ind] | |
ex[ind] = width - 1.0 | |
# if box's bottom right corner is too low | |
ind = np.where(ey > height - 1.0)[0] | |
edy[ind] = h[ind] + height - 2.0 - ey[ind] | |
ey[ind] = height - 1.0 | |
# if box's top left corner is too far left | |
ind = np.where(x < 0.0)[0] | |
dx[ind] = 0.0 - x[ind] | |
x[ind] = 0.0 | |
# if box's top left corner is too high | |
ind = np.where(y < 0.0)[0] | |
dy[ind] = 0.0 - y[ind] | |
y[ind] = 0.0 | |
return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] | |
return_list = [i.astype('int32') for i in return_list] | |
return return_list | |
def _preprocess(img): | |
"""Preprocessing step before feeding the network. | |
Arguments: | |
img: a float numpy array of shape [h, w, c]. | |
Returns: | |
a float numpy array of shape [1, c, h, w]. | |
""" | |
img = img.transpose((2, 0, 1)) | |
img = np.expand_dims(img, 0) | |
img = (img - 127.5) * 0.0078125 | |
return img | |