import numpy as np from PIL import Image def nms(boxes, overlap_threshold=0.5, mode='union'): """Non-maximum suppression. Arguments: boxes: a float numpy array of shape [n, 5], where each row is (xmin, ymin, xmax, ymax, score). overlap_threshold: a float number. mode: 'union' or 'min'. Returns: list with indices of the selected boxes """ # if there are no boxes, return the empty list if len(boxes) == 0: return [] # list of picked indices pick = [] # grab the coordinates of the bounding boxes x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0) ids = np.argsort(score) # in increasing order while len(ids) > 0: # grab index of the largest value last = len(ids) - 1 i = ids[last] pick.append(i) # compute intersections # of the box with the largest score # with the rest of boxes # left top corner of intersection boxes ix1 = np.maximum(x1[i], x1[ids[:last]]) iy1 = np.maximum(y1[i], y1[ids[:last]]) # right bottom corner of intersection boxes ix2 = np.minimum(x2[i], x2[ids[:last]]) iy2 = np.minimum(y2[i], y2[ids[:last]]) # width and height of intersection boxes w = np.maximum(0.0, ix2 - ix1 + 1.0) h = np.maximum(0.0, iy2 - iy1 + 1.0) # intersections' areas inter = w * h if mode == 'min': overlap = inter / np.minimum(area[i], area[ids[:last]]) elif mode == 'union': # intersection over union (IoU) overlap = inter / (area[i] + area[ids[:last]] - inter) # delete all boxes where overlap is too big ids = np.delete( ids, np.concatenate([[last], np.where(overlap > overlap_threshold)[0]]) ) return pick def convert_to_square(bboxes): """Convert bounding boxes to a square form. Arguments: bboxes: a float numpy array of shape [n, 5]. Returns: a float numpy array of shape [n, 5], squared bounding boxes. """ square_bboxes = np.zeros_like(bboxes) x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] h = y2 - y1 + 1.0 w = x2 - x1 + 1.0 max_side = np.maximum(h, w) square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5 square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5 square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 return square_bboxes def calibrate_box(bboxes, offsets): """Transform bounding boxes to be more like true bounding boxes. 'offsets' is one of the outputs of the nets. Arguments: bboxes: a float numpy array of shape [n, 5]. offsets: a float numpy array of shape [n, 4]. Returns: a float numpy array of shape [n, 5]. """ x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] w = x2 - x1 + 1.0 h = y2 - y1 + 1.0 w = np.expand_dims(w, 1) h = np.expand_dims(h, 1) # this is what happening here: # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] # x1_true = x1 + tx1*w # y1_true = y1 + ty1*h # x2_true = x2 + tx2*w # y2_true = y2 + ty2*h # below is just more compact form of this # are offsets always such that # x1 < x2 and y1 < y2 ? translation = np.hstack([w, h, w, h]) * offsets bboxes[:, 0:4] = bboxes[:, 0:4] + translation return bboxes def get_image_boxes(bounding_boxes, img, size=24): """Cut out boxes from the image. Arguments: bounding_boxes: a float numpy array of shape [n, 5]. img: an instance of PIL.Image. size: an integer, size of cutouts. Returns: a float numpy array of shape [n, 3, size, size]. """ num_boxes = len(bounding_boxes) width, height = img.size [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height) img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') for i in range(num_boxes): img_box = np.zeros((h[i], w[i], 3), 'uint8') img_array = np.asarray(img, 'uint8') img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \ img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] # resize img_box = Image.fromarray(img_box) img_box = img_box.resize((size, size), Image.BILINEAR) img_box = np.asarray(img_box, 'float32') img_boxes[i, :, :, :] = _preprocess(img_box) return img_boxes def correct_bboxes(bboxes, width, height): """Crop boxes that are too big and get coordinates with respect to cutouts. Arguments: bboxes: a float numpy array of shape [n, 5], where each row is (xmin, ymin, xmax, ymax, score). width: a float number. height: a float number. Returns: dy, dx, edy, edx: a int numpy arrays of shape [n], coordinates of the boxes with respect to the cutouts. y, x, ey, ex: a int numpy arrays of shape [n], corrected ymin, xmin, ymax, xmax. h, w: a int numpy arrays of shape [n], just heights and widths of boxes. in the following order: [dy, edy, dx, edx, y, ey, x, ex, w, h]. """ x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 num_boxes = bboxes.shape[0] # 'e' stands for end # (x, y) -> (ex, ey) x, y, ex, ey = x1, y1, x2, y2 # we need to cut out a box from the image. # (x, y, ex, ey) are corrected coordinates of the box # in the image. # (dx, dy, edx, edy) are coordinates of the box in the cutout # from the image. dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,)) edx, edy = w.copy() - 1.0, h.copy() - 1.0 # if box's bottom right corner is too far right ind = np.where(ex > width - 1.0)[0] edx[ind] = w[ind] + width - 2.0 - ex[ind] ex[ind] = width - 1.0 # if box's bottom right corner is too low ind = np.where(ey > height - 1.0)[0] edy[ind] = h[ind] + height - 2.0 - ey[ind] ey[ind] = height - 1.0 # if box's top left corner is too far left ind = np.where(x < 0.0)[0] dx[ind] = 0.0 - x[ind] x[ind] = 0.0 # if box's top left corner is too high ind = np.where(y < 0.0)[0] dy[ind] = 0.0 - y[ind] y[ind] = 0.0 return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] return_list = [i.astype('int32') for i in return_list] return return_list def _preprocess(img): """Preprocessing step before feeding the network. Arguments: img: a float numpy array of shape [h, w, c]. Returns: a float numpy array of shape [1, c, h, w]. """ img = img.transpose((2, 0, 1)) img = np.expand_dims(img, 0) img = (img - 127.5) * 0.0078125 return img