import torch from torch.autograd import Variable import math from PIL import Image import numpy as np from .box_utils import nms, _preprocess # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") device = 'cuda:0' def run_first_stage(image, net, scale, threshold): """Run P-Net, generate bounding boxes, and do NMS. Arguments: image: an instance of PIL.Image. net: an instance of pytorch's nn.Module, P-Net. scale: a float number, scale width and height of the image by this number. threshold: a float number, threshold on the probability of a face when generating bounding boxes from predictions of the net. Returns: a float numpy array of shape [n_boxes, 9], bounding boxes with scores and offsets (4 + 1 + 4). """ # scale the image and convert it to a float array width, height = image.size sw, sh = math.ceil(width * scale), math.ceil(height * scale) img = image.resize((sw, sh), Image.BILINEAR) img = np.asarray(img, 'float32') img = torch.FloatTensor(_preprocess(img)).to(device) with torch.no_grad(): output = net(img) probs = output[1].cpu().data.numpy()[0, 1, :, :] offsets = output[0].cpu().data.numpy() # probs: probability of a face at each sliding window # offsets: transformations to true bounding boxes boxes = _generate_bboxes(probs, offsets, scale, threshold) if len(boxes) == 0: return None keep = nms(boxes[:, 0:5], overlap_threshold=0.5) return boxes[keep] def _generate_bboxes(probs, offsets, scale, threshold): """Generate bounding boxes at places where there is probably a face. Arguments: probs: a float numpy array of shape [n, m]. offsets: a float numpy array of shape [1, 4, n, m]. scale: a float number, width and height of the image were scaled by this number. threshold: a float number. Returns: a float numpy array of shape [n_boxes, 9] """ # applying P-Net is equivalent, in some sense, to # moving 12x12 window with stride 2 stride = 2 cell_size = 12 # indices of boxes where there is probably a face inds = np.where(probs > threshold) if inds[0].size == 0: return np.array([]) # transformations of bounding boxes tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)] # they are defined as: # w = x2 - x1 + 1 # h = y2 - y1 + 1 # x1_true = x1 + tx1*w # x2_true = x2 + tx2*w # y1_true = y1 + ty1*h # y2_true = y2 + ty2*h offsets = np.array([tx1, ty1, tx2, ty2]) score = probs[inds[0], inds[1]] # P-Net is applied to scaled images # so we need to rescale bounding boxes back bounding_boxes = np.vstack([ np.round((stride * inds[1] + 1.0) / scale), np.round((stride * inds[0] + 1.0) / scale), np.round((stride * inds[1] + 1.0 + cell_size) / scale), np.round((stride * inds[0] + 1.0 + cell_size) / scale), score, offsets ]) # why one is added? return bounding_boxes.T