File size: 4,243 Bytes
1173b78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import numpy as np
import torch
from torch.autograd import Variable
import sys
sys.path.append("./")
from align.get_nets import PNet, RNet, ONet
from align.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
from align.first_stage import run_first_stage
def detect_faces(
image,
min_face_size=20.0,
thresholds=[0.6, 0.7, 0.8],
nms_thresholds=[0.7, 0.7, 0.7],
):
"""
Arguments:
image: an instance of PIL.Image.
min_face_size: a float number.
thresholds: a list of length 3.
nms_thresholds: a list of length 3.
Returns:
two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
bounding boxes and facial landmarks.
"""
# LOAD MODELS
pnet = PNet()
rnet = RNet()
onet = ONet()
onet.eval()
# BUILD AN IMAGE PYRAMID
width, height = image.size
min_length = min(height, width)
min_detection_size = 12
factor = 0.707 # sqrt(0.5)
# scales for scaling the image
scales = []
# scales the image so that
# minimum size that we can detect equals to
# minimum face size that we want to detect
m = min_detection_size / min_face_size
min_length *= m
factor_count = 0
while min_length > min_detection_size:
scales.append(m * factor**factor_count)
min_length *= factor
factor_count += 1
# STAGE 1
# it will be returned
bounding_boxes = []
# run P-Net on different scales
for s in scales:
boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
bounding_boxes.append(boxes)
# collect boxes (and offsets, and scores) from different scales
bounding_boxes = [i for i in bounding_boxes if i is not None]
bounding_boxes = np.vstack(bounding_boxes)
keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
bounding_boxes = bounding_boxes[keep]
# use offsets predicted by pnet to transform bounding boxes
bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
# shape [n_boxes, 5]
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
# STAGE 2
img_boxes = get_image_boxes(bounding_boxes, image, size=24)
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
output = rnet(img_boxes)
offsets = output[0].data.numpy() # shape [n_boxes, 4]
probs = output[1].data.numpy() # shape [n_boxes, 2]
keep = np.where(probs[:, 1] > thresholds[1])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
offsets = offsets[keep]
keep = nms(bounding_boxes, nms_thresholds[1])
bounding_boxes = bounding_boxes[keep]
bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
# STAGE 3
img_boxes = get_image_boxes(bounding_boxes, image, size=48)
if len(img_boxes) == 0:
return [], []
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
output = onet(img_boxes)
landmarks = output[0].data.numpy() # shape [n_boxes, 10]
offsets = output[1].data.numpy() # shape [n_boxes, 4]
probs = output[2].data.numpy() # shape [n_boxes, 2]
keep = np.where(probs[:, 1] > thresholds[2])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
offsets = offsets[keep]
landmarks = landmarks[keep]
# compute landmark points
width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
landmarks[:, 0:5] = (
np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
)
landmarks[:, 5:10] = (
np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
)
bounding_boxes = calibrate_box(bounding_boxes, offsets)
keep = nms(bounding_boxes, nms_thresholds[2], mode="min")
bounding_boxes = bounding_boxes[keep]
landmarks = landmarks[keep]
return bounding_boxes, landmarks
|