import os
import numpy as np
import math
import cv2
import onnxruntime
import time
import queue
import threading
import copy
from .retinaface import RetinaFaceDetector
from .remedian import remedian
def resolve(name):
f = os.path.join(os.path.dirname(__file__), name)
return f
def clamp_to_im(pt, w, h):
x = pt[0]
y = pt[1]
if x < 0:
x = 0
if y < 0:
y = 0
if x >= w:
x = w-1
if y >= h:
y = h-1
return (int(x), int(y+1))
def rotate(origin, point, a):
a = -a
ox, oy = origin
px, py = point
qx = ox + math.cos(a) * (px - ox) - math.sin(a) * (py - oy)
qy = oy + math.sin(a) * (px - ox) + math.cos(a) * (py - oy)
return qx, qy
def angle(p1, p2):
p1 = np.array(p1)
p2 = np.array(p2)
a = np.arctan2(*(p2 - p1)[::-1])
return (a % (2 * np.pi))
def compensate(p1, p2):
a = angle(p1, p2)
return rotate(p1, p2, a), a
def rotate_image(image, a, center):
(h, w) = image.shape[:2]
a = np.rad2deg(a)
M = cv2.getRotationMatrix2D((float(center[0]), float(center[1])), a, 1.0)
rotated = cv2.warpAffine(image, M, (w, h))
return rotated
def intersects(r1, r2, amount=0.3):
area1 = r1[2] * r1[3]
area2 = r2[2] * r2[3]
inter = 0.0
total = area1 + area2
r1_x1, r1_y1, w, h = r1
r1_x2 = r1_x1 + w
r1_y2 = r1_y1 + h
r2_x1, r2_y1, w, h = r2
r2_x2 = r2_x1 + w
r2_y2 = r2_y1 + h
left = max(r1_x1, r2_x1)
right = min(r1_x2, r2_x2)
top = max(r1_y1, r2_y1)
bottom = min(r1_y2, r2_y2)
if left < right and top < bottom:
inter = (right - left) * (bottom - top)
total -= inter
if inter / total >= amount:
return True
return False
#return not (r1_x1 > r2_x2 or r1_x2 < r2_x1 or r1_y1 > r2_y2 or r1_y2 < r2_y1)
def group_rects(rects):
rect_groups = {}
for rect in rects:
rect_groups[str(rect)] = [-1, -1, []]
group_id = 0
for i, rect in enumerate(rects):
name = str(rect)
group = group_id
group_id += 1
if rect_groups[name][0] < 0:
rect_groups[name] = [group, -1, []]
group = rect_groups[name][0]
for j, other_rect in enumerate(rects):
if i == j:
inter = intersects(rect, other_rect)
if intersects(rect, other_rect):
rect_groups[str(other_rect)] = [group, -1, []]
return rect_groups
def logit(p, factor=16.0):
if p >= 1.0:
p = 0.9999999
if p <= 0.0:
p = 0.0000001
p = p/(1-p)
return float(np.log(p)) / float(factor)
def logit_arr(p, factor=16.0):
p = np.clip(p, 0.0000001, 0.9999999)
return np.log(p / (1 - p)) / float(factor)
def matrix_to_quaternion(m):
t = 0.0
q = [0.0, 0.0, 0, 0.0]
if m[2,2] < 0:
if m[0,0] > m[1,1]:
t = 1 + m[0,0] - m[1,1] - m[2,2]
q = [t, m[0,1]+m[1,0], m[2,0]+m[0,2], m[1,2]-m[2,1]]
t = 1 - m[0,0] + m[1,1] - m[2,2]
q = [m[0,1]+m[1,0], t, m[1,2]+m[2,1], m[2,0]-m[0,2]]
if m[0,0] < -m[1,1]:
t = 1 - m[0,0] - m[1,1] + m[2,2]
q = [m[2,0]+m[0,2], m[1,2]+m[2,1], t, m[0,1]-m[1,0]]
t = 1 + m[0,0] + m[1,1] + m[2,2]
q = [m[1,2]-m[2,1], m[2,0]-m[0,2], m[0,1]-m[1,0], t]
q = np.array(q, np.float32) * 0.5 / np.sqrt(t)
return q
def worker_thread(session, frame, input, crop_info, queue, input_name, idx, tracker):
output =[], {input_name: input})[0]
conf, lms = tracker.landmarks(output[0], crop_info)
if conf > tracker.threshold:
eye_state = tracker.get_eye_state(frame, lms, single=True)
eye_state = [(1.0, 0.0, 0.0, 0.0), (1.0, 0.0, 0.0, 0.0)]
queue.put((session, conf, (lms, eye_state), crop_info, idx))
class Feature():
def __init__(self, threshold=0.15, alpha=0.2, hard_factor=0.15, decay=0.001, max_feature_updates=0):
self.median = remedian()
self.min = None
self.max = None
self.hard_min = None
self.hard_max = None
self.threshold = threshold
self.alpha = alpha
self.hard_factor = hard_factor
self.decay = decay
self.last = 0
self.current_median = 0
self.update_count = 0
self.max_feature_updates = max_feature_updates
self.first_seen = -1
self.updating = True
def update(self, x, now=0):
if self.max_feature_updates > 0:
if self.first_seen == -1:
self.first_seen = now;
new = self.update_state(x, now=now)
filtered = self.last * self.alpha + new * (1 - self.alpha)
self.last = filtered
return filtered
def update_state(self, x, now=0):
updating = self.updating and (self.max_feature_updates == 0 or now - self.first_seen < self.max_feature_updates)
if updating:
self.median + x
self.current_median = self.median.median()
self.updating = False
median = self.current_median
if self.min is None:
if x < median and (median - x) / median > self.threshold:
if updating:
self.min = x
self.hard_min = self.min + self.hard_factor * (median - self.min)
return -1
return 0
if x < self.min:
if updating:
self.min = x
self.hard_min = self.min + self.hard_factor * (median - self.min)
return -1
if self.max is None:
if x > median and (x - median) / median > self.threshold:
if updating:
self.max = x
self.hard_max = self.max - self.hard_factor * (self.max - median)
return 1
return 0
if x > self.max:
if updating:
self.max = x
self.hard_max = self.max - self.hard_factor * (self.max - median)
return 1
if updating:
if self.min < self.hard_min:
self.min = self.hard_min * self.decay + self.min * (1 - self.decay)
if self.max > self.hard_max:
self.max = self.hard_max * self.decay + self.max * (1 - self.decay)
if x < median:
return - (1 - (x - self.min) / (median - self.min))
elif x > median:
return (x - median) / (self.max - median)
return 0
class FeatureExtractor():
def __init__(self, max_feature_updates=0):
self.eye_l = Feature(max_feature_updates=max_feature_updates)
self.eye_r = Feature(max_feature_updates=max_feature_updates)
self.eyebrow_updown_l = Feature(max_feature_updates=max_feature_updates)
self.eyebrow_updown_r = Feature(max_feature_updates=max_feature_updates)
self.eyebrow_quirk_l = Feature(threshold=0.05, max_feature_updates=max_feature_updates)
self.eyebrow_quirk_r = Feature(threshold=0.05, max_feature_updates=max_feature_updates)
self.eyebrow_steepness_l = Feature(threshold=0.05, max_feature_updates=max_feature_updates)
self.eyebrow_steepness_r = Feature(threshold=0.05, max_feature_updates=max_feature_updates)
self.mouth_corner_updown_l = Feature(max_feature_updates=max_feature_updates)
self.mouth_corner_updown_r = Feature(max_feature_updates=max_feature_updates)
self.mouth_corner_inout_l = Feature(threshold=0.02, max_feature_updates=max_feature_updates)
self.mouth_corner_inout_r = Feature(threshold=0.02, max_feature_updates=max_feature_updates)
self.mouth_open = Feature(max_feature_updates=max_feature_updates)
self.mouth_wide = Feature(threshold=0.02, max_feature_updates=max_feature_updates)
def align_points(self, a, b, pts):
a = tuple(a)
b = tuple(b)
alpha = angle(a, b)
alpha = np.rad2deg(alpha)
if alpha >= 90:
alpha = - (alpha - 180)
if alpha <= -90:
alpha = - (alpha + 180)
alpha = np.deg2rad(alpha)
aligned_pts = []
for pt in pts:
aligned_pts.append(np.array(rotate(a, pt, alpha)))
return alpha, np.array(aligned_pts)
def update(self, pts, full=True):
features = {}
now = time.perf_counter()
norm_distance_x = np.mean([pts[0, 0] - pts[16, 0], pts[1, 0] - pts[15, 0]])
norm_distance_y = np.mean([pts[27, 1] - pts[28, 1], pts[28, 1] - pts[29, 1], pts[29, 1] - pts[30, 1]])
a1, f_pts = self.align_points(pts[42], pts[45], pts[[43, 44, 47, 46]])
f = abs((np.mean([f_pts[0,1], f_pts[1,1]]) - np.mean([f_pts[2,1], f_pts[3,1]])) / norm_distance_y)
features["eye_l"] = self.eye_l.update(f, now)
a2, f_pts = self.align_points(pts[36], pts[39], pts[[37, 38, 41, 40]])
f = abs((np.mean([f_pts[0,1], f_pts[1,1]]) - np.mean([f_pts[2,1], f_pts[3,1]])) / norm_distance_y)
features["eye_r"] = self.eye_r.update(f, now)
if full:
a3, _ = self.align_points(pts[0], pts[16], [])
a4, _ = self.align_points(pts[31], pts[35], [])
norm_angle = np.mean(list(map(np.rad2deg, [a1, a2, a3, a4])))
a, f_pts = self.align_points(pts[22], pts[26], pts[[22, 23, 24, 25, 26]])
features["eyebrow_steepness_l"] = self.eyebrow_steepness_l.update(-np.rad2deg(a) - norm_angle, now)
f = np.max(np.abs(np.array(f_pts[1:4]) - f_pts[0, 1])) / norm_distance_y
features["eyebrow_quirk_l"] = self.eyebrow_quirk_l.update(f, now)
a, f_pts = self.align_points(pts[17], pts[21], pts[[17, 18, 19, 20, 21]])
features["eyebrow_steepness_r"] = self.eyebrow_steepness_r.update(np.rad2deg(a) - norm_angle, now)
f = np.max(np.abs(np.array(f_pts[1:4]) - f_pts[0, 1])) / norm_distance_y
features["eyebrow_quirk_r"] = self.eyebrow_quirk_r.update(f, now)
features["eyebrow_steepness_l"] = 0.
features["eyebrow_steepness_r"] = 0.
features["eyebrow_quirk_l"] = 0.
features["eyebrow_quirk_r"] = 0.
f = (np.mean([pts[22, 1], pts[26, 1]]) - pts[27, 1]) / norm_distance_y
features["eyebrow_updown_l"] = self.eyebrow_updown_l.update(f, now)
f = (np.mean([pts[17, 1], pts[21, 1]]) - pts[27, 1]) / norm_distance_y
features["eyebrow_updown_r"] = self.eyebrow_updown_r.update(f, now)
upper_mouth_line = np.mean([pts[49, 1], pts[50, 1], pts[51, 1]])
center_line = np.mean([pts[50, 0], pts[60, 0], pts[27, 0], pts[30, 0], pts[64, 0], pts[55, 0]])
f = (upper_mouth_line - pts[62, 1]) / norm_distance_y
features["mouth_corner_updown_l"] = self.mouth_corner_updown_l.update(f, now)
if full:
f = abs(center_line - pts[62, 0]) / norm_distance_x
features["mouth_corner_inout_l"] = self.mouth_corner_inout_l.update(f, now)
features["mouth_corner_inout_l"] = 0.
f = (upper_mouth_line - pts[58, 1]) / norm_distance_y
features["mouth_corner_updown_r"] = self.mouth_corner_updown_r.update(f, now)
if full:
f = abs(center_line - pts[58, 0]) / norm_distance_x
features["mouth_corner_inout_r"] = self.mouth_corner_inout_r.update(f, now)
features["mouth_corner_inout_r"] = 0.
f = abs(np.mean(pts[[59,60,61], 1], axis=0) - np.mean(pts[[63,64,65], 1], axis=0)) / norm_distance_y
features["mouth_open"] = self.mouth_open.update(f, now)
f = abs(pts[58, 0] - pts[62, 0]) / norm_distance_x
features["mouth_wide"] = self.mouth_wide.update(f, now)
return features
class FaceInfo():
def __init__(self, id, tracker): = id
self.frame_count = -1
self.tracker = tracker
self.contour_pts = [0,1,8,15,16,27,28,29,30,31,32,33,34,35]
self.face_3d = copy.copy(self.tracker.face_3d)
if self.tracker.model_type == -1:
self.contour_pts = [0,2,8,14,16,27,30,33]
self.alive = False
self.coord = None
self.base_scale_v = self.tracker.face_3d[27:30, 1] - self.tracker.face_3d[28:31, 1]
self.base_scale_h = np.abs(self.tracker.face_3d[[0, 36, 42], 0] - self.tracker.face_3d[[16, 39, 45], 0])
self.limit_3d_adjustment = True
self.update_count_delta = 75.
self.update_count_max = 7500.
if self.tracker.max_feature_updates > 0:
self.features = FeatureExtractor(self.tracker.max_feature_updates)
def reset(self):
self.alive = False
self.conf = None
self.lms = None
self.eye_state = None
self.rotation = None
self.translation = None
self.success = None
self.quaternion = None
self.euler = None
self.pnp_error = None
self.pts_3d = None
self.eye_blink = None
self.bbox = None
self.pnp_error = 0
if self.tracker.max_feature_updates < 1:
self.features = FeatureExtractor(0)
self.current_features = {}
self.contour = np.zeros((21,3))
self.update_counts = np.zeros((66,2))
self.fail_count = 0
def update(self, result, coord, frame_count):
self.frame_count = frame_count
if result is None:
self.conf, (self.lms, self.eye_state) = result
self.coord = coord
self.alive = True
def update_contour(self):
self.contour = np.array(self.face_3d[self.contour_pts])
def normalize_pts3d(self, pts_3d):
# Calculate angle using nose
pts_3d[:, 0:2] -= pts_3d[30, 0:2]
alpha = angle(pts_3d[30, 0:2], pts_3d[27, 0:2])
alpha -= np.deg2rad(90)
R = np.matrix([[np.cos(alpha), -np.sin(alpha)], [np.sin(alpha), np.cos(alpha)]])
pts_3d[:, 0:2] = (pts_3d - pts_3d[30])[:, 0:2].dot(R) + pts_3d[30, 0:2]
# Vertical scale
pts_3d[:, 1] /= np.mean((pts_3d[27:30, 1] - pts_3d[28:31, 1]) / self.base_scale_v)
# Horizontal scale
pts_3d[:, 0] /= np.mean(np.abs(pts_3d[[0, 36, 42], 0] - pts_3d[[16, 39, 45], 0]) / self.base_scale_h)
return pts_3d
def adjust_3d(self):
if self.conf < 0.4 or self.pnp_error > 300:
if self.tracker.model_type != -1 and not self.tracker.static_model:
max_runs = 1
eligible = np.delete(np.arange(0, 66), [30])
changed_any = False
update_type = -1
d_o = np.ones((66,))
d_c = np.ones((66,))
for runs in range(max_runs):
r = 1.0 + np.random.random_sample((66,3)) * 0.02 - 0.01
r[30, :] = 1.0
if self.euler[0] > -165 and self.euler[0] < 145:
elif self.euler[1] > -10 and self.euler[1] < 20:
r[:, 2] = 1.0
update_type = 0
r[:, 0:2] = 1.0
if self.euler[2] > 120 or self.euler[2] < 60:
# Enable only one side of the points, depending on direction
elif self.euler[1] < -10:
update_type = 1
r[[0, 1, 2, 3, 4, 5, 6, 7, 17, 18, 19, 20, 21, 31, 32, 36, 37, 38, 39, 40, 41, 48, 49, 56, 57, 58, 59, 65], 2] = 1.0
eligible = [8, 9, 10, 11, 12, 13, 14, 15, 16, 22, 23, 24, 25, 26, 27, 28, 29, 33, 34, 35, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 60, 61, 62, 63, 64]
update_type = 1
r[[9, 10, 11, 12, 13, 14, 15, 16, 22, 23, 24, 25, 26, 34, 35, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 61, 62, 63], 2] = 1.0
eligible = [0, 1, 2, 3, 4, 5, 6, 7, 8, 17, 18, 19, 20, 21, 27, 28, 29, 31, 32, 33, 36, 37, 38, 39, 40, 41, 48, 49, 50, 55, 56, 57, 58, 59, 60, 64, 65]
if self.limit_3d_adjustment:
eligible = np.nonzero(self.update_counts[:, update_type] < self.update_counts[:, abs(update_type - 1)] + self.update_count_delta)[0]
if eligible.shape[0] <= 0:
if runs == 0:
updated = copy.copy(self.face_3d[0:66])
o_projected = np.ones((66,2))
o_projected[eligible] = np.squeeze(np.array(cv2.projectPoints(self.face_3d[eligible], self.rotation, self.translation,, self.tracker.dist_coeffs)[0]), 1)
c = updated * r
c_projected = np.zeros((66,2))
c_projected[eligible] = np.squeeze(np.array(cv2.projectPoints(c[eligible], self.rotation, self.translation,, self.tracker.dist_coeffs)[0]), 1)
changed = False
d_o[eligible] = np.linalg.norm(o_projected[eligible] - self.lms[eligible, 0:2], axis=1)
d_c[eligible] = np.linalg.norm(c_projected[eligible] - self.lms[eligible, 0:2], axis=1)
indices = np.nonzero(d_c < d_o)[0]
if indices.shape[0] > 0:
if self.limit_3d_adjustment:
indices = np.intersect1d(indices, eligible)
if indices.shape[0] > 0:
self.update_counts[indices, update_type] += 1
updated[indices] = c[indices]
o_projected[indices] = c_projected[indices]
changed = True
changed_any = changed_any or changed
if not changed:
if changed_any:
# Update weighted by point confidence
weights = np.zeros((66,3))
weights[:, :] = self.lms[0:66, 2:3]
weights[weights > 0.7] = 1.0
weights = 1.0 - weights
update_indices = np.arange(0, 66)
if self.limit_3d_adjustment:
update_indices = np.nonzero(self.update_counts[:, update_type] <= self.update_count_max)[0]
self.face_3d[update_indices] = self.face_3d[update_indices] * weights[update_indices] + updated[update_indices] * (1. - weights[update_indices])
self.pts_3d = self.normalize_pts3d(self.pts_3d)
if self.tracker.feature_level == 2:
self.current_features = self.features.update(self.pts_3d[:, 0:2])
self.eye_blink = []
self.eye_blink.append(1 - min(max(0, -self.current_features["eye_r"]), 1))
self.eye_blink.append(1 - min(max(0, -self.current_features["eye_l"]), 1))
elif self.tracker.feature_level == 1:
self.current_features = self.features.update(self.pts_3d[:, 0:2], False)
self.eye_blink = []
self.eye_blink.append(1 - min(max(0, -self.current_features["eye_r"]), 1))
self.eye_blink.append(1 - min(max(0, -self.current_features["eye_l"]), 1))
def get_model_base_path(model_dir):
model_base_path = resolve(os.path.join("models"))
if model_dir is None:
if not os.path.exists(model_base_path):
model_base_path = resolve(os.path.join("..", "models"))
model_base_path = model_dir
return model_base_path
class Tracker():
def __init__(self, width, height, model_type=3, detection_threshold=0.6, threshold=None, max_faces=1, discard_after=5, scan_every=3, bbox_growth=0.0, max_threads=4, silent=False, model_dir=None, no_gaze=False, use_retinaface=False, max_feature_updates=0, static_model=False, feature_level=2, try_hard=False):
options = onnxruntime.SessionOptions()
options.inter_op_num_threads = 1
options.intra_op_num_threads = min(max_threads,4)
options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
options.log_severity_level = 3
self.model_type = model_type
self.models = [
model = "lm_modelT_opt.onnx"
if model_type >= 0:
model = self.models[self.model_type]
if model_type == -2:
model = "lm_modelV_opt.onnx"
if model_type == -3:
model = "lm_modelU_opt.onnx"
model_base_path = get_model_base_path(model_dir)
if threshold is None:
threshold = 0.6
if model_type < 0:
threshold = 0.87
self.retinaface = RetinaFaceDetector(model_path=os.path.join(model_base_path, "retinaface_640x640_opt.onnx"), json_path=os.path.join(model_base_path, "priorbox_640x640.json"), threads=max(max_threads,4), top_k=max_faces, res=(640, 640))
self.retinaface_scan = RetinaFaceDetector(model_path=os.path.join(model_base_path, "retinaface_640x640_opt.onnx"), json_path=os.path.join(model_base_path, "priorbox_640x640.json"), threads=2, top_k=max_faces, res=(640, 640))
self.use_retinaface = use_retinaface
# Single face instance with multiple threads
self.session = onnxruntime.InferenceSession(os.path.join(model_base_path, model), sess_options=options)
# Multiple faces with single threads
self.sessions = []
self.max_workers = max(min(max_threads, max_faces), 1)
extra_threads = max_threads % self.max_workers
for i in range(self.max_workers):
options = onnxruntime.SessionOptions()
options.inter_op_num_threads = 1
options.intra_op_num_threads = min(max(max_threads // self.max_workers, 4), 1)
if options.intra_op_num_threads < 1:
options.intra_op_num_threads = 1
elif i < extra_threads:
options.intra_op_num_threads += 1
options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
self.sessions.append(onnxruntime.InferenceSession(os.path.join(model_base_path, model), sess_options=options))
self.input_name = self.session.get_inputs()[0].name
options = onnxruntime.SessionOptions()
options.inter_op_num_threads = 1
options.intra_op_num_threads = max(max_threads,4)
options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
options.log_severity_level = 3
self.gaze_model = onnxruntime.InferenceSession(os.path.join(model_base_path, "mnv3_gaze32_split_opt.onnx"), sess_options=options)
options.intra_op_num_threads = 1
self.gaze_model_single = onnxruntime.InferenceSession(os.path.join(model_base_path, "mnv3_gaze32_split_opt.onnx"), sess_options=options)
self.detection = onnxruntime.InferenceSession(os.path.join(model_base_path, "mnv3_detection_opt.onnx"), sess_options=options)
self.faces = []
# Image normalization constants
self.mean = np.float32(np.array([0.485, 0.456, 0.406]))
self.std = np.float32(np.array([0.229, 0.224, 0.225]))
self.mean = self.mean / self.std
self.std = self.std * 255.0
self.mean = - self.mean
self.std = 1.0 / self.std
self.mean_32 = np.tile(self.mean, [32, 32, 1])
self.std_32 = np.tile(self.std, [32, 32, 1])
self.mean_224 = np.tile(self.mean, [224, 224, 1])
self.std_224 = np.tile(self.std, [224, 224, 1])
# PnP solving
self.face_3d = np.array([
[ 0.4551769692672 , 0.300895790030204, -0.764429433974752],
[ 0.448998827123556, 0.166995837790733, -0.765143004071253],
[ 0.437431554952677, 0.022655479179981, -0.739267175112735],
[ 0.415033422928434, -0.088941454648772, -0.747947437846473],
[ 0.389123587370091, -0.232380029794684, -0.704788385327458],
[ 0.334630113904382, -0.361265387599081, -0.615587579236862],
[ 0.263725112132858, -0.460009725616771, -0.491479221041573],
[ 0.16241621322721 , -0.558037146073869, -0.339445180872282],
[ 0. , -0.621079019321682, -0.287294770748887],
[-0.16241621322721 , -0.558037146073869, -0.339445180872282],
[-0.263725112132858, -0.460009725616771, -0.491479221041573],
[-0.334630113904382, -0.361265387599081, -0.615587579236862],
[-0.389123587370091, -0.232380029794684, -0.704788385327458],
[-0.415033422928434, -0.088941454648772, -0.747947437846473],
[-0.437431554952677, 0.022655479179981, -0.739267175112735],
[-0.448998827123556, 0.166995837790733, -0.765143004071253],
[-0.4551769692672 , 0.300895790030204, -0.764429433974752],
[ 0.385529968662985, 0.402800553948697, -0.310031082540741],
[ 0.322196658344302, 0.464439136821772, -0.250558059367669],
[ 0.25409760441282 , 0.46420381416882 , -0.208177722146526],
[ 0.186875436782135, 0.44706071961879 , -0.145299823706503],
[ 0.120880983543622, 0.423566314072968, -0.110757158774771],
[-0.120880983543622, 0.423566314072968, -0.110757158774771],
[-0.186875436782135, 0.44706071961879 , -0.145299823706503],
[-0.25409760441282 , 0.46420381416882 , -0.208177722146526],
[-0.322196658344302, 0.464439136821772, -0.250558059367669],
[-0.385529968662985, 0.402800553948697, -0.310031082540741],
[ 0. , 0.293332603215811, -0.137582088779393],
[ 0. , 0.194828701837823, -0.069158109325951],
[ 0. , 0.103844017393155, -0.009151819844964],
[ 0. , 0. , 0. ],
[ 0.080626352317973, -0.041276068128093, -0.134161035564826],
[ 0.046439347377934, -0.057675223874769, -0.102990627164664],
[ 0. , -0.068753126205604, -0.090545348482397],
[-0.046439347377934, -0.057675223874769, -0.102990627164664],
[-0.080626352317973, -0.041276068128093, -0.134161035564826],
[ 0.315905195966084, 0.298337502555443, -0.285107407636464],
[ 0.275252345439353, 0.312721904921771, -0.244558251170671],
[ 0.176394511553111, 0.311907184376107, -0.219205360345231],
[ 0.131229723798772, 0.284447361805627, -0.234239149487417],
[ 0.184124948330084, 0.260179585304867, -0.226590776513707],
[ 0.279433549294448, 0.267363071770222, -0.248441437111633],
[-0.131229723798772, 0.284447361805627, -0.234239149487417],
[-0.176394511553111, 0.311907184376107, -0.219205360345231],
[-0.275252345439353, 0.312721904921771, -0.244558251170671],
[-0.315905195966084, 0.298337502555443, -0.285107407636464],
[-0.279433549294448, 0.267363071770222, -0.248441437111633],
[-0.184124948330084, 0.260179585304867, -0.226590776513707],
[ 0.121155252430729, -0.208988660580347, -0.160606287940521],
[ 0.041356305910044, -0.194484199722098, -0.096159882202821],
[ 0. , -0.205180167345702, -0.083299217789729],
[-0.041356305910044, -0.194484199722098, -0.096159882202821],
[-0.121155252430729, -0.208988660580347, -0.160606287940521],
[-0.132325402795928, -0.290857984604968, -0.187067868218105],
[-0.064137791831655, -0.325377847425684, -0.158924039726607],
[ 0. , -0.343742581679188, -0.113925986025684],
[ 0.064137791831655, -0.325377847425684, -0.158924039726607],
[ 0.132325402795928, -0.290857984604968, -0.187067868218105],
[ 0.181481567104525, -0.243239316141725, -0.231284988892766],
[ 0.083999507750469, -0.239717753728704, -0.155256465640701],
[ 0. , -0.256058040176369, -0.0950619498899 ],
[-0.083999507750469, -0.239717753728704, -0.155256465640701],
[-0.181481567104525, -0.243239316141725, -0.231284988892766],
[-0.074036069749345, -0.250689938345682, -0.177346470406188],
[ 0. , -0.264945854681568, -0.112349967428413],
[ 0.074036069749345, -0.250689938345682, -0.177346470406188],
# Pupils and eyeball centers
[ 0.257990002632141, 0.276080012321472, -0.219998998939991],
[-0.257990002632141, 0.276080012321472, -0.219998998939991],
[ 0.257990002632141, 0.276080012321472, -0.324570998549461],
[-0.257990002632141, 0.276080012321472, -0.324570998549461]
], np.float32) = np.array([[width, 0, width/2], [0, width, height/2], [0, 0, 1]], np.float32)
self.inverse_camera = np.linalg.inv(
self.dist_coeffs = np.zeros((4,1))
self.frame_count = 0
self.width = width
self.height = height
self.threshold = threshold
self.detection_threshold = detection_threshold
self.max_faces = max_faces
self.max_threads = max_threads
self.discard = 0
self.discard_after = discard_after
self.detected = 0
self.wait_count = 0
self.scan_every = scan_every
self.bbox_growth = bbox_growth
self.silent = silent
self.try_hard = try_hard
self.res = 224.
self.mean_res = self.mean_224
self.std_res = self.std_224
if model_type < 0:
self.res = 56.
self.mean_res = np.tile(self.mean, [56, 56, 1])
self.std_res = np.tile(self.std, [56, 56, 1])
if model_type < -1:
self.res = 112.
self.mean_res = np.tile(self.mean, [112, 112, 1])
self.std_res = np.tile(self.std, [112, 112, 1])
self.res_i = int(self.res)
self.out_res = 27.
if model_type < 0:
self.out_res = 6.
if model_type < -1:
self.out_res = 13.
self.out_res_i = int(self.out_res) + 1
self.logit_factor = 16.
if model_type < 0:
self.logit_factor = 8.
if model_type < -1:
self.logit_factor = 16.
self.no_gaze = no_gaze
self.debug_gaze = False
self.feature_level = feature_level
if model_type == -1:
self.feature_level = min(feature_level, 1)
self.max_feature_updates = max_feature_updates
self.static_model = static_model
self.face_info = [FaceInfo(id, self) for id in range(max_faces)]
self.fail_count = 0
def detect_faces(self, frame):
im = cv2.resize(frame, (224, 224), interpolation=cv2.INTER_LINEAR)[:,:,::-1] * self.std_224 + self.mean_224
im = np.expand_dims(im, 0)
im = np.transpose(im, (0,3,1,2))
outputs, maxpool =[], {'input': im})
outputs = np.array(outputs)
maxpool = np.array(maxpool)
outputs[0, 0, outputs[0, 0] != maxpool[0, 0]] = 0
detections = np.flip(np.argsort(outputs[0,0].flatten()))
results = []
for det in detections[0:self.max_faces]:
y, x = det // 56, det % 56
c = outputs[0, 0, y, x]
r = outputs[0, 1, y, x] * 112.
x *= 4
y *= 4
r *= 1.0
if c < self.detection_threshold:
results.append((x - r, y - r, 2 * r, 2 * r * 1.0))
results = np.array(results).astype(np.float32)
if results.shape[0] > 0:
results[:, [0,2]] *= frame.shape[1] / 224.
results[:, [1,3]] *= frame.shape[0] / 224.
return results
def landmarks(self, tensor, crop_info):
crop_x1, crop_y1, scale_x, scale_y, _ = crop_info
avg_conf = 0
res = self.res - 1
c0, c1, c2 = 66, 132, 198
if self.model_type == -1:
c0, c1, c2 = 30, 60, 90
t_main = tensor[0:c0].reshape((c0,self.out_res_i * self.out_res_i))
t_m = t_main.argmax(1)
indices = np.expand_dims(t_m, 1)
t_conf = np.take_along_axis(t_main, indices, 1).reshape((c0,))
t_off_x = np.take_along_axis(tensor[c0:c1].reshape((c0,self.out_res_i * self.out_res_i)), indices, 1).reshape((c0,))
t_off_y = np.take_along_axis(tensor[c1:c2].reshape((c0,self.out_res_i * self.out_res_i)), indices, 1).reshape((c0,))
t_off_x = res * logit_arr(t_off_x, self.logit_factor)
t_off_y = res * logit_arr(t_off_y, self.logit_factor)
t_x = crop_y1 + scale_y * (res * np.floor(t_m / self.out_res_i) / self.out_res + t_off_x)
t_y = crop_x1 + scale_x * (res * np.floor(np.mod(t_m, self.out_res_i)) / self.out_res + t_off_y)
avg_conf = np.average(t_conf)
lms = np.stack([t_x, t_y, t_conf], 1)
lms[np.isnan(lms).any(axis=1)] = np.array([0.,0.,0.], dtype=np.float32)
if self.model_type == -1:
lms = lms[[0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,6,7,7,8,8,9,10,10,11,11,12,21,21,21,22,23,23,23,23,23,13,14,14,15,16,16,17,18,18,19,20,20,24,25,25,25,26,26,27,27,27,24,24,28,28,28,26,29,29,29]]
#lms[[1,3,4,6,7,9,10,12,13,15,18,20,23,25,38,40,44,46]] += lms[[2,2,5,5,8,8,11,11,14,16,19,21,24,26,39,39,45,45]]
#lms[[3,4,6,7,9,10,12,13]] += lms[[5,5,8,8,11,11,14,14]]
#lms[[1,15,18,20,23,25,38,40,44,46]] /= 2.0
#lms[[3,4,6,7,9,10,12,13]] /= 3.0
part_avg = np.mean(np.partition(lms[:,2],3)[0:3])
if part_avg < 0.65:
avg_conf = part_avg
return (avg_conf, np.array(lms))
def estimate_depth(self, face_info):
lms = np.concatenate((face_info.lms, np.array([[face_info.eye_state[0][1], face_info.eye_state[0][2], face_info.eye_state[0][3]], [face_info.eye_state[1][1], face_info.eye_state[1][2], face_info.eye_state[1][3]]], np.float32)), 0)
image_pts = np.array(lms)[face_info.contour_pts, 0:2]
success = False
if not face_info.rotation is None:
success, face_info.rotation, face_info.translation = cv2.solvePnP(face_info.contour, image_pts,, self.dist_coeffs, useExtrinsicGuess=True, rvec=np.transpose(face_info.rotation), tvec=np.transpose(face_info.translation), flags=cv2.SOLVEPNP_ITERATIVE)
rvec = np.array([0, 0, 0], np.float32)
tvec = np.array([0, 0, 0], np.float32)
success, face_info.rotation, face_info.translation = cv2.solvePnP(face_info.contour, image_pts,, self.dist_coeffs, useExtrinsicGuess=True, rvec=rvec, tvec=tvec, flags=cv2.SOLVEPNP_ITERATIVE)
rotation = face_info.rotation
translation = face_info.translation
pts_3d = np.zeros((70,3), np.float32)
if not success:
face_info.rotation = np.array([0.0, 0.0, 0.0], np.float32)
face_info.translation = np.array([0.0, 0.0, 0.0], np.float32)
return False, np.zeros(4), np.zeros(3), 99999., pts_3d, lms
face_info.rotation = np.transpose(face_info.rotation)
face_info.translation = np.transpose(face_info.translation)
rmat, _ = cv2.Rodrigues(rotation)
inverse_rotation = np.linalg.inv(rmat)
t_reference =
t_reference = t_reference + face_info.translation
t_reference =
t_depth = t_reference[:, 2]
t_depth[t_depth == 0] = 0.000001
t_depth_e = np.expand_dims(t_depth[:],1)
t_reference = t_reference[:] / t_depth_e
pts_3d[0:66] = np.stack([lms[0:66,0], lms[0:66,1], np.ones((66,))], 1) * t_depth_e[0:66]
pts_3d[0:66] = (pts_3d[0:66].dot(self.inverse_camera.transpose()) - face_info.translation).dot(inverse_rotation.transpose())
pnp_error = np.power(lms[0:17,0:2] - t_reference[0:17,0:2], 2).sum()
pnp_error += np.power(lms[30,0:2] - t_reference[30,0:2], 2).sum()
if np.isnan(pnp_error):
pnp_error = 9999999.
for i, pt in enumerate(face_info.face_3d[66:70]):
if i == 2:
# Right eyeball
# Eyeballs have an average diameter of 12.5mm and and the distance between eye corners is 30-35mm, so a conversion factor of 0.385 can be applied
eye_center = (pts_3d[36] + pts_3d[39]) / 2.0
d_corner = np.linalg.norm(pts_3d[36] - pts_3d[39])
depth = 0.385 * d_corner
pt_3d = np.array([eye_center[0], eye_center[1], eye_center[2] - depth])
pts_3d[68] = pt_3d
if i == 3:
# Left eyeball
eye_center = (pts_3d[42] + pts_3d[45]) / 2.0
d_corner = np.linalg.norm(pts_3d[42] - pts_3d[45])
depth = 0.385 * d_corner
pt_3d = np.array([eye_center[0], eye_center[1], eye_center[2] - depth])
pts_3d[69] = pt_3d
if i == 0:
d1 = np.linalg.norm(lms[66,0:2] - lms[36,0:2])
d2 = np.linalg.norm(lms[66,0:2] - lms[39,0:2])
d = d1 + d2
pt = (pts_3d[36] * d1 + pts_3d[39] * d2) / d
if i == 1:
d1 = np.linalg.norm(lms[67,0:2] - lms[42,0:2])
d2 = np.linalg.norm(lms[67,0:2] - lms[45,0:2])
d = d1 + d2
pt = (pts_3d[42] * d1 + pts_3d[45] * d2) / d
if i < 2:
reference =
reference = reference + face_info.translation
reference =
depth = reference[2]
pt_3d = np.array([lms[66+i][0] * depth, lms[66+i][1] * depth, depth], np.float32)
pt_3d =
pt_3d = pt_3d - face_info.translation
pt_3d =
pts_3d[66+i,:] = pt_3d[:]
pts_3d[np.isnan(pts_3d).any(axis=1)] = np.array([0.,0.,0.], dtype=np.float32)
pnp_error = np.sqrt(pnp_error / (2.0 * image_pts.shape[0]))
if pnp_error > 300:
face_info.fail_count += 1
if face_info.fail_count > 5:
# Something went wrong with adjusting the 3D model
if not self.silent:
print(f"Detected anomaly when 3D fitting face {}. Resetting.")
face_info.face_3d = copy.copy(self.face_3d)
face_info.rotation = None
face_info.translation = np.array([0.0, 0.0, 0.0], np.float32)
face_info.update_counts = np.zeros((66,2))
face_info.fail_count = 0
euler = cv2.RQDecomp3x3(rmat)[0]
return True, matrix_to_quaternion(rmat), euler, pnp_error, pts_3d, lms
def preprocess(self, im, crop):
x1, y1, x2, y2 = crop
im = np.float32(im[y1:y2, x1:x2])
im = cv2.resize(im, (self.res_i, self.res_i), interpolation=cv2.INTER_LINEAR) * self.std_res + self.mean_res
im = np.expand_dims(im, 0)
im = np.transpose(im, (0,3,1,2))
return im
def equalize(self, im):
im_yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV)
im_yuv[:,:,0] = cv2.equalizeHist(im_yuv[:,:,0])
return cv2.cvtColor(im_yuv, cv2.COLOR_YUV2BGR)
def corners_to_eye(self, corners, w, h, flip):
((cx1, cy1), (cx2, cy2)) = corners
c1 = np.array([cx1, cy1])
c2 = np.array([cx2, cy2])
c2, a = compensate(c1, c2)
center = (c1 + c2) / 2.0
radius = np.linalg.norm(c1 - c2) / 2.0
radius = np.array([radius * 1.4, radius * 1.2])
upper_left = clamp_to_im(center - radius, w, h)
lower_right = clamp_to_im(center + radius, w, h)
return upper_left, lower_right, center, radius, c1, a
def prepare_eye(self, frame, full_frame, lms, flip):
outer_pt = tuple(lms[0])
inner_pt = tuple(lms[1])
h, w, _ = frame.shape
(x1, y1), (x2, y2), center, radius, reference, a = self.corners_to_eye((outer_pt, inner_pt), w, h, flip)
im = rotate_image(frame[:, :, ::], a, reference)
im = im[int(y1):int(y2), int(x1):int(x2),:]
if < 1:
return None, None, None, None, None, None
if flip:
im = cv2.flip(im, 1)
scale = np.array([(x2 - x1), (y2 - y1)]) / 32.
im = cv2.resize(im, (32, 32), interpolation=cv2.INTER_LINEAR)
#im = self.equalize(im)
if self.debug_gaze:
if not flip:
full_frame[0:32, 0:32] = im
full_frame[0:32, 32:64] = im
im = im.astype(np.float32)[:,:,::-1] * self.std_32 + self.mean_32
im = np.expand_dims(im, 0)
im = np.transpose(im, (0,3,2,1))
return im, x1, y1, scale, reference, a
def extract_face(self, frame, lms):
lms = np.array(lms)[:,0:2][:,::-1]
x1, y1 = tuple(lms.min(0))
x2, y2 = tuple(lms.max(0))
radius_x = 1.2 * (x2 - x1) / 2.0
radius_y = 1.2 * (y2 - y1) / 2.0
radius = np.array((radius_x, radius_y))
center = (np.array((x1, y1)) + np.array((x2, y2))) / 2.0
w, h, _ = frame.shape
x1, y1 = clamp_to_im(center - radius, h, w)
x2, y2 = clamp_to_im(center + radius + 1, h, w)
offset = np.array((x1, y1))
lms = (lms[:, 0:2] - offset).astype(
frame = frame[y1:y2, x1:x2]
return frame, lms, offset
def get_eye_state(self, frame, lms, single=False):
if self.no_gaze:
return [(1.0, 0.0, 0.0, 0.0), (1.0, 0.0, 0.0, 0.0)]
lms = np.array(lms)
e_x = [0,0]
e_y = [0,0]
scale = [0,0]
reference = [None, None]
angles = [0, 0]
face_frame, lms, offset = self.extract_face(frame, lms)
(right_eye, e_x[0], e_y[0], scale[0], reference[0], angles[0]) = self.prepare_eye(face_frame, frame, np.array([lms[36,0:2], lms[39,0:2]]), False)
(left_eye, e_x[1], e_y[1], scale[1], reference[1], angles[1]) = self.prepare_eye(face_frame, frame, np.array([lms[42,0:2], lms[45,0:2]]), True)
if right_eye is None or left_eye is None:
return [(1.0, 0.0, 0.0, 0.0), (1.0, 0.0, 0.0, 0.0)]
both_eyes = np.concatenate((right_eye, left_eye))
results = None
if single:
results =[], {self.input_name: both_eyes})
results =[], {self.input_name: both_eyes})
open = [0, 0]
open[0] = 1#results[1][0].argmax()
open[1] = 1#results[1][1].argmax()
results = np.array(results[0])
eye_state = []
for i in range(2):
m = int(results[i][0].argmax())
x = m // 8
y = m % 8
conf = float(results[i][0][x,y])
off_x = 32.0 * logit(results[i][1][x, y], 8.0)
off_y = 32.0 * logit(results[i][2][x, y], 8.0)
if i == 1:
eye_x = 32.0 * float(x) / 8.0 + off_x
eye_x = 32.0 * float(x) / 8.0 + off_x
eye_y = 32.0 * float(y) / 8.0 + off_y
if self.debug_gaze:
if i == 0:
frame[int(eye_y), int(eye_x)] = (0, 0, 255)
frame[int(eye_y+1), int(eye_x)] = (0, 0, 255)
frame[int(eye_y+1), int(eye_x+1)] = (0, 0, 255)
frame[int(eye_y), int(eye_x+1)] = (0, 0, 255)
frame[int(eye_y), 32+int(eye_x)] = (0, 0, 255)
frame[int(eye_y+1), 32+int(eye_x)] = (0, 0, 255)
frame[int(eye_y+1), 32+int(eye_x+1)] = (0, 0, 255)
frame[int(eye_y), 32+int(eye_x+1)] = (0, 0, 255)
if i == 0:
eye_x = e_x[i] + scale[i][0] * eye_x
eye_x = e_x[i] + scale[i][0] * (32. - eye_x)
eye_y = e_y[i] + scale[i][1] * eye_y
eye_x, eye_y = rotate(reference[i], (eye_x, eye_y), -angles[i])
eye_x = eye_x + offset[0]
eye_y = eye_y + offset[1]
eye_state.append([open[i], eye_y, eye_x, conf])
eye_state = np.array(eye_state)
eye_state[np.isnan(eye_state).any(axis=1)] = np.array([1.,0.,0.,0.], dtype=np.float32)
return eye_state
def assign_face_info(self, results):
if self.max_faces == 1 and len(results) == 1:
conf, (lms, eye_state), conf_adjust = results[0]
self.face_info[0].update((conf - conf_adjust, (lms, eye_state)), np.array(lms)[:, 0:2].mean(0), self.frame_count)
result_coords = []
adjusted_results = []
for conf, (lms, eye_state), conf_adjust in results:
adjusted_results.append((conf - conf_adjust, (lms, eye_state)))
result_coords.append(np.array(lms)[:, 0:2].mean(0))
results = adjusted_results
candidates = [[]] * self.max_faces
max_dist = 2 * np.linalg.norm(np.array([self.width, self.height]))
for i, face_info in enumerate(self.face_info):
for j, coord in enumerate(result_coords):
if face_info.coord is None:
candidates[i].append((max_dist, i, j))
candidates[i].append((np.linalg.norm(face_info.coord - coord), i, j))
for i, candidate in enumerate(candidates):
candidates[i] = sorted(candidate)
found = 0
target = len(results)
used_results = {}
used_faces = {}
while found < target:
min_list = min(candidates)
candidate = min_list.pop(0)
face_idx = candidate[1]
result_idx = candidate[2]
if not result_idx in used_results and not face_idx in used_faces:
self.face_info[face_idx].update(results[result_idx], result_coords[result_idx], self.frame_count)
used_results[result_idx] = True
used_faces[face_idx] = True
found += 1
if len(min_list) == 0:
min_list.append((2 * max_dist, face_idx, result_idx))
for face_info in self.face_info:
if face_info.frame_count != self.frame_count:
face_info.update(None, None, self.frame_count)
def predict(self, frame, additional_faces=[]):
self.frame_count += 1
start = time.perf_counter()
im = frame
duration_fd = 0.0
duration_pp = 0.0
duration_model = 0.0
duration_pnp = 0.0
new_faces = []
bonus_cutoff = len(self.faces)
self.wait_count += 1
if self.detected == 0:
start_fd = time.perf_counter()
if self.use_retinaface > 0 or self.try_hard:
retinaface_detections = self.retinaface.detect_retina(frame)
if self.use_retinaface == 0 or self.try_hard:
if self.try_hard:
new_faces.extend([(0, 0, self.width, self.height)])
duration_fd = 1000 * (time.perf_counter() - start_fd)
self.wait_count = 0
elif self.detected < self.max_faces:
if self.use_retinaface > 0:
if self.wait_count >= self.scan_every:
if self.use_retinaface > 0:
start_fd = time.perf_counter()
duration_fd = 1000 * (time.perf_counter() - start_fd)
self.wait_count = 0
self.wait_count = 0
if len(new_faces) < 1:
duration = (time.perf_counter() - start) * 1000
if not self.silent:
print(f"Took {duration:.2f}ms")
return []
crops = []
crop_info = []
num_crops = 0
for j, (x,y,w,h) in enumerate(new_faces):
crop_x1 = x - int(w * 0.1)
crop_y1 = y - int(h * 0.125)
crop_x2 = x + w + int(w * 0.1)
crop_y2 = y + h + int(h * 0.125)
crop_x1, crop_y1 = clamp_to_im((crop_x1, crop_y1), self.width, self.height)
crop_x2, crop_y2 = clamp_to_im((crop_x2, crop_y2), self.width, self.height)
scale_x = float(crop_x2 - crop_x1) / self.res
scale_y = float(crop_y2 - crop_y1) / self.res
if crop_x2 - crop_x1 < 4 or crop_y2 - crop_y1 < 4:
start_pp = time.perf_counter()
crop = self.preprocess(im, (crop_x1, crop_y1, crop_x2, crop_y2))
duration_pp += 1000 * (time.perf_counter() - start_pp)
crop_info.append((crop_x1, crop_y1, scale_x, scale_y, 0.0 if j >= bonus_cutoff else 0.1))
num_crops += 1
start_model = time.perf_counter()
outputs = {}
if num_crops == 1:
output =[], {self.input_name: crops[0]})[0]
conf, lms = self.landmarks(output[0], crop_info[0])
if conf > self.threshold:
eye_state = self.get_eye_state(frame, lms)
eye_state = [(1.0, 0.0, 0.0, 0.0), (1.0, 0.0, 0.0, 0.0)]
outputs[crop_info[0]] = (conf, (lms, eye_state), 0)
started = 0
results = queue.Queue()
for i in range(min(num_crops, self.max_workers)):
thread = threading.Thread(target=worker_thread, args=(self.sessions[started], frame, crops[started], crop_info[started], results, self.input_name, started, self))
started += 1
returned = 0
while returned < num_crops:
result = results.get(True)
if len(result) != 1:
session, conf, lms, sample_crop_info, idx = result
outputs[sample_crop_info] = (conf, lms, idx)
session = result[0]
returned += 1
if started < num_crops:
thread = threading.Thread(target=worker_thread, args=(session, frame, crops[started], crop_info[started], results, self.input_name, started, self))
started += 1
actual_faces = []
good_crops = []
for crop in crop_info:
if crop not in outputs:
conf, lms, i = outputs[crop]
x1, y1, _ = lms[0].min(0)
x2, y2, _ = lms[0].max(0)
bb = (x1, y1, x2 - x1, y2 - y1)
outputs[crop] = (conf, lms, i, bb)
groups = group_rects(actual_faces)
best_results = {}
for crop in good_crops:
conf, lms, i, bb = outputs[crop]
if conf < self.threshold:
group_id = groups[str(bb)][0]
if not group_id in best_results:
best_results[group_id] = [-1, [], 0]
if conf > self.threshold and best_results[group_id][0] < conf + crop[4]:
best_results[group_id][0] = conf + crop[4]
best_results[group_id][1] = lms
best_results[group_id][2] = crop[4]
sorted_results = sorted(best_results.values(), key=lambda x: x[0], reverse=True)[:self.max_faces]
duration_model = 1000 * (time.perf_counter() - start_model)
results = []
detected = []
start_pnp = time.perf_counter()
for face_info in self.face_info:
if face_info.alive and face_info.conf > self.threshold:
face_info.success, face_info.quaternion, face_info.euler, face_info.pnp_error, face_info.pts_3d, face_info.lms = self.estimate_depth(face_info)
lms = face_info.lms[:, 0:2]
x1, y1 = tuple(lms[0:66].min(0))
x2, y2 = tuple(lms[0:66].max(0))
bbox = (y1, x1, y2 - y1, x2 - x1)
face_info.bbox = bbox
duration_pnp += 1000 * (time.perf_counter() - start_pnp)
if len(detected) > 0:
self.detected = len(detected)
self.faces = detected
self.discard = 0
self.detected = 0
self.discard += 1
if self.discard > self.discard_after:
self.faces = []
if self.bbox_growth > 0:
faces = []
for (x,y,w,h) in self.faces:
x -= w * self.bbox_growth
y -= h * self.bbox_growth
w += 2 * w * self.bbox_growth
h += 2 * h * self.bbox_growth
self.faces = faces
self.faces = [x for x in self.faces if not np.isnan(np.array(x)).any()]
self.detected = len(self.faces)
duration = (time.perf_counter() - start) * 1000
if not self.silent:
print(f"Took {duration:.2f}ms (detect: {duration_fd:.2f}ms, crop: {duration_pp:.2f}ms, track: {duration_model:.2f}ms, 3D points: {duration_pnp:.2f}ms)")
results = sorted(results, key=lambda x:
return results