Spaces:
Running
on
A10G
Running
on
A10G
from __future__ import print_function | |
import os | |
import torch | |
from torch.utils.model_zoo import load_url | |
from enum import Enum | |
import numpy as np | |
import cv2 | |
try: | |
import urllib.request as request_file | |
except BaseException: | |
import urllib as request_file | |
from .models import FAN, ResNetDepth | |
from .utils import * | |
class LandmarksType(Enum): | |
"""Enum class defining the type of landmarks to detect. | |
``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face | |
``_2halfD`` - this points represent the projection of the 3D points into 3D | |
``_3D`` - detect the points ``(x,y,z)``` in a 3D space | |
""" | |
_2D = 1 | |
_2halfD = 2 | |
_3D = 3 | |
class NetworkSize(Enum): | |
# TINY = 1 | |
# SMALL = 2 | |
# MEDIUM = 3 | |
LARGE = 4 | |
def __new__(cls, value): | |
member = object.__new__(cls) | |
member._value_ = value | |
return member | |
def __int__(self): | |
return self.value | |
class FaceAlignment: | |
def __init__(self, landmarks_type, network_size=NetworkSize.LARGE, | |
device='cuda', flip_input=False, face_detector='sfd', verbose=False): | |
self.device = device | |
self.flip_input = flip_input | |
self.landmarks_type = landmarks_type | |
self.verbose = verbose | |
network_size = int(network_size) | |
if 'cuda' in device: | |
torch.backends.cudnn.benchmark = True | |
# torch.backends.cuda.matmul.allow_tf32 = False | |
# torch.backends.cudnn.benchmark = True | |
# torch.backends.cudnn.deterministic = False | |
# torch.backends.cudnn.allow_tf32 = True | |
print('cuda start') | |
# Get the face detector | |
face_detector_module = __import__('face_detection.detection.' + face_detector, | |
globals(), locals(), [face_detector], 0) | |
self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose) | |
def get_detections_for_batch(self, images): | |
images = images[..., ::-1] | |
detected_faces = self.face_detector.detect_from_batch(images.copy()) | |
results = [] | |
for i, d in enumerate(detected_faces): | |
if len(d) == 0: | |
results.append(None) | |
continue | |
d = d[0] | |
d = np.clip(d, 0, None) | |
x1, y1, x2, y2 = map(int, d[:-1]) | |
results.append((x1, y1, x2, y2)) | |
return results | |
class YOLOv8_face: | |
def __init__(self, path = 'face_detection/weights/yolov8n-face.onnx', conf_thres=0.2, iou_thres=0.5): | |
self.conf_threshold = conf_thres | |
self.iou_threshold = iou_thres | |
self.class_names = ['face'] | |
self.num_classes = len(self.class_names) | |
# Initialize model | |
self.net = cv2.dnn.readNet(path) | |
self.input_height = 640 | |
self.input_width = 640 | |
self.reg_max = 16 | |
self.project = np.arange(self.reg_max) | |
self.strides = (8, 16, 32) | |
self.feats_hw = [(math.ceil(self.input_height / self.strides[i]), math.ceil(self.input_width / self.strides[i])) for i in range(len(self.strides))] | |
self.anchors = self.make_anchors(self.feats_hw) | |
def make_anchors(self, feats_hw, grid_cell_offset=0.5): | |
"""Generate anchors from features.""" | |
anchor_points = {} | |
for i, stride in enumerate(self.strides): | |
h,w = feats_hw[i] | |
x = np.arange(0, w) + grid_cell_offset # shift x | |
y = np.arange(0, h) + grid_cell_offset # shift y | |
sx, sy = np.meshgrid(x, y) | |
# sy, sx = np.meshgrid(y, x) | |
anchor_points[stride] = np.stack((sx, sy), axis=-1).reshape(-1, 2) | |
return anchor_points | |
def softmax(self, x, axis=1): | |
x_exp = np.exp(x) | |
# 如果是列向量,则axis=0 | |
x_sum = np.sum(x_exp, axis=axis, keepdims=True) | |
s = x_exp / x_sum | |
return s | |
def resize_image(self, srcimg, keep_ratio=True): | |
top, left, newh, neww = 0, 0, self.input_width, self.input_height | |
if keep_ratio and srcimg.shape[0] != srcimg.shape[1]: | |
hw_scale = srcimg.shape[0] / srcimg.shape[1] | |
if hw_scale > 1: | |
newh, neww = self.input_height, int(self.input_width / hw_scale) | |
img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA) | |
left = int((self.input_width - neww) * 0.5) | |
img = cv2.copyMakeBorder(img, 0, 0, left, self.input_width - neww - left, cv2.BORDER_CONSTANT, | |
value=(0, 0, 0)) # add border | |
else: | |
newh, neww = int(self.input_height * hw_scale), self.input_width | |
img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA) | |
top = int((self.input_height - newh) * 0.5) | |
img = cv2.copyMakeBorder(img, top, self.input_height - newh - top, 0, 0, cv2.BORDER_CONSTANT, | |
value=(0, 0, 0)) | |
else: | |
img = cv2.resize(srcimg, (self.input_width, self.input_height), interpolation=cv2.INTER_AREA) | |
return img, newh, neww, top, left | |
def detect(self, srcimg): | |
input_img, newh, neww, padh, padw = self.resize_image(cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB)) | |
scale_h, scale_w = srcimg.shape[0]/newh, srcimg.shape[1]/neww | |
input_img = input_img.astype(np.float32) / 255.0 | |
blob = cv2.dnn.blobFromImage(input_img) | |
self.net.setInput(blob) | |
outputs = self.net.forward(self.net.getUnconnectedOutLayersNames()) | |
# if isinstance(outputs, tuple): | |
# outputs = list(outputs) | |
# if float(cv2.__version__[:3])>=4.7: | |
# outputs = [outputs[2], outputs[0], outputs[1]] ###opencv4.7需要这一步,opencv4.5不需要 | |
# Perform inference on the image | |
det_bboxes, det_conf, det_classid, landmarks = self.post_process(outputs, scale_h, scale_w, padh, padw) | |
return det_bboxes, det_conf, det_classid, landmarks | |
def post_process(self, preds, scale_h, scale_w, padh, padw): | |
bboxes, scores, landmarks = [], [], [] | |
for i, pred in enumerate(preds): | |
stride = int(self.input_height/pred.shape[2]) | |
pred = pred.transpose((0, 2, 3, 1)) | |
box = pred[..., :self.reg_max * 4] | |
cls = 1 / (1 + np.exp(-pred[..., self.reg_max * 4:-15])).reshape((-1,1)) | |
kpts = pred[..., -15:].reshape((-1,15)) ### x1,y1,score1, ..., x5,y5,score5 | |
# tmp = box.reshape(self.feats_hw[i][0], self.feats_hw[i][1], 4, self.reg_max) | |
tmp = box.reshape(-1, 4, self.reg_max) | |
bbox_pred = self.softmax(tmp, axis=-1) | |
bbox_pred = np.dot(bbox_pred, self.project).reshape((-1,4)) | |
bbox = self.distance2bbox(self.anchors[stride], bbox_pred, max_shape=(self.input_height, self.input_width)) * stride | |
kpts[:, 0::3] = (kpts[:, 0::3] * 2.0 + (self.anchors[stride][:, 0].reshape((-1,1)) - 0.5)) * stride | |
kpts[:, 1::3] = (kpts[:, 1::3] * 2.0 + (self.anchors[stride][:, 1].reshape((-1,1)) - 0.5)) * stride | |
kpts[:, 2::3] = 1 / (1+np.exp(-kpts[:, 2::3])) | |
bbox -= np.array([[padw, padh, padw, padh]]) ###合理使用广播法则 | |
bbox *= np.array([[scale_w, scale_h, scale_w, scale_h]]) | |
kpts -= np.tile(np.array([padw, padh, 0]), 5).reshape((1,15)) | |
kpts *= np.tile(np.array([scale_w, scale_h, 1]), 5).reshape((1,15)) | |
bboxes.append(bbox) | |
scores.append(cls) | |
landmarks.append(kpts) | |
bboxes = np.concatenate(bboxes, axis=0) | |
scores = np.concatenate(scores, axis=0) | |
landmarks = np.concatenate(landmarks, axis=0) | |
bboxes_wh = bboxes.copy() | |
bboxes_wh[:, 2:4] = bboxes[:, 2:4] - bboxes[:, 0:2] ####xywh | |
classIds = np.argmax(scores, axis=1) | |
confidences = np.max(scores, axis=1) ####max_class_confidence | |
mask = confidences>self.conf_threshold | |
bboxes_wh = bboxes_wh[mask] ###合理使用广播法则 | |
confidences = confidences[mask] | |
classIds = classIds[mask] | |
landmarks = landmarks[mask] | |
indices = cv2.dnn.NMSBoxes(bboxes_wh.tolist(), confidences.tolist(), self.conf_threshold, | |
self.iou_threshold).flatten() | |
if len(indices) > 0: | |
mlvl_bboxes = bboxes_wh[indices] | |
confidences = confidences[indices] | |
classIds = classIds[indices] | |
landmarks = landmarks[indices] | |
return mlvl_bboxes, confidences, classIds, landmarks | |
else: | |
print('nothing detect') | |
return np.array([]), np.array([]), np.array([]), np.array([]) | |
def distance2bbox(self, points, distance, max_shape=None): | |
x1 = points[:, 0] - distance[:, 0] | |
y1 = points[:, 1] - distance[:, 1] | |
x2 = points[:, 0] + distance[:, 2] | |
y2 = points[:, 1] + distance[:, 3] | |
if max_shape is not None: | |
x1 = np.clip(x1, 0, max_shape[1]) | |
y1 = np.clip(y1, 0, max_shape[0]) | |
x2 = np.clip(x2, 0, max_shape[1]) | |
y2 = np.clip(y2, 0, max_shape[0]) | |
return np.stack([x1, y1, x2, y2], axis=-1) | |
def draw_detections(self, image, boxes, scores, kpts): | |
for box, score, kp in zip(boxes, scores, kpts): | |
x, y, w, h = box.astype(int) | |
# Draw rectangle | |
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), thickness=3) | |
cv2.putText(image, "face:"+str(round(score,2)), (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), thickness=2) | |
for i in range(5): | |
cv2.circle(image, (int(kp[i * 3]), int(kp[i * 3 + 1])), 4, (0, 255, 0), thickness=-1) | |
# cv2.putText(image, str(i), (int(kp[i * 3]), int(kp[i * 3 + 1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), thickness=1) | |
return image | |
ROOT = os.path.dirname(os.path.abspath(__file__)) |