import numpy as np import cv2 import onnxruntime as ort from hivision.creator.retinaface.box_utils import decode, decode_landm from hivision.creator.retinaface.prior_box import PriorBox import argparse def py_cpu_nms(dets, thresh): """Pure Python NMS baseline.""" x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] return keep parser = argparse.ArgumentParser(description="Retinaface") parser.add_argument( "--network", default="resnet50", help="Backbone network mobile0.25 or resnet50" ) parser.add_argument( "--cpu", action="store_true", default=False, help="Use cpu inference" ) parser.add_argument( "--confidence_threshold", default=0.8, type=float, help="confidence_threshold" ) parser.add_argument("--top_k", default=5000, type=int, help="top_k") parser.add_argument("--nms_threshold", default=0.2, type=float, help="nms_threshold") parser.add_argument("--keep_top_k", default=750, type=int, help="keep_top_k") parser.add_argument( "-s", "--save_image", action="store_true", default=True, help="show detection results", ) parser.add_argument( "--vis_thres", default=0.6, type=float, help="visualization_threshold" ) args = parser.parse_args() def load_model_ort(model_path): ort_session = ort.InferenceSession(model_path) return ort_session def retinaface_detect_faces(image, model_path: str, sess=None): cfg = { "name": "Resnet50", "min_sizes": [[16, 32], [64, 128], [256, 512]], "steps": [8, 16, 32], "variance": [0.1, 0.2], "clip": False, "loc_weight": 2.0, "gpu_train": True, "batch_size": 24, "ngpu": 4, "epoch": 100, "decay1": 70, "decay2": 90, "image_size": 840, "pretrain": True, "return_layers": {"layer2": 1, "layer3": 2, "layer4": 3}, "in_channel": 256, "out_channel": 256, } # Load ONNX model if sess is None: retinaface = load_model_ort(model_path) else: retinaface = sess resize = 1 # Read and preprocess the image img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) img = np.float32(img_rgb) im_height, im_width, _ = img.shape scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = np.expand_dims(img, axis=0) # Run the model inputs = {"input": img} loc, conf, landms = retinaface.run(None, inputs) # tic = time.time() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() prior_data = priors boxes = decode(np.squeeze(loc, axis=0), prior_data, cfg["variance"]) boxes = boxes * scale / resize scores = np.squeeze(conf, axis=0)[:, 1] landms = decode_landm(np.squeeze(landms.data, axis=0), prior_data, cfg["variance"]) scale1 = np.array( [ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], ] ) landms = landms * scale1 / resize # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][: args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[: args.keep_top_k, :] landms = landms[: args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # print("post processing time: {:.4f}s".format(time.time() - tic)) return dets, retinaface if __name__ == "__main__": import gradio as gr # Create Gradio interface iface = gr.Interface( fn=retinaface_detect_faces, inputs=[ gr.Image( type="numpy", label="上传图片", height=400 ), # Set the height to 400 gr.Textbox(value="./FaceDetector.onnx", label="ONNX模型路径"), ], outputs=gr.Number(label="检测到的人脸数量"), title="人脸检测", description="上传图片并提供ONNX模型路径以检测人脸数量。", ) # Launch the Gradio app iface.launch()