# coding: utf-8 import os.path as osp import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False) import torch import numpy as np import onnxruntime from .timer import Timer from .rprint import rlog from .crop import crop_image, _transform_pts def make_abs_path(fn): return osp.join(osp.dirname(osp.realpath(__file__)), fn) def to_ndarray(obj): if isinstance(obj, torch.Tensor): return obj.cpu().numpy() elif isinstance(obj, np.ndarray): return obj else: return np.array(obj) class LandmarkRunner(object): """landmark runner""" def __init__(self, **kwargs): ckpt_path = kwargs.get('ckpt_path') onnx_provider = 'cpu' #run on cpu for it to work with ZeroGPU // kwargs.get('onnx_provider', 'cuda') # 默认用cuda device_id = kwargs.get('device_id', 0) self.dsize = kwargs.get('dsize', 224) self.timer = Timer() if onnx_provider.lower() == 'cuda': self.session = onnxruntime.InferenceSession( ckpt_path, providers=[ ('CUDAExecutionProvider', {'device_id': device_id}) ] ) else: opts = onnxruntime.SessionOptions() opts.intra_op_num_threads = 4 # 默认线程数为 4 self.session = onnxruntime.InferenceSession( ckpt_path, providers=['CPUExecutionProvider'], sess_options=opts ) def _run(self, inp): out = self.session.run(None, {'input': inp}) return out def run(self, img_rgb: np.ndarray, lmk=None): if lmk is not None: crop_dct = crop_image(img_rgb, lmk, dsize=self.dsize, scale=1.5, vy_ratio=-0.1) img_crop_rgb = crop_dct['img_crop'] else: img_crop_rgb = cv2.resize(img_rgb, (self.dsize, self.dsize)) scale = max(img_rgb.shape[:2]) / self.dsize crop_dct = { 'M_c2o': np.array([ [scale, 0., 0.], [0., scale, 0.], [0., 0., 1.], ], dtype=np.float32), } inp = (img_crop_rgb.astype(np.float32) / 255.).transpose(2, 0, 1)[None, ...] # HxWx3 (BGR) -> 1x3xHxW (RGB!) out_lst = self._run(inp) out_pts = out_lst[2] pts = to_ndarray(out_pts[0]).reshape(-1, 2) * self.dsize # scale to 0-224 pts = _transform_pts(pts, M=crop_dct['M_c2o']) return { 'pts': pts, # 2d landmarks 203 points } def warmup(self): # 构造dummy image进行warmup self.timer.tic() dummy_image = np.zeros((1, 3, self.dsize, self.dsize), dtype=np.float32) _ = self._run(dummy_image) elapse = self.timer.toc() rlog(f'LandmarkRunner warmup time: {elapse:.3f}s')