hinobus / libs /person_pose.py
thanhson28's picture
Upload 13 files
2f1145c
raw
history blame
5.18 kB
import cv2
import numpy as np
import onnxruntime as ort
from cvut.logger import Logger
__all__ = ['PoseDetector']
class PoseDetector:
def __init__(self,
onnx_file="model_ort.onnx",
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True,
infer_wh=(192, 256),
pad_value=0,
logger: Logger = None,
):
self.logger = logger
self.model = None
self.load_network(model=onnx_file)
self.infer_wh = infer_wh
self.mean = np.array(mean, dtype='float32')
self.std = np.array(std, dtype='float32')
self.to_rgb = to_rgb
self.pad_value = pad_value
def load_network(self, model):
device = ort.get_device()
cuda = True if device == 'GPU' else False
try:
providers = \
['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda \
else ['CPUExecutionProvider']
so = ort.SessionOptions()
so.log_severity_level = 3
# self.logger.info(f"providers: {providers}")
self.model = ort.InferenceSession(
model, providers=providers, sess_options=so)
self.output_details = [i.name for i in self.model.get_outputs()]
self.input_details = [i.name for i in self.model.get_inputs()]
self.is_inititated = True
except Exception as e:
raise Exception(f"Cannot load model {model}: {e}")
def _get_rescale_ratio(self, image: np.ndarray):
ori_h, ori_w = image.shape[:2]
scale_factor = min(self.infer_wh[0] / ori_w, self.infer_wh[1] / ori_h)
return scale_factor
def _padding_image(self, image: np.ndarray):
# Get resized image
resized_h, resized_w = image.shape[:2]
# padding
padding_h, padding_w = self.infer_wh[1] - \
resized_h, self.infer_wh[0] - resized_w
top_padding = int(round(padding_h // 2 - 0.1))
left_padding = int(round(padding_w // 2 - 0.1))
bottom_padding = padding_h - top_padding
right_padding = padding_w - left_padding
padding_list = [top_padding, bottom_padding,
left_padding, right_padding]
if (
top_padding != 0
or
bottom_padding != 0
or
left_padding != 0
or
right_padding != 0
):
image = cv2.copyMakeBorder(image, top_padding, bottom_padding,
left_padding, right_padding,
cv2.BORDER_CONSTANT,
value=self.pad_value)
return image, padding_list
def _preprocess(self, image: np.ndarray):
"""Image in BGR format"""
scale_factor = self._get_rescale_ratio(image)
image = cv2.resize(image, None, fx=scale_factor, fy=scale_factor)
scale_wh = (scale_factor, scale_factor)
image, padding_list = self._padding_image(image)
image = image.astype(np.float32)
image = (image - self.mean[None, None, :]) / self.std[None, None, :]
if self.to_rgb:
image = image[:, :, ::-1]
image = image.transpose((2, 0, 1))
image = np.expand_dims(image, 0)
image = np.ascontiguousarray(image)
return image, padding_list, scale_factor
def _infer(self, image: np.ndarray):
"""Image in BGR format"""
try:
# preprocess
im, padding_list, scale = self._preprocess(image)
# ONNX inference
# inp = {self.input_details[0]: im}
outs = self.model.run(None, {"input": im})
kpts = outs[0]
scores = outs[1][0]
resized_kpts = []
for kpt in kpts[0]:
kpt[0] = kpt[0] - padding_list[2] # padding left
kpt[1] = kpt[1] - padding_list[0] # padding top
kpt = kpt / scale
resized_kpts.append(kpt)
output = [resized_kpts, scores]
return output
except Exception as e:
if self.logger:
self.logger.error(f"{e}", detail=True)
else:
print(f"{e}")
def infer(self, image, threshold):
"""Image in BGR format"""
image = np.array(image)
h, w, _ = image.shape
detections = self._infer(image)
results = []
if detections:
kpts = detections[0]
scores = detections[1]
for kpt, score in zip(kpts, scores):
# self.logger.info(f"score: {score}")
if score >= threshold:
results.append({
"confidence": float(score),
"point": kpt
})
return results
@staticmethod
def visualize(image, kpts):
for point in kpts:
kpt = point['point']
cv2.circle(image, (int(kpt[0]), int(kpt[1])), 10, (0, 0, 255), -1)
return image