Spaces:
Sleeping
Sleeping
import cv2 | |
import numpy as np | |
import onnxruntime as ort | |
from cvut.logger import Logger | |
__all__ = ['PoseDetector'] | |
class PoseDetector: | |
def __init__(self, | |
onnx_file="model_ort.onnx", | |
mean=[123.675, 116.28, 103.53], | |
std=[58.395, 57.12, 57.375], | |
to_rgb=True, | |
infer_wh=(192, 256), | |
pad_value=0, | |
logger: Logger = None, | |
): | |
self.logger = logger | |
self.model = None | |
self.load_network(model=onnx_file) | |
self.infer_wh = infer_wh | |
self.mean = np.array(mean, dtype='float32') | |
self.std = np.array(std, dtype='float32') | |
self.to_rgb = to_rgb | |
self.pad_value = pad_value | |
def load_network(self, model): | |
device = ort.get_device() | |
cuda = True if device == 'GPU' else False | |
try: | |
providers = \ | |
['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda \ | |
else ['CPUExecutionProvider'] | |
so = ort.SessionOptions() | |
so.log_severity_level = 3 | |
# self.logger.info(f"providers: {providers}") | |
self.model = ort.InferenceSession( | |
model, providers=providers, sess_options=so) | |
self.output_details = [i.name for i in self.model.get_outputs()] | |
self.input_details = [i.name for i in self.model.get_inputs()] | |
self.is_inititated = True | |
except Exception as e: | |
raise Exception(f"Cannot load model {model}: {e}") | |
def _get_rescale_ratio(self, image: np.ndarray): | |
ori_h, ori_w = image.shape[:2] | |
scale_factor = min(self.infer_wh[0] / ori_w, self.infer_wh[1] / ori_h) | |
return scale_factor | |
def _padding_image(self, image: np.ndarray): | |
# Get resized image | |
resized_h, resized_w = image.shape[:2] | |
# padding | |
padding_h, padding_w = self.infer_wh[1] - \ | |
resized_h, self.infer_wh[0] - resized_w | |
top_padding = int(round(padding_h // 2 - 0.1)) | |
left_padding = int(round(padding_w // 2 - 0.1)) | |
bottom_padding = padding_h - top_padding | |
right_padding = padding_w - left_padding | |
padding_list = [top_padding, bottom_padding, | |
left_padding, right_padding] | |
if ( | |
top_padding != 0 | |
or | |
bottom_padding != 0 | |
or | |
left_padding != 0 | |
or | |
right_padding != 0 | |
): | |
image = cv2.copyMakeBorder(image, top_padding, bottom_padding, | |
left_padding, right_padding, | |
cv2.BORDER_CONSTANT, | |
value=self.pad_value) | |
return image, padding_list | |
def _preprocess(self, image: np.ndarray): | |
"""Image in BGR format""" | |
scale_factor = self._get_rescale_ratio(image) | |
image = cv2.resize(image, None, fx=scale_factor, fy=scale_factor) | |
scale_wh = (scale_factor, scale_factor) | |
image, padding_list = self._padding_image(image) | |
image = image.astype(np.float32) | |
image = (image - self.mean[None, None, :]) / self.std[None, None, :] | |
if self.to_rgb: | |
image = image[:, :, ::-1] | |
image = image.transpose((2, 0, 1)) | |
image = np.expand_dims(image, 0) | |
image = np.ascontiguousarray(image) | |
return image, padding_list, scale_factor | |
def _infer(self, image: np.ndarray): | |
"""Image in BGR format""" | |
try: | |
# preprocess | |
im, padding_list, scale = self._preprocess(image) | |
# ONNX inference | |
# inp = {self.input_details[0]: im} | |
outs = self.model.run(None, {"input": im}) | |
kpts = outs[0] | |
scores = outs[1][0] | |
resized_kpts = [] | |
for kpt in kpts[0]: | |
kpt[0] = kpt[0] - padding_list[2] # padding left | |
kpt[1] = kpt[1] - padding_list[0] # padding top | |
kpt = kpt / scale | |
resized_kpts.append(kpt) | |
output = [resized_kpts, scores] | |
return output | |
except Exception as e: | |
if self.logger: | |
self.logger.error(f"{e}", detail=True) | |
else: | |
print(f"{e}") | |
def infer(self, image, threshold): | |
"""Image in BGR format""" | |
image = np.array(image) | |
h, w, _ = image.shape | |
detections = self._infer(image) | |
results = [] | |
if detections: | |
kpts = detections[0] | |
scores = detections[1] | |
for kpt, score in zip(kpts, scores): | |
# self.logger.info(f"score: {score}") | |
if score >= threshold: | |
results.append({ | |
"confidence": float(score), | |
"point": kpt | |
}) | |
return results | |
def visualize(image, kpts): | |
for point in kpts: | |
kpt = point['point'] | |
cv2.circle(image, (int(kpt[0]), int(kpt[1])), 10, (0, 0, 255), -1) | |
return image | |