Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy as np | |
import onnxruntime | |
class YOLOv7: | |
def __init__( | |
self, | |
model_path, | |
labels_path, | |
engine_path, | |
official_nms=False | |
): | |
self.official_nms = official_nms | |
self.class_names = [] | |
with open(labels_path, 'r') as f: | |
self.class_names = [cname.strip() for cname in f.readlines()] | |
f.close() | |
# Create a list of colors for each class where each color is a tuple of 3 integer values | |
rng = np.random.default_rng(3) | |
self.colors = rng.uniform(0, 255, size=(len(self.class_names), 3)) | |
# Initialize model | |
self.initialize_model(model_path, engine_path) | |
def __call__(self, image, confidence_threshold, nms_threshold): | |
return self.detect_objects(image, confidence_threshold, nms_threshold) | |
def xywh2xyxy(self, x): | |
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2) | |
y = np.copy(x) | |
y[..., 0] = x[..., 0] - x[..., 2] / 2 | |
y[..., 1] = x[..., 1] - x[..., 3] / 2 | |
y[..., 2] = x[..., 0] + x[..., 2] / 2 | |
y[..., 3] = x[..., 1] + x[..., 3] / 2 | |
return y | |
def initialize_model(self, model_path, engine_path): | |
self.session = onnxruntime.InferenceSession( | |
model_path, | |
providers=[ | |
# ( | |
# 'TensorrtExecutionProvider', | |
# { | |
# 'device_id': 0, | |
# 'trt_max_workspace_size': 2147483648, | |
# 'trt_fp16_enable': True, | |
# 'trt_engine_cache_enable': True, | |
# 'trt_engine_cache_path': '{}'.format(engine_path), | |
# } | |
# ), | |
# ( | |
# 'CUDAExecutionProvider', | |
# { | |
# 'device_id': 0, | |
# 'arena_extend_strategy': 'kNextPowerOfTwo', | |
# 'gpu_mem_limit': 2 * 1024 * 1024 * 1024, | |
# 'cudnn_conv_algo_search': 'EXHAUSTIVE', | |
# 'do_copy_in_default_stream': True, | |
# } | |
# ) | |
'CPUExecutionProvider' | |
] | |
) | |
# Get model info | |
self.get_input_details() | |
self.get_output_details() | |
self.has_postprocess = 'score' in self.output_names or self.official_nms | |
def detect_objects(self, image, confidence_threshold, nms_threshold): | |
input_tensor = self.prepare_input(image) | |
# Perform inference on the image | |
outputs = self.inference(input_tensor) | |
# Process output data | |
self.boxes, self.scores, self.class_ids = self.process_output(outputs, confidence_threshold, nms_threshold) | |
return self.boxes, self.scores, self.class_ids | |
def prepare_input(self, image): | |
self.img_height, self.img_width = image.shape[:2] | |
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
# Resize input image | |
input_img = cv2.resize(input_img, (self.input_width, self.input_height)) | |
# Scale input pixel values to 0 to 1 | |
input_img = input_img / 255.0 | |
input_img = input_img.transpose(2, 0, 1) | |
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32) | |
return input_tensor | |
def rescale_boxes(self, boxes): | |
# Rescale boxes to original image dimensions | |
input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height]) | |
boxes = np.divide(boxes, input_shape, dtype=np.float32) | |
boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height]) | |
return boxes | |
def process_output(self, output, conf_threshold, nms_threshold): | |
boxes, scores, class_ids = output | |
boxes = boxes[0] | |
scores = scores[0] | |
class_ids = class_ids[0] | |
res_boxes = [] | |
res_scores = [] | |
res_class_ids = [] | |
for box, score, class_id in zip(boxes, scores, class_ids): | |
if score > conf_threshold: | |
score = score[0] | |
res_boxes.append(box) | |
res_scores.append(score) | |
res_class_ids.append(int(class_id)) | |
if len(res_scores) == 0: | |
return [], [], [] | |
# Scale boxes to original image dimensions | |
res_boxes = self.rescale_boxes(res_boxes) | |
fin_boxes, fin_scores, fin_class_ids = [], [], [] | |
final_boxes = cv2.dnn.NMSBoxes(res_boxes, res_scores, conf_threshold, nms_threshold) | |
for max_valueid in final_boxes: | |
fin_boxes.append(res_boxes[max_valueid]) | |
fin_scores.append(res_scores[max_valueid]) | |
fin_class_ids.append(res_class_ids[max_valueid]) | |
# Convert boxes to xyxy format | |
fin_boxes = self.xywh2xyxy(np.array(fin_boxes)) | |
# Convert class ids to class names | |
fin_class_ids = [self.class_names[i] for i in fin_class_ids] | |
return fin_boxes, fin_scores, fin_class_ids | |
def draw_detections(self, image, draw_scores=True, mask_alpha=0.4): | |
mask_img = image.copy() | |
det_img = image.copy() | |
img_height, img_width = image.shape[:2] | |
size = min([img_height, img_width]) * 0.0006 | |
text_thickness = int(min([img_height, img_width]) * 0.001) | |
# Draw bounding boxes and labels of detections | |
for box, score, class_id in zip(self.boxes, self.scores, self.class_ids): | |
color = self.colors[class_id] | |
x1, y1, x2, y2 = box.astype(int) | |
# Draw rectangle | |
cv2.rectangle(det_img, (x1, y1), (x2, y2), color, 2) | |
# Draw fill rectangle in mask image | |
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1) | |
label = self.class_names[class_id] | |
caption = f'{label} {int(score * 100)}%' | |
(tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX, | |
fontScale=size, thickness=text_thickness) | |
th = int(th * 1.2) | |
cv2.rectangle(det_img, (x1, y1), | |
(x1 + tw, y1 - th), color, -1) | |
cv2.rectangle(mask_img, (x1, y1), | |
(x1 + tw, y1 - th), color, -1) | |
cv2.putText(det_img, caption, (x1, y1), | |
cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA) | |
cv2.putText(mask_img, caption, (x1, y1), | |
cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA) | |
return cv2.addWeighted(mask_img, mask_alpha, det_img, 1 - mask_alpha, 0) | |
def get_input_details(self): | |
model_inputs = self.session.get_inputs() | |
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))] | |
self.input_shape = model_inputs[0].shape | |
self.input_height = self.input_shape[2] | |
self.input_width = self.input_shape[3] | |
def get_output_details(self): | |
model_outputs = self.session.get_outputs() | |
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))] | |
def inference(self, input_tensor): | |
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor}) | |
return outputs | |