import cv2 import numpy as np import onnxruntime class YOLOv7: def __init__( self, model_path, labels_path, engine_path, official_nms=False ): self.official_nms = official_nms self.class_names = [] with open(labels_path, 'r') as f: self.class_names = [cname.strip() for cname in f.readlines()] f.close() # Create a list of colors for each class where each color is a tuple of 3 integer values rng = np.random.default_rng(3) self.colors = rng.uniform(0, 255, size=(len(self.class_names), 3)) # Initialize model self.initialize_model(model_path, engine_path) def __call__(self, image, confidence_threshold, nms_threshold): return self.detect_objects(image, confidence_threshold, nms_threshold) def xywh2xyxy(self, x): # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2) y = np.copy(x) y[..., 0] = x[..., 0] - x[..., 2] / 2 y[..., 1] = x[..., 1] - x[..., 3] / 2 y[..., 2] = x[..., 0] + x[..., 2] / 2 y[..., 3] = x[..., 1] + x[..., 3] / 2 return y def initialize_model(self, model_path, engine_path): self.session = onnxruntime.InferenceSession( model_path, providers=[ # ( # 'TensorrtExecutionProvider', # { # 'device_id': 0, # 'trt_max_workspace_size': 2147483648, # 'trt_fp16_enable': True, # 'trt_engine_cache_enable': True, # 'trt_engine_cache_path': '{}'.format(engine_path), # } # ), # ( # 'CUDAExecutionProvider', # { # 'device_id': 0, # 'arena_extend_strategy': 'kNextPowerOfTwo', # 'gpu_mem_limit': 2 * 1024 * 1024 * 1024, # 'cudnn_conv_algo_search': 'EXHAUSTIVE', # 'do_copy_in_default_stream': True, # } # ) 'CPUExecutionProvider' ] ) # Get model info self.get_input_details() self.get_output_details() self.has_postprocess = 'score' in self.output_names or self.official_nms def detect_objects(self, image, confidence_threshold, nms_threshold): input_tensor = self.prepare_input(image) # Perform inference on the image outputs = self.inference(input_tensor) # Process output data self.boxes, self.scores, self.class_ids = self.process_output(outputs, confidence_threshold, nms_threshold) return self.boxes, self.scores, self.class_ids def prepare_input(self, image): self.img_height, self.img_width = image.shape[:2] input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Resize input image input_img = cv2.resize(input_img, (self.input_width, self.input_height)) # Scale input pixel values to 0 to 1 input_img = input_img / 255.0 input_img = input_img.transpose(2, 0, 1) input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32) return input_tensor def rescale_boxes(self, boxes): # Rescale boxes to original image dimensions input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height]) boxes = np.divide(boxes, input_shape, dtype=np.float32) boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height]) return boxes def process_output(self, output, conf_threshold, nms_threshold): boxes, scores, class_ids = output boxes = boxes[0] scores = scores[0] class_ids = class_ids[0] res_boxes = [] res_scores = [] res_class_ids = [] for box, score, class_id in zip(boxes, scores, class_ids): if score > conf_threshold: score = score[0] res_boxes.append(box) res_scores.append(score) res_class_ids.append(int(class_id)) if len(res_scores) == 0: return [], [], [] # Scale boxes to original image dimensions res_boxes = self.rescale_boxes(res_boxes) fin_boxes, fin_scores, fin_class_ids = [], [], [] final_boxes = cv2.dnn.NMSBoxes(res_boxes, res_scores, conf_threshold, nms_threshold) for max_valueid in final_boxes: fin_boxes.append(res_boxes[max_valueid]) fin_scores.append(res_scores[max_valueid]) fin_class_ids.append(res_class_ids[max_valueid]) # Convert boxes to xyxy format fin_boxes = self.xywh2xyxy(np.array(fin_boxes)) # Convert class ids to class names fin_class_ids = [self.class_names[i] for i in fin_class_ids] return fin_boxes, fin_scores, fin_class_ids def draw_detections(self, image, draw_scores=True, mask_alpha=0.4): mask_img = image.copy() det_img = image.copy() img_height, img_width = image.shape[:2] size = min([img_height, img_width]) * 0.0006 text_thickness = int(min([img_height, img_width]) * 0.001) # Draw bounding boxes and labels of detections for box, score, class_id in zip(self.boxes, self.scores, self.class_ids): color = self.colors[class_id] x1, y1, x2, y2 = box.astype(int) # Draw rectangle cv2.rectangle(det_img, (x1, y1), (x2, y2), color, 2) # Draw fill rectangle in mask image cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1) label = self.class_names[class_id] caption = f'{label} {int(score * 100)}%' (tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=size, thickness=text_thickness) th = int(th * 1.2) cv2.rectangle(det_img, (x1, y1), (x1 + tw, y1 - th), color, -1) cv2.rectangle(mask_img, (x1, y1), (x1 + tw, y1 - th), color, -1) cv2.putText(det_img, caption, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA) cv2.putText(mask_img, caption, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA) return cv2.addWeighted(mask_img, mask_alpha, det_img, 1 - mask_alpha, 0) def get_input_details(self): model_inputs = self.session.get_inputs() self.input_names = [model_inputs[i].name for i in range(len(model_inputs))] self.input_shape = model_inputs[0].shape self.input_height = self.input_shape[2] self.input_width = self.input_shape[3] def get_output_details(self): model_outputs = self.session.get_outputs() self.output_names = [model_outputs[i].name for i in range(len(model_outputs))] def inference(self, input_tensor): outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor}) return outputs