import numpy as np import cv2 import torch import onnxruntime import sys import pathlib CURRENT_DIR = pathlib.Path(__file__).parent sys.path.append(str(CURRENT_DIR)) import argparse from utils import ( letterbox, non_max_suppression, scale_coords, Annotator, Colors, ) def pre_process(img): img = letterbox(img, [640, 640], stride=32, auto=False)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) img = img.astype("float32") img = img / 255.0 img = img[np.newaxis, :] return img def post_process(x): x = list(x) z = [] # inference output stride = [8, 16, 32] for i in range(3): bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) x[i] = ( torch.tensor(x[i]) .view(bs, 3, 85, ny, nx) .permute(0, 1, 3, 4, 2) .contiguous() ) y = x[i].sigmoid() xy = (y[..., 0:2] * 2.0 - 0.5 + grid[i]) * stride[i] wh = (y[..., 2:4] * 2) ** 2 * anchor_grid[i] y = torch.cat((xy, wh, y[..., 4:]), -1) z.append(y.view(bs, -1, 85)) return (torch.cat(z, 1), x) def make_parser(): parser = argparse.ArgumentParser("onnxruntime inference sample") parser.add_argument( "-m", "--onnx_model", type=str, default="./yolov5s.onnx", help="input your onnx model.", ) parser.add_argument( "-i", "--image_path", type=str, default='./demo.jpg', help="path to your input image.", ) parser.add_argument( "-o", "--output_path", type=str, default='./demo_infer.jpg', help="path to your output directory.", ) parser.add_argument( '--ipu', action='store_true', help='flag for ryzen ai' ) parser.add_argument( '--provider_config', default='', type=str, help='provider config for ryzen ai' ) return parser names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] if __name__ == '__main__': args = make_parser().parse_args() onnx_path = args.onnx_model if args.ipu: providers = ["VitisAIExecutionProvider"] provider_options = [{"config_file": args.provider_config}] onnx_weight = onnxruntime.InferenceSession(onnx_path, providers=providers, provider_options=provider_options) else: onnx_weight = onnxruntime.InferenceSession(onnx_path) grid = np.load("./grid.npy", allow_pickle=True) anchor_grid = np.load("./anchor_grid.npy", allow_pickle=True) path = args.image_path new_path = args.output_path conf_thres, iou_thres, classes, agnostic_nms, max_det = 0.25, 0.45, None, False, 1000 img0 = cv2.imread(path) img = pre_process(img0) onnx_input = {onnx_weight.get_inputs()[0].name: img.transpose(0, 2, 3, 1)} onnx_output = onnx_weight.run(None, onnx_input) onnx_output = [torch.tensor(item).permute(0, 3, 1, 2) for item in onnx_output] onnx_output = post_process(onnx_output) pred = non_max_suppression( onnx_output[0], conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det ) colors = Colors() det = pred[0] im0 = img0.copy() annotator = Annotator(im0, line_width=2, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Write results for *xyxy, conf, cls in reversed(det): c = int(cls) # integer class label = f"{names[c]} {conf:.2f}" annotator.box_label(xyxy, label, color=colors(c, True)) # Stream results im0 = annotator.result() cv2.imwrite(new_path, im0)