Spaces:

xiang-wuu
/

yolov5

Runtime error

App Files Files Community

glenn-jocher commited on Jul 24, 2021

Commit

2e53844

•

1 Parent(s): 39ef6c7

ONNX inference update (#4073)

Browse files

Files changed (1) hide show

detect.py +32 -22

detect.py CHANGED Viewed

@@ -64,18 +64,23 @@ def run(weights='yolov5s.pt',  # model.pt path(s)
     half &= device.type != 'cpu'  # half precision only supported on CUDA
     # Load model
-    model = attempt_load(weights, map_location=device)  # load FP32 model
-    stride = int(model.stride.max())  # model stride
     imgsz = check_img_size(imgsz, s=stride)  # check image size
-    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-    if half:
-        model.half()  # to FP16
-    # Second-stage classifier
-    classify = False
-    if classify:
-        modelc = load_classifier(name='resnet50', n=2)  # initialize
-        modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
     # Dataloader
     if webcam:
@@ -89,31 +94,36 @@ def run(weights='yolov5s.pt',  # model.pt path(s)
     vid_path, vid_writer = [None] * bs, [None] * bs
     # Run inference
-    if device.type != 'cpu':
         model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
     t0 = time.time()
     for path, img, im0s, vid_cap in dataset:
-        img = torch.from_numpy(img).to(device)
-        img = img.half() if half else img.float()  # uint8 to fp16/32
         img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        if img.ndimension() == 3:
-            img = img.unsqueeze(0)
         # Inference
         t1 = time_sync()
-        pred = model(img,
-                     augment=augment,
-                     visualize=increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False)[0]
-        # Apply NMS
         pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
         t2 = time_sync()
-        # Apply Classifier
         if classify:
             pred = apply_classifier(pred, modelc, img, im0s)
-        # Process detections
         for i, det in enumerate(pred):  # detections per image
             if webcam:  # batch_size >= 1
                 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count

     half &= device.type != 'cpu'  # half precision only supported on CUDA
     # Load model
+    w = weights[0] if isinstance(weights, list) else weights
+    classify, pt, onnx = False, w.endswith('.pt'), w.endswith('.onnx')  # inference type
+    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
+    if pt:
+        model = attempt_load(weights, map_location=device)  # load FP32 model
+        stride = int(model.stride.max())  # model stride
+        names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+        if half:
+            model.half()  # to FP16
+        if classify:  # second-stage classifier
+            modelc = load_classifier(name='resnet50', n=2)  # initialize
+            modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
+    elif onnx:
+        check_requirements(('onnx', 'onnxruntime'))
+        import onnxruntime
+        session = onnxruntime.InferenceSession(w, None)
     imgsz = check_img_size(imgsz, s=stride)  # check image size
     # Dataloader
     if webcam:
     vid_path, vid_writer = [None] * bs, [None] * bs
     # Run inference
+    if pt and device.type != 'cpu':
         model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
     t0 = time.time()
     for path, img, im0s, vid_cap in dataset:
+        if pt:
+            img = torch.from_numpy(img).to(device)
+            img = img.half() if half else img.float()  # uint8 to fp16/32
+        elif onnx:
+            img = img.astype('float32')
         img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        if len(img.shape) == 3:
+            img = img[None]  # expand for batch dim
         # Inference
         t1 = time_sync()
+        if pt:
+            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+            pred = model(img, augment=augment, visualize=visualize)[0]
+        elif onnx:
+            pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
+        # NMS
         pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
         t2 = time_sync()
+        # Second-stage classifier (optional)
         if classify:
             pred = apply_classifier(pred, modelc, img, im0s)
+        # Process predictions
         for i, det in enumerate(pred):  # detections per image
             if webcam:  # batch_size >= 1
                 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count