pesi
/

Luigi commited on
Commit
1f0f5d8
1 Parent(s): 9133777

Add option --yolo_nas_pose, used to read YOLO NAS Pose model instead of RTMO

Browse files
Files changed (2) hide show
  1. rtmo_demo.py +3 -2
  2. rtmo_gpu.py +43 -22
rtmo_demo.py CHANGED
@@ -13,6 +13,7 @@ if __name__ == "__main__":
13
  parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
14
  parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
15
  parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX model file (required)')
 
16
 
17
  # Parse the command-line arguments
18
  args = parser.parse_args()
@@ -20,10 +21,10 @@ if __name__ == "__main__":
20
  onnx_model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
21
 
22
  # Only Tiny Model has (416,416) as input model
23
- model_input_size = (416,416) if 'rtmo-t' in onnx_model.lower() else (640,640)
24
 
25
  body = RTMO_GPU(onnx_model=onnx_model,
26
- model_input_size=model_input_size)
27
 
28
  for mp4_path in Path(args.path).glob('*'):
29
 
 
13
  parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
14
  parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
15
  parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX model file (required)')
16
+ parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
17
 
18
  # Parse the command-line arguments
19
  args = parser.parse_args()
 
21
  onnx_model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
22
 
23
  # Only Tiny Model has (416,416) as input model
24
+ model_input_size = (416,416) if 'rtmo-t' in onnx_model.lower() and not args.yolo_nas_pose else (640,640)
25
 
26
  body = RTMO_GPU(onnx_model=onnx_model,
27
+ model_input_size=model_input_size, is_yolo_nas_pose=args.yolo_nas_pose)
28
 
29
  for mp4_path in Path(args.path).glob('*'):
30
 
rtmo_gpu.py CHANGED
@@ -291,22 +291,36 @@ class RTMO_GPU(object):
291
  - final_boxes (np.ndarray): Final bounding boxes.
292
  - final_scores (np.ndarray): Final scores.
293
  """
294
- det_outputs, pose_outputs = outputs
295
-
296
- # onnx contains nms module
297
- pack_dets = (det_outputs[0, :, :4], det_outputs[0, :, 4])
298
- final_boxes, final_scores = pack_dets
299
- final_boxes /= ratio
300
- isscore = final_scores > 0.3
301
- isbbox = [i for i in isscore]
302
- # final_boxes = final_boxes[isbbox]
303
-
304
- # decode pose outputs
305
- keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
306
- keypoints = keypoints / ratio
307
-
308
- keypoints = keypoints[isbbox]
309
- scores = scores[isbbox]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
  return keypoints, scores
312
 
@@ -321,16 +335,21 @@ class RTMO_GPU(object):
321
  """
322
  # build input to (1, 3, H, W)
323
  img = img.transpose(2, 0, 1)
324
- img = np.ascontiguousarray(img, dtype=np.float32)
325
  input = img[None, :, :, :]
326
 
327
  # Create an IO Binding object
328
  io_binding = self.session.io_binding()
329
 
330
- # Bind the model inputs and outputs to the IO Binding object
331
- io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
332
- io_binding.bind_output(name='dets')
333
- io_binding.bind_output(name='keypoints')
 
 
 
 
 
334
 
335
  # Run inference with IO Binding
336
  self.session.run_with_iobinding(io_binding)
@@ -355,7 +374,8 @@ class RTMO_GPU(object):
355
  model_input_size: tuple = (640, 640),
356
  mean: tuple = None,
357
  std: tuple = None,
358
- device: str = 'cuda'):
 
359
 
360
  if not os.path.exists(onnx_model):
361
  # If the file does not exist, raise FileNotFoundError
@@ -381,6 +401,7 @@ class RTMO_GPU(object):
381
  self.mean = mean
382
  self.std = std
383
  self.device = device
 
384
 
385
  class RTMO_GPU_Batch(RTMO_GPU):
386
  def preprocess_batch(self, imgs: List[np.ndarray]) -> Tuple[np.ndarray, List[float]]:
 
291
  - final_boxes (np.ndarray): Final bounding boxes.
292
  - final_scores (np.ndarray): Final scores.
293
  """
294
+
295
+ if not self.is_yolo_nas_pose:
296
+ # RTMO
297
+ det_outputs, pose_outputs = outputs
298
+
299
+ # onnx contains nms module
300
+ pack_dets = (det_outputs[0, :, :4], det_outputs[0, :, 4])
301
+ final_boxes, final_scores = pack_dets
302
+ final_boxes /= ratio
303
+ isscore = final_scores > 0.3
304
+ isbbox = [i for i in isscore]
305
+ # final_boxes = final_boxes[isbbox]
306
+
307
+ # decode pose outputs
308
+ keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
309
+ keypoints = keypoints / ratio
310
+
311
+ keypoints = keypoints[isbbox]
312
+ scores = scores[isbbox]
313
+ else:
314
+ # NAS Pose
315
+ flat_predictions = outputs[0]
316
+ if flat_predictions.shape[0] > 0: # at least one person found
317
+ mask = flat_predictions[:, 0] == 0
318
+ pred_bboxes = flat_predictions[mask, 1:5]
319
+ pred_joints = flat_predictions[mask, 6:].reshape((len(pred_bboxes), -1, 3))
320
+ keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
321
+ keypoints = keypoints / ratio
322
+ else: # no detection
323
+ keypoints, scores = np.zeros((0, 17, 2)), np.zeros((0, 17))
324
 
325
  return keypoints, scores
326
 
 
335
  """
336
  # build input to (1, 3, H, W)
337
  img = img.transpose(2, 0, 1)
338
+ img = np.ascontiguousarray(img, dtype=np.float32 if not self.is_yolo_nas_pose else np.uint8)
339
  input = img[None, :, :, :]
340
 
341
  # Create an IO Binding object
342
  io_binding = self.session.io_binding()
343
 
344
+ if not self.is_yolo_nas_pose:
345
+ # RTMO
346
+ io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
347
+ io_binding.bind_output(name='dets')
348
+ io_binding.bind_output(name='keypoints')
349
+ else:
350
+ # NAS Pose, flat format
351
+ io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.uint8, shape=input.shape, buffer_ptr=input.ctypes.data)
352
+ io_binding.bind_output(name='graph2_flat_predictions')
353
 
354
  # Run inference with IO Binding
355
  self.session.run_with_iobinding(io_binding)
 
374
  model_input_size: tuple = (640, 640),
375
  mean: tuple = None,
376
  std: tuple = None,
377
+ device: str = 'cuda',
378
+ is_yolo_nas_pose = False):
379
 
380
  if not os.path.exists(onnx_model):
381
  # If the file does not exist, raise FileNotFoundError
 
401
  self.mean = mean
402
  self.std = std
403
  self.device = device
404
+ self.is_yolo_nas_pose = is_yolo_nas_pose
405
 
406
  class RTMO_GPU_Batch(RTMO_GPU):
407
  def preprocess_batch(self, imgs: List[np.ndarray]) -> Tuple[np.ndarray, List[float]]: