imyhxy glenn-jocher commited on
Commit
7a39803
1 Parent(s): f17c86b

Export, detect and validation with TensorRT engine file (#5699)

Browse files

* Export and detect with TensorRT engine file

* Resolve `isort`

* Make validation works with TensorRT engine

* feat: update export docstring

* feat: change suffix from *.trt to *.engine

* feat: get rid of pycuda

* feat: make compatiable with val.py

* feat: support detect with fp16 engine

* Add Lite to Edge TPU string

* Remove *.trt comment

* Revert to standard success logger.info string

* Fix Deprecation Warning

```
export.py:310: DeprecationWarning: Use build_serialized_network instead.
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
```

* Revert deprecation warning fix



@imyhxy

it seems we can't apply the deprecation warning fix because then export fails, so I'm reverting my previous change here.

* Update export.py

* Update export.py

* Update common.py

* export onnx to file before building TensorRT engine file

* feat: triger ONNX export failed early

* feat: load ONNX model from file

Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>

Files changed (4) hide show
  1. detect.py +2 -2
  2. export.py +54 -1
  3. models/common.py +28 -4
  4. val.py +6 -4
detect.py CHANGED
@@ -77,11 +77,11 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
77
  # Load model
78
  device = select_device(device)
79
  model = DetectMultiBackend(weights, device=device, dnn=dnn)
80
- stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx
81
  imgsz = check_img_size(imgsz, s=stride) # check image size
82
 
83
  # Half
84
- half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA
85
  if pt:
86
  model.model.half() if half else model.model.float()
87
 
 
77
  # Load model
78
  device = select_device(device)
79
  model = DetectMultiBackend(weights, device=device, dnn=dnn)
80
+ stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
81
  imgsz = check_img_size(imgsz, s=stride) # check image size
82
 
83
  # Half
84
+ half &= (pt or engine) and device.type != 'cpu' # half precision only supported by PyTorch on CUDA
85
  if pt:
86
  model.model.half() if half else model.model.float()
87
 
export.py CHANGED
@@ -12,6 +12,7 @@ TensorFlow SavedModel | yolov5s_saved_model/ | 'saved_model'
12
  TensorFlow GraphDef | yolov5s.pb | 'pb'
13
  TensorFlow Lite | yolov5s.tflite | 'tflite'
14
  TensorFlow.js | yolov5s_web_model/ | 'tfjs'
 
15
 
16
  Usage:
17
  $ python path/to/export.py --weights yolov5s.pt --include torchscript onnx coreml saved_model pb tflite tfjs
@@ -24,6 +25,7 @@ Inference:
24
  yolov5s_saved_model
25
  yolov5s.pb
26
  yolov5s.tflite
 
27
 
28
  TensorFlow.js:
29
  $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
@@ -263,6 +265,51 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
263
  LOGGER.info(f'\n{prefix} export failure: {e}')
264
 
265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  @torch.no_grad()
267
  def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
268
  weights=ROOT / 'yolov5s.pt', # weights path
@@ -278,6 +325,8 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
278
  dynamic=False, # ONNX/TF: dynamic axes
279
  simplify=False, # ONNX: simplify model
280
  opset=12, # ONNX: opset version
 
 
281
  topk_per_class=100, # TF.js NMS: topk per class to keep
282
  topk_all=100, # TF.js NMS: topk for all classes to keep
283
  iou_thres=0.45, # TF.js NMS: IoU threshold
@@ -322,6 +371,8 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
322
  export_torchscript(model, im, file, optimize)
323
  if 'onnx' in include:
324
  export_onnx(model, im, file, opset, train, dynamic, simplify)
 
 
325
  if 'coreml' in include:
326
  export_coreml(model, im, file)
327
 
@@ -360,13 +411,15 @@ def parse_opt():
360
  parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
361
  parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
362
  parser.add_argument('--opset', type=int, default=13, help='ONNX: opset version')
 
 
363
  parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
364
  parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
365
  parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
366
  parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
367
  parser.add_argument('--include', nargs='+',
368
  default=['torchscript', 'onnx'],
369
- help='available formats are (torchscript, onnx, coreml, saved_model, pb, tflite, tfjs)')
370
  opt = parser.parse_args()
371
  print_args(FILE.stem, opt)
372
  return opt
 
12
  TensorFlow GraphDef | yolov5s.pb | 'pb'
13
  TensorFlow Lite | yolov5s.tflite | 'tflite'
14
  TensorFlow.js | yolov5s_web_model/ | 'tfjs'
15
+ TensorRT | yolov5s.engine | 'engine'
16
 
17
  Usage:
18
  $ python path/to/export.py --weights yolov5s.pt --include torchscript onnx coreml saved_model pb tflite tfjs
 
25
  yolov5s_saved_model
26
  yolov5s.pb
27
  yolov5s.tflite
28
+ yolov5s.engine
29
 
30
  TensorFlow.js:
31
  $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
 
265
  LOGGER.info(f'\n{prefix} export failure: {e}')
266
 
267
 
268
+ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
269
+ try:
270
+ check_requirements(('tensorrt',))
271
+ import tensorrt as trt
272
+
273
+ opset = (12, 13)[trt.__version__[0] == '8'] # test on TensorRT 7.x and 8.x
274
+ export_onnx(model, im, file, opset, train, False, simplify)
275
+ onnx = file.with_suffix('.onnx')
276
+ assert onnx.exists(), f'failed to export ONNX file: {onnx}'
277
+
278
+ LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
279
+ f = str(file).replace('.pt', '.engine') # TensorRT engine file
280
+ logger = trt.Logger(trt.Logger.INFO)
281
+ if verbose:
282
+ logger.min_severity = trt.Logger.Severity.VERBOSE
283
+
284
+ builder = trt.Builder(logger)
285
+ config = builder.create_builder_config()
286
+ config.max_workspace_size = workspace * 1 << 30
287
+
288
+ flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
289
+ network = builder.create_network(flag)
290
+ parser = trt.OnnxParser(network, logger)
291
+ if not parser.parse_from_file(str(onnx)):
292
+ raise RuntimeError(f'failed to load ONNX file: {onnx}')
293
+
294
+ inputs = [network.get_input(i) for i in range(network.num_inputs)]
295
+ outputs = [network.get_output(i) for i in range(network.num_outputs)]
296
+ LOGGER.info(f'{prefix} Network Description:')
297
+ for inp in inputs:
298
+ LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
299
+ for out in outputs:
300
+ LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
301
+
302
+ half &= builder.platform_has_fast_fp16
303
+ LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}')
304
+ if half:
305
+ config.set_flag(trt.BuilderFlag.FP16)
306
+ with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
307
+ t.write(engine.serialize())
308
+ LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
309
+
310
+ except Exception as e:
311
+ LOGGER.info(f'\n{prefix} export failure: {e}')
312
+
313
  @torch.no_grad()
314
  def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
315
  weights=ROOT / 'yolov5s.pt', # weights path
 
325
  dynamic=False, # ONNX/TF: dynamic axes
326
  simplify=False, # ONNX: simplify model
327
  opset=12, # ONNX: opset version
328
+ verbose=False, # TensorRT: verbose log
329
+ workspace=4, # TensorRT: workspace size (GB)
330
  topk_per_class=100, # TF.js NMS: topk per class to keep
331
  topk_all=100, # TF.js NMS: topk for all classes to keep
332
  iou_thres=0.45, # TF.js NMS: IoU threshold
 
371
  export_torchscript(model, im, file, optimize)
372
  if 'onnx' in include:
373
  export_onnx(model, im, file, opset, train, dynamic, simplify)
374
+ if 'engine' in include:
375
+ export_engine(model, im, file, train, half, simplify, workspace, verbose)
376
  if 'coreml' in include:
377
  export_coreml(model, im, file)
378
 
 
411
  parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
412
  parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
413
  parser.add_argument('--opset', type=int, default=13, help='ONNX: opset version')
414
+ parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
415
+ parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
416
  parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
417
  parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
418
  parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
419
  parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
420
  parser.add_argument('--include', nargs='+',
421
  default=['torchscript', 'onnx'],
422
+ help='available formats are (torchscript, onnx, engine, coreml, saved_model, pb, tflite, tfjs)')
423
  opt = parser.parse_args()
424
  print_args(FILE.stem, opt)
425
  return opt
models/common.py CHANGED
@@ -7,6 +7,7 @@ import json
7
  import math
8
  import platform
9
  import warnings
 
10
  from copy import copy
11
  from pathlib import Path
12
 
@@ -285,11 +286,12 @@ class DetectMultiBackend(nn.Module):
285
  # TensorFlow Lite: *.tflite
286
  # ONNX Runtime: *.onnx
287
  # OpenCV DNN: *.onnx with dnn=True
 
288
  super().__init__()
289
  w = str(weights[0] if isinstance(weights, list) else weights)
290
- suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel']
291
  check_suffix(w, suffixes) # check weights have acceptable suffix
292
- pt, onnx, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans
293
  jit = pt and 'torchscript' in w.lower()
294
  stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
295
 
@@ -317,6 +319,23 @@ class DetectMultiBackend(nn.Module):
317
  check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
318
  import onnxruntime
319
  session = onnxruntime.InferenceSession(w, None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  else: # TensorFlow model (TFLite, pb, saved_model)
321
  import tensorflow as tf
322
  if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
@@ -334,7 +353,7 @@ class DetectMultiBackend(nn.Module):
334
  model = tf.keras.models.load_model(w)
335
  elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
336
  if 'edgetpu' in w.lower():
337
- LOGGER.info(f'Loading {w} for TensorFlow Edge TPU inference...')
338
  import tflite_runtime.interpreter as tfli
339
  delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime
340
  'Darwin': 'libedgetpu.1.dylib',
@@ -369,6 +388,11 @@ class DetectMultiBackend(nn.Module):
369
  y = self.net.forward()
370
  else: # ONNX Runtime
371
  y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
 
 
 
 
 
372
  else: # TensorFlow model (TFLite, pb, saved_model)
373
  im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
374
  if self.pb:
@@ -391,7 +415,7 @@ class DetectMultiBackend(nn.Module):
391
  y[..., 1] *= h # y
392
  y[..., 2] *= w # w
393
  y[..., 3] *= h # h
394
- y = torch.tensor(y)
395
  return (y, []) if val else y
396
 
397
 
 
7
  import math
8
  import platform
9
  import warnings
10
+ from collections import namedtuple
11
  from copy import copy
12
  from pathlib import Path
13
 
 
286
  # TensorFlow Lite: *.tflite
287
  # ONNX Runtime: *.onnx
288
  # OpenCV DNN: *.onnx with dnn=True
289
+ # TensorRT: *.engine
290
  super().__init__()
291
  w = str(weights[0] if isinstance(weights, list) else weights)
292
+ suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel']
293
  check_suffix(w, suffixes) # check weights have acceptable suffix
294
+ pt, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans
295
  jit = pt and 'torchscript' in w.lower()
296
  stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
297
 
 
319
  check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
320
  import onnxruntime
321
  session = onnxruntime.InferenceSession(w, None)
322
+ elif engine: # TensorRT
323
+ LOGGER.info(f'Loading {w} for TensorRT inference...')
324
+ import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
325
+ Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
326
+ logger = trt.Logger(trt.Logger.INFO)
327
+ with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
328
+ model = runtime.deserialize_cuda_engine(f.read())
329
+ bindings = dict()
330
+ for index in range(model.num_bindings):
331
+ name = model.get_binding_name(index)
332
+ dtype = trt.nptype(model.get_binding_dtype(index))
333
+ shape = tuple(model.get_binding_shape(index))
334
+ data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
335
+ bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
336
+ binding_addrs = {n: d.ptr for n, d in bindings.items()}
337
+ context = model.create_execution_context()
338
+ batch_size = bindings['images'].shape[0]
339
  else: # TensorFlow model (TFLite, pb, saved_model)
340
  import tensorflow as tf
341
  if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
 
353
  model = tf.keras.models.load_model(w)
354
  elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
355
  if 'edgetpu' in w.lower():
356
+ LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
357
  import tflite_runtime.interpreter as tfli
358
  delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime
359
  'Darwin': 'libedgetpu.1.dylib',
 
388
  y = self.net.forward()
389
  else: # ONNX Runtime
390
  y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
391
+ elif self.engine: # TensorRT
392
+ assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
393
+ self.binding_addrs['images'] = int(im.data_ptr())
394
+ self.context.execute_v2(list(self.binding_addrs.values()))
395
+ y = self.bindings['output'].data
396
  else: # TensorFlow model (TFLite, pb, saved_model)
397
  im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
398
  if self.pb:
 
415
  y[..., 1] *= h # y
416
  y[..., 2] *= w # w
417
  y[..., 3] *= h # h
418
+ y = torch.tensor(y) if isinstance(y, np.ndarray) else y
419
  return (y, []) if val else y
420
 
421
 
val.py CHANGED
@@ -111,7 +111,7 @@ def run(data,
111
  # Initialize/load model and set device
112
  training = model is not None
113
  if training: # called by train.py
114
- device, pt = next(model.parameters()).device, True # get model device, PyTorch model
115
 
116
  half &= device.type != 'cpu' # half precision only supported on CUDA
117
  model.half() if half else model.float()
@@ -124,11 +124,13 @@ def run(data,
124
 
125
  # Load model
126
  model = DetectMultiBackend(weights, device=device, dnn=dnn)
127
- stride, pt = model.stride, model.pt
128
  imgsz = check_img_size(imgsz, s=stride) # check image size
129
- half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA
130
  if pt:
131
  model.model.half() if half else model.model.float()
 
 
132
  else:
133
  half = False
134
  batch_size = 1 # export.py models default to batch-size 1
@@ -165,7 +167,7 @@ def run(data,
165
  pbar = tqdm(dataloader, desc=s, ncols=NCOLS, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
166
  for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
167
  t1 = time_sync()
168
- if pt:
169
  im = im.to(device, non_blocking=True)
170
  targets = targets.to(device)
171
  im = im.half() if half else im.float() # uint8 to fp16/32
 
111
  # Initialize/load model and set device
112
  training = model is not None
113
  if training: # called by train.py
114
+ device, pt, engine = next(model.parameters()).device, True, False # get model device, PyTorch model
115
 
116
  half &= device.type != 'cpu' # half precision only supported on CUDA
117
  model.half() if half else model.float()
 
124
 
125
  # Load model
126
  model = DetectMultiBackend(weights, device=device, dnn=dnn)
127
+ stride, pt, engine = model.stride, model.pt, model.engine
128
  imgsz = check_img_size(imgsz, s=stride) # check image size
129
+ half &= (pt or engine) and device.type != 'cpu' # half precision only supported by PyTorch on CUDA
130
  if pt:
131
  model.model.half() if half else model.model.float()
132
+ elif engine:
133
+ batch_size = model.batch_size
134
  else:
135
  half = False
136
  batch_size = 1 # export.py models default to batch-size 1
 
167
  pbar = tqdm(dataloader, desc=s, ncols=NCOLS, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
168
  for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
169
  t1 = time_sync()
170
+ if pt or engine:
171
  im = im.to(device, non_blocking=True)
172
  targets = targets.to(device)
173
  im = im.half() if half else im.float() # uint8 to fp16/32