Export, detect and validation with TensorRT engine file (#5699)
Browse files* Export and detect with TensorRT engine file
* Resolve `isort`
* Make validation works with TensorRT engine
* feat: update export docstring
* feat: change suffix from *.trt to *.engine
* feat: get rid of pycuda
* feat: make compatiable with val.py
* feat: support detect with fp16 engine
* Add Lite to Edge TPU string
* Remove *.trt comment
* Revert to standard success logger.info string
* Fix Deprecation Warning
```
export.py:310: DeprecationWarning: Use build_serialized_network instead.
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
```
* Revert deprecation warning fix
@imyhxy
it seems we can't apply the deprecation warning fix because then export fails, so I'm reverting my previous change here.
* Update export.py
* Update export.py
* Update common.py
* export onnx to file before building TensorRT engine file
* feat: triger ONNX export failed early
* feat: load ONNX model from file
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
@@ -77,11 +77,11 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
|
|
77 |
# Load model
|
78 |
device = select_device(device)
|
79 |
model = DetectMultiBackend(weights, device=device, dnn=dnn)
|
80 |
-
stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx
|
81 |
imgsz = check_img_size(imgsz, s=stride) # check image size
|
82 |
|
83 |
# Half
|
84 |
-
half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA
|
85 |
if pt:
|
86 |
model.model.half() if half else model.model.float()
|
87 |
|
|
|
77 |
# Load model
|
78 |
device = select_device(device)
|
79 |
model = DetectMultiBackend(weights, device=device, dnn=dnn)
|
80 |
+
stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
|
81 |
imgsz = check_img_size(imgsz, s=stride) # check image size
|
82 |
|
83 |
# Half
|
84 |
+
half &= (pt or engine) and device.type != 'cpu' # half precision only supported by PyTorch on CUDA
|
85 |
if pt:
|
86 |
model.model.half() if half else model.model.float()
|
87 |
|
@@ -12,6 +12,7 @@ TensorFlow SavedModel | yolov5s_saved_model/ | 'saved_model'
|
|
12 |
TensorFlow GraphDef | yolov5s.pb | 'pb'
|
13 |
TensorFlow Lite | yolov5s.tflite | 'tflite'
|
14 |
TensorFlow.js | yolov5s_web_model/ | 'tfjs'
|
|
|
15 |
|
16 |
Usage:
|
17 |
$ python path/to/export.py --weights yolov5s.pt --include torchscript onnx coreml saved_model pb tflite tfjs
|
@@ -24,6 +25,7 @@ Inference:
|
|
24 |
yolov5s_saved_model
|
25 |
yolov5s.pb
|
26 |
yolov5s.tflite
|
|
|
27 |
|
28 |
TensorFlow.js:
|
29 |
$ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
|
@@ -263,6 +265,51 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
|
|
263 |
LOGGER.info(f'\n{prefix} export failure: {e}')
|
264 |
|
265 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
@torch.no_grad()
|
267 |
def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
|
268 |
weights=ROOT / 'yolov5s.pt', # weights path
|
@@ -278,6 +325,8 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
|
|
278 |
dynamic=False, # ONNX/TF: dynamic axes
|
279 |
simplify=False, # ONNX: simplify model
|
280 |
opset=12, # ONNX: opset version
|
|
|
|
|
281 |
topk_per_class=100, # TF.js NMS: topk per class to keep
|
282 |
topk_all=100, # TF.js NMS: topk for all classes to keep
|
283 |
iou_thres=0.45, # TF.js NMS: IoU threshold
|
@@ -322,6 +371,8 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
|
|
322 |
export_torchscript(model, im, file, optimize)
|
323 |
if 'onnx' in include:
|
324 |
export_onnx(model, im, file, opset, train, dynamic, simplify)
|
|
|
|
|
325 |
if 'coreml' in include:
|
326 |
export_coreml(model, im, file)
|
327 |
|
@@ -360,13 +411,15 @@ def parse_opt():
|
|
360 |
parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
|
361 |
parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
|
362 |
parser.add_argument('--opset', type=int, default=13, help='ONNX: opset version')
|
|
|
|
|
363 |
parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
|
364 |
parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
|
365 |
parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
|
366 |
parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
|
367 |
parser.add_argument('--include', nargs='+',
|
368 |
default=['torchscript', 'onnx'],
|
369 |
-
help='available formats are (torchscript, onnx, coreml, saved_model, pb, tflite, tfjs)')
|
370 |
opt = parser.parse_args()
|
371 |
print_args(FILE.stem, opt)
|
372 |
return opt
|
|
|
12 |
TensorFlow GraphDef | yolov5s.pb | 'pb'
|
13 |
TensorFlow Lite | yolov5s.tflite | 'tflite'
|
14 |
TensorFlow.js | yolov5s_web_model/ | 'tfjs'
|
15 |
+
TensorRT | yolov5s.engine | 'engine'
|
16 |
|
17 |
Usage:
|
18 |
$ python path/to/export.py --weights yolov5s.pt --include torchscript onnx coreml saved_model pb tflite tfjs
|
|
|
25 |
yolov5s_saved_model
|
26 |
yolov5s.pb
|
27 |
yolov5s.tflite
|
28 |
+
yolov5s.engine
|
29 |
|
30 |
TensorFlow.js:
|
31 |
$ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
|
|
|
265 |
LOGGER.info(f'\n{prefix} export failure: {e}')
|
266 |
|
267 |
|
268 |
+
def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
|
269 |
+
try:
|
270 |
+
check_requirements(('tensorrt',))
|
271 |
+
import tensorrt as trt
|
272 |
+
|
273 |
+
opset = (12, 13)[trt.__version__[0] == '8'] # test on TensorRT 7.x and 8.x
|
274 |
+
export_onnx(model, im, file, opset, train, False, simplify)
|
275 |
+
onnx = file.with_suffix('.onnx')
|
276 |
+
assert onnx.exists(), f'failed to export ONNX file: {onnx}'
|
277 |
+
|
278 |
+
LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
|
279 |
+
f = str(file).replace('.pt', '.engine') # TensorRT engine file
|
280 |
+
logger = trt.Logger(trt.Logger.INFO)
|
281 |
+
if verbose:
|
282 |
+
logger.min_severity = trt.Logger.Severity.VERBOSE
|
283 |
+
|
284 |
+
builder = trt.Builder(logger)
|
285 |
+
config = builder.create_builder_config()
|
286 |
+
config.max_workspace_size = workspace * 1 << 30
|
287 |
+
|
288 |
+
flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
|
289 |
+
network = builder.create_network(flag)
|
290 |
+
parser = trt.OnnxParser(network, logger)
|
291 |
+
if not parser.parse_from_file(str(onnx)):
|
292 |
+
raise RuntimeError(f'failed to load ONNX file: {onnx}')
|
293 |
+
|
294 |
+
inputs = [network.get_input(i) for i in range(network.num_inputs)]
|
295 |
+
outputs = [network.get_output(i) for i in range(network.num_outputs)]
|
296 |
+
LOGGER.info(f'{prefix} Network Description:')
|
297 |
+
for inp in inputs:
|
298 |
+
LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
|
299 |
+
for out in outputs:
|
300 |
+
LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
|
301 |
+
|
302 |
+
half &= builder.platform_has_fast_fp16
|
303 |
+
LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}')
|
304 |
+
if half:
|
305 |
+
config.set_flag(trt.BuilderFlag.FP16)
|
306 |
+
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
|
307 |
+
t.write(engine.serialize())
|
308 |
+
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
|
309 |
+
|
310 |
+
except Exception as e:
|
311 |
+
LOGGER.info(f'\n{prefix} export failure: {e}')
|
312 |
+
|
313 |
@torch.no_grad()
|
314 |
def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
|
315 |
weights=ROOT / 'yolov5s.pt', # weights path
|
|
|
325 |
dynamic=False, # ONNX/TF: dynamic axes
|
326 |
simplify=False, # ONNX: simplify model
|
327 |
opset=12, # ONNX: opset version
|
328 |
+
verbose=False, # TensorRT: verbose log
|
329 |
+
workspace=4, # TensorRT: workspace size (GB)
|
330 |
topk_per_class=100, # TF.js NMS: topk per class to keep
|
331 |
topk_all=100, # TF.js NMS: topk for all classes to keep
|
332 |
iou_thres=0.45, # TF.js NMS: IoU threshold
|
|
|
371 |
export_torchscript(model, im, file, optimize)
|
372 |
if 'onnx' in include:
|
373 |
export_onnx(model, im, file, opset, train, dynamic, simplify)
|
374 |
+
if 'engine' in include:
|
375 |
+
export_engine(model, im, file, train, half, simplify, workspace, verbose)
|
376 |
if 'coreml' in include:
|
377 |
export_coreml(model, im, file)
|
378 |
|
|
|
411 |
parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
|
412 |
parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
|
413 |
parser.add_argument('--opset', type=int, default=13, help='ONNX: opset version')
|
414 |
+
parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
|
415 |
+
parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
|
416 |
parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
|
417 |
parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
|
418 |
parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
|
419 |
parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
|
420 |
parser.add_argument('--include', nargs='+',
|
421 |
default=['torchscript', 'onnx'],
|
422 |
+
help='available formats are (torchscript, onnx, engine, coreml, saved_model, pb, tflite, tfjs)')
|
423 |
opt = parser.parse_args()
|
424 |
print_args(FILE.stem, opt)
|
425 |
return opt
|
@@ -7,6 +7,7 @@ import json
|
|
7 |
import math
|
8 |
import platform
|
9 |
import warnings
|
|
|
10 |
from copy import copy
|
11 |
from pathlib import Path
|
12 |
|
@@ -285,11 +286,12 @@ class DetectMultiBackend(nn.Module):
|
|
285 |
# TensorFlow Lite: *.tflite
|
286 |
# ONNX Runtime: *.onnx
|
287 |
# OpenCV DNN: *.onnx with dnn=True
|
|
|
288 |
super().__init__()
|
289 |
w = str(weights[0] if isinstance(weights, list) else weights)
|
290 |
-
suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel']
|
291 |
check_suffix(w, suffixes) # check weights have acceptable suffix
|
292 |
-
pt, onnx, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans
|
293 |
jit = pt and 'torchscript' in w.lower()
|
294 |
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
|
295 |
|
@@ -317,6 +319,23 @@ class DetectMultiBackend(nn.Module):
|
|
317 |
check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
|
318 |
import onnxruntime
|
319 |
session = onnxruntime.InferenceSession(w, None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
else: # TensorFlow model (TFLite, pb, saved_model)
|
321 |
import tensorflow as tf
|
322 |
if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
|
@@ -334,7 +353,7 @@ class DetectMultiBackend(nn.Module):
|
|
334 |
model = tf.keras.models.load_model(w)
|
335 |
elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
|
336 |
if 'edgetpu' in w.lower():
|
337 |
-
LOGGER.info(f'Loading {w} for TensorFlow Edge TPU inference...')
|
338 |
import tflite_runtime.interpreter as tfli
|
339 |
delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime
|
340 |
'Darwin': 'libedgetpu.1.dylib',
|
@@ -369,6 +388,11 @@ class DetectMultiBackend(nn.Module):
|
|
369 |
y = self.net.forward()
|
370 |
else: # ONNX Runtime
|
371 |
y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
|
|
|
|
|
|
|
|
|
|
|
372 |
else: # TensorFlow model (TFLite, pb, saved_model)
|
373 |
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
|
374 |
if self.pb:
|
@@ -391,7 +415,7 @@ class DetectMultiBackend(nn.Module):
|
|
391 |
y[..., 1] *= h # y
|
392 |
y[..., 2] *= w # w
|
393 |
y[..., 3] *= h # h
|
394 |
-
y = torch.tensor(y)
|
395 |
return (y, []) if val else y
|
396 |
|
397 |
|
|
|
7 |
import math
|
8 |
import platform
|
9 |
import warnings
|
10 |
+
from collections import namedtuple
|
11 |
from copy import copy
|
12 |
from pathlib import Path
|
13 |
|
|
|
286 |
# TensorFlow Lite: *.tflite
|
287 |
# ONNX Runtime: *.onnx
|
288 |
# OpenCV DNN: *.onnx with dnn=True
|
289 |
+
# TensorRT: *.engine
|
290 |
super().__init__()
|
291 |
w = str(weights[0] if isinstance(weights, list) else weights)
|
292 |
+
suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel']
|
293 |
check_suffix(w, suffixes) # check weights have acceptable suffix
|
294 |
+
pt, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans
|
295 |
jit = pt and 'torchscript' in w.lower()
|
296 |
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
|
297 |
|
|
|
319 |
check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
|
320 |
import onnxruntime
|
321 |
session = onnxruntime.InferenceSession(w, None)
|
322 |
+
elif engine: # TensorRT
|
323 |
+
LOGGER.info(f'Loading {w} for TensorRT inference...')
|
324 |
+
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
325 |
+
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
326 |
+
logger = trt.Logger(trt.Logger.INFO)
|
327 |
+
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
328 |
+
model = runtime.deserialize_cuda_engine(f.read())
|
329 |
+
bindings = dict()
|
330 |
+
for index in range(model.num_bindings):
|
331 |
+
name = model.get_binding_name(index)
|
332 |
+
dtype = trt.nptype(model.get_binding_dtype(index))
|
333 |
+
shape = tuple(model.get_binding_shape(index))
|
334 |
+
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
|
335 |
+
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
|
336 |
+
binding_addrs = {n: d.ptr for n, d in bindings.items()}
|
337 |
+
context = model.create_execution_context()
|
338 |
+
batch_size = bindings['images'].shape[0]
|
339 |
else: # TensorFlow model (TFLite, pb, saved_model)
|
340 |
import tensorflow as tf
|
341 |
if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
|
|
|
353 |
model = tf.keras.models.load_model(w)
|
354 |
elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
|
355 |
if 'edgetpu' in w.lower():
|
356 |
+
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
|
357 |
import tflite_runtime.interpreter as tfli
|
358 |
delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime
|
359 |
'Darwin': 'libedgetpu.1.dylib',
|
|
|
388 |
y = self.net.forward()
|
389 |
else: # ONNX Runtime
|
390 |
y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
|
391 |
+
elif self.engine: # TensorRT
|
392 |
+
assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
|
393 |
+
self.binding_addrs['images'] = int(im.data_ptr())
|
394 |
+
self.context.execute_v2(list(self.binding_addrs.values()))
|
395 |
+
y = self.bindings['output'].data
|
396 |
else: # TensorFlow model (TFLite, pb, saved_model)
|
397 |
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
|
398 |
if self.pb:
|
|
|
415 |
y[..., 1] *= h # y
|
416 |
y[..., 2] *= w # w
|
417 |
y[..., 3] *= h # h
|
418 |
+
y = torch.tensor(y) if isinstance(y, np.ndarray) else y
|
419 |
return (y, []) if val else y
|
420 |
|
421 |
|
@@ -111,7 +111,7 @@ def run(data,
|
|
111 |
# Initialize/load model and set device
|
112 |
training = model is not None
|
113 |
if training: # called by train.py
|
114 |
-
device, pt = next(model.parameters()).device, True # get model device, PyTorch model
|
115 |
|
116 |
half &= device.type != 'cpu' # half precision only supported on CUDA
|
117 |
model.half() if half else model.float()
|
@@ -124,11 +124,13 @@ def run(data,
|
|
124 |
|
125 |
# Load model
|
126 |
model = DetectMultiBackend(weights, device=device, dnn=dnn)
|
127 |
-
stride, pt = model.stride, model.pt
|
128 |
imgsz = check_img_size(imgsz, s=stride) # check image size
|
129 |
-
half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA
|
130 |
if pt:
|
131 |
model.model.half() if half else model.model.float()
|
|
|
|
|
132 |
else:
|
133 |
half = False
|
134 |
batch_size = 1 # export.py models default to batch-size 1
|
@@ -165,7 +167,7 @@ def run(data,
|
|
165 |
pbar = tqdm(dataloader, desc=s, ncols=NCOLS, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
|
166 |
for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
|
167 |
t1 = time_sync()
|
168 |
-
if pt:
|
169 |
im = im.to(device, non_blocking=True)
|
170 |
targets = targets.to(device)
|
171 |
im = im.half() if half else im.float() # uint8 to fp16/32
|
|
|
111 |
# Initialize/load model and set device
|
112 |
training = model is not None
|
113 |
if training: # called by train.py
|
114 |
+
device, pt, engine = next(model.parameters()).device, True, False # get model device, PyTorch model
|
115 |
|
116 |
half &= device.type != 'cpu' # half precision only supported on CUDA
|
117 |
model.half() if half else model.float()
|
|
|
124 |
|
125 |
# Load model
|
126 |
model = DetectMultiBackend(weights, device=device, dnn=dnn)
|
127 |
+
stride, pt, engine = model.stride, model.pt, model.engine
|
128 |
imgsz = check_img_size(imgsz, s=stride) # check image size
|
129 |
+
half &= (pt or engine) and device.type != 'cpu' # half precision only supported by PyTorch on CUDA
|
130 |
if pt:
|
131 |
model.model.half() if half else model.model.float()
|
132 |
+
elif engine:
|
133 |
+
batch_size = model.batch_size
|
134 |
else:
|
135 |
half = False
|
136 |
batch_size = 1 # export.py models default to batch-size 1
|
|
|
167 |
pbar = tqdm(dataloader, desc=s, ncols=NCOLS, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
|
168 |
for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
|
169 |
t1 = time_sync()
|
170 |
+
if pt or engine:
|
171 |
im = im.to(device, non_blocking=True)
|
172 |
targets = targets.to(device)
|
173 |
im = im.half() if half else im.float() # uint8 to fp16/32
|