DavidB glenn-jocher commited on
Commit
596de6d
1 Parent(s): 7e98b48

Default FP16 TensorRT export (#6798)

Browse files

* Assert engine precision #6777

* Default to FP32 inputs for TensorRT engines

* Default to FP16 TensorRT exports #6777

* Remove wrong line #6777

* Automatically adjust detect.py input precision #6777

* Automatically adjust val.py input precision #6777

* Add missing colon

* Cleanup

* Cleanup

* Remove default trt_fp16_input definition

* Experiment

* Reorder detect.py if statement to after half checks

* Update common.py

* Update export.py

* Cleanup

Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>

Files changed (4) hide show
  1. detect.py +4 -0
  2. export.py +2 -3
  3. models/common.py +3 -0
  4. val.py +4 -0
detect.py CHANGED
@@ -97,6 +97,10 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
97
  half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
98
  if pt or jit:
99
  model.model.half() if half else model.model.float()
 
 
 
 
100
 
101
  # Dataloader
102
  if webcam:
 
97
  half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
98
  if pt or jit:
99
  model.model.half() if half else model.model.float()
100
+ elif engine and model.trt_fp16_input != half:
101
+ LOGGER.info('model ' + (
102
+ 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
103
+ half = model.trt_fp16_input
104
 
105
  # Dataloader
106
  if webcam:
export.py CHANGED
@@ -233,9 +233,8 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
233
  for out in outputs:
234
  LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
235
 
236
- half &= builder.platform_has_fast_fp16
237
- LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}')
238
- if half:
239
  config.set_flag(trt.BuilderFlag.FP16)
240
  with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
241
  t.write(engine.serialize())
 
233
  for out in outputs:
234
  LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
235
 
236
+ LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')
237
+ if builder.platform_has_fast_fp16:
 
238
  config.set_flag(trt.BuilderFlag.FP16)
239
  with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
240
  t.write(engine.serialize())
models/common.py CHANGED
@@ -338,6 +338,7 @@ class DetectMultiBackend(nn.Module):
338
  import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
339
  check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
340
  Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
 
341
  logger = trt.Logger(trt.Logger.INFO)
342
  with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
343
  model = runtime.deserialize_cuda_engine(f.read())
@@ -348,6 +349,8 @@ class DetectMultiBackend(nn.Module):
348
  shape = tuple(model.get_binding_shape(index))
349
  data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
350
  bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
 
 
351
  binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
352
  context = model.create_execution_context()
353
  batch_size = bindings['images'].shape[0]
 
338
  import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
339
  check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
340
  Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
341
+ trt_fp16_input = False
342
  logger = trt.Logger(trt.Logger.INFO)
343
  with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
344
  model = runtime.deserialize_cuda_engine(f.read())
 
349
  shape = tuple(model.get_binding_shape(index))
350
  data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
351
  bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
352
+ if model.binding_is_input(index) and dtype == np.float16:
353
+ trt_fp16_input = True
354
  binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
355
  context = model.create_execution_context()
356
  batch_size = bindings['images'].shape[0]
val.py CHANGED
@@ -144,6 +144,10 @@ def run(data,
144
  model.model.half() if half else model.model.float()
145
  elif engine:
146
  batch_size = model.batch_size
 
 
 
 
147
  else:
148
  half = False
149
  batch_size = 1 # export.py models default to batch-size 1
 
144
  model.model.half() if half else model.model.float()
145
  elif engine:
146
  batch_size = model.batch_size
147
+ if model.trt_fp16_input != half:
148
+ LOGGER.info('model ' + (
149
+ 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
150
+ half = model.trt_fp16_input
151
  else:
152
  half = False
153
  batch_size = 1 # export.py models default to batch-size 1