glenn-jocher commited on
Commit
b94b59e
1 Parent(s): 84efa62

DetectMultiBackend() `--half` handling (#6945)

Browse files

* DetectMultiBackend() `--half` handling

* CI fixes

* rename .half to .fp16 to avoid conflict

* warmup fix

* val update

* engine update

* engine update

Files changed (3) hide show
  1. detect.py +4 -13
  2. models/common.py +8 -5
  3. val.py +9 -16
detect.py CHANGED
@@ -89,19 +89,10 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
89
 
90
  # Load model
91
  device = select_device(device)
92
- model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
93
- stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
94
  imgsz = check_img_size(imgsz, s=stride) # check image size
95
 
96
- # Half
97
- half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
98
- if pt or jit:
99
- model.model.half() if half else model.model.float()
100
- elif engine and model.trt_fp16_input != half:
101
- LOGGER.info('model ' + (
102
- 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
103
- half = model.trt_fp16_input
104
-
105
  # Dataloader
106
  if webcam:
107
  view_img = check_imshow()
@@ -114,12 +105,12 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
114
  vid_path, vid_writer = [None] * bs, [None] * bs
115
 
116
  # Run inference
117
- model.warmup(imgsz=(1 if pt else bs, 3, *imgsz), half=half) # warmup
118
  dt, seen = [0.0, 0.0, 0.0], 0
119
  for path, im, im0s, vid_cap, s in dataset:
120
  t1 = time_sync()
121
  im = torch.from_numpy(im).to(device)
122
- im = im.half() if half else im.float() # uint8 to fp16/32
123
  im /= 255 # 0 - 255 to 0.0 - 1.0
124
  if len(im.shape) == 3:
125
  im = im[None] # expand for batch dim
 
89
 
90
  # Load model
91
  device = select_device(device)
92
+ model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
93
+ stride, names, pt = model.stride, model.names, model.pt
94
  imgsz = check_img_size(imgsz, s=stride) # check image size
95
 
 
 
 
 
 
 
 
 
 
96
  # Dataloader
97
  if webcam:
98
  view_img = check_imshow()
 
105
  vid_path, vid_writer = [None] * bs, [None] * bs
106
 
107
  # Run inference
108
+ model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
109
  dt, seen = [0.0, 0.0, 0.0], 0
110
  for path, im, im0s, vid_cap, s in dataset:
111
  t1 = time_sync()
112
  im = torch.from_numpy(im).to(device)
113
+ im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
114
  im /= 255 # 0 - 255 to 0.0 - 1.0
115
  if len(im.shape) == 3:
116
  im = im[None] # expand for batch dim
models/common.py CHANGED
@@ -277,7 +277,7 @@ class Concat(nn.Module):
277
 
278
  class DetectMultiBackend(nn.Module):
279
  # YOLOv5 MultiBackend class for python inference on various backends
280
- def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
281
  # Usage:
282
  # PyTorch: weights = *.pt
283
  # TorchScript: *.torchscript
@@ -297,6 +297,7 @@ class DetectMultiBackend(nn.Module):
297
  pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
298
  stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
299
  w = attempt_download(w) # download if not local
 
300
  if data: # data.yaml path (optional)
301
  with open(data, errors='ignore') as f:
302
  names = yaml.safe_load(f)['names'] # class names
@@ -305,11 +306,13 @@ class DetectMultiBackend(nn.Module):
305
  model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
306
  stride = max(int(model.stride.max()), 32) # model stride
307
  names = model.module.names if hasattr(model, 'module') else model.names # get class names
 
308
  self.model = model # explicitly assign for to(), cpu(), cuda(), half()
309
  elif jit: # TorchScript
310
  LOGGER.info(f'Loading {w} for TorchScript inference...')
311
  extra_files = {'config.txt': ''} # model metadata
312
  model = torch.jit.load(w, _extra_files=extra_files)
 
313
  if extra_files['config.txt']:
314
  d = json.loads(extra_files['config.txt']) # extra_files dict
315
  stride, names = int(d['stride']), d['names']
@@ -338,11 +341,11 @@ class DetectMultiBackend(nn.Module):
338
  import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
339
  check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
340
  Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
341
- trt_fp16_input = False
342
  logger = trt.Logger(trt.Logger.INFO)
343
  with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
344
  model = runtime.deserialize_cuda_engine(f.read())
345
  bindings = OrderedDict()
 
346
  for index in range(model.num_bindings):
347
  name = model.get_binding_name(index)
348
  dtype = trt.nptype(model.get_binding_dtype(index))
@@ -350,7 +353,7 @@ class DetectMultiBackend(nn.Module):
350
  data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
351
  bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
352
  if model.binding_is_input(index) and dtype == np.float16:
353
- trt_fp16_input = True
354
  binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
355
  context = model.create_execution_context()
356
  batch_size = bindings['images'].shape[0]
@@ -458,11 +461,11 @@ class DetectMultiBackend(nn.Module):
458
  y = torch.tensor(y) if isinstance(y, np.ndarray) else y
459
  return (y, []) if val else y
460
 
461
- def warmup(self, imgsz=(1, 3, 640, 640), half=False):
462
  # Warmup model by running inference once
463
  if self.pt or self.jit or self.onnx or self.engine: # warmup types
464
  if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
465
- im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image
466
  self.forward(im) # warmup
467
 
468
  @staticmethod
 
277
 
278
  class DetectMultiBackend(nn.Module):
279
  # YOLOv5 MultiBackend class for python inference on various backends
280
+ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False):
281
  # Usage:
282
  # PyTorch: weights = *.pt
283
  # TorchScript: *.torchscript
 
297
  pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
298
  stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
299
  w = attempt_download(w) # download if not local
300
+ fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16
301
  if data: # data.yaml path (optional)
302
  with open(data, errors='ignore') as f:
303
  names = yaml.safe_load(f)['names'] # class names
 
306
  model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
307
  stride = max(int(model.stride.max()), 32) # model stride
308
  names = model.module.names if hasattr(model, 'module') else model.names # get class names
309
+ model.half() if fp16 else model.float()
310
  self.model = model # explicitly assign for to(), cpu(), cuda(), half()
311
  elif jit: # TorchScript
312
  LOGGER.info(f'Loading {w} for TorchScript inference...')
313
  extra_files = {'config.txt': ''} # model metadata
314
  model = torch.jit.load(w, _extra_files=extra_files)
315
+ model.half() if fp16 else model.float()
316
  if extra_files['config.txt']:
317
  d = json.loads(extra_files['config.txt']) # extra_files dict
318
  stride, names = int(d['stride']), d['names']
 
341
  import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
342
  check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
343
  Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
 
344
  logger = trt.Logger(trt.Logger.INFO)
345
  with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
346
  model = runtime.deserialize_cuda_engine(f.read())
347
  bindings = OrderedDict()
348
+ fp16 = False # default updated below
349
  for index in range(model.num_bindings):
350
  name = model.get_binding_name(index)
351
  dtype = trt.nptype(model.get_binding_dtype(index))
 
353
  data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
354
  bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
355
  if model.binding_is_input(index) and dtype == np.float16:
356
+ fp16 = True
357
  binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
358
  context = model.create_execution_context()
359
  batch_size = bindings['images'].shape[0]
 
461
  y = torch.tensor(y) if isinstance(y, np.ndarray) else y
462
  return (y, []) if val else y
463
 
464
+ def warmup(self, imgsz=(1, 3, 640, 640)):
465
  # Warmup model by running inference once
466
  if self.pt or self.jit or self.onnx or self.engine: # warmup types
467
  if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
468
+ im = torch.zeros(*imgsz).to(self.device).type(torch.half if self.fp16 else torch.float) # input image
469
  self.forward(im) # warmup
470
 
471
  @staticmethod
val.py CHANGED
@@ -125,7 +125,6 @@ def run(data,
125
  training = model is not None
126
  if training: # called by train.py
127
  device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model
128
-
129
  half &= device.type != 'cpu' # half precision only supported on CUDA
130
  model.half() if half else model.float()
131
  else: # called directly
@@ -136,23 +135,17 @@ def run(data,
136
  (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
137
 
138
  # Load model
139
- model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
140
- stride, pt, jit, onnx, engine = model.stride, model.pt, model.jit, model.onnx, model.engine
141
  imgsz = check_img_size(imgsz, s=stride) # check image size
142
- half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
143
- if pt or jit:
144
- model.model.half() if half else model.model.float()
145
- elif engine:
146
  batch_size = model.batch_size
147
- if model.trt_fp16_input != half:
148
- LOGGER.info('model ' + (
149
- 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
150
- half = model.trt_fp16_input
151
  else:
152
- half = False
153
- batch_size = 1 # export.py models default to batch-size 1
154
- device = torch.device('cpu')
155
- LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
156
 
157
  # Data
158
  data = check_dataset(data) # check
@@ -166,7 +159,7 @@ def run(data,
166
 
167
  # Dataloader
168
  if not training:
169
- model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz), half=half) # warmup
170
  pad = 0.0 if task in ('speed', 'benchmark') else 0.5
171
  rect = False if task == 'benchmark' else pt # square inference for benchmarks
172
  task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images
 
125
  training = model is not None
126
  if training: # called by train.py
127
  device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model
 
128
  half &= device.type != 'cpu' # half precision only supported on CUDA
129
  model.half() if half else model.float()
130
  else: # called directly
 
135
  (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
136
 
137
  # Load model
138
+ model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
139
+ stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
140
  imgsz = check_img_size(imgsz, s=stride) # check image size
141
+ half = model.fp16 # FP16 supported on limited backends with CUDA
142
+ if engine:
 
 
143
  batch_size = model.batch_size
 
 
 
 
144
  else:
145
+ device = model.device
146
+ if not pt or jit:
147
+ batch_size = 1 # export.py models default to batch-size 1
148
+ LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
149
 
150
  # Data
151
  data = check_dataset(data) # check
 
159
 
160
  # Dataloader
161
  if not training:
162
+ model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup
163
  pad = 0.0 if task in ('speed', 'benchmark') else 0.5
164
  rect = False if task == 'benchmark' else pt # square inference for benchmarks
165
  task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images