glenn-jocher
commited on
Commit
•
b94b59e
1
Parent(s):
84efa62
DetectMultiBackend() `--half` handling (#6945)
Browse files* DetectMultiBackend() `--half` handling
* CI fixes
* rename .half to .fp16 to avoid conflict
* warmup fix
* val update
* engine update
* engine update
- detect.py +4 -13
- models/common.py +8 -5
- val.py +9 -16
detect.py
CHANGED
@@ -89,19 +89,10 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
|
|
89 |
|
90 |
# Load model
|
91 |
device = select_device(device)
|
92 |
-
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
|
93 |
-
stride, names, pt
|
94 |
imgsz = check_img_size(imgsz, s=stride) # check image size
|
95 |
|
96 |
-
# Half
|
97 |
-
half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
|
98 |
-
if pt or jit:
|
99 |
-
model.model.half() if half else model.model.float()
|
100 |
-
elif engine and model.trt_fp16_input != half:
|
101 |
-
LOGGER.info('model ' + (
|
102 |
-
'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
|
103 |
-
half = model.trt_fp16_input
|
104 |
-
|
105 |
# Dataloader
|
106 |
if webcam:
|
107 |
view_img = check_imshow()
|
@@ -114,12 +105,12 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
|
|
114 |
vid_path, vid_writer = [None] * bs, [None] * bs
|
115 |
|
116 |
# Run inference
|
117 |
-
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)
|
118 |
dt, seen = [0.0, 0.0, 0.0], 0
|
119 |
for path, im, im0s, vid_cap, s in dataset:
|
120 |
t1 = time_sync()
|
121 |
im = torch.from_numpy(im).to(device)
|
122 |
-
im = im.half() if
|
123 |
im /= 255 # 0 - 255 to 0.0 - 1.0
|
124 |
if len(im.shape) == 3:
|
125 |
im = im[None] # expand for batch dim
|
|
|
89 |
|
90 |
# Load model
|
91 |
device = select_device(device)
|
92 |
+
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
|
93 |
+
stride, names, pt = model.stride, model.names, model.pt
|
94 |
imgsz = check_img_size(imgsz, s=stride) # check image size
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
# Dataloader
|
97 |
if webcam:
|
98 |
view_img = check_imshow()
|
|
|
105 |
vid_path, vid_writer = [None] * bs, [None] * bs
|
106 |
|
107 |
# Run inference
|
108 |
+
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
|
109 |
dt, seen = [0.0, 0.0, 0.0], 0
|
110 |
for path, im, im0s, vid_cap, s in dataset:
|
111 |
t1 = time_sync()
|
112 |
im = torch.from_numpy(im).to(device)
|
113 |
+
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
|
114 |
im /= 255 # 0 - 255 to 0.0 - 1.0
|
115 |
if len(im.shape) == 3:
|
116 |
im = im[None] # expand for batch dim
|
models/common.py
CHANGED
@@ -277,7 +277,7 @@ class Concat(nn.Module):
|
|
277 |
|
278 |
class DetectMultiBackend(nn.Module):
|
279 |
# YOLOv5 MultiBackend class for python inference on various backends
|
280 |
-
def __init__(self, weights='yolov5s.pt', device=
|
281 |
# Usage:
|
282 |
# PyTorch: weights = *.pt
|
283 |
# TorchScript: *.torchscript
|
@@ -297,6 +297,7 @@ class DetectMultiBackend(nn.Module):
|
|
297 |
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
|
298 |
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
|
299 |
w = attempt_download(w) # download if not local
|
|
|
300 |
if data: # data.yaml path (optional)
|
301 |
with open(data, errors='ignore') as f:
|
302 |
names = yaml.safe_load(f)['names'] # class names
|
@@ -305,11 +306,13 @@ class DetectMultiBackend(nn.Module):
|
|
305 |
model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
|
306 |
stride = max(int(model.stride.max()), 32) # model stride
|
307 |
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
|
|
308 |
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
309 |
elif jit: # TorchScript
|
310 |
LOGGER.info(f'Loading {w} for TorchScript inference...')
|
311 |
extra_files = {'config.txt': ''} # model metadata
|
312 |
model = torch.jit.load(w, _extra_files=extra_files)
|
|
|
313 |
if extra_files['config.txt']:
|
314 |
d = json.loads(extra_files['config.txt']) # extra_files dict
|
315 |
stride, names = int(d['stride']), d['names']
|
@@ -338,11 +341,11 @@ class DetectMultiBackend(nn.Module):
|
|
338 |
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
339 |
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
|
340 |
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
341 |
-
trt_fp16_input = False
|
342 |
logger = trt.Logger(trt.Logger.INFO)
|
343 |
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
344 |
model = runtime.deserialize_cuda_engine(f.read())
|
345 |
bindings = OrderedDict()
|
|
|
346 |
for index in range(model.num_bindings):
|
347 |
name = model.get_binding_name(index)
|
348 |
dtype = trt.nptype(model.get_binding_dtype(index))
|
@@ -350,7 +353,7 @@ class DetectMultiBackend(nn.Module):
|
|
350 |
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
|
351 |
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
|
352 |
if model.binding_is_input(index) and dtype == np.float16:
|
353 |
-
|
354 |
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
|
355 |
context = model.create_execution_context()
|
356 |
batch_size = bindings['images'].shape[0]
|
@@ -458,11 +461,11 @@ class DetectMultiBackend(nn.Module):
|
|
458 |
y = torch.tensor(y) if isinstance(y, np.ndarray) else y
|
459 |
return (y, []) if val else y
|
460 |
|
461 |
-
def warmup(self, imgsz=(1, 3, 640, 640)
|
462 |
# Warmup model by running inference once
|
463 |
if self.pt or self.jit or self.onnx or self.engine: # warmup types
|
464 |
if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
|
465 |
-
im = torch.zeros(*imgsz).to(self.device).type(torch.half if
|
466 |
self.forward(im) # warmup
|
467 |
|
468 |
@staticmethod
|
|
|
277 |
|
278 |
class DetectMultiBackend(nn.Module):
|
279 |
# YOLOv5 MultiBackend class for python inference on various backends
|
280 |
+
def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False):
|
281 |
# Usage:
|
282 |
# PyTorch: weights = *.pt
|
283 |
# TorchScript: *.torchscript
|
|
|
297 |
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
|
298 |
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
|
299 |
w = attempt_download(w) # download if not local
|
300 |
+
fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16
|
301 |
if data: # data.yaml path (optional)
|
302 |
with open(data, errors='ignore') as f:
|
303 |
names = yaml.safe_load(f)['names'] # class names
|
|
|
306 |
model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
|
307 |
stride = max(int(model.stride.max()), 32) # model stride
|
308 |
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
309 |
+
model.half() if fp16 else model.float()
|
310 |
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
311 |
elif jit: # TorchScript
|
312 |
LOGGER.info(f'Loading {w} for TorchScript inference...')
|
313 |
extra_files = {'config.txt': ''} # model metadata
|
314 |
model = torch.jit.load(w, _extra_files=extra_files)
|
315 |
+
model.half() if fp16 else model.float()
|
316 |
if extra_files['config.txt']:
|
317 |
d = json.loads(extra_files['config.txt']) # extra_files dict
|
318 |
stride, names = int(d['stride']), d['names']
|
|
|
341 |
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
342 |
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
|
343 |
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
|
|
344 |
logger = trt.Logger(trt.Logger.INFO)
|
345 |
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
346 |
model = runtime.deserialize_cuda_engine(f.read())
|
347 |
bindings = OrderedDict()
|
348 |
+
fp16 = False # default updated below
|
349 |
for index in range(model.num_bindings):
|
350 |
name = model.get_binding_name(index)
|
351 |
dtype = trt.nptype(model.get_binding_dtype(index))
|
|
|
353 |
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
|
354 |
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
|
355 |
if model.binding_is_input(index) and dtype == np.float16:
|
356 |
+
fp16 = True
|
357 |
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
|
358 |
context = model.create_execution_context()
|
359 |
batch_size = bindings['images'].shape[0]
|
|
|
461 |
y = torch.tensor(y) if isinstance(y, np.ndarray) else y
|
462 |
return (y, []) if val else y
|
463 |
|
464 |
+
def warmup(self, imgsz=(1, 3, 640, 640)):
|
465 |
# Warmup model by running inference once
|
466 |
if self.pt or self.jit or self.onnx or self.engine: # warmup types
|
467 |
if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
|
468 |
+
im = torch.zeros(*imgsz).to(self.device).type(torch.half if self.fp16 else torch.float) # input image
|
469 |
self.forward(im) # warmup
|
470 |
|
471 |
@staticmethod
|
val.py
CHANGED
@@ -125,7 +125,6 @@ def run(data,
|
|
125 |
training = model is not None
|
126 |
if training: # called by train.py
|
127 |
device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model
|
128 |
-
|
129 |
half &= device.type != 'cpu' # half precision only supported on CUDA
|
130 |
model.half() if half else model.float()
|
131 |
else: # called directly
|
@@ -136,23 +135,17 @@ def run(data,
|
|
136 |
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
|
137 |
|
138 |
# Load model
|
139 |
-
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
|
140 |
-
stride, pt, jit,
|
141 |
imgsz = check_img_size(imgsz, s=stride) # check image size
|
142 |
-
half
|
143 |
-
if
|
144 |
-
model.model.half() if half else model.model.float()
|
145 |
-
elif engine:
|
146 |
batch_size = model.batch_size
|
147 |
-
if model.trt_fp16_input != half:
|
148 |
-
LOGGER.info('model ' + (
|
149 |
-
'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
|
150 |
-
half = model.trt_fp16_input
|
151 |
else:
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
|
157 |
# Data
|
158 |
data = check_dataset(data) # check
|
@@ -166,7 +159,7 @@ def run(data,
|
|
166 |
|
167 |
# Dataloader
|
168 |
if not training:
|
169 |
-
model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)
|
170 |
pad = 0.0 if task in ('speed', 'benchmark') else 0.5
|
171 |
rect = False if task == 'benchmark' else pt # square inference for benchmarks
|
172 |
task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images
|
|
|
125 |
training = model is not None
|
126 |
if training: # called by train.py
|
127 |
device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model
|
|
|
128 |
half &= device.type != 'cpu' # half precision only supported on CUDA
|
129 |
model.half() if half else model.float()
|
130 |
else: # called directly
|
|
|
135 |
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
|
136 |
|
137 |
# Load model
|
138 |
+
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
|
139 |
+
stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
|
140 |
imgsz = check_img_size(imgsz, s=stride) # check image size
|
141 |
+
half = model.fp16 # FP16 supported on limited backends with CUDA
|
142 |
+
if engine:
|
|
|
|
|
143 |
batch_size = model.batch_size
|
|
|
|
|
|
|
|
|
144 |
else:
|
145 |
+
device = model.device
|
146 |
+
if not pt or jit:
|
147 |
+
batch_size = 1 # export.py models default to batch-size 1
|
148 |
+
LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
|
149 |
|
150 |
# Data
|
151 |
data = check_dataset(data) # check
|
|
|
159 |
|
160 |
# Dataloader
|
161 |
if not training:
|
162 |
+
model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup
|
163 |
pad = 0.0 if task in ('speed', 'benchmark') else 0.5
|
164 |
rect = False if task == 'benchmark' else pt # square inference for benchmarks
|
165 |
task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images
|