glenn-jocher commited on
Commit
260b172
1 Parent(s): 629d370

FP16 inference update

Browse files
Files changed (4) hide show
  1. detect.py +4 -10
  2. requirements.txt +3 -3
  3. test.py +17 -9
  4. utils/utils.py +4 -1
detect.py CHANGED
@@ -14,6 +14,7 @@ def detect(save_img=False):
14
  if os.path.exists(out):
15
  shutil.rmtree(out) # delete output folder
16
  os.makedirs(out) # make new output folder
 
17
 
18
  # Load model
19
  google_utils.attempt_download(weights)
@@ -21,6 +22,8 @@ def detect(save_img=False):
21
  # torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning
22
  # model.fuse()
23
  model.to(device).eval()
 
 
24
 
25
  # Second-stage classifier
26
  classify = False
@@ -29,11 +32,6 @@ def detect(save_img=False):
29
  modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights
30
  modelc.to(device).eval()
31
 
32
- # Half precision
33
- half = half and device.type != 'cpu' # half precision only supported on CUDA
34
- if half:
35
- model.half()
36
-
37
  # Set Dataloader
38
  vid_path, vid_writer = None, None
39
  if webcam:
@@ -51,7 +49,7 @@ def detect(save_img=False):
51
  # Run inference
52
  t0 = time.time()
53
  img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
54
- _ = model(img.half() if half else img.float()) if device.type != 'cpu' else None # run once
55
  for path, img, im0s, vid_cap in dataset:
56
  img = torch.from_numpy(img).to(device)
57
  img = img.half() if half else img.float() # uint8 to fp16/32
@@ -63,10 +61,6 @@ def detect(save_img=False):
63
  t1 = torch_utils.time_synchronized()
64
  pred = model(img, augment=opt.augment)[0]
65
 
66
- # to float
67
- if half:
68
- pred = pred.float()
69
-
70
  # Apply NMS
71
  pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres,
72
  fast=True, classes=opt.classes, agnostic=opt.agnostic_nms)
 
14
  if os.path.exists(out):
15
  shutil.rmtree(out) # delete output folder
16
  os.makedirs(out) # make new output folder
17
+ half &= device.type != 'cpu' # half precision only supported on CUDA
18
 
19
  # Load model
20
  google_utils.attempt_download(weights)
 
22
  # torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning
23
  # model.fuse()
24
  model.to(device).eval()
25
+ if half:
26
+ model.half() # to FP16
27
 
28
  # Second-stage classifier
29
  classify = False
 
32
  modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights
33
  modelc.to(device).eval()
34
 
 
 
 
 
 
35
  # Set Dataloader
36
  vid_path, vid_writer = None, None
37
  if webcam:
 
49
  # Run inference
50
  t0 = time.time()
51
  img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
52
+ _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
53
  for path, img, im0s, vid_cap in dataset:
54
  img = torch.from_numpy(img).to(device)
55
  img = img.half() if half else img.float() # uint8 to fp16/32
 
61
  t1 = torch_utils.time_synchronized()
62
  pred = model(img, augment=opt.augment)[0]
63
 
 
 
 
 
64
  # Apply NMS
65
  pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres,
66
  fast=True, classes=opt.classes, agnostic=opt.agnostic_nms)
requirements.txt CHANGED
@@ -1,12 +1,12 @@
1
  # pip install -U -r requirements.txt
2
  Cython
3
- numpy
4
  opencv-python
5
- torch >= 1.5
6
  matplotlib
7
  pillow
8
  tensorboard
9
- pyyaml >= 5.3
10
  torchvision
11
  scipy
12
  tqdm
 
1
  # pip install -U -r requirements.txt
2
  Cython
3
+ numpy==1.17
4
  opencv-python
5
+ torch>=1.5
6
  matplotlib
7
  pillow
8
  tensorboard
9
+ PyYAML>=5.3
10
  torchvision
11
  scipy
12
  tqdm
test.py CHANGED
@@ -20,10 +20,12 @@ def test(data,
20
  model=None,
21
  dataloader=None,
22
  fast=False,
23
- verbose=False): # 0 fast, 1 accurate
 
24
  # Initialize/load model and set device
25
  if model is None:
26
  device = torch_utils.select_device(opt.device, batch_size=batch_size)
 
27
 
28
  # Remove previous
29
  for f in glob.glob('test_batch*.jpg'):
@@ -35,6 +37,8 @@ def test(data,
35
  torch_utils.model_info(model)
36
  # model.fuse()
37
  model.to(device)
 
 
38
 
39
  if device.type != 'cpu' and torch.cuda.device_count() > 1:
40
  model = nn.DataParallel(model)
@@ -72,24 +76,27 @@ def test(data,
72
 
73
  seen = 0
74
  model.eval()
75
- _ = model(torch.zeros((1, 3, imgsz, imgsz), device=device)) if device.type != 'cpu' else None # run once
 
76
  names = model.names if hasattr(model, 'names') else model.module.names
77
  coco91class = coco80_to_coco91_class()
78
  s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
79
  p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
80
  loss = torch.zeros(3, device=device)
81
  jdict, stats, ap, ap_class = [], [], [], []
82
- for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
83
- imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0
 
 
84
  targets = targets.to(device)
85
- nb, _, height, width = imgs.shape # batch size, channels, height, width
86
  whwh = torch.Tensor([width, height, width, height]).to(device)
87
 
88
  # Disable gradients
89
  with torch.no_grad():
90
  # Run model
91
  t = torch_utils.time_synchronized()
92
- inf_out, train_out = model(imgs, augment=augment) # inference and training outputs
93
  t0 += torch_utils.time_synchronized() - t
94
 
95
  # Compute loss
@@ -125,7 +132,7 @@ def test(data,
125
  # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
126
  image_id = int(Path(paths[si]).stem.split('_')[-1])
127
  box = pred[:, :4].clone() # xyxy
128
- scale_coords(imgs[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape
129
  box = xyxy2xywh(box) # xywh
130
  box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
131
  for p, b in zip(pred.tolist(), box.tolist()):
@@ -168,9 +175,9 @@ def test(data,
168
  # Plot images
169
  if batch_i < 1:
170
  f = 'test_batch%g_gt.jpg' % batch_i # filename
171
- plot_images(imgs, targets, paths, f, names) # ground truth
172
  f = 'test_batch%g_pred.jpg' % batch_i
173
- plot_images(imgs, output_to_target(output, width, height), paths, f, names) # predictions
174
 
175
  # Compute statistics
176
  stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
@@ -241,6 +248,7 @@ if __name__ == '__main__':
241
  parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
242
  parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
243
  parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
 
244
  parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
245
  parser.add_argument('--augment', action='store_true', help='augmented inference')
246
  parser.add_argument('--verbose', action='store_true', help='report mAP by class')
 
20
  model=None,
21
  dataloader=None,
22
  fast=False,
23
+ verbose=False,
24
+ half=False): # FP16
25
  # Initialize/load model and set device
26
  if model is None:
27
  device = torch_utils.select_device(opt.device, batch_size=batch_size)
28
+ half &= device.type != 'cpu' # half precision only supported on CUDA
29
 
30
  # Remove previous
31
  for f in glob.glob('test_batch*.jpg'):
 
37
  torch_utils.model_info(model)
38
  # model.fuse()
39
  model.to(device)
40
+ if half:
41
+ model.half() # to FP16
42
 
43
  if device.type != 'cpu' and torch.cuda.device_count() > 1:
44
  model = nn.DataParallel(model)
 
76
 
77
  seen = 0
78
  model.eval()
79
+ img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
80
+ _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
81
  names = model.names if hasattr(model, 'names') else model.module.names
82
  coco91class = coco80_to_coco91_class()
83
  s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
84
  p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
85
  loss = torch.zeros(3, device=device)
86
  jdict, stats, ap, ap_class = [], [], [], []
87
+ for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
88
+ img = img.to(device)
89
+ img = img.half() if half else img.float() # uint8 to fp16/32
90
+ img /= 255.0 # 0 - 255 to 0.0 - 1.0
91
  targets = targets.to(device)
92
+ nb, _, height, width = img.shape # batch size, channels, height, width
93
  whwh = torch.Tensor([width, height, width, height]).to(device)
94
 
95
  # Disable gradients
96
  with torch.no_grad():
97
  # Run model
98
  t = torch_utils.time_synchronized()
99
+ inf_out, train_out = model(img, augment=augment) # inference and training outputs
100
  t0 += torch_utils.time_synchronized() - t
101
 
102
  # Compute loss
 
132
  # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
133
  image_id = int(Path(paths[si]).stem.split('_')[-1])
134
  box = pred[:, :4].clone() # xyxy
135
+ scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape
136
  box = xyxy2xywh(box) # xywh
137
  box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
138
  for p, b in zip(pred.tolist(), box.tolist()):
 
175
  # Plot images
176
  if batch_i < 1:
177
  f = 'test_batch%g_gt.jpg' % batch_i # filename
178
+ plot_images(img, targets, paths, f, names) # ground truth
179
  f = 'test_batch%g_pred.jpg' % batch_i
180
+ plot_images(img, output_to_target(output, width, height), paths, f, names) # predictions
181
 
182
  # Compute statistics
183
  stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
 
248
  parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
249
  parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
250
  parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
251
+ parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
252
  parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
253
  parser.add_argument('--augment', action='store_true', help='augmented inference')
254
  parser.add_argument('--verbose', action='store_true', help='report mAP by class')
utils/utils.py CHANGED
@@ -504,6 +504,9 @@ def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, fast=False, c
504
  Returns detections with shape:
505
  nx6 (x1, y1, x2, y2, conf, cls)
506
  """
 
 
 
507
  nc = prediction[0].shape[1] - 5 # number of classes
508
  xc = prediction[..., 4] > conf_thres # candidates
509
 
@@ -902,7 +905,7 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
902
  return None
903
 
904
  if isinstance(images, torch.Tensor):
905
- images = images.cpu().numpy()
906
 
907
  if isinstance(targets, torch.Tensor):
908
  targets = targets.cpu().numpy()
 
504
  Returns detections with shape:
505
  nx6 (x1, y1, x2, y2, conf, cls)
506
  """
507
+ if prediction.dtype is torch.float16:
508
+ prediction = prediction.float() # to FP32
509
+
510
  nc = prediction[0].shape[1] - 5 # number of classes
511
  xc = prediction[..., 4] > conf_thres # candidates
512
 
 
905
  return None
906
 
907
  if isinstance(images, torch.Tensor):
908
+ images = images.cpu().float().numpy()
909
 
910
  if isinstance(targets, torch.Tensor):
911
  targets = targets.cpu().numpy()