glenn-jocher
commited on
Commit
·
260b172
1
Parent(s):
629d370
FP16 inference update
Browse files- detect.py +4 -10
- requirements.txt +3 -3
- test.py +17 -9
- utils/utils.py +4 -1
detect.py
CHANGED
@@ -14,6 +14,7 @@ def detect(save_img=False):
|
|
14 |
if os.path.exists(out):
|
15 |
shutil.rmtree(out) # delete output folder
|
16 |
os.makedirs(out) # make new output folder
|
|
|
17 |
|
18 |
# Load model
|
19 |
google_utils.attempt_download(weights)
|
@@ -21,6 +22,8 @@ def detect(save_img=False):
|
|
21 |
# torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning
|
22 |
# model.fuse()
|
23 |
model.to(device).eval()
|
|
|
|
|
24 |
|
25 |
# Second-stage classifier
|
26 |
classify = False
|
@@ -29,11 +32,6 @@ def detect(save_img=False):
|
|
29 |
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights
|
30 |
modelc.to(device).eval()
|
31 |
|
32 |
-
# Half precision
|
33 |
-
half = half and device.type != 'cpu' # half precision only supported on CUDA
|
34 |
-
if half:
|
35 |
-
model.half()
|
36 |
-
|
37 |
# Set Dataloader
|
38 |
vid_path, vid_writer = None, None
|
39 |
if webcam:
|
@@ -51,7 +49,7 @@ def detect(save_img=False):
|
|
51 |
# Run inference
|
52 |
t0 = time.time()
|
53 |
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
|
54 |
-
_ = model(img.half() if half else img
|
55 |
for path, img, im0s, vid_cap in dataset:
|
56 |
img = torch.from_numpy(img).to(device)
|
57 |
img = img.half() if half else img.float() # uint8 to fp16/32
|
@@ -63,10 +61,6 @@ def detect(save_img=False):
|
|
63 |
t1 = torch_utils.time_synchronized()
|
64 |
pred = model(img, augment=opt.augment)[0]
|
65 |
|
66 |
-
# to float
|
67 |
-
if half:
|
68 |
-
pred = pred.float()
|
69 |
-
|
70 |
# Apply NMS
|
71 |
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres,
|
72 |
fast=True, classes=opt.classes, agnostic=opt.agnostic_nms)
|
|
|
14 |
if os.path.exists(out):
|
15 |
shutil.rmtree(out) # delete output folder
|
16 |
os.makedirs(out) # make new output folder
|
17 |
+
half &= device.type != 'cpu' # half precision only supported on CUDA
|
18 |
|
19 |
# Load model
|
20 |
google_utils.attempt_download(weights)
|
|
|
22 |
# torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning
|
23 |
# model.fuse()
|
24 |
model.to(device).eval()
|
25 |
+
if half:
|
26 |
+
model.half() # to FP16
|
27 |
|
28 |
# Second-stage classifier
|
29 |
classify = False
|
|
|
32 |
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights
|
33 |
modelc.to(device).eval()
|
34 |
|
|
|
|
|
|
|
|
|
|
|
35 |
# Set Dataloader
|
36 |
vid_path, vid_writer = None, None
|
37 |
if webcam:
|
|
|
49 |
# Run inference
|
50 |
t0 = time.time()
|
51 |
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
|
52 |
+
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
|
53 |
for path, img, im0s, vid_cap in dataset:
|
54 |
img = torch.from_numpy(img).to(device)
|
55 |
img = img.half() if half else img.float() # uint8 to fp16/32
|
|
|
61 |
t1 = torch_utils.time_synchronized()
|
62 |
pred = model(img, augment=opt.augment)[0]
|
63 |
|
|
|
|
|
|
|
|
|
64 |
# Apply NMS
|
65 |
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres,
|
66 |
fast=True, classes=opt.classes, agnostic=opt.agnostic_nms)
|
requirements.txt
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
# pip install -U -r requirements.txt
|
2 |
Cython
|
3 |
-
numpy
|
4 |
opencv-python
|
5 |
-
torch
|
6 |
matplotlib
|
7 |
pillow
|
8 |
tensorboard
|
9 |
-
|
10 |
torchvision
|
11 |
scipy
|
12 |
tqdm
|
|
|
1 |
# pip install -U -r requirements.txt
|
2 |
Cython
|
3 |
+
numpy==1.17
|
4 |
opencv-python
|
5 |
+
torch>=1.5
|
6 |
matplotlib
|
7 |
pillow
|
8 |
tensorboard
|
9 |
+
PyYAML>=5.3
|
10 |
torchvision
|
11 |
scipy
|
12 |
tqdm
|
test.py
CHANGED
@@ -20,10 +20,12 @@ def test(data,
|
|
20 |
model=None,
|
21 |
dataloader=None,
|
22 |
fast=False,
|
23 |
-
verbose=False
|
|
|
24 |
# Initialize/load model and set device
|
25 |
if model is None:
|
26 |
device = torch_utils.select_device(opt.device, batch_size=batch_size)
|
|
|
27 |
|
28 |
# Remove previous
|
29 |
for f in glob.glob('test_batch*.jpg'):
|
@@ -35,6 +37,8 @@ def test(data,
|
|
35 |
torch_utils.model_info(model)
|
36 |
# model.fuse()
|
37 |
model.to(device)
|
|
|
|
|
38 |
|
39 |
if device.type != 'cpu' and torch.cuda.device_count() > 1:
|
40 |
model = nn.DataParallel(model)
|
@@ -72,24 +76,27 @@ def test(data,
|
|
72 |
|
73 |
seen = 0
|
74 |
model.eval()
|
75 |
-
|
|
|
76 |
names = model.names if hasattr(model, 'names') else model.module.names
|
77 |
coco91class = coco80_to_coco91_class()
|
78 |
s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
|
79 |
p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
|
80 |
loss = torch.zeros(3, device=device)
|
81 |
jdict, stats, ap, ap_class = [], [], [], []
|
82 |
-
for batch_i, (
|
83 |
-
|
|
|
|
|
84 |
targets = targets.to(device)
|
85 |
-
nb, _, height, width =
|
86 |
whwh = torch.Tensor([width, height, width, height]).to(device)
|
87 |
|
88 |
# Disable gradients
|
89 |
with torch.no_grad():
|
90 |
# Run model
|
91 |
t = torch_utils.time_synchronized()
|
92 |
-
inf_out, train_out = model(
|
93 |
t0 += torch_utils.time_synchronized() - t
|
94 |
|
95 |
# Compute loss
|
@@ -125,7 +132,7 @@ def test(data,
|
|
125 |
# [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
|
126 |
image_id = int(Path(paths[si]).stem.split('_')[-1])
|
127 |
box = pred[:, :4].clone() # xyxy
|
128 |
-
scale_coords(
|
129 |
box = xyxy2xywh(box) # xywh
|
130 |
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
|
131 |
for p, b in zip(pred.tolist(), box.tolist()):
|
@@ -168,9 +175,9 @@ def test(data,
|
|
168 |
# Plot images
|
169 |
if batch_i < 1:
|
170 |
f = 'test_batch%g_gt.jpg' % batch_i # filename
|
171 |
-
plot_images(
|
172 |
f = 'test_batch%g_pred.jpg' % batch_i
|
173 |
-
plot_images(
|
174 |
|
175 |
# Compute statistics
|
176 |
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
|
@@ -241,6 +248,7 @@ if __name__ == '__main__':
|
|
241 |
parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
|
242 |
parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
|
243 |
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
|
|
244 |
parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
|
245 |
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
246 |
parser.add_argument('--verbose', action='store_true', help='report mAP by class')
|
|
|
20 |
model=None,
|
21 |
dataloader=None,
|
22 |
fast=False,
|
23 |
+
verbose=False,
|
24 |
+
half=False): # FP16
|
25 |
# Initialize/load model and set device
|
26 |
if model is None:
|
27 |
device = torch_utils.select_device(opt.device, batch_size=batch_size)
|
28 |
+
half &= device.type != 'cpu' # half precision only supported on CUDA
|
29 |
|
30 |
# Remove previous
|
31 |
for f in glob.glob('test_batch*.jpg'):
|
|
|
37 |
torch_utils.model_info(model)
|
38 |
# model.fuse()
|
39 |
model.to(device)
|
40 |
+
if half:
|
41 |
+
model.half() # to FP16
|
42 |
|
43 |
if device.type != 'cpu' and torch.cuda.device_count() > 1:
|
44 |
model = nn.DataParallel(model)
|
|
|
76 |
|
77 |
seen = 0
|
78 |
model.eval()
|
79 |
+
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
|
80 |
+
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
|
81 |
names = model.names if hasattr(model, 'names') else model.module.names
|
82 |
coco91class = coco80_to_coco91_class()
|
83 |
s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
|
84 |
p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
|
85 |
loss = torch.zeros(3, device=device)
|
86 |
jdict, stats, ap, ap_class = [], [], [], []
|
87 |
+
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
|
88 |
+
img = img.to(device)
|
89 |
+
img = img.half() if half else img.float() # uint8 to fp16/32
|
90 |
+
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
91 |
targets = targets.to(device)
|
92 |
+
nb, _, height, width = img.shape # batch size, channels, height, width
|
93 |
whwh = torch.Tensor([width, height, width, height]).to(device)
|
94 |
|
95 |
# Disable gradients
|
96 |
with torch.no_grad():
|
97 |
# Run model
|
98 |
t = torch_utils.time_synchronized()
|
99 |
+
inf_out, train_out = model(img, augment=augment) # inference and training outputs
|
100 |
t0 += torch_utils.time_synchronized() - t
|
101 |
|
102 |
# Compute loss
|
|
|
132 |
# [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
|
133 |
image_id = int(Path(paths[si]).stem.split('_')[-1])
|
134 |
box = pred[:, :4].clone() # xyxy
|
135 |
+
scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape
|
136 |
box = xyxy2xywh(box) # xywh
|
137 |
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
|
138 |
for p, b in zip(pred.tolist(), box.tolist()):
|
|
|
175 |
# Plot images
|
176 |
if batch_i < 1:
|
177 |
f = 'test_batch%g_gt.jpg' % batch_i # filename
|
178 |
+
plot_images(img, targets, paths, f, names) # ground truth
|
179 |
f = 'test_batch%g_pred.jpg' % batch_i
|
180 |
+
plot_images(img, output_to_target(output, width, height), paths, f, names) # predictions
|
181 |
|
182 |
# Compute statistics
|
183 |
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
|
|
|
248 |
parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
|
249 |
parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
|
250 |
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
251 |
+
parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
|
252 |
parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
|
253 |
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
254 |
parser.add_argument('--verbose', action='store_true', help='report mAP by class')
|
utils/utils.py
CHANGED
@@ -504,6 +504,9 @@ def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, fast=False, c
|
|
504 |
Returns detections with shape:
|
505 |
nx6 (x1, y1, x2, y2, conf, cls)
|
506 |
"""
|
|
|
|
|
|
|
507 |
nc = prediction[0].shape[1] - 5 # number of classes
|
508 |
xc = prediction[..., 4] > conf_thres # candidates
|
509 |
|
@@ -902,7 +905,7 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
|
|
902 |
return None
|
903 |
|
904 |
if isinstance(images, torch.Tensor):
|
905 |
-
images = images.cpu().numpy()
|
906 |
|
907 |
if isinstance(targets, torch.Tensor):
|
908 |
targets = targets.cpu().numpy()
|
|
|
504 |
Returns detections with shape:
|
505 |
nx6 (x1, y1, x2, y2, conf, cls)
|
506 |
"""
|
507 |
+
if prediction.dtype is torch.float16:
|
508 |
+
prediction = prediction.float() # to FP32
|
509 |
+
|
510 |
nc = prediction[0].shape[1] - 5 # number of classes
|
511 |
xc = prediction[..., 4] > conf_thres # candidates
|
512 |
|
|
|
905 |
return None
|
906 |
|
907 |
if isinstance(images, torch.Tensor):
|
908 |
+
images = images.cpu().float().numpy()
|
909 |
|
910 |
if isinstance(targets, torch.Tensor):
|
911 |
targets = targets.cpu().numpy()
|