Commit
•
2d99063
1
Parent(s):
621caea
Feature `python train.py --cache disk` (#4049)
Browse files* Add cache-on-disk and cache-directory to cache images on disk
* Fix load_image with cache_on_disk
* Add no_cache flag for load_image
* Revert the parts('logging' and a new line) that do not need to be modified
* Add the assertion for shapes of cached images
* Add a suffix string for cached images
* Fix boundary-error of letterbox for load_mosaic
* Add prefix as cache-key of cache-on-disk
* Update cache-function on disk
* Add psutil in requirements.txt
* Update train.py
* Cleanup1
* Cleanup2
* Skip existing npy
* Include re-space
* Export return character fix
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
- export.py +2 -2
- train.py +4 -4
- utils/datasets.py +29 -16
export.py
CHANGED
@@ -156,8 +156,8 @@ def run(weights='./yolov5s.pt', # weights path
|
|
156 |
|
157 |
# Finish
|
158 |
print(f'\nExport complete ({time.time() - t:.2f}s)'
|
159 |
-
f"
|
160 |
-
f'
|
161 |
|
162 |
|
163 |
def parse_opt():
|
|
|
156 |
|
157 |
# Finish
|
158 |
print(f'\nExport complete ({time.time() - t:.2f}s)'
|
159 |
+
f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
|
160 |
+
f'\nVisualize with https://netron.app')
|
161 |
|
162 |
|
163 |
def parse_opt():
|
train.py
CHANGED
@@ -201,7 +201,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
|
|
201 |
|
202 |
# Trainloader
|
203 |
train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
|
204 |
-
hyp=hyp, augment=True, cache=opt.
|
205 |
workers=workers, image_weights=opt.image_weights, quad=opt.quad,
|
206 |
prefix=colorstr('train: '))
|
207 |
mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class
|
@@ -211,7 +211,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
|
|
211 |
# Process 0
|
212 |
if RANK in [-1, 0]:
|
213 |
val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls,
|
214 |
-
hyp=hyp, cache=
|
215 |
workers=workers, pad=0.5,
|
216 |
prefix=colorstr('val: '))[0]
|
217 |
|
@@ -389,7 +389,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
|
|
389 |
# end epoch ----------------------------------------------------------------------------------------------------
|
390 |
# end training -----------------------------------------------------------------------------------------------------
|
391 |
if RANK in [-1, 0]:
|
392 |
-
LOGGER.info(f'{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours
|
393 |
if not evolve:
|
394 |
if is_coco: # COCO dataset
|
395 |
for m in [last, best] if best.exists() else [last]: # speed, mAP tests
|
@@ -430,7 +430,7 @@ def parse_opt(known=False):
|
|
430 |
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
|
431 |
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
|
432 |
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
|
433 |
-
parser.add_argument('--cache
|
434 |
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
|
435 |
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
436 |
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
|
|
|
201 |
|
202 |
# Trainloader
|
203 |
train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
|
204 |
+
hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=RANK,
|
205 |
workers=workers, image_weights=opt.image_weights, quad=opt.quad,
|
206 |
prefix=colorstr('train: '))
|
207 |
mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class
|
|
|
211 |
# Process 0
|
212 |
if RANK in [-1, 0]:
|
213 |
val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls,
|
214 |
+
hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1,
|
215 |
workers=workers, pad=0.5,
|
216 |
prefix=colorstr('val: '))[0]
|
217 |
|
|
|
389 |
# end epoch ----------------------------------------------------------------------------------------------------
|
390 |
# end training -----------------------------------------------------------------------------------------------------
|
391 |
if RANK in [-1, 0]:
|
392 |
+
LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
|
393 |
if not evolve:
|
394 |
if is_coco: # COCO dataset
|
395 |
for m in [last, best] if best.exists() else [last]: # speed, mAP tests
|
|
|
430 |
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
|
431 |
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
|
432 |
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
|
433 |
+
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
|
434 |
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
|
435 |
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
436 |
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
|
utils/datasets.py
CHANGED
@@ -455,16 +455,25 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
455 |
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
|
456 |
|
457 |
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
|
458 |
-
self.imgs = [None] * n
|
459 |
if cache_images:
|
|
|
|
|
|
|
|
|
460 |
gb = 0 # Gigabytes of cached images
|
461 |
self.img_hw0, self.img_hw = [None] * n, [None] * n
|
462 |
results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
|
463 |
pbar = tqdm(enumerate(results), total=n)
|
464 |
for i, x in pbar:
|
465 |
-
|
466 |
-
|
467 |
-
|
|
|
|
|
|
|
|
|
|
|
468 |
pbar.close()
|
469 |
|
470 |
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
|
@@ -618,21 +627,25 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
618 |
|
619 |
|
620 |
# Ancillary functions --------------------------------------------------------------------------------------------------
|
621 |
-
def load_image(self,
|
622 |
-
# loads 1 image from dataset, returns
|
623 |
-
|
624 |
-
if
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
|
|
|
|
|
|
|
|
629 |
r = self.img_size / max(h0, w0) # ratio
|
630 |
if r != 1: # if sizes are not equal
|
631 |
-
|
632 |
-
|
633 |
-
return
|
634 |
else:
|
635 |
-
return self.imgs[
|
636 |
|
637 |
|
638 |
def load_mosaic(self, index):
|
|
|
455 |
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
|
456 |
|
457 |
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
|
458 |
+
self.imgs, self.img_npy = [None] * n, [None] * n
|
459 |
if cache_images:
|
460 |
+
if cache_images == 'disk':
|
461 |
+
self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
|
462 |
+
self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
|
463 |
+
self.im_cache_dir.mkdir(parents=True, exist_ok=True)
|
464 |
gb = 0 # Gigabytes of cached images
|
465 |
self.img_hw0, self.img_hw = [None] * n, [None] * n
|
466 |
results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
|
467 |
pbar = tqdm(enumerate(results), total=n)
|
468 |
for i, x in pbar:
|
469 |
+
if cache_images == 'disk':
|
470 |
+
if not self.img_npy[i].exists():
|
471 |
+
np.save(self.img_npy[i].as_posix(), x[0])
|
472 |
+
gb += self.img_npy[i].stat().st_size
|
473 |
+
else:
|
474 |
+
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
|
475 |
+
gb += self.imgs[i].nbytes
|
476 |
+
pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
|
477 |
pbar.close()
|
478 |
|
479 |
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
|
|
|
627 |
|
628 |
|
629 |
# Ancillary functions --------------------------------------------------------------------------------------------------
|
630 |
+
def load_image(self, i):
|
631 |
+
# loads 1 image from dataset index 'i', returns im, original hw, resized hw
|
632 |
+
im = self.imgs[i]
|
633 |
+
if im is None: # not cached in ram
|
634 |
+
npy = self.img_npy[i]
|
635 |
+
if npy and npy.exists(): # load npy
|
636 |
+
im = np.load(npy)
|
637 |
+
else: # read image
|
638 |
+
path = self.img_files[i]
|
639 |
+
im = cv2.imread(path) # BGR
|
640 |
+
assert im is not None, 'Image Not Found ' + path
|
641 |
+
h0, w0 = im.shape[:2] # orig hw
|
642 |
r = self.img_size / max(h0, w0) # ratio
|
643 |
if r != 1: # if sizes are not equal
|
644 |
+
im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
|
645 |
+
interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
|
646 |
+
return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
|
647 |
else:
|
648 |
+
return self.imgs[i], self.img_hw0[i], self.img_hw[i] # im, hw_original, hw_resized
|
649 |
|
650 |
|
651 |
def load_mosaic(self, index):
|