glenn-jocher
commited on
Commit
•
f527704
1
Parent(s):
2296f15
Cache v0.3: improved corrupt image/label reporting (#3676)
Browse files* Cache v0.3: improved corrupt image/label reporting
Fix for https://github.com/ultralytics/yolov5/issues/3656#issuecomment-863660899
* cleanup
- utils/datasets.py +15 -9
utils/datasets.py
CHANGED
@@ -390,7 +390,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
390 |
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
|
391 |
if cache_path.is_file():
|
392 |
cache, exists = torch.load(cache_path), True # load
|
393 |
-
if cache['hash'] != get_hash(self.label_files + self.img_files):
|
394 |
cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
|
395 |
else:
|
396 |
cache, exists = self.cache_labels(cache_path, prefix), False # cache
|
@@ -400,11 +400,12 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
400 |
if exists:
|
401 |
d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
|
402 |
tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results
|
|
|
|
|
403 |
assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'
|
404 |
|
405 |
# Read cache
|
406 |
-
cache.pop('hash') # remove
|
407 |
-
cache.pop('version') # remove version
|
408 |
labels, shapes, self.segments = zip(*cache.values())
|
409 |
self.labels = list(labels)
|
410 |
self.shapes = np.array(shapes, dtype=np.float64)
|
@@ -461,26 +462,31 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
461 |
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
|
462 |
# Cache dataset labels, check images and read shapes
|
463 |
x = {} # dict
|
464 |
-
nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, corrupt
|
465 |
desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
|
466 |
with Pool(num_threads) as pool:
|
467 |
pbar = tqdm(pool.imap_unordered(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
|
468 |
desc=desc, total=len(self.img_files))
|
469 |
-
for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f in pbar:
|
470 |
nm += nm_f
|
471 |
nf += nf_f
|
472 |
ne += ne_f
|
473 |
nc += nc_f
|
474 |
if im_file:
|
475 |
x[im_file] = [l, shape, segments]
|
|
|
|
|
476 |
pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
|
477 |
|
478 |
pbar.close()
|
|
|
|
|
479 |
if nf == 0:
|
480 |
logging.info(f'{prefix}WARNING: No labels found in {path}. See {help_url}')
|
481 |
x['hash'] = get_hash(self.label_files + self.img_files)
|
482 |
x['results'] = nf, nm, ne, nc, len(self.img_files)
|
483 |
-
x['
|
|
|
484 |
try:
|
485 |
torch.save(x, path) # save cache for next time
|
486 |
logging.info(f'{prefix}New cache created: {path}')
|
@@ -1084,11 +1090,11 @@ def verify_image_label(args):
|
|
1084 |
else:
|
1085 |
nm = 1 # label missing
|
1086 |
l = np.zeros((0, 5), dtype=np.float32)
|
1087 |
-
return im_file, l, shape, segments, nm, nf, ne, nc
|
1088 |
except Exception as e:
|
1089 |
nc = 1
|
1090 |
-
|
1091 |
-
return [None, None, None, None, nm, nf, ne, nc]
|
1092 |
|
1093 |
|
1094 |
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
|
|
|
390 |
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
|
391 |
if cache_path.is_file():
|
392 |
cache, exists = torch.load(cache_path), True # load
|
393 |
+
if cache['hash'] != get_hash(self.label_files + self.img_files) or cache['version'] != 0.3:
|
394 |
cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
|
395 |
else:
|
396 |
cache, exists = self.cache_labels(cache_path, prefix), False # cache
|
|
|
400 |
if exists:
|
401 |
d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
|
402 |
tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results
|
403 |
+
if cache['msgs']:
|
404 |
+
logging.info('\n'.join(cache['msgs'])) # display warnings
|
405 |
assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'
|
406 |
|
407 |
# Read cache
|
408 |
+
[cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items
|
|
|
409 |
labels, shapes, self.segments = zip(*cache.values())
|
410 |
self.labels = list(labels)
|
411 |
self.shapes = np.array(shapes, dtype=np.float64)
|
|
|
462 |
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
|
463 |
# Cache dataset labels, check images and read shapes
|
464 |
x = {} # dict
|
465 |
+
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
466 |
desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
|
467 |
with Pool(num_threads) as pool:
|
468 |
pbar = tqdm(pool.imap_unordered(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
|
469 |
desc=desc, total=len(self.img_files))
|
470 |
+
for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
|
471 |
nm += nm_f
|
472 |
nf += nf_f
|
473 |
ne += ne_f
|
474 |
nc += nc_f
|
475 |
if im_file:
|
476 |
x[im_file] = [l, shape, segments]
|
477 |
+
if msg:
|
478 |
+
msgs.append(msg)
|
479 |
pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
|
480 |
|
481 |
pbar.close()
|
482 |
+
if msgs:
|
483 |
+
logging.info('\n'.join(msgs))
|
484 |
if nf == 0:
|
485 |
logging.info(f'{prefix}WARNING: No labels found in {path}. See {help_url}')
|
486 |
x['hash'] = get_hash(self.label_files + self.img_files)
|
487 |
x['results'] = nf, nm, ne, nc, len(self.img_files)
|
488 |
+
x['msgs'] = msgs # warnings
|
489 |
+
x['version'] = 0.3 # cache version
|
490 |
try:
|
491 |
torch.save(x, path) # save cache for next time
|
492 |
logging.info(f'{prefix}New cache created: {path}')
|
|
|
1090 |
else:
|
1091 |
nm = 1 # label missing
|
1092 |
l = np.zeros((0, 5), dtype=np.float32)
|
1093 |
+
return im_file, l, shape, segments, nm, nf, ne, nc, ''
|
1094 |
except Exception as e:
|
1095 |
nc = 1
|
1096 |
+
msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
|
1097 |
+
return [None, None, None, None, nm, nf, ne, nc, msg]
|
1098 |
|
1099 |
|
1100 |
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
|