jdfr glenn-jocher commited on
Commit
db3bbdd
1 Parent(s): a18b0c3

autosplit: take image files with uppercase extensions into account (#5269)

Browse files

* take image files with uppercase extensions into account in autosplit

* case fix

* Refactor implementation

Removes additional variable (capital variable names are also only for global variables), and uses the same methodology as implemented earlier in datasets.py L409.

* Remove redundant rglob characters

Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>

Files changed (1) hide show
  1. utils/datasets.py +3 -3
utils/datasets.py CHANGED
@@ -396,7 +396,7 @@ class LoadImagesAndLabels(Dataset):
396
  p = Path(p) # os-agnostic
397
  if p.is_dir(): # dir
398
  f += glob.glob(str(p / '**' / '*.*'), recursive=True)
399
- # f = list(p.rglob('**/*.*')) # pathlib
400
  elif p.is_file(): # file
401
  with open(p, 'r') as t:
402
  t = t.read().strip().splitlines()
@@ -406,7 +406,7 @@ class LoadImagesAndLabels(Dataset):
406
  else:
407
  raise Exception(f'{prefix}{p} does not exist')
408
  self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS])
409
- # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib
410
  assert self.img_files, f'{prefix}No images found'
411
  except Exception as e:
412
  raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
@@ -866,7 +866,7 @@ def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annota
866
  annotated_only: Only use images with an annotated txt file
867
  """
868
  path = Path(path) # images dir
869
- files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in IMG_FORMATS], []) # image files only
870
  n = len(files) # number of files
871
  random.seed(0) # for reproducibility
872
  indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
 
396
  p = Path(p) # os-agnostic
397
  if p.is_dir(): # dir
398
  f += glob.glob(str(p / '**' / '*.*'), recursive=True)
399
+ # f = list(p.rglob('*.*')) # pathlib
400
  elif p.is_file(): # file
401
  with open(p, 'r') as t:
402
  t = t.read().strip().splitlines()
 
406
  else:
407
  raise Exception(f'{prefix}{p} does not exist')
408
  self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS])
409
+ # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
410
  assert self.img_files, f'{prefix}No images found'
411
  except Exception as e:
412
  raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
 
866
  annotated_only: Only use images with an annotated txt file
867
  """
868
  path = Path(path) # images dir
869
+ files = sorted([x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS]) # image files only
870
  n = len(files) # number of files
871
  random.seed(0) # for reproducibility
872
  indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split