glenn-jocher commited on
Commit
548a98a
1 Parent(s): 9c91aea

Recursive directories dataset capability (#1408)

Browse files

* Recursive directories dataset capability

* x.split('.')[-1]

* f += glob.glob(f"{p}{os.sep}**.*", recursive=True)

* f += glob.glob(str(p / '**.*'), recursive=True)

* remove . from image and vid formats

* .txt to txt

* str(p / '**' / '*.*')

Files changed (2) hide show
  1. utils/datasets.py +12 -13
  2. utils/general.py +3 -3
utils/datasets.py CHANGED
@@ -23,8 +23,8 @@ from utils.torch_utils import torch_distributed_zero_first
23
 
24
  # Parameters
25
  help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
26
- img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
27
- vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
28
 
29
  # Get orientation exif tag
30
  for orientation in ExifTags.TAGS.keys():
@@ -125,8 +125,8 @@ class LoadImages: # for inference
125
  else:
126
  raise Exception('ERROR: %s does not exist' % p)
127
 
128
- images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
129
- videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
130
  ni, nv = len(images), len(videos)
131
 
132
  self.img_size = img_size
@@ -337,24 +337,23 @@ class LoadImagesAndLabels(Dataset): # for training/testing
337
  def img2label_paths(img_paths):
338
  # Define label paths as a function of image paths
339
  sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
340
- return [x.replace(sa, sb, 1).replace(os.path.splitext(x)[-1], '.txt') for x in img_paths]
341
 
342
  try:
343
  f = [] # image files
344
  for p in path if isinstance(path, list) else [path]:
345
- p = str(Path(p)) # os-agnostic
346
- parent = str(Path(p).parent) + os.sep
347
- if os.path.isfile(p): # file
 
348
  with open(p, 'r') as t:
349
  t = t.read().splitlines()
 
350
  f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
351
- elif os.path.isdir(p): # folder
352
- f += glob.iglob(p + os.sep + '*.*')
353
  else:
354
  raise Exception('%s does not exist' % p)
355
- self.img_files = sorted(
356
- [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats])
357
- assert len(self.img_files) > 0, 'No images found'
358
  except Exception as e:
359
  raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
360
 
 
23
 
24
  # Parameters
25
  help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
26
+ img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes
27
+ vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
28
 
29
  # Get orientation exif tag
30
  for orientation in ExifTags.TAGS.keys():
 
125
  else:
126
  raise Exception('ERROR: %s does not exist' % p)
127
 
128
+ images = [x for x in files if x.split('.')[-1].lower() in img_formats]
129
+ videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
130
  ni, nv = len(images), len(videos)
131
 
132
  self.img_size = img_size
 
337
  def img2label_paths(img_paths):
338
  # Define label paths as a function of image paths
339
  sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
340
+ return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths]
341
 
342
  try:
343
  f = [] # image files
344
  for p in path if isinstance(path, list) else [path]:
345
+ p = Path(p) # os-agnostic
346
+ if p.is_dir(): # dir
347
+ f += glob.glob(str(p / '**' / '*.*'), recursive=True)
348
+ elif p.is_file(): # file
349
  with open(p, 'r') as t:
350
  t = t.read().splitlines()
351
+ parent = str(p.parent) + os.sep
352
  f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
 
 
353
  else:
354
  raise Exception('%s does not exist' % p)
355
+ self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
356
+ assert self.img_files, 'No images found'
 
357
  except Exception as e:
358
  raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
359
 
utils/general.py CHANGED
@@ -79,9 +79,9 @@ def check_dataset(dict):
79
  # Download dataset if not found locally
80
  val, s = dict.get('val'), dict.get('download')
81
  if val and len(val):
82
- val = [os.path.abspath(x) for x in (val if isinstance(val, list) else [val])] # val path
83
- if not all(os.path.exists(x) for x in val):
84
- print('\nWARNING: Dataset not found, nonexistent paths: %s' % [*val])
85
  if s and len(s): # download script
86
  print('Downloading %s ...' % s)
87
  if s.startswith('http') and s.endswith('.zip'): # URL
 
79
  # Download dataset if not found locally
80
  val, s = dict.get('val'), dict.get('download')
81
  if val and len(val):
82
+ val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
83
+ if not all(x.exists() for x in val):
84
+ print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
85
  if s and len(s): # download script
86
  print('Downloading %s ...' % s)
87
  if s.startswith('http') and s.endswith('.zip'): # URL