glenn-jocher
commited on
Commit
•
548a98a
1
Parent(s):
9c91aea
Recursive directories dataset capability (#1408)
Browse files* Recursive directories dataset capability
* x.split('.')[-1]
* f += glob.glob(f"{p}{os.sep}**.*", recursive=True)
* f += glob.glob(str(p / '**.*'), recursive=True)
* remove . from image and vid formats
* .txt to txt
* str(p / '**' / '*.*')
- utils/datasets.py +12 -13
- utils/general.py +3 -3
utils/datasets.py
CHANGED
@@ -23,8 +23,8 @@ from utils.torch_utils import torch_distributed_zero_first
|
|
23 |
|
24 |
# Parameters
|
25 |
help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
|
26 |
-
img_formats = ['
|
27 |
-
vid_formats = ['
|
28 |
|
29 |
# Get orientation exif tag
|
30 |
for orientation in ExifTags.TAGS.keys():
|
@@ -125,8 +125,8 @@ class LoadImages: # for inference
|
|
125 |
else:
|
126 |
raise Exception('ERROR: %s does not exist' % p)
|
127 |
|
128 |
-
images = [x for x in files if
|
129 |
-
videos = [x for x in files if
|
130 |
ni, nv = len(images), len(videos)
|
131 |
|
132 |
self.img_size = img_size
|
@@ -337,24 +337,23 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
337 |
def img2label_paths(img_paths):
|
338 |
# Define label paths as a function of image paths
|
339 |
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
|
340 |
-
return [x.replace(sa, sb, 1).replace(
|
341 |
|
342 |
try:
|
343 |
f = [] # image files
|
344 |
for p in path if isinstance(path, list) else [path]:
|
345 |
-
p =
|
346 |
-
|
347 |
-
|
|
|
348 |
with open(p, 'r') as t:
|
349 |
t = t.read().splitlines()
|
|
|
350 |
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
|
351 |
-
elif os.path.isdir(p): # folder
|
352 |
-
f += glob.iglob(p + os.sep + '*.*')
|
353 |
else:
|
354 |
raise Exception('%s does not exist' % p)
|
355 |
-
self.img_files = sorted(
|
356 |
-
|
357 |
-
assert len(self.img_files) > 0, 'No images found'
|
358 |
except Exception as e:
|
359 |
raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
|
360 |
|
|
|
23 |
|
24 |
# Parameters
|
25 |
help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
|
26 |
+
img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes
|
27 |
+
vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
|
28 |
|
29 |
# Get orientation exif tag
|
30 |
for orientation in ExifTags.TAGS.keys():
|
|
|
125 |
else:
|
126 |
raise Exception('ERROR: %s does not exist' % p)
|
127 |
|
128 |
+
images = [x for x in files if x.split('.')[-1].lower() in img_formats]
|
129 |
+
videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
|
130 |
ni, nv = len(images), len(videos)
|
131 |
|
132 |
self.img_size = img_size
|
|
|
337 |
def img2label_paths(img_paths):
|
338 |
# Define label paths as a function of image paths
|
339 |
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
|
340 |
+
return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths]
|
341 |
|
342 |
try:
|
343 |
f = [] # image files
|
344 |
for p in path if isinstance(path, list) else [path]:
|
345 |
+
p = Path(p) # os-agnostic
|
346 |
+
if p.is_dir(): # dir
|
347 |
+
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
|
348 |
+
elif p.is_file(): # file
|
349 |
with open(p, 'r') as t:
|
350 |
t = t.read().splitlines()
|
351 |
+
parent = str(p.parent) + os.sep
|
352 |
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
|
|
|
|
|
353 |
else:
|
354 |
raise Exception('%s does not exist' % p)
|
355 |
+
self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
|
356 |
+
assert self.img_files, 'No images found'
|
|
|
357 |
except Exception as e:
|
358 |
raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
|
359 |
|
utils/general.py
CHANGED
@@ -79,9 +79,9 @@ def check_dataset(dict):
|
|
79 |
# Download dataset if not found locally
|
80 |
val, s = dict.get('val'), dict.get('download')
|
81 |
if val and len(val):
|
82 |
-
val = [
|
83 |
-
if not all(
|
84 |
-
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [
|
85 |
if s and len(s): # download script
|
86 |
print('Downloading %s ...' % s)
|
87 |
if s.startswith('http') and s.endswith('.zip'): # URL
|
|
|
79 |
# Download dataset if not found locally
|
80 |
val, s = dict.get('val'), dict.get('download')
|
81 |
if val and len(val):
|
82 |
+
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
83 |
+
if not all(x.exists() for x in val):
|
84 |
+
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
|
85 |
if s and len(s): # download script
|
86 |
print('Downloading %s ...' % s)
|
87 |
if s.startswith('http') and s.endswith('.zip'): # URL
|