glenn-jocher
commited on
Multi-threaded VisDrone and VOC downloads (#7108)
Browse files* Multi-threaded VOC download
* Update VOC.yaml
* Update
* Update general.py
* Update general.py
- data/GlobalWheat2020.yaml +1 -0
- data/Objects365.yaml +1 -0
- data/SKU-110K.yaml +1 -0
- data/VOC.yaml +1 -1
- data/VisDrone.yaml +1 -1
- data/coco.yaml +1 -0
- utils/general.py +7 -4
data/GlobalWheat2020.yaml
CHANGED
@@ -34,6 +34,7 @@ names: ['wheat_head'] # class names
|
|
34 |
download: |
|
35 |
from utils.general import download, Path
|
36 |
|
|
|
37 |
# Download
|
38 |
dir = Path(yaml['path']) # dataset root dir
|
39 |
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
|
|
|
34 |
download: |
|
35 |
from utils.general import download, Path
|
36 |
|
37 |
+
|
38 |
# Download
|
39 |
dir = Path(yaml['path']) # dataset root dir
|
40 |
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
|
data/Objects365.yaml
CHANGED
@@ -65,6 +65,7 @@ download: |
|
|
65 |
|
66 |
from utils.general import Path, download, np, xyxy2xywhn
|
67 |
|
|
|
68 |
# Make Directories
|
69 |
dir = Path(yaml['path']) # dataset root dir
|
70 |
for p in 'images', 'labels':
|
|
|
65 |
|
66 |
from utils.general import Path, download, np, xyxy2xywhn
|
67 |
|
68 |
+
|
69 |
# Make Directories
|
70 |
dir = Path(yaml['path']) # dataset root dir
|
71 |
for p in 'images', 'labels':
|
data/SKU-110K.yaml
CHANGED
@@ -24,6 +24,7 @@ download: |
|
|
24 |
from tqdm import tqdm
|
25 |
from utils.general import np, pd, Path, download, xyxy2xywh
|
26 |
|
|
|
27 |
# Download
|
28 |
dir = Path(yaml['path']) # dataset root dir
|
29 |
parent = Path(dir.parent) # download dir
|
|
|
24 |
from tqdm import tqdm
|
25 |
from utils.general import np, pd, Path, download, xyxy2xywh
|
26 |
|
27 |
+
|
28 |
# Download
|
29 |
dir = Path(yaml['path']) # dataset root dir
|
30 |
parent = Path(dir.parent) # download dir
|
data/VOC.yaml
CHANGED
@@ -62,7 +62,7 @@ download: |
|
|
62 |
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
|
63 |
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
|
64 |
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
|
65 |
-
download(urls, dir=dir / 'images', delete=False)
|
66 |
|
67 |
# Convert
|
68 |
path = dir / f'images/VOCdevkit'
|
|
|
62 |
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
|
63 |
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
|
64 |
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
|
65 |
+
download(urls, dir=dir / 'images', delete=False, threads=3)
|
66 |
|
67 |
# Convert
|
68 |
path = dir / f'images/VOCdevkit'
|
data/VisDrone.yaml
CHANGED
@@ -54,7 +54,7 @@ download: |
|
|
54 |
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
|
55 |
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
|
56 |
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
|
57 |
-
download(urls, dir=dir)
|
58 |
|
59 |
# Convert
|
60 |
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
|
|
|
54 |
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
|
55 |
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
|
56 |
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
|
57 |
+
download(urls, dir=dir, threads=4)
|
58 |
|
59 |
# Convert
|
60 |
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
|
data/coco.yaml
CHANGED
@@ -30,6 +30,7 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
|
|
30 |
download: |
|
31 |
from utils.general import download, Path
|
32 |
|
|
|
33 |
# Download labels
|
34 |
segments = False # segment or box labels
|
35 |
dir = Path(yaml['path']) # dataset root dir
|
|
|
30 |
download: |
|
31 |
from utils.general import download, Path
|
32 |
|
33 |
+
|
34 |
# Download labels
|
35 |
segments = False # segment or box labels
|
36 |
dir = Path(yaml['path']) # dataset root dir
|
utils/general.py
CHANGED
@@ -449,8 +449,9 @@ def check_dataset(data, autodownload=True):
|
|
449 |
if val:
|
450 |
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
451 |
if not all(x.exists() for x in val):
|
452 |
-
LOGGER.info('\nDataset not found
|
453 |
if s and autodownload: # download script
|
|
|
454 |
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
|
455 |
if s.startswith('http') and s.endswith('.zip'): # URL
|
456 |
f = Path(s).name # filename
|
@@ -465,9 +466,11 @@ def check_dataset(data, autodownload=True):
|
|
465 |
r = os.system(s)
|
466 |
else: # python script
|
467 |
r = exec(s, {'yaml': data}) # return None
|
468 |
-
|
|
|
|
|
469 |
else:
|
470 |
-
raise Exception('Dataset not found
|
471 |
|
472 |
return data # dictionary
|
473 |
|
@@ -491,7 +494,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
|
|
491 |
if curl:
|
492 |
os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
|
493 |
else:
|
494 |
-
torch.hub.download_url_to_file(url, f, progress=
|
495 |
if unzip and f.suffix in ('.zip', '.gz'):
|
496 |
LOGGER.info(f'Unzipping {f}...')
|
497 |
if f.suffix == '.zip':
|
|
|
449 |
if val:
|
450 |
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
451 |
if not all(x.exists() for x in val):
|
452 |
+
LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]))
|
453 |
if s and autodownload: # download script
|
454 |
+
t = time.time()
|
455 |
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
|
456 |
if s.startswith('http') and s.endswith('.zip'): # URL
|
457 |
f = Path(s).name # filename
|
|
|
466 |
r = os.system(s)
|
467 |
else: # python script
|
468 |
r = exec(s, {'yaml': data}) # return None
|
469 |
+
dt = f'({round(time.time() - t, 1)}s)'
|
470 |
+
s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
|
471 |
+
LOGGER.info(emojis(f"Dataset download {s}"))
|
472 |
else:
|
473 |
+
raise Exception(emojis('Dataset not found ❌'))
|
474 |
|
475 |
return data # dictionary
|
476 |
|
|
|
494 |
if curl:
|
495 |
os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
|
496 |
else:
|
497 |
+
torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download
|
498 |
if unzip and f.suffix in ('.zip', '.gz'):
|
499 |
LOGGER.info(f'Unzipping {f}...')
|
500 |
if f.suffix == '.zip':
|