glenn-jocher commited on
Commit
f79d747
1 Parent(s): 417a2f4

Add optional dataset.yaml `path` attribute (#3753)

Browse files

* Add optional dataset.yaml `path` attribute

@KalenMike

* pass locals to python scripts

* handle lists

* update coco128.yaml

* Capitalize first letter

* add test key

* finalize GlobalWheat2020.yaml

* finalize objects365.yaml

* finalize SKU-110K.yaml

* finalize SKU-110K.yaml

* finalize VisDrone.yaml

* NoneType fix

* update download comment

* voc to VOC

* update

* update VOC.yaml

* update VOC.yaml

* remove dashes

* delete get_voc.sh

* force coco and coco128 to ../datasets

* Capitalize Argoverse_HD.yaml

* Capitalize Objects365.yaml

* update Argoverse_HD.yaml

* coco segments fix

* VOC single-thread

* update Argoverse_HD.yaml

* update data_dict in test handling

* create root

data/Argoverse_HD.yaml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
2
+ # Train command: python train.py --data Argoverse_HD.yaml
3
+ # Default dataset location is next to YOLOv5:
4
+ # /parent
5
+ # /datasets/Argoverse
6
+ # /yolov5
7
+
8
+
9
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
10
+ path: ../datasets/Argoverse # dataset root dir
11
+ train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
12
+ val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
13
+ test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
14
+
15
+ # Classes
16
+ nc: 8 # number of classes
17
+ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign' ] # class names
18
+
19
+
20
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
21
+ download: |
22
+ import json
23
+
24
+ from tqdm import tqdm
25
+ from utils.general import download, Path
26
+
27
+
28
+ def argoverse2yolo(set):
29
+ labels = {}
30
+ a = json.load(open(set, "rb"))
31
+ for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
32
+ img_id = annot['image_id']
33
+ img_name = a['images'][img_id]['name']
34
+ img_label_name = img_name[:-3] + "txt"
35
+
36
+ cls = annot['category_id'] # instance class id
37
+ x_center, y_center, width, height = annot['bbox']
38
+ x_center = (x_center + width / 2) / 1920.0 # offset and scale
39
+ y_center = (y_center + height / 2) / 1200.0 # offset and scale
40
+ width /= 1920.0 # scale
41
+ height /= 1200.0 # scale
42
+
43
+ img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
44
+ if not img_dir.exists():
45
+ img_dir.mkdir(parents=True, exist_ok=True)
46
+
47
+ k = str(img_dir / img_label_name)
48
+ if k not in labels:
49
+ labels[k] = []
50
+ labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
51
+
52
+ for k in labels:
53
+ with open(k, "w") as f:
54
+ f.writelines(labels[k])
55
+
56
+
57
+ # Download
58
+ dir = Path('../datasets/Argoverse') # dataset root dir
59
+ urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
60
+ download(urls, dir=dir, delete=False)
61
+
62
+ # Convert
63
+ annotations_dir = 'Argoverse-HD/annotations/'
64
+ (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
65
+ for d in "train.json", "val.json":
66
+ argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels
data/GlobalWheat2020.yaml CHANGED
@@ -1,43 +1,40 @@
1
  # Global Wheat 2020 dataset http://www.global-wheat.com/
2
  # Train command: python train.py --data GlobalWheat2020.yaml
3
  # Default dataset location is next to YOLOv5:
4
- # /parent_folder
5
  # /datasets/GlobalWheat2020
6
  # /yolov5
7
 
8
 
9
- # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
10
- train: # 3422 images
11
- - ../datasets/GlobalWheat2020/images/arvalis_1
12
- - ../datasets/GlobalWheat2020/images/arvalis_2
13
- - ../datasets/GlobalWheat2020/images/arvalis_3
14
- - ../datasets/GlobalWheat2020/images/ethz_1
15
- - ../datasets/GlobalWheat2020/images/rres_1
16
- - ../datasets/GlobalWheat2020/images/inrae_1
17
- - ../datasets/GlobalWheat2020/images/usask_1
18
-
19
- val: # 748 images (WARNING: train set contains ethz_1)
20
- - ../datasets/GlobalWheat2020/images/ethz_1
21
-
22
- test: # 1276 images
23
- - ../datasets/GlobalWheat2020/images/utokyo_1
24
- - ../datasets/GlobalWheat2020/images/utokyo_2
25
- - ../datasets/GlobalWheat2020/images/nau_1
26
- - ../datasets/GlobalWheat2020/images/uq_1
27
-
28
- # number of classes
29
- nc: 1
30
-
31
- # class names
32
- names: [ 'wheat_head' ]
33
-
34
-
35
- # download command/URL (optional) --------------------------------------------------------------------------------------
36
  download: |
37
  from utils.general import download, Path
38
 
39
  # Download
40
- dir = Path('../datasets/GlobalWheat2020') # dataset directory
41
  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
42
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
43
  download(urls, dir=dir)
 
1
  # Global Wheat 2020 dataset http://www.global-wheat.com/
2
  # Train command: python train.py --data GlobalWheat2020.yaml
3
  # Default dataset location is next to YOLOv5:
4
+ # /parent
5
  # /datasets/GlobalWheat2020
6
  # /yolov5
7
 
8
 
9
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
10
+ path: ../datasets/GlobalWheat2020 # dataset root dir
11
+ train: # train images (relative to 'path') 3422 images
12
+ - images/arvalis_1
13
+ - images/arvalis_2
14
+ - images/arvalis_3
15
+ - images/ethz_1
16
+ - images/rres_1
17
+ - images/inrae_1
18
+ - images/usask_1
19
+ val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
20
+ - images/ethz_1
21
+ test: # test images (optional) 1276 images
22
+ - images/utokyo_1
23
+ - images/utokyo_2
24
+ - images/nau_1
25
+ - images/uq_1
26
+
27
+ # Classes
28
+ nc: 1 # number of classes
29
+ names: [ 'wheat_head' ] # class names
30
+
31
+
32
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
 
 
 
33
  download: |
34
  from utils.general import download, Path
35
 
36
  # Download
37
+ dir = Path(yaml['path']) # dataset root dir
38
  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
39
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
40
  download(urls, dir=dir)
data/{objects365.yaml → Objects365.yaml} RENAMED
@@ -1,18 +1,19 @@
1
  # Objects365 dataset https://www.objects365.org/
2
- # Train command: python train.py --data objects365.yaml
3
  # Default dataset location is next to YOLOv5:
4
- # /parent_folder
5
- # /datasets/objects365
6
  # /yolov5
7
 
8
- # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
9
- train: ../datasets/objects365/images/train # 1742289 images
10
- val: ../datasets/objects365/images/val # 5570 images
11
 
12
- # number of classes
13
- nc: 365
 
 
 
14
 
15
- # class names
 
16
  names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
17
  'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
18
  'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
@@ -56,7 +57,7 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl
56
  'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ]
57
 
58
 
59
- # download command/URL (optional) --------------------------------------------------------------------------------------
60
  download: |
61
  from pycocotools.coco import COCO
62
  from tqdm import tqdm
@@ -64,7 +65,7 @@ download: |
64
  from utils.general import download, Path
65
 
66
  # Make Directories
67
- dir = Path('../datasets/objects365') # dataset directory
68
  for p in 'images', 'labels':
69
  (dir / p).mkdir(parents=True, exist_ok=True)
70
  for q in 'train', 'val':
 
1
  # Objects365 dataset https://www.objects365.org/
2
+ # Train command: python train.py --data Objects365.yaml
3
  # Default dataset location is next to YOLOv5:
4
+ # /parent
5
+ # /datasets/Objects365
6
  # /yolov5
7
 
 
 
 
8
 
9
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
10
+ path: ../datasets/Objects365 # dataset root dir
11
+ train: images/train # train images (relative to 'path') 1742289 images
12
+ val: images/val # val images (relative to 'path') 5570 images
13
+ test: # test images (optional)
14
 
15
+ # Classes
16
+ nc: 365 # number of classes
17
  names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
18
  'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
19
  'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
 
57
  'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ]
58
 
59
 
60
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
61
  download: |
62
  from pycocotools.coco import COCO
63
  from tqdm import tqdm
 
65
  from utils.general import download, Path
66
 
67
  # Make Directories
68
+ dir = Path(yaml['path']) # dataset root dir
69
  for p in 'images', 'labels':
70
  (dir / p).mkdir(parents=True, exist_ok=True)
71
  for q in 'train', 'val':
data/SKU-110K.yaml CHANGED
@@ -1,39 +1,38 @@
1
  # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
2
  # Train command: python train.py --data SKU-110K.yaml
3
  # Default dataset location is next to YOLOv5:
4
- # /parent_folder
5
  # /datasets/SKU-110K
6
  # /yolov5
7
 
8
 
9
- # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
10
- train: ../datasets/SKU-110K/train.txt # 8219 images
11
- val: ../datasets/SKU-110K/val.txt # 588 images
12
- test: ../datasets/SKU-110K/test.txt # 2936 images
 
13
 
14
- # number of classes
15
- nc: 1
 
16
 
17
- # class names
18
- names: [ 'object' ]
19
 
20
-
21
- # download command/URL (optional) --------------------------------------------------------------------------------------
22
  download: |
23
  import shutil
24
  from tqdm import tqdm
25
  from utils.general import np, pd, Path, download, xyxy2xywh
26
 
27
  # Download
28
- datasets = Path('../datasets') # download directory
 
29
  urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
30
- download(urls, dir=datasets, delete=False)
31
 
32
  # Rename directories
33
- dir = (datasets / 'SKU-110K')
34
  if dir.exists():
35
  shutil.rmtree(dir)
36
- (datasets / 'SKU110K_fixed').rename(dir) # rename dir
37
  (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
38
 
39
  # Convert labels
 
1
  # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
2
  # Train command: python train.py --data SKU-110K.yaml
3
  # Default dataset location is next to YOLOv5:
4
+ # /parent
5
  # /datasets/SKU-110K
6
  # /yolov5
7
 
8
 
9
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
10
+ path: ../datasets/SKU-110K # dataset root dir
11
+ train: train.txt # train images (relative to 'path') 8219 images
12
+ val: val.txt # val images (relative to 'path') 588 images
13
+ test: test.txt # test images (optional) 2936 images
14
 
15
+ # Classes
16
+ nc: 1 # number of classes
17
+ names: [ 'object' ] # class names
18
 
 
 
19
 
20
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
 
21
  download: |
22
  import shutil
23
  from tqdm import tqdm
24
  from utils.general import np, pd, Path, download, xyxy2xywh
25
 
26
  # Download
27
+ dir = Path(yaml['path']) # dataset root dir
28
+ parent = Path(dir.parent) # download dir
29
  urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
30
+ download(urls, dir=parent, delete=False)
31
 
32
  # Rename directories
 
33
  if dir.exists():
34
  shutil.rmtree(dir)
35
+ (parent / 'SKU110K_fixed').rename(dir) # rename dir
36
  (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
37
 
38
  # Convert labels
data/VOC.yaml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
2
+ # Train command: python train.py --data VOC.yaml
3
+ # Default dataset location is next to YOLOv5:
4
+ # /parent
5
+ # /datasets/VOC
6
+ # /yolov5
7
+
8
+
9
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
10
+ path: ../datasets/VOC
11
+ train: # train images (relative to 'path') 16551 images
12
+ - images/train2012
13
+ - images/train2007
14
+ - images/val2012
15
+ - images/val2007
16
+ val: # val images (relative to 'path') 4952 images
17
+ - images/test2007
18
+ test: # test images (optional)
19
+ - images/test2007
20
+
21
+ # Classes
22
+ nc: 20 # number of classes
23
+ names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
24
+ 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] # class names
25
+
26
+
27
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
28
+ download: |
29
+ import xml.etree.ElementTree as ET
30
+
31
+ from tqdm import tqdm
32
+ from utils.general import download, Path
33
+
34
+
35
+ def convert_label(path, lb_path, year, image_id):
36
+ def convert_box(size, box):
37
+ dw, dh = 1. / size[0], 1. / size[1]
38
+ x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
39
+ return x * dw, y * dh, w * dw, h * dh
40
+
41
+ in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
42
+ out_file = open(lb_path, 'w')
43
+ tree = ET.parse(in_file)
44
+ root = tree.getroot()
45
+ size = root.find('size')
46
+ w = int(size.find('width').text)
47
+ h = int(size.find('height').text)
48
+
49
+ for obj in root.iter('object'):
50
+ cls = obj.find('name').text
51
+ if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
52
+ xmlbox = obj.find('bndbox')
53
+ bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
54
+ cls_id = yaml['names'].index(cls) # class id
55
+ out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
56
+
57
+
58
+ # Download
59
+ dir = Path(yaml['path']) # dataset root dir
60
+ url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
61
+ urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
62
+ url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
63
+ url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
64
+ download(urls, dir=dir / 'images', delete=False)
65
+
66
+ # Convert
67
+ path = dir / f'images/VOCdevkit'
68
+ for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
69
+ imgs_path = dir / 'images' / f'{image_set}{year}'
70
+ lbs_path = dir / 'labels' / f'{image_set}{year}'
71
+ imgs_path.mkdir(exist_ok=True, parents=True)
72
+ lbs_path.mkdir(exist_ok=True, parents=True)
73
+
74
+ image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
75
+ for id in tqdm(image_ids, desc=f'{image_set}{year}'):
76
+ f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
77
+ lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
78
+ f.rename(imgs_path / f.name) # move image
79
+ convert_label(path, lb_path, year, id) # convert labels to YOLO format
data/VisDrone.yaml CHANGED
@@ -1,24 +1,23 @@
1
  # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
2
  # Train command: python train.py --data VisDrone.yaml
3
  # Default dataset location is next to YOLOv5:
4
- # /parent_folder
5
- # /VisDrone
6
  # /yolov5
7
 
8
 
9
- # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
10
- train: ../VisDrone/VisDrone2019-DET-train/images # 6471 images
11
- val: ../VisDrone/VisDrone2019-DET-val/images # 548 images
12
- test: ../VisDrone/VisDrone2019-DET-test-dev/images # 1610 images
 
13
 
14
- # number of classes
15
- nc: 10
16
-
17
- # class names
18
  names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
19
 
20
 
21
- # download command/URL (optional) --------------------------------------------------------------------------------------
22
  download: |
23
  from utils.general import download, os, Path
24
 
@@ -49,7 +48,7 @@ download: |
49
 
50
 
51
  # Download
52
- dir = Path('../VisDrone') # dataset directory
53
  urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
54
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
55
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
 
1
  # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
2
  # Train command: python train.py --data VisDrone.yaml
3
  # Default dataset location is next to YOLOv5:
4
+ # /parent
5
+ # /datasets/VisDrone
6
  # /yolov5
7
 
8
 
9
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
10
+ path: ../datasets/VisDrone # dataset root dir
11
+ train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
12
+ val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
13
+ test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
14
 
15
+ # Classes
16
+ nc: 10 # number of classes
 
 
17
  names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
18
 
19
 
20
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
21
  download: |
22
  from utils.general import download, os, Path
23
 
 
48
 
49
 
50
  # Download
51
+ dir = Path(yaml['path']) # dataset root dir
52
  urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
53
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
54
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
data/argoverse_hd.yaml DELETED
@@ -1,21 +0,0 @@
1
- # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
2
- # Train command: python train.py --data argoverse_hd.yaml
3
- # Default dataset location is next to YOLOv5:
4
- # /parent_folder
5
- # /argoverse
6
- # /yolov5
7
-
8
-
9
- # download command/URL (optional)
10
- download: bash data/scripts/get_argoverse_hd.sh
11
-
12
- # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
13
- train: ../argoverse/Argoverse-1.1/images/train/ # 39384 images
14
- val: ../argoverse/Argoverse-1.1/images/val/ # 15062 iamges
15
- test: ../argoverse/Argoverse-1.1/images/test/ # Submit to: https://eval.ai/web/challenges/challenge-page/800/overview
16
-
17
- # number of classes
18
- nc: 8
19
-
20
- # class names
21
- names: [ 'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign' ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/coco.yaml CHANGED
@@ -1,23 +1,19 @@
1
  # COCO 2017 dataset http://cocodataset.org
2
  # Train command: python train.py --data coco.yaml
3
  # Default dataset location is next to YOLOv5:
4
- # /parent_folder
5
- # /coco
6
  # /yolov5
7
 
8
 
9
- # download command/URL (optional)
10
- download: bash data/scripts/get_coco.sh
 
 
 
11
 
12
- # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
13
- train: ../coco/train2017.txt # 118287 images
14
- val: ../coco/val2017.txt # 5000 images
15
- test: ../coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
16
-
17
- # number of classes
18
- nc: 80
19
-
20
- # class names
21
  names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
22
  'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
23
  'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
@@ -26,10 +22,22 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
26
  'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
27
  'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
28
  'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
29
- 'hair drier', 'toothbrush' ]
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- # Print classes
32
- # with open('data/coco.yaml') as f:
33
- # d = yaml.safe_load(f) # dict
34
- # for i, x in enumerate(d['names']):
35
- # print(i, x)
 
1
  # COCO 2017 dataset http://cocodataset.org
2
  # Train command: python train.py --data coco.yaml
3
  # Default dataset location is next to YOLOv5:
4
+ # /parent
5
+ # /datasets/coco
6
  # /yolov5
7
 
8
 
9
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
10
+ path: ../datasets/coco # dataset root dir
11
+ train: train2017.txt # train images (relative to 'path') 118287 images
12
+ val: val2017.txt # train images (relative to 'path') 5000 images
13
+ test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
14
 
15
+ # Classes
16
+ nc: 80 # number of classes
 
 
 
 
 
 
 
17
  names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
18
  'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
19
  'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 
22
  'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
23
  'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
24
  'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
25
+ 'hair drier', 'toothbrush' ] # class names
26
+
27
+
28
+ # Download script/URL (optional)
29
+ download: |
30
+ from utils.general import download, Path
31
+
32
+ # Download labels
33
+ segments = False # segment or box labels
34
+ dir = Path(yaml['path']) # dataset root dir
35
+ url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
36
+ urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
37
+ download(urls, dir=dir.parent)
38
 
39
+ # Download data
40
+ urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
41
+ 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
42
+ 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
43
+ download(urls, dir=dir / 'images', threads=3)
data/coco128.yaml CHANGED
@@ -1,22 +1,19 @@
1
  # COCO 2017 dataset http://cocodataset.org - first 128 training images
2
  # Train command: python train.py --data coco128.yaml
3
  # Default dataset location is next to YOLOv5:
4
- # /parent_folder
5
- # /coco128
6
  # /yolov5
7
 
8
 
9
- # download command/URL (optional)
10
- download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
 
 
 
11
 
12
- # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
13
- train: ../coco128/images/train2017/ # 128 images
14
- val: ../coco128/images/train2017/ # 128 images
15
-
16
- # number of classes
17
- nc: 80
18
-
19
- # class names
20
  names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
21
  'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
22
  'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
@@ -25,4 +22,8 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
25
  'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
26
  'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
27
  'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
28
- 'hair drier', 'toothbrush' ]
 
 
 
 
 
1
  # COCO 2017 dataset http://cocodataset.org - first 128 training images
2
  # Train command: python train.py --data coco128.yaml
3
  # Default dataset location is next to YOLOv5:
4
+ # /parent
5
+ # /datasets/coco128
6
  # /yolov5
7
 
8
 
9
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
10
+ path: ../datasets/coco128 # dataset root dir
11
+ train: images/train2017 # train images (relative to 'path') 128 images
12
+ val: images/train2017 # val images (relative to 'path') 128 images
13
+ test: # test images (optional)
14
 
15
+ # Classes
16
+ nc: 80 # number of classes
 
 
 
 
 
 
17
  names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
18
  'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
19
  'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 
22
  'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
23
  'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
24
  'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
25
+ 'hair drier', 'toothbrush' ] # class names
26
+
27
+
28
+ # Download script/URL (optional)
29
+ download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
data/hyps/hyp.finetune.yaml CHANGED
@@ -1,5 +1,5 @@
1
  # Hyperparameters for VOC finetuning
2
- # python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
3
  # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
4
 
5
 
 
1
  # Hyperparameters for VOC finetuning
2
+ # python train.py --batch 64 --weights yolov5m.pt --data VOC.yaml --img 512 --epochs 50
3
  # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
4
 
5
 
data/scripts/get_argoverse_hd.sh DELETED
@@ -1,61 +0,0 @@
1
- #!/bin/bash
2
- # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
3
- # Download command: bash data/scripts/get_argoverse_hd.sh
4
- # Train command: python train.py --data argoverse_hd.yaml
5
- # Default dataset location is next to YOLOv5:
6
- # /parent_folder
7
- # /argoverse
8
- # /yolov5
9
-
10
- # Download/unzip images
11
- d='../argoverse/' # unzip directory
12
- mkdir $d
13
- url=https://argoverse-hd.s3.us-east-2.amazonaws.com/
14
- f=Argoverse-HD-Full.zip
15
- curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &# download, unzip, remove in background
16
- wait # finish background tasks
17
-
18
- cd ../argoverse/Argoverse-1.1/
19
- ln -s tracking images
20
-
21
- cd ../Argoverse-HD/annotations/
22
-
23
- python3 - "$@" <<END
24
- import json
25
- from pathlib import Path
26
-
27
- annotation_files = ["train.json", "val.json"]
28
- print("Converting annotations to YOLOv5 format...")
29
-
30
- for val in annotation_files:
31
- a = json.load(open(val, "rb"))
32
-
33
- label_dict = {}
34
- for annot in a['annotations']:
35
- img_id = annot['image_id']
36
- img_name = a['images'][img_id]['name']
37
- img_label_name = img_name[:-3] + "txt"
38
-
39
- cls = annot['category_id'] # instance class id
40
- x_center, y_center, width, height = annot['bbox']
41
- x_center = (x_center + width / 2) / 1920. # offset and scale
42
- y_center = (y_center + height / 2) / 1200. # offset and scale
43
- width /= 1920. # scale
44
- height /= 1200. # scale
45
-
46
- img_dir = "./labels/" + a['seq_dirs'][a['images'][annot['image_id']]['sid']]
47
-
48
- Path(img_dir).mkdir(parents=True, exist_ok=True)
49
- if img_dir + "/" + img_label_name not in label_dict:
50
- label_dict[img_dir + "/" + img_label_name] = []
51
-
52
- label_dict[img_dir + "/" + img_label_name].append(f"{cls} {x_center} {y_center} {width} {height}\n")
53
-
54
- for filename in label_dict:
55
- with open(filename, "w") as file:
56
- for string in label_dict[filename]:
57
- file.write(string)
58
-
59
- END
60
-
61
- mv ./labels ../../Argoverse-1.1/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/scripts/get_voc.sh DELETED
@@ -1,116 +0,0 @@
1
- #!/bin/bash
2
- # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
3
- # Download command: bash data/scripts/get_voc.sh
4
- # Train command: python train.py --data voc.yaml
5
- # Default dataset location is next to YOLOv5:
6
- # /parent_folder
7
- # /VOC
8
- # /yolov5
9
-
10
- start=$(date +%s)
11
- mkdir -p ../tmp
12
- cd ../tmp/
13
-
14
- # Download/unzip images and labels
15
- d='.' # unzip directory
16
- url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
17
- f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
18
- f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images
19
- f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
20
- for f in $f3 $f2 $f1; do
21
- echo 'Downloading' $url$f '...'
22
- curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
23
- done
24
- wait # finish background tasks
25
-
26
- end=$(date +%s)
27
- runtime=$((end - start))
28
- echo "Completed in" $runtime "seconds"
29
-
30
- echo "Splitting dataset..."
31
- python3 - "$@" <<END
32
- import os
33
- import xml.etree.ElementTree as ET
34
- from os import getcwd
35
-
36
- sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
37
-
38
- classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
39
- "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
40
-
41
-
42
- def convert_box(size, box):
43
- dw = 1. / (size[0])
44
- dh = 1. / (size[1])
45
- x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
46
- return x * dw, y * dh, w * dw, h * dh
47
-
48
-
49
- def convert_annotation(year, image_id):
50
- in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
51
- out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
52
- tree = ET.parse(in_file)
53
- root = tree.getroot()
54
- size = root.find('size')
55
- w = int(size.find('width').text)
56
- h = int(size.find('height').text)
57
-
58
- for obj in root.iter('object'):
59
- difficult = obj.find('difficult').text
60
- cls = obj.find('name').text
61
- if cls not in classes or int(difficult) == 1:
62
- continue
63
- cls_id = classes.index(cls)
64
- xmlbox = obj.find('bndbox')
65
- b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
66
- float(xmlbox.find('ymax').text))
67
- bb = convert_box((w, h), b)
68
- out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
69
-
70
-
71
- cwd = getcwd()
72
- for year, image_set in sets:
73
- if not os.path.exists('VOCdevkit/VOC%s/labels/' % year):
74
- os.makedirs('VOCdevkit/VOC%s/labels/' % year)
75
- image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
76
- list_file = open('%s_%s.txt' % (year, image_set), 'w')
77
- for image_id in image_ids:
78
- list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (cwd, year, image_id))
79
- convert_annotation(year, image_id)
80
- list_file.close()
81
- END
82
-
83
- cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
84
- cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
85
-
86
- mkdir ../VOC ../VOC/images ../VOC/images/train ../VOC/images/val
87
- mkdir ../VOC/labels ../VOC/labels/train ../VOC/labels/val
88
-
89
- python3 - "$@" <<END
90
- import os
91
-
92
- print(os.path.exists('../tmp/train.txt'))
93
- with open('../tmp/train.txt', 'r') as f:
94
- for line in f.readlines():
95
- line = "/".join(line.split('/')[-5:]).strip()
96
- if os.path.exists("../" + line):
97
- os.system("cp ../" + line + " ../VOC/images/train")
98
-
99
- line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
100
- if os.path.exists("../" + line):
101
- os.system("cp ../" + line + " ../VOC/labels/train")
102
-
103
- print(os.path.exists('../tmp/2007_test.txt'))
104
- with open('../tmp/2007_test.txt', 'r') as f:
105
- for line in f.readlines():
106
- line = "/".join(line.split('/')[-5:]).strip()
107
- if os.path.exists("../" + line):
108
- os.system("cp ../" + line + " ../VOC/images/val")
109
-
110
- line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
111
- if os.path.exists("../" + line):
112
- os.system("cp ../" + line + " ../VOC/labels/val")
113
- END
114
-
115
- rm -rf ../tmp # remove temporary directory
116
- echo "VOC download done."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/voc.yaml DELETED
@@ -1,21 +0,0 @@
1
- # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
2
- # Train command: python train.py --data voc.yaml
3
- # Default dataset location is next to YOLOv5:
4
- # /parent_folder
5
- # /VOC
6
- # /yolov5
7
-
8
-
9
- # download command/URL (optional)
10
- download: bash data/scripts/get_voc.sh
11
-
12
- # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
13
- train: ../VOC/images/train/ # 16551 images
14
- val: ../VOC/images/val/ # 4952 images
15
-
16
- # number of classes
17
- nc: 20
18
-
19
- # class names
20
- names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
21
- 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test.py CHANGED
@@ -76,6 +76,11 @@ def run(data,
76
  # if device.type != 'cpu' and torch.cuda.device_count() > 1:
77
  # model = nn.DataParallel(model)
78
 
 
 
 
 
 
79
  # Half
80
  half &= device.type != 'cpu' # half precision only supported on CUDA
81
  if half:
@@ -83,10 +88,6 @@ def run(data,
83
 
84
  # Configure
85
  model.eval()
86
- if isinstance(data, str):
87
- with open(data) as f:
88
- data = yaml.safe_load(f)
89
- check_dataset(data) # check
90
  is_coco = type(data['val']) is str and data['val'].endswith('coco/val2017.txt') # COCO dataset
91
  nc = 1 if single_cls else int(data['nc']) # number of classes
92
  iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
 
76
  # if device.type != 'cpu' and torch.cuda.device_count() > 1:
77
  # model = nn.DataParallel(model)
78
 
79
+ # Data
80
+ with open(data) as f:
81
+ data = yaml.safe_load(f)
82
+ check_dataset(data) # check
83
+
84
  # Half
85
  half &= device.type != 'cpu' # half precision only supported on CUDA
86
  if half:
 
88
 
89
  # Configure
90
  model.eval()
 
 
 
 
91
  is_coco = type(data['val']) is str and data['val'].endswith('coco/val2017.txt') # COCO dataset
92
  nc = 1 if single_cls else int(data['nc']) # number of classes
93
  iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
train.py CHANGED
@@ -453,7 +453,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
453
  if not evolve:
454
  if is_coco: # COCO dataset
455
  for m in [last, best] if best.exists() else [last]: # speed, mAP tests
456
- results, _, _ = test.run(data,
457
  batch_size=batch_size // WORLD_SIZE * 2,
458
  imgsz=imgsz_test,
459
  conf_thres=0.001,
 
453
  if not evolve:
454
  if is_coco: # COCO dataset
455
  for m in [last, best] if best.exists() else [last]: # speed, mAP tests
456
+ results, _, _ = test.run(data_dict,
457
  batch_size=batch_size // WORLD_SIZE * 2,
458
  imgsz=imgsz_test,
459
  conf_thres=0.001,
tutorial.ipynb CHANGED
@@ -1255,7 +1255,7 @@
1255
  "source": [
1256
  "# VOC\n",
1257
  "for b, m in zip([64, 48, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']): # zip(batch_size, model)\n",
1258
- " !python train.py --batch {b} --weights {m}.pt --data voc.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}"
1259
  ],
1260
  "execution_count": null,
1261
  "outputs": []
 
1255
  "source": [
1256
  "# VOC\n",
1257
  "for b, m in zip([64, 48, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']): # zip(batch_size, model)\n",
1258
+ " !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}"
1259
  ],
1260
  "execution_count": null,
1261
  "outputs": []
utils/general.py CHANGED
@@ -222,9 +222,14 @@ def check_file(file):
222
 
223
  def check_dataset(data, autodownload=True):
224
  # Download dataset if not found locally
225
- val, s = data.get('val'), data.get('download')
 
 
 
 
 
 
226
  if val:
227
- root = Path(val).parts[0] + os.sep # unzip directory i.e. '../'
228
  val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
229
  if not all(x.exists() for x in val):
230
  print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
@@ -233,12 +238,14 @@ def check_dataset(data, autodownload=True):
233
  f = Path(s).name # filename
234
  print(f'Downloading {s} ...')
235
  torch.hub.download_url_to_file(s, f)
 
 
236
  r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip
237
  elif s.startswith('bash '): # bash script
238
  print(f'Running {s} ...')
239
  r = os.system(s)
240
  else: # python script
241
- r = exec(s) # return None
242
  print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result
243
  else:
244
  raise Exception('Dataset not found.')
@@ -258,7 +265,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
258
  if unzip and f.suffix in ('.zip', '.gz'):
259
  print(f'Unzipping {f}...')
260
  if f.suffix == '.zip':
261
- s = f'unzip -qo {f} -d {dir} && rm {f}' # unzip -quiet -overwrite
262
  elif f.suffix == '.gz':
263
  s = f'tar xfz {f} --directory {f.parent}' # unzip
264
  if delete: # delete zip file after unzip
 
222
 
223
  def check_dataset(data, autodownload=True):
224
  # Download dataset if not found locally
225
+ path = Path(data.get('path', '')) # optional 'path' field
226
+ if path:
227
+ for k in 'train', 'val', 'test':
228
+ if data.get(k): # prepend path
229
+ data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
230
+
231
+ train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
232
  if val:
 
233
  val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
234
  if not all(x.exists() for x in val):
235
  print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
 
238
  f = Path(s).name # filename
239
  print(f'Downloading {s} ...')
240
  torch.hub.download_url_to_file(s, f)
241
+ root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
242
+ Path(root).mkdir(parents=True, exist_ok=True) # create root
243
  r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip
244
  elif s.startswith('bash '): # bash script
245
  print(f'Running {s} ...')
246
  r = os.system(s)
247
  else: # python script
248
+ r = exec(s, {'yaml': data}) # return None
249
  print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result
250
  else:
251
  raise Exception('Dataset not found.')
 
265
  if unzip and f.suffix in ('.zip', '.gz'):
266
  print(f'Unzipping {f}...')
267
  if f.suffix == '.zip':
268
+ s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite
269
  elif f.suffix == '.gz':
270
  s = f'tar xfz {f} --directory {f.parent}' # unzip
271
  if delete: # delete zip file after unzip