glenn-jocher commited on
Commit
78fd077
1 Parent(s): d48a34d

VisDrone2019-DET Dataset Auto-Download (#2882)

Browse files

* VisDrone Dataset Auto-Download

* add visdrone.yaml

* cleanup

* add VisDrone2019-DET-test-dev

* cleanup VOC

data/argoverse_hd.yaml CHANGED
@@ -1,6 +1,6 @@
1
  # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
2
  # Train command: python train.py --data argoverse_hd.yaml
3
- # Default dataset location is next to /yolov5:
4
  # /parent_folder
5
  # /argoverse
6
  # /yolov5
 
1
  # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
2
  # Train command: python train.py --data argoverse_hd.yaml
3
+ # Default dataset location is next to YOLOv5:
4
  # /parent_folder
5
  # /argoverse
6
  # /yolov5
data/coco.yaml CHANGED
@@ -1,6 +1,6 @@
1
  # COCO 2017 dataset http://cocodataset.org
2
  # Train command: python train.py --data coco.yaml
3
- # Default dataset location is next to /yolov5:
4
  # /parent_folder
5
  # /coco
6
  # /yolov5
 
1
  # COCO 2017 dataset http://cocodataset.org
2
  # Train command: python train.py --data coco.yaml
3
+ # Default dataset location is next to YOLOv5:
4
  # /parent_folder
5
  # /coco
6
  # /yolov5
data/coco128.yaml CHANGED
@@ -1,6 +1,6 @@
1
  # COCO 2017 dataset http://cocodataset.org - first 128 training images
2
  # Train command: python train.py --data coco128.yaml
3
- # Default dataset location is next to /yolov5:
4
  # /parent_folder
5
  # /coco128
6
  # /yolov5
 
1
  # COCO 2017 dataset http://cocodataset.org - first 128 training images
2
  # Train command: python train.py --data coco128.yaml
3
+ # Default dataset location is next to YOLOv5:
4
  # /parent_folder
5
  # /coco128
6
  # /yolov5
data/scripts/get_argoverse_hd.sh CHANGED
@@ -2,7 +2,7 @@
2
  # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
3
  # Download command: bash data/scripts/get_argoverse_hd.sh
4
  # Train command: python train.py --data argoverse_hd.yaml
5
- # Default dataset location is next to /yolov5:
6
  # /parent_folder
7
  # /argoverse
8
  # /yolov5
 
2
  # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
3
  # Download command: bash data/scripts/get_argoverse_hd.sh
4
  # Train command: python train.py --data argoverse_hd.yaml
5
+ # Default dataset location is next to YOLOv5:
6
  # /parent_folder
7
  # /argoverse
8
  # /yolov5
data/scripts/get_coco.sh CHANGED
@@ -2,7 +2,7 @@
2
  # COCO 2017 dataset http://cocodataset.org
3
  # Download command: bash data/scripts/get_coco.sh
4
  # Train command: python train.py --data coco.yaml
5
- # Default dataset location is next to /yolov5:
6
  # /parent_folder
7
  # /coco
8
  # /yolov5
 
2
  # COCO 2017 dataset http://cocodataset.org
3
  # Download command: bash data/scripts/get_coco.sh
4
  # Train command: python train.py --data coco.yaml
5
+ # Default dataset location is next to YOLOv5:
6
  # /parent_folder
7
  # /coco
8
  # /yolov5
data/scripts/get_voc.sh CHANGED
@@ -2,7 +2,7 @@
2
  # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
3
  # Download command: bash data/scripts/get_voc.sh
4
  # Train command: python train.py --data voc.yaml
5
- # Default dataset location is next to /yolov5:
6
  # /parent_folder
7
  # /VOC
8
  # /yolov5
@@ -29,34 +29,27 @@ echo "Completed in" $runtime "seconds"
29
 
30
  echo "Splitting dataset..."
31
  python3 - "$@" <<END
32
- import xml.etree.ElementTree as ET
33
- import pickle
34
  import os
35
- from os import listdir, getcwd
36
- from os.path import join
 
 
37
 
38
- sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
 
39
 
40
- classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
41
 
 
 
 
 
 
42
 
43
- def convert(size, box):
44
- dw = 1./(size[0])
45
- dh = 1./(size[1])
46
- x = (box[0] + box[1])/2.0 - 1
47
- y = (box[2] + box[3])/2.0 - 1
48
- w = box[1] - box[0]
49
- h = box[3] - box[2]
50
- x = x*dw
51
- w = w*dw
52
- y = y*dh
53
- h = h*dh
54
- return (x,y,w,h)
55
 
56
  def convert_annotation(year, image_id):
57
- in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
58
- out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
59
- tree=ET.parse(in_file)
60
  root = tree.getroot()
61
  size = root.find('size')
62
  w = int(size.find('width').text)
@@ -65,74 +58,58 @@ def convert_annotation(year, image_id):
65
  for obj in root.iter('object'):
66
  difficult = obj.find('difficult').text
67
  cls = obj.find('name').text
68
- if cls not in classes or int(difficult)==1:
69
  continue
70
  cls_id = classes.index(cls)
71
  xmlbox = obj.find('bndbox')
72
- b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
73
- bb = convert((w,h), b)
 
74
  out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
75
 
76
- wd = getcwd()
77
 
 
78
  for year, image_set in sets:
79
- if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
80
- os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
81
- image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
82
- list_file = open('%s_%s.txt'%(year, image_set), 'w')
83
  for image_id in image_ids:
84
- list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
85
  convert_annotation(year, image_id)
86
  list_file.close()
87
-
88
  END
89
 
90
  cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
91
  cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
92
 
93
- python3 - "$@" <<END
 
94
 
95
- import shutil
96
  import os
97
- os.system('mkdir ../VOC/')
98
- os.system('mkdir ../VOC/images')
99
- os.system('mkdir ../VOC/images/train')
100
- os.system('mkdir ../VOC/images/val')
101
-
102
- os.system('mkdir ../VOC/labels')
103
- os.system('mkdir ../VOC/labels/train')
104
- os.system('mkdir ../VOC/labels/val')
105
 
106
- import os
107
  print(os.path.exists('../tmp/train.txt'))
108
- f = open('../tmp/train.txt', 'r')
109
- lines = f.readlines()
110
-
111
- for line in lines:
112
- line = "/".join(line.split('/')[-5:]).strip()
113
- if (os.path.exists("../" + line)):
114
- os.system("cp ../"+ line + " ../VOC/images/train")
115
-
116
- line = line.replace('JPEGImages', 'labels')
117
- line = line.replace('jpg', 'txt')
118
- if (os.path.exists("../" + line)):
119
- os.system("cp ../"+ line + " ../VOC/labels/train")
120
 
 
 
 
121
 
122
  print(os.path.exists('../tmp/2007_test.txt'))
123
- f = open('../tmp/2007_test.txt', 'r')
124
- lines = f.readlines()
125
-
126
- for line in lines:
127
- line = "/".join(line.split('/')[-5:]).strip()
128
- if (os.path.exists("../" + line)):
129
- os.system("cp ../"+ line + " ../VOC/images/val")
130
-
131
- line = line.replace('JPEGImages', 'labels')
132
- line = line.replace('jpg', 'txt')
133
- if (os.path.exists("../" + line)):
134
- os.system("cp ../"+ line + " ../VOC/labels/val")
135
-
136
  END
137
 
138
  rm -rf ../tmp # remove temporary directory
 
2
  # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
3
  # Download command: bash data/scripts/get_voc.sh
4
  # Train command: python train.py --data voc.yaml
5
+ # Default dataset location is next to YOLOv5:
6
  # /parent_folder
7
  # /VOC
8
  # /yolov5
 
29
 
30
  echo "Splitting dataset..."
31
  python3 - "$@" <<END
 
 
32
  import os
33
+ import xml.etree.ElementTree as ET
34
+ from os import getcwd
35
+
36
+ sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
37
 
38
+ classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
39
+ "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
40
 
 
41
 
42
+ def convert_box(size, box):
43
+ dw = 1. / (size[0])
44
+ dh = 1. / (size[1])
45
+ x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
46
+ return x * dw, y * dh, w * dw, h * dh
47
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def convert_annotation(year, image_id):
50
+ in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
51
+ out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
52
+ tree = ET.parse(in_file)
53
  root = tree.getroot()
54
  size = root.find('size')
55
  w = int(size.find('width').text)
 
58
  for obj in root.iter('object'):
59
  difficult = obj.find('difficult').text
60
  cls = obj.find('name').text
61
+ if cls not in classes or int(difficult) == 1:
62
  continue
63
  cls_id = classes.index(cls)
64
  xmlbox = obj.find('bndbox')
65
+ b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
66
+ float(xmlbox.find('ymax').text))
67
+ bb = convert_box((w, h), b)
68
  out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
69
 
 
70
 
71
+ cwd = getcwd()
72
  for year, image_set in sets:
73
+ if not os.path.exists('VOCdevkit/VOC%s/labels/' % year):
74
+ os.makedirs('VOCdevkit/VOC%s/labels/' % year)
75
+ image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
76
+ list_file = open('%s_%s.txt' % (year, image_set), 'w')
77
  for image_id in image_ids:
78
+ list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (cwd, year, image_id))
79
  convert_annotation(year, image_id)
80
  list_file.close()
 
81
  END
82
 
83
  cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
84
  cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
85
 
86
+ mkdir ../VOC ../VOC/images ../VOC/images/train ../VOC/images/val
87
+ mkdir ../VOC/labels ../VOC/labels/train ../VOC/labels/val
88
 
89
+ python3 - "$@" <<END
90
  import os
 
 
 
 
 
 
 
 
91
 
 
92
  print(os.path.exists('../tmp/train.txt'))
93
+ with open('../tmp/train.txt', 'r') as f:
94
+ for line in f.readlines():
95
+ line = "/".join(line.split('/')[-5:]).strip()
96
+ if os.path.exists("../" + line):
97
+ os.system("cp ../" + line + " ../VOC/images/train")
 
 
 
 
 
 
 
98
 
99
+ line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
100
+ if os.path.exists("../" + line):
101
+ os.system("cp ../" + line + " ../VOC/labels/train")
102
 
103
  print(os.path.exists('../tmp/2007_test.txt'))
104
+ with open('../tmp/2007_test.txt', 'r') as f:
105
+ for line in f.readlines():
106
+ line = "/".join(line.split('/')[-5:]).strip()
107
+ if os.path.exists("../" + line):
108
+ os.system("cp ../" + line + " ../VOC/images/val")
109
+
110
+ line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
111
+ if os.path.exists("../" + line):
112
+ os.system("cp ../" + line + " ../VOC/labels/val")
 
 
 
 
113
  END
114
 
115
  rm -rf ../tmp # remove temporary directory
data/visdrone.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
2
+ # Train command: python train.py --data visdrone.yaml
3
+ # Default dataset location is next to YOLOv5:
4
+ # /parent_folder
5
+ # /VisDrone
6
+ # /yolov5
7
+
8
+
9
+ # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
10
+ train: ../VisDrone/VisDrone2019-DET-train/images # 6471 images
11
+ val: ../VisDrone/VisDrone2019-DET-val/images # 548 images
12
+ test: ../VisDrone/VisDrone2019-DET-test-dev/images # 1610 images
13
+
14
+ # number of classes
15
+ nc: 10
16
+
17
+ # class names
18
+ names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
19
+
20
+
21
+ # download command/URL (optional) --------------------------------------------------------------------------------------
22
+ download: |
23
+ import os
24
+ from pathlib import Path
25
+
26
+ from utils.general import download
27
+
28
+
29
+ def visdrone2yolo(dir):
30
+ from PIL import Image
31
+ from tqdm import tqdm
32
+
33
+ def convert_box(size, box):
34
+ # Convert VisDrone box to YOLO xywh box
35
+ dw = 1. / size[0]
36
+ dh = 1. / size[1]
37
+ return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
38
+
39
+ (dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
40
+ pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
41
+ for f in pbar:
42
+ img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
43
+ lines = []
44
+ with open(f, 'r') as file: # read annotation.txt
45
+ for row in [x.split(',') for x in file.read().strip().splitlines()]:
46
+ if row[4] == '0': # VisDrone 'ignored regions' class 0
47
+ continue
48
+ cls = int(row[5]) - 1
49
+ box = convert_box(img_size, tuple(map(int, row[:4])))
50
+ lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
51
+ with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
52
+ fl.writelines(lines) # write label.txt
53
+
54
+
55
+ # Download
56
+ dir = Path('../VisDrone') # dataset directory
57
+ urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
58
+ 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
59
+ 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip'
60
+ 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
61
+ download(urls, dir=dir)
62
+
63
+ # Convert
64
+ for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
65
+ visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
data/voc.yaml CHANGED
@@ -1,6 +1,6 @@
1
  # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
2
  # Train command: python train.py --data voc.yaml
3
- # Default dataset location is next to /yolov5:
4
  # /parent_folder
5
  # /VOC
6
  # /yolov5
 
1
  # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
2
  # Train command: python train.py --data voc.yaml
3
+ # Default dataset location is next to YOLOv5:
4
  # /parent_folder
5
  # /VOC
6
  # /yolov5
utils/general.py CHANGED
@@ -9,6 +9,8 @@ import random
9
  import re
10
  import subprocess
11
  import time
 
 
12
  from pathlib import Path
13
 
14
  import cv2
@@ -161,18 +163,40 @@ def check_dataset(dict):
161
  if not all(x.exists() for x in val):
162
  print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
163
  if s and len(s): # download script
164
- print('Downloading %s ...' % s)
165
  if s.startswith('http') and s.endswith('.zip'): # URL
166
  f = Path(s).name # filename
 
167
  torch.hub.download_url_to_file(s, f)
168
- r = os.system('unzip -q %s -d ../ && rm %s' % (f, f)) # unzip
169
- else: # bash script
 
170
  r = os.system(s)
171
- print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value
 
 
172
  else:
173
  raise Exception('Dataset not found.')
174
 
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  def make_divisible(x, divisor):
177
  # Returns x evenly divisible by divisor
178
  return math.ceil(x / divisor) * divisor
 
9
  import re
10
  import subprocess
11
  import time
12
+ from itertools import repeat
13
+ from multiprocessing.pool import ThreadPool
14
  from pathlib import Path
15
 
16
  import cv2
 
163
  if not all(x.exists() for x in val):
164
  print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
165
  if s and len(s): # download script
 
166
  if s.startswith('http') and s.endswith('.zip'): # URL
167
  f = Path(s).name # filename
168
+ print(f'Downloading {s} ...')
169
  torch.hub.download_url_to_file(s, f)
170
+ r = os.system(f'unzip -q {f} -d ../ && rm {f}') # unzip
171
+ elif s.startswith('bash '): # bash script
172
+ print(f'Running {s} ...')
173
  r = os.system(s)
174
+ else: # python script
175
+ r = exec(s) # return None
176
+ print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result
177
  else:
178
  raise Exception('Dataset not found.')
179
 
180
 
181
+ def download(url, dir='.', multi_thread=False):
182
+ # Multi-threaded file download function
183
+ def download_one(url, dir):
184
+ # Download 1 file
185
+ f = dir / Path(url).name # filename
186
+ print(f'Downloading {url} to {f}...')
187
+ torch.hub.download_url_to_file(url, f, progress=True) # download
188
+ if f.suffix == '.zip':
189
+ os.system(f'unzip -qo {f} -d {dir} && rm {f}') # unzip -quiet -overwrite
190
+
191
+ dir = Path(dir)
192
+ dir.mkdir(parents=True, exist_ok=True) # make directory
193
+ if multi_thread:
194
+ ThreadPool(8).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # 8 threads
195
+ else:
196
+ for u in tuple(url) if isinstance(url, str) else url:
197
+ download_one(u, dir)
198
+
199
+
200
  def make_divisible(x, divisor):
201
  # Returns x evenly divisible by divisor
202
  return math.ceil(x / divisor) * divisor