glenn-jocher
commited on
Commit
•
41523e2
1
Parent(s):
3d8ed0a
Dataset autodownload feature addition (#685)
Browse files* initial commit
* move download scripts into data/scripts
* new check_dataset() function in general.py
* move check_dataset() out of with context
* Update general.py
* DDP update
* Update general.py
- data/coco.yaml +3 -1
- data/coco128.yaml +3 -1
- data/get_coco2017.sh +0 -30
- data/scripts/get_coco.sh +21 -0
- data/{get_voc.sh → scripts/get_voc.sh} +39 -42
- data/voc.yaml +3 -1
- test.py +2 -1
- train.py +5 -3
- utils/general.py +19 -0
data/coco.yaml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
# COCO 2017 dataset http://cocodataset.org
|
2 |
-
# Download command: bash yolov5/data/get_coco2017.sh
|
3 |
# Train command: python train.py --data coco.yaml
|
4 |
# Default dataset location is next to /yolov5:
|
5 |
# /parent_folder
|
@@ -7,6 +6,9 @@
|
|
7 |
# /yolov5
|
8 |
|
9 |
|
|
|
|
|
|
|
10 |
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
|
11 |
train: ../coco/train2017.txt # 118287 images
|
12 |
val: ../coco/val2017.txt # 5000 images
|
|
|
1 |
# COCO 2017 dataset http://cocodataset.org
|
|
|
2 |
# Train command: python train.py --data coco.yaml
|
3 |
# Default dataset location is next to /yolov5:
|
4 |
# /parent_folder
|
|
|
6 |
# /yolov5
|
7 |
|
8 |
|
9 |
+
# download command/URL (optional)
|
10 |
+
download: bash data/scripts/get_coco.sh
|
11 |
+
|
12 |
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
|
13 |
train: ../coco/train2017.txt # 118287 images
|
14 |
val: ../coco/val2017.txt # 5000 images
|
data/coco128.yaml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
# COCO 2017 dataset http://cocodataset.org - first 128 training images
|
2 |
-
# Download command: python -c "from yolov5.utils.google_utils import *; gdrive_download('1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', 'coco128.zip')"
|
3 |
# Train command: python train.py --data coco128.yaml
|
4 |
# Default dataset location is next to /yolov5:
|
5 |
# /parent_folder
|
@@ -7,6 +6,9 @@
|
|
7 |
# /yolov5
|
8 |
|
9 |
|
|
|
|
|
|
|
10 |
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
|
11 |
train: ../coco128/images/train2017/ # 128 images
|
12 |
val: ../coco128/images/train2017/ # 128 images
|
|
|
1 |
# COCO 2017 dataset http://cocodataset.org - first 128 training images
|
|
|
2 |
# Train command: python train.py --data coco128.yaml
|
3 |
# Default dataset location is next to /yolov5:
|
4 |
# /parent_folder
|
|
|
6 |
# /yolov5
|
7 |
|
8 |
|
9 |
+
# download command/URL (optional)
|
10 |
+
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
|
11 |
+
|
12 |
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
|
13 |
train: ../coco128/images/train2017/ # 128 images
|
14 |
val: ../coco128/images/train2017/ # 128 images
|
data/get_coco2017.sh
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
# COCO 2017 dataset http://cocodataset.org
|
3 |
-
# Download command: bash yolov5/data/get_coco2017.sh
|
4 |
-
# Train command: python train.py --data coco.yaml
|
5 |
-
# Default dataset location is next to /yolov5:
|
6 |
-
# /parent_folder
|
7 |
-
# /coco
|
8 |
-
# /yolov5
|
9 |
-
|
10 |
-
|
11 |
-
# Download labels from Google Drive, accepting presented query
|
12 |
-
filename="coco2017labels.zip"
|
13 |
-
fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L"
|
14 |
-
curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
|
15 |
-
curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
|
16 |
-
rm ./cookie
|
17 |
-
|
18 |
-
# Unzip labels
|
19 |
-
unzip -q ${filename} # for coco.zip
|
20 |
-
# tar -xzf ${filename} # for coco.tar.gz
|
21 |
-
rm ${filename}
|
22 |
-
|
23 |
-
# Download and unzip images
|
24 |
-
cd coco/images
|
25 |
-
f="train2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 19G, 118k images
|
26 |
-
f="val2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 1G, 5k images
|
27 |
-
# f="test2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 7G, 41k images
|
28 |
-
|
29 |
-
# cd out
|
30 |
-
cd ../..
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/scripts/get_coco.sh
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
# COCO 2017 dataset http://cocodataset.org
|
3 |
+
# Download command: bash data/scripts/get_coco.sh
|
4 |
+
# Train command: python train.py --data coco.yaml
|
5 |
+
# Default dataset location is next to /yolov5:
|
6 |
+
# /parent_folder
|
7 |
+
# /coco
|
8 |
+
# /yolov5
|
9 |
+
|
10 |
+
# Download/unzip labels
|
11 |
+
echo 'Downloading COCO 2017 labels ...'
|
12 |
+
d='../' # unzip directory
|
13 |
+
f='coco2017labels.zip' && curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f
|
14 |
+
unzip -q $f -d $d && rm $f
|
15 |
+
|
16 |
+
# Download/unzip images
|
17 |
+
echo 'Downloading COCO 2017 images ...'
|
18 |
+
d='../coco/images' # unzip directory
|
19 |
+
f='train2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 19G, 118k images
|
20 |
+
f='val2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 1G, 5k images
|
21 |
+
# f='test2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 7G, 41k images
|
data/{get_voc.sh → scripts/get_voc.sh}
RENAMED
@@ -1,33 +1,32 @@
|
|
|
|
1 |
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
|
2 |
-
# Download command: bash
|
3 |
# Train command: python train.py --data voc.yaml
|
4 |
# Default dataset location is next to /yolov5:
|
5 |
# /parent_folder
|
6 |
# /VOC
|
7 |
# /yolov5
|
8 |
|
9 |
-
|
10 |
-
start=`date +%s`
|
11 |
|
12 |
# handle optional download dir
|
13 |
-
if [ -z "$1" ]
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
cd $1
|
27 |
fi
|
28 |
|
29 |
echo "Downloading VOC2007 trainval ..."
|
30 |
-
# Download
|
31 |
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
|
32 |
echo "Downloading VOC2007 test data ..."
|
33 |
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
|
@@ -42,44 +41,42 @@ echo "removing tars ..."
|
|
42 |
rm VOCtrainval_06-Nov-2007.tar
|
43 |
rm VOCtest_06-Nov-2007.tar
|
44 |
|
45 |
-
end
|
46 |
-
runtime=$((end-start))
|
47 |
|
48 |
echo "Completed in" $runtime "seconds"
|
49 |
|
50 |
-
start
|
51 |
|
52 |
# handle optional download dir
|
53 |
-
if [ -z "$1" ]
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
cd $1
|
67 |
fi
|
68 |
|
69 |
echo "Downloading VOC2012 trainval ..."
|
70 |
-
# Download
|
71 |
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
|
72 |
echo "Done downloading."
|
73 |
|
74 |
-
|
75 |
# Extract data
|
76 |
echo "Extracting trainval ..."
|
77 |
tar -xf VOCtrainval_11-May-2012.tar
|
78 |
echo "removing tar ..."
|
79 |
rm VOCtrainval_11-May-2012.tar
|
80 |
|
81 |
-
end
|
82 |
-
runtime=$((end-start))
|
83 |
|
84 |
echo "Completed in" $runtime "seconds"
|
85 |
|
@@ -144,8 +141,8 @@ for year, image_set in sets:
|
|
144 |
|
145 |
END
|
146 |
|
147 |
-
cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >
|
148 |
-
cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >
|
149 |
|
150 |
python3 - "$@" <<END
|
151 |
|
@@ -211,5 +208,5 @@ for line in lines:
|
|
211 |
|
212 |
END
|
213 |
|
214 |
-
rm -rf ../tmp
|
215 |
echo "VOC download done."
|
|
|
1 |
+
#!/bin/bash
|
2 |
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
|
3 |
+
# Download command: bash data/scripts/get_voc.sh
|
4 |
# Train command: python train.py --data voc.yaml
|
5 |
# Default dataset location is next to /yolov5:
|
6 |
# /parent_folder
|
7 |
# /VOC
|
8 |
# /yolov5
|
9 |
|
10 |
+
start=$(date +%s)
|
|
|
11 |
|
12 |
# handle optional download dir
|
13 |
+
if [ -z "$1" ]; then
|
14 |
+
# navigate to ~/tmp
|
15 |
+
echo "navigating to ../tmp/ ..."
|
16 |
+
mkdir -p ../tmp
|
17 |
+
cd ../tmp/
|
18 |
+
else
|
19 |
+
# check if is valid directory
|
20 |
+
if [ ! -d $1 ]; then
|
21 |
+
echo $1 "is not a valid directory"
|
22 |
+
exit 0
|
23 |
+
fi
|
24 |
+
echo "navigating to" $1 "..."
|
25 |
+
cd $1
|
|
|
26 |
fi
|
27 |
|
28 |
echo "Downloading VOC2007 trainval ..."
|
29 |
+
# Download data
|
30 |
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
|
31 |
echo "Downloading VOC2007 test data ..."
|
32 |
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
|
|
|
41 |
rm VOCtrainval_06-Nov-2007.tar
|
42 |
rm VOCtest_06-Nov-2007.tar
|
43 |
|
44 |
+
end=$(date +%s)
|
45 |
+
runtime=$((end - start))
|
46 |
|
47 |
echo "Completed in" $runtime "seconds"
|
48 |
|
49 |
+
start=$(date +%s)
|
50 |
|
51 |
# handle optional download dir
|
52 |
+
if [ -z "$1" ]; then
|
53 |
+
# navigate to ~/tmp
|
54 |
+
echo "navigating to ../tmp/ ..."
|
55 |
+
mkdir -p ../tmp
|
56 |
+
cd ../tmp/
|
57 |
+
else
|
58 |
+
# check if is valid directory
|
59 |
+
if [ ! -d $1 ]; then
|
60 |
+
echo $1 "is not a valid directory"
|
61 |
+
exit 0
|
62 |
+
fi
|
63 |
+
echo "navigating to" $1 "..."
|
64 |
+
cd $1
|
|
|
65 |
fi
|
66 |
|
67 |
echo "Downloading VOC2012 trainval ..."
|
68 |
+
# Download data
|
69 |
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
|
70 |
echo "Done downloading."
|
71 |
|
|
|
72 |
# Extract data
|
73 |
echo "Extracting trainval ..."
|
74 |
tar -xf VOCtrainval_11-May-2012.tar
|
75 |
echo "removing tar ..."
|
76 |
rm VOCtrainval_11-May-2012.tar
|
77 |
|
78 |
+
end=$(date +%s)
|
79 |
+
runtime=$((end - start))
|
80 |
|
81 |
echo "Completed in" $runtime "seconds"
|
82 |
|
|
|
141 |
|
142 |
END
|
143 |
|
144 |
+
cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
|
145 |
+
cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
|
146 |
|
147 |
python3 - "$@" <<END
|
148 |
|
|
|
208 |
|
209 |
END
|
210 |
|
211 |
+
rm -rf ../tmp # remove temporary directory
|
212 |
echo "VOC download done."
|
data/voc.yaml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
|
2 |
-
# Download command: bash ./data/get_voc.sh
|
3 |
# Train command: python train.py --data voc.yaml
|
4 |
# Default dataset location is next to /yolov5:
|
5 |
# /parent_folder
|
@@ -7,6 +6,9 @@
|
|
7 |
# /yolov5
|
8 |
|
9 |
|
|
|
|
|
|
|
10 |
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
|
11 |
train: ../VOC/images/train/ # 16551 images
|
12 |
val: ../VOC/images/val/ # 4952 images
|
|
|
1 |
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
|
|
|
2 |
# Train command: python train.py --data voc.yaml
|
3 |
# Default dataset location is next to /yolov5:
|
4 |
# /parent_folder
|
|
|
6 |
# /yolov5
|
7 |
|
8 |
|
9 |
+
# download command/URL (optional)
|
10 |
+
download: bash data/scripts/get_voc.sh
|
11 |
+
|
12 |
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
|
13 |
train: ../VOC/images/train/ # 16551 images
|
14 |
val: ../VOC/images/val/ # 4952 images
|
test.py
CHANGED
@@ -13,7 +13,7 @@ from tqdm import tqdm
|
|
13 |
from models.experimental import attempt_load
|
14 |
from utils.datasets import create_dataloader
|
15 |
from utils.general import (
|
16 |
-
coco80_to_coco91_class, check_file, check_img_size, compute_loss, non_max_suppression,
|
17 |
scale_coords, xyxy2xywh, clip_coords, plot_images, xywh2xyxy, box_iou, output_to_target, ap_per_class)
|
18 |
from utils.torch_utils import select_device, time_synchronized
|
19 |
|
@@ -68,6 +68,7 @@ def test(data,
|
|
68 |
model.eval()
|
69 |
with open(data) as f:
|
70 |
data = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
|
|
71 |
nc = 1 if single_cls else int(data['nc']) # number of classes
|
72 |
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
|
73 |
niou = iouv.numel()
|
|
|
13 |
from models.experimental import attempt_load
|
14 |
from utils.datasets import create_dataloader
|
15 |
from utils.general import (
|
16 |
+
coco80_to_coco91_class, check_dataset, check_file, check_img_size, compute_loss, non_max_suppression,
|
17 |
scale_coords, xyxy2xywh, clip_coords, plot_images, xywh2xyxy, box_iou, output_to_target, ap_per_class)
|
18 |
from utils.torch_utils import select_device, time_synchronized
|
19 |
|
|
|
68 |
model.eval()
|
69 |
with open(data) as f:
|
70 |
data = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
71 |
+
check_dataset(data) # check
|
72 |
nc = 1 if single_cls else int(data['nc']) # number of classes
|
73 |
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
|
74 |
niou = iouv.numel()
|
train.py
CHANGED
@@ -21,9 +21,9 @@ import test # import test.py to get mAP after each epoch
|
|
21 |
from models.yolo import Model
|
22 |
from utils.datasets import create_dataloader
|
23 |
from utils.general import (
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
from utils.google_utils import attempt_download
|
28 |
from utils.torch_utils import init_seeds, ModelEMA, select_device, intersect_dicts
|
29 |
|
@@ -51,6 +51,8 @@ def train(hyp, opt, device, tb_writer=None):
|
|
51 |
init_seeds(2 + rank)
|
52 |
with open(opt.data) as f:
|
53 |
data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
|
|
|
|
54 |
train_path = data_dict['train']
|
55 |
test_path = data_dict['val']
|
56 |
nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names']) # number classes, names
|
|
|
21 |
from models.yolo import Model
|
22 |
from utils.datasets import create_dataloader
|
23 |
from utils.general import (
|
24 |
+
torch_distributed_zero_first, labels_to_class_weights, plot_labels, check_anchors, labels_to_image_weights,
|
25 |
+
compute_loss, plot_images, fitness, strip_optimizer, plot_results, get_latest_run, check_dataset, check_file,
|
26 |
+
check_git_status, check_img_size, increment_dir, print_mutation, plot_evolution)
|
27 |
from utils.google_utils import attempt_download
|
28 |
from utils.torch_utils import init_seeds, ModelEMA, select_device, intersect_dicts
|
29 |
|
|
|
51 |
init_seeds(2 + rank)
|
52 |
with open(opt.data) as f:
|
53 |
data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
54 |
+
with torch_distributed_zero_first(rank):
|
55 |
+
check_dataset(data_dict) # check
|
56 |
train_path = data_dict['train']
|
57 |
test_path = data_dict['val']
|
58 |
nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names']) # number classes, names
|
utils/general.py
CHANGED
@@ -128,6 +128,25 @@ def check_file(file):
|
|
128 |
return files[0] # return first file if multiple found
|
129 |
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
def make_divisible(x, divisor):
|
132 |
# Returns x evenly divisble by divisor
|
133 |
return math.ceil(x / divisor) * divisor
|
|
|
128 |
return files[0] # return first file if multiple found
|
129 |
|
130 |
|
131 |
+
def check_dataset(dict):
|
132 |
+
# Download dataset if not found
|
133 |
+
train, val = os.path.abspath(dict['train']), os.path.abspath(dict['val']) # data paths
|
134 |
+
if not (os.path.exists(train) and os.path.exists(val)):
|
135 |
+
print('\nWARNING: Dataset not found, nonexistant paths: %s' % [train, val])
|
136 |
+
if 'download' in dict:
|
137 |
+
s = dict['download']
|
138 |
+
print('Attempting autodownload from: %s' % s)
|
139 |
+
if s.startswith('http') and s.endswith('.zip'): # URL
|
140 |
+
f = Path(s).name # filename
|
141 |
+
torch.hub.download_url_to_file(s, f)
|
142 |
+
r = os.system('unzip -q %s -d ../ && rm %s' % (f, f))
|
143 |
+
else: # bash script
|
144 |
+
r = os.system(s)
|
145 |
+
print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value
|
146 |
+
else:
|
147 |
+
Exception('Dataset autodownload unavailable.')
|
148 |
+
|
149 |
+
|
150 |
def make_divisible(x, divisor):
|
151 |
# Returns x evenly divisble by divisor
|
152 |
return math.ceil(x / divisor) * divisor
|