glenn-jocher commited on
Commit
4346b13
1 Parent(s): 00917a6

Dataset download bash script updates (#1132)

Browse files
Files changed (2) hide show
  1. data/scripts/get_coco.sh +10 -7
  2. data/scripts/get_voc.sh +12 -68
data/scripts/get_coco.sh CHANGED
@@ -8,14 +8,17 @@
8
  # /yolov5
9
 
10
  # Download/unzip labels
11
- echo 'Downloading COCO 2017 labels ...'
12
  d='../' # unzip directory
13
- f='coco2017labels.zip' && curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f
14
- unzip -q $f -d $d && rm $f
 
15
 
16
  # Download/unzip images
17
- echo 'Downloading COCO 2017 images ...'
18
  d='../coco/images' # unzip directory
19
- f='train2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 19G, 118k images
20
- f='val2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 1G, 5k images
21
- # f='test2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 7G, 41k images
 
 
 
 
 
8
  # /yolov5
9
 
10
  # Download/unzip labels
 
11
  d='../' # unzip directory
12
+ url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
13
+ f='coco2017labels.zip' # 68 MB
14
+ echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
15
 
16
  # Download/unzip images
 
17
  d='../coco/images' # unzip directory
18
+ url=http://images.cocodataset.org/zips/
19
+ f1='train2017.zip' # 19G, 118k images
20
+ f2='val2017.zip' # 1G, 5k images
21
+ f3='test2017.zip' # 7G, 41k images (optional)
22
+ for f in $f1 $f2; do
23
+ echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
24
+ done
data/scripts/get_voc.sh CHANGED
@@ -8,79 +8,23 @@
8
  # /yolov5
9
 
10
  start=$(date +%s)
11
-
12
- # handle optional download dir
13
- if [ -z "$1" ]; then
14
- # navigate to ~/tmp
15
- echo "navigating to ../tmp/ ..."
16
- mkdir -p ../tmp
17
- cd ../tmp/
18
- else
19
- # check if is valid directory
20
- if [ ! -d $1 ]; then
21
- echo $1 "is not a valid directory"
22
- exit 0
23
- fi
24
- echo "navigating to" $1 "..."
25
- cd $1
26
- fi
27
-
28
- echo "Downloading VOC2007 trainval ..."
29
- # Download data
30
- curl -LO http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
31
- echo "Downloading VOC2007 test data ..."
32
- curl -LO http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
33
- echo "Done downloading."
34
-
35
- # Extract data
36
- echo "Extracting trainval ..."
37
- tar -xf VOCtrainval_06-Nov-2007.tar
38
- echo "Extracting test ..."
39
- tar -xf VOCtest_06-Nov-2007.tar
40
- echo "removing tars ..."
41
- rm VOCtrainval_06-Nov-2007.tar
42
- rm VOCtest_06-Nov-2007.tar
43
-
44
- end=$(date +%s)
45
- runtime=$((end - start))
46
-
47
- echo "Completed in" $runtime "seconds"
48
-
49
- start=$(date +%s)
50
-
51
- # handle optional download dir
52
- if [ -z "$1" ]; then
53
- # navigate to ~/tmp
54
- echo "navigating to ../tmp/ ..."
55
- mkdir -p ../tmp
56
- cd ../tmp/
57
- else
58
- # check if is valid directory
59
- if [ ! -d $1 ]; then
60
- echo $1 "is not a valid directory"
61
- exit 0
62
- fi
63
- echo "navigating to" $1 "..."
64
- cd $1
65
- fi
66
-
67
- echo "Downloading VOC2012 trainval ..."
68
- # Download data
69
- curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
70
- echo "Done downloading."
71
-
72
- # Extract data
73
- echo "Extracting trainval ..."
74
- tar -xf VOCtrainval_11-May-2012.tar
75
- echo "removing tar ..."
76
- rm VOCtrainval_11-May-2012.tar
77
 
78
  end=$(date +%s)
79
  runtime=$((end - start))
80
-
81
  echo "Completed in" $runtime "seconds"
82
 
83
- cd ../tmp
84
  echo "Spliting dataset..."
85
  python3 - "$@" <<END
86
  import xml.etree.ElementTree as ET
 
8
  # /yolov5
9
 
10
  start=$(date +%s)
11
+ mkdir -p ../tmp
12
+ cd ../tmp/
13
+
14
+ # Download/unzip images and labels
15
+ d='.' # unzip directory
16
+ url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
17
+ f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
18
+ f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images
19
+ f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
20
+ for f in $f1 $f2 $f3; do
21
+ echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
22
+ done
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  end=$(date +%s)
25
  runtime=$((end - start))
 
26
  echo "Completed in" $runtime "seconds"
27
 
 
28
  echo "Spliting dataset..."
29
  python3 - "$@" <<END
30
  import xml.etree.ElementTree as ET