ChristopherSTAN commited on
Commit
8c3b829
1 Parent(s): 4503d80

Add files via upload

Browse files
Files changed (1) hide show
  1. data/get_voc.sh +206 -0
data/get_voc.sh ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ start=`date +%s`
3
+
4
+ # handle optional download dir
5
+ if [ -z "$1" ]
6
+ then
7
+ # navigate to ~/data
8
+ echo "navigating to ../data/ ..."
9
+ mkdir -p ../data
10
+ cd ../data/
11
+ else
12
+ # check if is valid directory
13
+ if [ ! -d $1 ]; then
14
+ echo $1 "is not a valid directory"
15
+ exit 0
16
+ fi
17
+ echo "navigating to" $1 "..."
18
+ cd $1
19
+ fi
20
+
21
+ echo "Downloading VOC2007 trainval ..."
22
+ # Download the data.
23
+ curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
24
+ echo "Downloading VOC2007 test data ..."
25
+ curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
26
+ echo "Done downloading."
27
+
28
+ # Extract data
29
+ echo "Extracting trainval ..."
30
+ tar -xvf VOCtrainval_06-Nov-2007.tar
31
+ echo "Extracting test ..."
32
+ tar -xvf VOCtest_06-Nov-2007.tar
33
+ echo "removing tars ..."
34
+ rm VOCtrainval_06-Nov-2007.tar
35
+ rm VOCtest_06-Nov-2007.tar
36
+
37
+ end=`date +%s`
38
+ runtime=$((end-start))
39
+
40
+ echo "Completed in" $runtime "seconds"
41
+
42
+ start=`date +%s`
43
+
44
+ # handle optional download dir
45
+ if [ -z "$1" ]
46
+ then
47
+ # navigate to ~/data
48
+ echo "navigating to ../data/ ..."
49
+ mkdir -p ../data
50
+ cd ../data/
51
+ else
52
+ # check if is valid directory
53
+ if [ ! -d $1 ]; then
54
+ echo $1 "is not a valid directory"
55
+ exit 0
56
+ fi
57
+ echo "navigating to" $1 "..."
58
+ cd $1
59
+ fi
60
+
61
+ echo "Downloading VOC2012 trainval ..."
62
+ # Download the data.
63
+ curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
64
+ echo "Done downloading."
65
+
66
+
67
+ # Extract data
68
+ echo "Extracting trainval ..."
69
+ tar -xvf VOCtrainval_11-May-2012.tar
70
+ echo "removing tar ..."
71
+ rm VOCtrainval_11-May-2012.tar
72
+
73
+ end=`date +%s`
74
+ runtime=$((end-start))
75
+
76
+ echo "Completed in" $runtime "seconds"
77
+
78
+ cd ../data
79
+ echo "Spliting dataset..."
80
+ python3 - "$@" <<END
81
+ import xml.etree.ElementTree as ET
82
+ import pickle
83
+ import os
84
+ from os import listdir, getcwd
85
+ from os.path import join
86
+
87
+ sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
88
+
89
+ classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
90
+
91
+
92
+ def convert(size, box):
93
+ dw = 1./(size[0])
94
+ dh = 1./(size[1])
95
+ x = (box[0] + box[1])/2.0 - 1
96
+ y = (box[2] + box[3])/2.0 - 1
97
+ w = box[1] - box[0]
98
+ h = box[3] - box[2]
99
+ x = x*dw
100
+ w = w*dw
101
+ y = y*dh
102
+ h = h*dh
103
+ return (x,y,w,h)
104
+
105
+ def convert_annotation(year, image_id):
106
+ in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
107
+ out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
108
+ tree=ET.parse(in_file)
109
+ root = tree.getroot()
110
+ size = root.find('size')
111
+ w = int(size.find('width').text)
112
+ h = int(size.find('height').text)
113
+
114
+ for obj in root.iter('object'):
115
+ difficult = obj.find('difficult').text
116
+ cls = obj.find('name').text
117
+ if cls not in classes or int(difficult)==1:
118
+ continue
119
+ cls_id = classes.index(cls)
120
+ xmlbox = obj.find('bndbox')
121
+ b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
122
+ bb = convert((w,h), b)
123
+ out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
124
+
125
+ wd = getcwd()
126
+
127
+ for year, image_set in sets:
128
+ if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
129
+ os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
130
+ image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
131
+ list_file = open('%s_%s.txt'%(year, image_set), 'w')
132
+ for image_id in image_ids:
133
+ list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
134
+ convert_annotation(year, image_id)
135
+ list_file.close()
136
+
137
+ END
138
+
139
+ cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt
140
+ cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt
141
+
142
+ python3 - "$@" <<END
143
+
144
+ import shutil
145
+ import os
146
+ os.system('mkdir ../VOC/')
147
+ os.system('mkdir ../VOC/images')
148
+ os.system('mkdir ../VOC/images/train')
149
+ os.system('mkdir ../VOC/images/val')
150
+
151
+ os.system('mkdir ../VOC/labels')
152
+ os.system('mkdir ../VOC/labels/train')
153
+ os.system('mkdir ../VOC/labels/val')
154
+
155
+ import os
156
+ print(os.path.exists('../data/train.txt'))
157
+ f = open('../data/train.txt', 'r')
158
+ lines = f.readlines()
159
+
160
+ for line in lines:
161
+ #print(line.split('/')[-1][:-1])
162
+ line = "/".join(line.split('/')[2:])
163
+ #print(line)
164
+ if (os.path.exists("../" + line[:-1])):
165
+ os.system("cp ../"+ line[:-1] + " ../VOC/images/train")
166
+
167
+ print(os.path.exists('../data/train.txt'))
168
+ f = open('../data/train.txt', 'r')
169
+ lines = f.readlines()
170
+
171
+ for line in lines:
172
+ #print(line.split('/')[-1][:-1])
173
+ line = "/".join(line.split('/')[2:])
174
+ line = line.replace('JPEGImages', 'labels')
175
+ line = line.replace('jpg', 'txt')
176
+ #print(line)
177
+ if (os.path.exists("../" + line[:-1])):
178
+ os.system("cp ../"+ line[:-1] + " ../VOC/labels/train")
179
+
180
+ print(os.path.exists('../data/2007_test.txt'))
181
+ f = open('../data/2007_test.txt', 'r')
182
+ lines = f.readlines()
183
+
184
+ for line in lines:
185
+ #print(line.split('/')[-1][:-1])
186
+ line = "/".join(line.split('/')[2:])
187
+
188
+ if (os.path.exists("../" + line[:-1])):
189
+ os.system("cp ../"+ line[:-1] + " ../VOC/images/val")
190
+
191
+ print(os.path.exists('../data/2007_test.txt'))
192
+ f = open('../data/2007_test.txt', 'r')
193
+ lines = f.readlines()
194
+
195
+ for line in lines:
196
+ #print(line.split('/')[-1][:-1])
197
+ line = "/".join(line.split('/')[2:])
198
+ line = line.replace('JPEGImages', 'labels')
199
+ line = line.replace('jpg', 'txt')
200
+ #print(line)
201
+ if (os.path.exists("../" + line[:-1])):
202
+ os.system("cp ../"+ line[:-1] + " ../VOC/labels/val")
203
+
204
+ END
205
+
206
+ rm -rf ../data