myasin
commited on
Commit
·
3cc12b1
1
Parent(s):
2e14bbc
Fixed issues with the coco-split utility
Browse files- Fixed requiring info and license information absence causing the splitting to fail
- utils/cocosplit.py +16 -12
utils/cocosplit.py
CHANGED
@@ -6,19 +6,18 @@ import funcy
|
|
6 |
from sklearn.model_selection import train_test_split
|
7 |
|
8 |
parser = argparse.ArgumentParser(description='Splits COCO annotations file into training and test sets.')
|
9 |
-
parser.add_argument('
|
10 |
help='Path to COCO annotations file.')
|
11 |
-
parser.add_argument('train', type=str, help='Where to store COCO training annotations')
|
12 |
-
parser.add_argument('test', type=str, help='Where to store COCO test annotations')
|
13 |
-
parser.add_argument('
|
14 |
help="A percentage of a split; a number in (0, 1)")
|
15 |
parser.add_argument('--having-annotations', dest='having_annotations', action='store_true',
|
16 |
help='Ignore all images without annotations. Keep only these with at least one annotation')
|
17 |
|
18 |
-
def save_coco(file,
|
19 |
with open(file, 'wt', encoding='UTF-8') as coco:
|
20 |
-
json.dump(
|
21 |
-
'annotations': annotations, 'categories': categories}, coco, indent=2, sort_keys=True)
|
22 |
|
23 |
def filter_annotations(annotations, images):
|
24 |
image_ids = funcy.lmap(lambda i: int(i['id']), images)
|
@@ -33,11 +32,9 @@ def main(annotation_path,
|
|
33 |
|
34 |
with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
|
35 |
coco = json.load(annotations)
|
36 |
-
|
37 |
-
licenses = coco['licenses']
|
38 |
images = coco['images']
|
39 |
annotations = coco['annotations']
|
40 |
-
categories = coco['categories']
|
41 |
|
42 |
number_of_images = len(images)
|
43 |
|
@@ -48,8 +45,15 @@ def main(annotation_path,
|
|
48 |
|
49 |
x, y = train_test_split(images, train_size=split_ratio, random_state=random_state)
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
print("Saved {} entries in {} and {} in {}".format(len(x), train_save_path, len(y), test_save_path))
|
55 |
|
|
|
6 |
from sklearn.model_selection import train_test_split
|
7 |
|
8 |
parser = argparse.ArgumentParser(description='Splits COCO annotations file into training and test sets.')
|
9 |
+
parser.add_argument('--annotation_path', metavar='coco_annotations', type=str,
|
10 |
help='Path to COCO annotations file.')
|
11 |
+
parser.add_argument('--train', type=str, help='Where to store COCO training annotations')
|
12 |
+
parser.add_argument('--test', type=str, help='Where to store COCO test annotations')
|
13 |
+
parser.add_argument('--s', dest='split_ratio', type=float, required=True,
|
14 |
help="A percentage of a split; a number in (0, 1)")
|
15 |
parser.add_argument('--having-annotations', dest='having_annotations', action='store_true',
|
16 |
help='Ignore all images without annotations. Keep only these with at least one annotation')
|
17 |
|
18 |
+
def save_coco(file, tagged_data):
|
19 |
with open(file, 'wt', encoding='UTF-8') as coco:
|
20 |
+
json.dump(tagged_data, coco, indent=2, sort_keys=True)
|
|
|
21 |
|
22 |
def filter_annotations(annotations, images):
|
23 |
image_ids = funcy.lmap(lambda i: int(i['id']), images)
|
|
|
32 |
|
33 |
with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
|
34 |
coco = json.load(annotations)
|
35 |
+
|
|
|
36 |
images = coco['images']
|
37 |
annotations = coco['annotations']
|
|
|
38 |
|
39 |
number_of_images = len(images)
|
40 |
|
|
|
45 |
|
46 |
x, y = train_test_split(images, train_size=split_ratio, random_state=random_state)
|
47 |
|
48 |
+
# Train Data
|
49 |
+
coco.update({'images': x,
|
50 |
+
'annotations': filter_annotations(annotations, x)})
|
51 |
+
save_coco(train_save_path, coco)
|
52 |
+
|
53 |
+
# Test Data
|
54 |
+
coco.update({'images': y,
|
55 |
+
'annotations': filter_annotations(annotations, y)})
|
56 |
+
save_coco(test_save_path, coco)
|
57 |
|
58 |
print("Saved {} entries in {} and {} in {}".format(len(x), train_save_path, len(y), test_save_path))
|
59 |
|