File size: 8,440 Bytes
6e14436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# Copyright (c) Facebook, Inc. and its affiliates.
import argparse
import json
from collections import defaultdict

# This mapping is extracted from the official LVIS mapping:
# https://github.com/lvis-dataset/lvis-api/blob/master/data/coco_to_synset.json
COCO_SYNSET_CATEGORIES = [
    {"synset": "person.n.01", "coco_cat_id": 1},
    {"synset": "bicycle.n.01", "coco_cat_id": 2},
    {"synset": "car.n.01", "coco_cat_id": 3},
    {"synset": "motorcycle.n.01", "coco_cat_id": 4},
    {"synset": "airplane.n.01", "coco_cat_id": 5},
    {"synset": "bus.n.01", "coco_cat_id": 6},
    {"synset": "train.n.01", "coco_cat_id": 7},
    {"synset": "truck.n.01", "coco_cat_id": 8},
    {"synset": "boat.n.01", "coco_cat_id": 9},
    {"synset": "traffic_light.n.01", "coco_cat_id": 10},
    {"synset": "fireplug.n.01", "coco_cat_id": 11},
    {"synset": "stop_sign.n.01", "coco_cat_id": 13},
    {"synset": "parking_meter.n.01", "coco_cat_id": 14},
    {"synset": "bench.n.01", "coco_cat_id": 15},
    {"synset": "bird.n.01", "coco_cat_id": 16},
    {"synset": "cat.n.01", "coco_cat_id": 17},
    {"synset": "dog.n.01", "coco_cat_id": 18},
    {"synset": "horse.n.01", "coco_cat_id": 19},
    {"synset": "sheep.n.01", "coco_cat_id": 20},
    {"synset": "beef.n.01", "coco_cat_id": 21},
    {"synset": "elephant.n.01", "coco_cat_id": 22},
    {"synset": "bear.n.01", "coco_cat_id": 23},
    {"synset": "zebra.n.01", "coco_cat_id": 24},
    {"synset": "giraffe.n.01", "coco_cat_id": 25},
    {"synset": "backpack.n.01", "coco_cat_id": 27},
    {"synset": "umbrella.n.01", "coco_cat_id": 28},
    {"synset": "bag.n.04", "coco_cat_id": 31},
    {"synset": "necktie.n.01", "coco_cat_id": 32},
    {"synset": "bag.n.06", "coco_cat_id": 33},
    {"synset": "frisbee.n.01", "coco_cat_id": 34},
    {"synset": "ski.n.01", "coco_cat_id": 35},
    {"synset": "snowboard.n.01", "coco_cat_id": 36},
    {"synset": "ball.n.06", "coco_cat_id": 37},
    {"synset": "kite.n.03", "coco_cat_id": 38},
    {"synset": "baseball_bat.n.01", "coco_cat_id": 39},
    {"synset": "baseball_glove.n.01", "coco_cat_id": 40},
    {"synset": "skateboard.n.01", "coco_cat_id": 41},
    {"synset": "surfboard.n.01", "coco_cat_id": 42},
    {"synset": "tennis_racket.n.01", "coco_cat_id": 43},
    {"synset": "bottle.n.01", "coco_cat_id": 44},
    {"synset": "wineglass.n.01", "coco_cat_id": 46},
    {"synset": "cup.n.01", "coco_cat_id": 47},
    {"synset": "fork.n.01", "coco_cat_id": 48},
    {"synset": "knife.n.01", "coco_cat_id": 49},
    {"synset": "spoon.n.01", "coco_cat_id": 50},
    {"synset": "bowl.n.03", "coco_cat_id": 51},
    {"synset": "banana.n.02", "coco_cat_id": 52},
    {"synset": "apple.n.01", "coco_cat_id": 53},
    {"synset": "sandwich.n.01", "coco_cat_id": 54},
    {"synset": "orange.n.01", "coco_cat_id": 55},
    {"synset": "broccoli.n.01", "coco_cat_id": 56},
    {"synset": "carrot.n.01", "coco_cat_id": 57},
    # {"synset": "frank.n.02", "coco_cat_id": 58},
    {"synset": "sausage.n.01", "coco_cat_id": 58},
    {"synset": "pizza.n.01", "coco_cat_id": 59},
    {"synset": "doughnut.n.02", "coco_cat_id": 60},
    {"synset": "cake.n.03", "coco_cat_id": 61},
    {"synset": "chair.n.01", "coco_cat_id": 62},
    {"synset": "sofa.n.01", "coco_cat_id": 63},
    {"synset": "pot.n.04", "coco_cat_id": 64},
    {"synset": "bed.n.01", "coco_cat_id": 65},
    {"synset": "dining_table.n.01", "coco_cat_id": 67},
    {"synset": "toilet.n.02", "coco_cat_id": 70},
    {"synset": "television_receiver.n.01", "coco_cat_id": 72},
    {"synset": "laptop.n.01", "coco_cat_id": 73},
    {"synset": "mouse.n.04", "coco_cat_id": 74},
    {"synset": "remote_control.n.01", "coco_cat_id": 75},
    {"synset": "computer_keyboard.n.01", "coco_cat_id": 76},
    {"synset": "cellular_telephone.n.01", "coco_cat_id": 77},
    {"synset": "microwave.n.02", "coco_cat_id": 78},
    {"synset": "oven.n.01", "coco_cat_id": 79},
    {"synset": "toaster.n.02", "coco_cat_id": 80},
    {"synset": "sink.n.01", "coco_cat_id": 81},
    {"synset": "electric_refrigerator.n.01", "coco_cat_id": 82},
    {"synset": "book.n.01", "coco_cat_id": 84},
    {"synset": "clock.n.01", "coco_cat_id": 85},
    {"synset": "vase.n.01", "coco_cat_id": 86},
    {"synset": "scissors.n.01", "coco_cat_id": 87},
    {"synset": "teddy.n.01", "coco_cat_id": 88},
    {"synset": "hand_blower.n.01", "coco_cat_id": 89},
    {"synset": "toothbrush.n.01", "coco_cat_id": 90},
]

def map_name(x):
    x = x.replace('_', ' ')
    if '(' in x:
        x = x[:x.find('(')]
    return x.lower().strip()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cc_ann', default='datasets/cc3m/train_image_info.json')
    parser.add_argument('--out_path', default='datasets/cc3m/train_image_info_tags.json')
    parser.add_argument('--keep_images', action='store_true')
    parser.add_argument('--allcaps', action='store_true')
    parser.add_argument('--cat_path', default='')
    parser.add_argument('--convert_caption', action='store_true')
    # parser.add_argument('--lvis_ann', default='datasets/lvis/lvis_v1_val.json')
    args = parser.parse_args()

    # lvis_data = json.load(open(args.lvis_ann, 'r'))
    cc_data = json.load(open(args.cc_ann, 'r'))
    if args.convert_caption:
        num_caps = 0
        caps = defaultdict(list)
        for x in cc_data['annotations']:
            caps[x['image_id']].append(x['caption'])
        for x in cc_data['images']:
            x['captions'] = caps[x['id']]
            num_caps += len(x['captions'])
        print('# captions', num_caps)

    if args.cat_path != '':
        print('Loading', args.cat_path)
        cats = json.load(open(args.cat_path))['categories']
        if 'synonyms' not in cats[0]:
            cocoid2synset = {x['coco_cat_id']: x['synset'] \
                for x in COCO_SYNSET_CATEGORIES}
            synset2synonyms = {x['synset']: x['synonyms'] \
                for x in cc_data['categories']}
            for x in cats:
                synonyms = synset2synonyms[cocoid2synset[x['id']]]
                x['synonyms'] = synonyms
                x['frequency'] = 'f'
        cc_data['categories'] = cats

    id2cat = {x['id']: x for x in cc_data['categories']}
    class_count = {x['id']: 0 for x in cc_data['categories']}
    class_data = {x['id']: [' ' + map_name(xx) + ' ' for xx in x['synonyms']] \
            for x in cc_data['categories']}
    num_examples = 5
    examples = {x['id']: [] for x in cc_data['categories']}

    print('class_data', class_data)

    images = []
    for i, x in enumerate(cc_data['images']):
        if i % 10000 == 0:
            print(i, len(cc_data['images']))
        if args.allcaps:
            caption = (' '.join(x['captions'])).lower()
        else:
            caption = x['captions'][0].lower()
        x['pos_category_ids'] = []
        for cat_id, cat_names in class_data.items():
            find = False
            for c in cat_names:
                if c in caption or caption.startswith(c[1:]) \
                    or caption.endswith(c[:-1]):
                    find = True
                    break
            if find:
                x['pos_category_ids'].append(cat_id)
                class_count[cat_id] += 1
                if len(examples[cat_id]) < num_examples:
                    examples[cat_id].append(caption)
        if len(x['pos_category_ids']) > 0 or args.keep_images:
            images.append(x)

    zero_class = []
    for cat_id, count in class_count.items():
        print(id2cat[cat_id]['name'], count, end=', ')
        if count == 0:
            zero_class.append(id2cat[cat_id])
    print('==')
    print('zero class', zero_class)

    # for freq in ['r', 'c', 'f']:
    #     print('#cats', freq, len([x for x in cc_data['categories'] \
    #         if x['frequency'] == freq] and class_count[x['id']] > 0))

    for freq in ['r', 'c', 'f']:
        print('#Images', freq, sum([v for k, v in class_count.items() \
        if id2cat[k]['frequency'] == freq]))

    try:
        out_data = {'images': images, 'categories': cc_data['categories'], \
            'annotations': []}
        for k, v in out_data.items():
            print(k, len(v))
        if args.keep_images and not args.out_path.endswith('_full.json'):
            args.out_path = args.out_path[:-5] + '_full.json'
        print('Writing to', args.out_path)
        json.dump(out_data, open(args.out_path, 'w'))
    except:
        pass