oguzakif's picture
init repo
d4b77ac
# --------------------------------------------------------
# SiamMask
# Licensed under The MIT License
# Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import h5py
import json
import os
import scipy.misc
import sys
import numpy as np
import cv2
from os.path import join
def parse_args():
parser = argparse.ArgumentParser(description='Convert dataset')
parser.add_argument('--outdir', default='./', type=str,
help="output dir for json files")
parser.add_argument('--datadir', default='./', type=str,
help="data dir for annotations to be converted")
return parser.parse_args()
def xyxy_to_xywh(xyxy):
"""Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format."""
if isinstance(xyxy, (list, tuple)):
# Single box given as a list of coordinates
assert len(xyxy) == 4
x1, y1 = xyxy[0], xyxy[1]
w = xyxy[2] - x1 + 1
h = xyxy[3] - y1 + 1
return (x1, y1, w, h)
elif isinstance(xyxy, np.ndarray):
# Multiple boxes given as a 2D ndarray
return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1))
else:
raise TypeError('Argument xyxy must be a list, tuple, or numpy array.')
def polys_to_boxes(polys):
"""Convert a list of polygons into an array of tight bounding boxes."""
boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
for i in range(len(polys)):
poly = polys[i]
x0 = min(min(p[::2]) for p in poly)
x1 = max(max(p[::2]) for p in poly)
y0 = min(min(p[1::2]) for p in poly)
y1 = max(max(p[1::2]) for p in poly)
boxes_from_polys[i, :] = [x0, y0, x1, y1]
return boxes_from_polys
class Instance(object):
instID = 0
pixelCount = 0
def __init__(self, imgNp, instID):
if (instID ==0 ):
return
self.instID = int(instID)
self.pixelCount = int(self.getInstancePixels(imgNp, instID))
def getInstancePixels(self, imgNp, instLabel):
return (imgNp == instLabel).sum()
def toDict(self):
buildDict = {}
buildDict["instID"] = self.instID
buildDict["pixelCount"] = self.pixelCount
return buildDict
def __str__(self):
return "("+str(self.instID)+")"
def convert_ytb_vos(data_dir, out_dir):
sets = ['train']
ann_dirs = ['train/Annotations/']
json_name = 'instances_%s.json'
num_obj = 0
num_ann = 0
for data_set, ann_dir in zip(sets, ann_dirs):
print('Starting %s' % data_set)
ann_dict = {}
ann_dir = os.path.join(data_dir, ann_dir)
json_ann = json.load(open(os.path.join(ann_dir, '../meta.json')))
for vid, video in enumerate(json_ann['videos']):
v = json_ann['videos'][video]
frames = []
for obj in v['objects']:
o = v['objects'][obj]
frames.extend(o['frames'])
frames = sorted(set(frames))
annotations = []
instanceIds = []
for frame in frames:
file_name = join(video, frame)
fullname = os.path.join(ann_dir, file_name+'.png')
img = cv2.imread(fullname, 0)
h, w = img.shape[:2]
objects = dict()
for instanceId in np.unique(img):
if instanceId == 0:
continue
instanceObj = Instance(img, instanceId)
instanceObj_dict = instanceObj.toDict()
mask = (img == instanceId).astype(np.uint8)
_, contour, _ = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
polygons = [c.reshape(-1).tolist() for c in contour]
instanceObj_dict['contours'] = [p for p in polygons if len(p) > 4]
if len(instanceObj_dict['contours']) and instanceObj_dict['pixelCount'] > 1000:
objects[instanceId] = instanceObj_dict
# else:
# cv2.imshow("disappear?", mask)
# cv2.waitKey(0)
for objId in objects:
if len(objects[objId]) == 0:
continue
obj = objects[objId]
len_p = [len(p) for p in obj['contours']]
if min(len_p) <= 4:
print('Warning: invalid contours.')
continue # skip non-instance categories
ann = dict()
ann['h'] = h
ann['w'] = w
ann['file_name'] = file_name
ann['id'] = int(objId)
# ann['segmentation'] = obj['contours']
# ann['iscrowd'] = 0
ann['area'] = obj['pixelCount']
ann['bbox'] = xyxy_to_xywh(polys_to_boxes([obj['contours']])).tolist()[0]
annotations.append(ann)
instanceIds.append(objId)
num_ann += 1
instanceIds = sorted(set(instanceIds))
num_obj += len(instanceIds)
video_ann = {str(iId): [] for iId in instanceIds}
for ann in annotations:
video_ann[str(ann['id'])].append(ann)
ann_dict[video] = video_ann
if vid % 50 == 0 and vid != 0:
print("process: %d video" % (vid+1))
print("Num Videos: %d" % len(ann_dict))
print("Num Objects: %d" % num_obj)
print("Num Annotations: %d" % num_ann)
items = list(ann_dict.items())
train_dict = dict(items[:3000])
val_dict = dict(items[3000:])
with open(os.path.join(out_dir, json_name % 'train'), 'w') as outfile:
json.dump(train_dict, outfile)
with open(os.path.join(out_dir, json_name % 'val'), 'w') as outfile:
json.dump(val_dict, outfile)
if __name__ == '__main__':
args = parse_args()
convert_ytb_vos(args.datadir, args.outdir)