Spaces:

oguzakif
/

video-object-remover

Sleeping

File size: 6,324 Bytes

d4b77ac

# --------------------------------------------------------
# SiamMask
# Licensed under The MIT License
# Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import h5py
import json
import os
import scipy.misc
import sys
import numpy as np
import cv2
from os.path import join


def parse_args():
    parser = argparse.ArgumentParser(description='Convert dataset')
    parser.add_argument('--outdir', default='./', type=str,
                        help="output dir for json files")
    parser.add_argument('--datadir', default='./', type=str,
                        help="data dir for annotations to be converted")
    return parser.parse_args()


def xyxy_to_xywh(xyxy):
    """Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format."""
    if isinstance(xyxy, (list, tuple)):
        # Single box given as a list of coordinates
        assert len(xyxy) == 4
        x1, y1 = xyxy[0], xyxy[1]
        w = xyxy[2] - x1 + 1
        h = xyxy[3] - y1 + 1
        return (x1, y1, w, h)
    elif isinstance(xyxy, np.ndarray):
        # Multiple boxes given as a 2D ndarray
        return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1))
    else:
        raise TypeError('Argument xyxy must be a list, tuple, or numpy array.')


def polys_to_boxes(polys):
    """Convert a list of polygons into an array of tight bounding boxes."""
    boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
    for i in range(len(polys)):
        poly = polys[i]
        x0 = min(min(p[::2]) for p in poly)
        x1 = max(max(p[::2]) for p in poly)
        y0 = min(min(p[1::2]) for p in poly)
        y1 = max(max(p[1::2]) for p in poly)
        boxes_from_polys[i, :] = [x0, y0, x1, y1]
    return boxes_from_polys


class Instance(object):
    instID     = 0
    pixelCount = 0

    def __init__(self, imgNp, instID):
        if (instID ==0 ):
            return
        self.instID     = int(instID)
        self.pixelCount = int(self.getInstancePixels(imgNp, instID))

    def getInstancePixels(self, imgNp, instLabel):
        return (imgNp == instLabel).sum()

    def toDict(self):
        buildDict = {}
        buildDict["instID"]     = self.instID
        buildDict["pixelCount"] = self.pixelCount
        return buildDict

    def __str__(self):
        return "("+str(self.instID)+")"


def convert_ytb_vos(data_dir, out_dir):
    sets = ['train']
    ann_dirs = ['train/Annotations/']
    json_name = 'instances_%s.json'
    num_obj = 0
    num_ann = 0
    for data_set, ann_dir in zip(sets, ann_dirs):
        print('Starting %s' % data_set)
        ann_dict = {}
        ann_dir = os.path.join(data_dir, ann_dir)
        json_ann = json.load(open(os.path.join(ann_dir, '../meta.json')))
        for vid, video in enumerate(json_ann['videos']):
            v = json_ann['videos'][video]
            frames = []
            for obj in v['objects']:
                o = v['objects'][obj]
                frames.extend(o['frames'])
            frames = sorted(set(frames))

            annotations = []
            instanceIds = []
            for frame in frames:
                file_name = join(video, frame)
                fullname = os.path.join(ann_dir, file_name+'.png')
                img = cv2.imread(fullname, 0)
                h, w = img.shape[:2]

                objects = dict()
                for instanceId in np.unique(img):
                    if instanceId == 0:
                        continue
                    instanceObj = Instance(img, instanceId)
                    instanceObj_dict = instanceObj.toDict()
                    mask = (img == instanceId).astype(np.uint8)
                    _, contour, _ = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
                    polygons = [c.reshape(-1).tolist() for c in contour]
                    instanceObj_dict['contours'] = [p for p in polygons if len(p) > 4]
                    if len(instanceObj_dict['contours']) and instanceObj_dict['pixelCount'] > 1000:
                        objects[instanceId] = instanceObj_dict
                    # else:
                    #     cv2.imshow("disappear?", mask)
                    #     cv2.waitKey(0)

                for objId in objects:
                    if len(objects[objId]) == 0:
                        continue
                    obj = objects[objId]
                    len_p = [len(p) for p in obj['contours']]
                    if min(len_p) <= 4:
                        print('Warning: invalid contours.')
                        continue  # skip non-instance categories

                    ann = dict()
                    ann['h'] = h
                    ann['w'] = w
                    ann['file_name'] = file_name
                    ann['id'] = int(objId)
                    # ann['segmentation'] = obj['contours']
                    # ann['iscrowd'] = 0
                    ann['area'] = obj['pixelCount']
                    ann['bbox'] = xyxy_to_xywh(polys_to_boxes([obj['contours']])).tolist()[0]

                    annotations.append(ann)
                    instanceIds.append(objId)
                    num_ann += 1
            instanceIds = sorted(set(instanceIds))
            num_obj += len(instanceIds)
            video_ann = {str(iId): [] for iId in instanceIds}
            for ann in annotations:
                video_ann[str(ann['id'])].append(ann)

            ann_dict[video] = video_ann
            if vid % 50 == 0 and vid != 0:
                print("process: %d video" % (vid+1))

        print("Num Videos: %d" % len(ann_dict))
        print("Num Objects: %d" % num_obj)
        print("Num Annotations: %d" % num_ann)

        items = list(ann_dict.items())
        train_dict = dict(items[:3000])
        val_dict = dict(items[3000:])
        with open(os.path.join(out_dir, json_name % 'train'), 'w') as outfile:
            json.dump(train_dict, outfile)

        with open(os.path.join(out_dir, json_name % 'val'), 'w') as outfile:
            json.dump(val_dict, outfile)


if __name__ == '__main__':
    args = parse_args()
    convert_ytb_vos(args.datadir, args.outdir)