hasibzunair's picture
added files
4a285f6
raw history blame
No virus
3.78 kB
import re
import datetime
import numpy as np
from itertools import groupby
from skimage import measure
from PIL import Image
from pycocotools import mask
convert = lambda text: int(text) if text.isdigit() else text.lower()
natrual_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
def resize_binary_mask(array, new_size):
image = Image.fromarray(array.astype(np.uint8) * 255)
image = image.resize(new_size)
return np.asarray(image).astype(np.bool_)
def close_contour(contour):
if not np.array_equal(contour[0], contour[-1]):
contour = np.vstack((contour, contour[0]))
return contour
def binary_mask_to_rle(binary_mask):
rle = {'counts': [], 'size': list(binary_mask.shape)}
counts = rle.get('counts')
for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
if i == 0 and value == 1:
counts.append(0)
counts.append(len(list(elements)))
return rle
def binary_mask_to_polygon(binary_mask, tolerance=0):
"""Converts a binary mask to COCO polygon representation
Args:
binary_mask: a 2D binary numpy array where '1's represent the object
tolerance: Maximum distance from original points of polygon to approximated
polygonal chain. If tolerance is 0, the original coordinate array is returned.
"""
polygons = []
# pad mask to close contours of shapes which start and end at an edge
padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
contours = measure.find_contours(padded_binary_mask, 0.5)
contours = np.subtract(contours, 1)
for contour in contours:
contour = close_contour(contour)
contour = measure.approximate_polygon(contour, tolerance)
if len(contour) < 3:
continue
contour = np.flip(contour, axis=1)
segmentation = contour.ravel().tolist()
# after padding and subtracting 1 we may get -0.5 points in our segmentation
segmentation = [0 if i < 0 else i for i in segmentation]
polygons.append(segmentation)
return polygons
def create_image_info(image_id, file_name, image_size,
date_captured=datetime.datetime.utcnow().isoformat(' '),
license_id=1, coco_url="", flickr_url=""):
image_info = {
"id": image_id,
"file_name": file_name,
"width": image_size[0],
"height": image_size[1],
"date_captured": date_captured,
"license": license_id,
"coco_url": coco_url,
"flickr_url": flickr_url
}
return image_info
def create_annotation_info(annotation_id, image_id, category_info, binary_mask,
image_size=None, tolerance=2, bounding_box=None):
if image_size is not None:
binary_mask = resize_binary_mask(binary_mask, image_size)
binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
area = mask.area(binary_mask_encoded)
if area < 1:
return None
if bounding_box is None:
bounding_box = mask.toBbox(binary_mask_encoded)
if category_info["is_crowd"]:
is_crowd = 1
segmentation = binary_mask_to_rle(binary_mask)
else:
is_crowd = 0
segmentation = binary_mask_to_polygon(binary_mask, tolerance)
if not segmentation:
return None
annotation_info = {
"id": annotation_id,
"image_id": image_id,
"category_id": category_info["id"],
"iscrowd": is_crowd,
"area": area.tolist(),
"bbox": bounding_box.tolist(),
"segmentation": segmentation,
"width": binary_mask.shape[1],
"height": binary_mask.shape[0],
}
return annotation_info