import re import datetime import numpy as np from itertools import groupby from skimage import measure from PIL import Image from pycocotools import mask convert = lambda text: int(text) if text.isdigit() else text.lower() natrual_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] def resize_binary_mask(array, new_size): image = Image.fromarray(array.astype(np.uint8) * 255) image = image.resize(new_size) return np.asarray(image).astype(np.bool_) def close_contour(contour): if not np.array_equal(contour[0], contour[-1]): contour = np.vstack((contour, contour[0])) return contour def binary_mask_to_rle(binary_mask): rle = {'counts': [], 'size': list(binary_mask.shape)} counts = rle.get('counts') for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))): if i == 0 and value == 1: counts.append(0) counts.append(len(list(elements))) return rle def binary_mask_to_polygon(binary_mask, tolerance=0): """Converts a binary mask to COCO polygon representation Args: binary_mask: a 2D binary numpy array where '1's represent the object tolerance: Maximum distance from original points of polygon to approximated polygonal chain. If tolerance is 0, the original coordinate array is returned. """ polygons = [] # pad mask to close contours of shapes which start and end at an edge padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0) contours = measure.find_contours(padded_binary_mask, 0.5) contours = np.subtract(contours, 1) for contour in contours: contour = close_contour(contour) contour = measure.approximate_polygon(contour, tolerance) if len(contour) < 3: continue contour = np.flip(contour, axis=1) segmentation = contour.ravel().tolist() # after padding and subtracting 1 we may get -0.5 points in our segmentation segmentation = [0 if i < 0 else i for i in segmentation] polygons.append(segmentation) return polygons def create_image_info(image_id, file_name, image_size, date_captured=datetime.datetime.utcnow().isoformat(' '), license_id=1, coco_url="", flickr_url=""): image_info = { "id": image_id, "file_name": file_name, "width": image_size[0], "height": image_size[1], "date_captured": date_captured, "license": license_id, "coco_url": coco_url, "flickr_url": flickr_url } return image_info def create_annotation_info(annotation_id, image_id, category_info, binary_mask, image_size=None, tolerance=2, bounding_box=None): if image_size is not None: binary_mask = resize_binary_mask(binary_mask, image_size) binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8))) area = mask.area(binary_mask_encoded) if area < 1: return None if bounding_box is None: bounding_box = mask.toBbox(binary_mask_encoded) if category_info["is_crowd"]: is_crowd = 1 segmentation = binary_mask_to_rle(binary_mask) else: is_crowd = 0 segmentation = binary_mask_to_polygon(binary_mask, tolerance) if not segmentation: return None annotation_info = { "id": annotation_id, "image_id": image_id, "category_id": category_info["id"], "iscrowd": is_crowd, "area": area.tolist(), "bbox": bounding_box.tolist(), "segmentation": segmentation, "width": binary_mask.shape[1], "height": binary_mask.shape[0], } return annotation_info