# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== r"""Converts data from CSV to the OpenImagesDetectionChallengeEvaluator format.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import base64 import zlib import numpy as np import pandas as pd from pycocotools import mask as coco_mask from object_detection.core import standard_fields def _to_normalized_box(mask_np): """Decodes binary segmentation masks into np.arrays and boxes. Args: mask_np: np.ndarray of size NxWxH. Returns: a np.ndarray of the size Nx4, each row containing normalized coordinates [YMin, XMin, YMax, XMax] of a box computed of axis parallel enclosing box of a mask. """ coord1, coord2 = np.nonzero(mask_np) if coord1.size > 0: ymin = float(min(coord1)) / mask_np.shape[0] ymax = float(max(coord1) + 1) / mask_np.shape[0] xmin = float(min(coord2)) / mask_np.shape[1] xmax = float((max(coord2) + 1)) / mask_np.shape[1] return np.array([ymin, xmin, ymax, xmax]) else: return np.array([0.0, 0.0, 0.0, 0.0]) def _decode_raw_data_into_masks_and_boxes(segments, image_widths, image_heights): """Decods binary segmentation masks into np.arrays and boxes. Args: segments: pandas Series object containing either None entries, or strings with base64, zlib compressed, COCO RLE-encoded binary masks. All masks are expected to be the same size. image_widths: pandas Series of mask widths. image_heights: pandas Series of mask heights. Returns: a np.ndarray of the size NxWxH, where W and H is determined from the encoded masks; for the None values, zero arrays of size WxH are created. If input contains only None values, W=1, H=1. """ segment_masks = [] segment_boxes = [] ind = segments.first_valid_index() if ind is not None: size = [int(image_heights[ind]), int(image_widths[ind])] else: # It does not matter which size we pick since no masks will ever be # evaluated. return np.zeros((segments.shape[0], 1, 1), dtype=np.uint8), np.zeros( (segments.shape[0], 4), dtype=np.float32) for segment, im_width, im_height in zip(segments, image_widths, image_heights): if pd.isnull(segment): segment_masks.append(np.zeros([1, size[0], size[1]], dtype=np.uint8)) segment_boxes.append(np.expand_dims(np.array([0.0, 0.0, 0.0, 0.0]), 0)) else: compressed_mask = base64.b64decode(segment) rle_encoded_mask = zlib.decompress(compressed_mask) decoding_dict = { 'size': [im_height, im_width], 'counts': rle_encoded_mask } mask_tensor = coco_mask.decode(decoding_dict) segment_masks.append(np.expand_dims(mask_tensor, 0)) segment_boxes.append(np.expand_dims(_to_normalized_box(mask_tensor), 0)) return np.concatenate( segment_masks, axis=0), np.concatenate( segment_boxes, axis=0) def merge_boxes_and_masks(box_data, mask_data): return pd.merge( box_data, mask_data, how='outer', on=['LabelName', 'ImageID', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf']) def build_groundtruth_dictionary(data, class_label_map): """Builds a groundtruth dictionary from groundtruth data in CSV file. Args: data: Pandas DataFrame with the groundtruth data for a single image. class_label_map: Class labelmap from string label name to an integer. Returns: A dictionary with keys suitable for passing to OpenImagesDetectionChallengeEvaluator.add_single_ground_truth_image_info: standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of the format [ymin, xmin, ymax, xmax] in absolute image coordinates. standard_fields.InputDataFields.groundtruth_classes: integer numpy array of shape [num_boxes] containing 1-indexed groundtruth classes for the boxes. standard_fields.InputDataFields.verified_labels: integer 1D numpy array containing all classes for which labels are verified. standard_fields.InputDataFields.groundtruth_group_of: Optional length M numpy boolean array denoting whether a groundtruth box contains a group of instances. """ data_location = data[data.XMin.notnull()] data_labels = data[data.ConfidenceImageLabel.notnull()] dictionary = { standard_fields.InputDataFields.groundtruth_boxes: data_location[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(), standard_fields.InputDataFields.groundtruth_classes: data_location['LabelName'].map(lambda x: class_label_map[x] ).as_matrix(), standard_fields.InputDataFields.groundtruth_group_of: data_location['IsGroupOf'].as_matrix().astype(int), standard_fields.InputDataFields.groundtruth_image_classes: data_labels['LabelName'].map(lambda x: class_label_map[x] ).as_matrix(), } if 'Mask' in data_location: segments, _ = _decode_raw_data_into_masks_and_boxes( data_location['Mask'], data_location['ImageWidth'], data_location['ImageHeight']) dictionary[ standard_fields.InputDataFields.groundtruth_instance_masks] = segments return dictionary def build_predictions_dictionary(data, class_label_map): """Builds a predictions dictionary from predictions data in CSV file. Args: data: Pandas DataFrame with the predictions data for a single image. class_label_map: Class labelmap from string label name to an integer. Returns: Dictionary with keys suitable for passing to OpenImagesDetectionChallengeEvaluator.add_single_detected_image_info: standard_fields.DetectionResultFields.detection_boxes: float32 numpy array of shape [num_boxes, 4] containing `num_boxes` detection boxes of the format [ymin, xmin, ymax, xmax] in absolute image coordinates. standard_fields.DetectionResultFields.detection_scores: float32 numpy array of shape [num_boxes] containing detection scores for the boxes. standard_fields.DetectionResultFields.detection_classes: integer numpy array of shape [num_boxes] containing 1-indexed detection classes for the boxes. """ dictionary = { standard_fields.DetectionResultFields.detection_classes: data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(), standard_fields.DetectionResultFields.detection_scores: data['Score'].as_matrix() } if 'Mask' in data: segments, boxes = _decode_raw_data_into_masks_and_boxes( data['Mask'], data['ImageWidth'], data['ImageHeight']) dictionary[standard_fields.DetectionResultFields.detection_masks] = segments dictionary[standard_fields.DetectionResultFields.detection_boxes] = boxes else: dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[ 'YMin', 'XMin', 'YMax', 'XMax' ]].as_matrix() return dictionary