# coding=utf-8 # Copyright 2021 The Deeplab2 Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for the COCO Instance AP metric.""" from absl import logging import numpy as np import tensorflow as tf from deeplab2.evaluation import coco_instance_ap from deeplab2.evaluation import test_utils # See the definition of the color names at: # https://en.wikipedia.org/wiki/Web_colors. _CLASS_COLOR_MAP = { (0, 0, 0): 0, (0, 0, 255): 1, # Person (blue). (255, 0, 0): 2, # Bear (red). (0, 255, 0): 3, # Tree (lime). (255, 0, 255): 4, # Bird (fuchsia). (0, 255, 255): 5, # Sky (aqua). (255, 255, 0): 6, # Cat (yellow). } def combine_maps(semantic_map, instance_map, label_divisor): combined_map = instance_map + semantic_map * label_divisor return tf.cast(combined_map, tf.int32) class CocoInstanceApTest(tf.test.TestCase): def test_evaluates_single_image(self): groundtruth_boxes = [ [0.25, 0.4, 0.75, 1.0], ] groundtruth_classes = [8] groundtruth_masks = [[ [0, 0, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 1, 1, 1], [0, 0, 0, 0, 0], ]] groundtruth_is_crowd = [False] detection_masks = [[ [0, 0, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 0], ]] detection_scores = [0.8] detection_classes = [8] groundtruth_boxes = tf.constant(groundtruth_boxes, dtype=tf.float32) groundtruth_classes = tf.constant(groundtruth_classes, dtype=tf.int32) groundtruth_masks = tf.constant(groundtruth_masks, dtype=tf.uint8) groundtruth_is_crowd = tf.constant(groundtruth_is_crowd, dtype=tf.bool) detection_masks = tf.constant(detection_masks, dtype=tf.uint8) detection_scores = tf.constant(detection_scores, dtype=tf.float32) detection_classes = tf.constant(detection_classes, dtype=tf.int32) metric_obj = coco_instance_ap.InstanceAveragePrecision() metric_obj.update_state(groundtruth_boxes, groundtruth_classes, groundtruth_masks, groundtruth_is_crowd, detection_masks, detection_scores, detection_classes) result = metric_obj.result().numpy() # The IoU for the foreground match is 0.8. So it is a TP for 7/10 of the IoU # thresholds. expected_result = [0.7, 1, 1, 0.7, -1, -1, 0.7, 0.7, 0.7, 0.7, -1, -1] np.testing.assert_array_almost_equal(result, expected_result) class PanopticInstanceApTest(tf.test.TestCase): def test_evaluates_single_image(self): num_classes = 3 things_list = [1, 2] label_divisor = 256 ignore_label = 0 instance_class_map = { 0: 0, 47: 1, 97: 1, 133: 1, 150: 1, 174: 1, 198: 2, 215: 1, 244: 1, 255: 1, } gt_instances, gt_classes = test_utils.panoptic_segmentation_with_class_map( 'team_gt_instance.png', instance_class_map) gt_panoptic = combine_maps(gt_classes, gt_instances, label_divisor) pred_classes = test_utils.read_segmentation_with_rgb_color_map( 'team_pred_class.png', _CLASS_COLOR_MAP) pred_instances = test_utils.read_test_image( 'team_pred_instance.png', image_format='L') pred_panoptic = combine_maps(pred_classes, pred_instances, label_divisor) semantic_probability = tf.ones( tf.concat([tf.shape(pred_panoptic), [num_classes]], 0)) instance_score_map = tf.ones(tf.shape(pred_panoptic)) metric_obj = coco_instance_ap.PanopticInstanceAveragePrecision( num_classes, things_list, label_divisor, ignore_label) metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability, instance_score_map) result = metric_obj.result().numpy() logging.info('result = %s', result) expected_result = [ 0.2549, 0.9356, 0.1215, -1.0, 0.2399, 0.501, 0.0812, 0.2688, 0.2688, -1.0, 0.2583, 0.5 ] np.testing.assert_almost_equal(result, expected_result, decimal=4) def test_evaluates_with_scores(self): num_classes = 3 things_list = list(range(num_classes)) label_divisor = 256 ignore_label = 0 gt_classes = tf.constant([ [1, 1, 2, 2], [1, 1, 2, 2], [0, 0, 2, 2], [0, 0, 2, 2], ], tf.int32) pred_classes = tf.constant([ [1, 1, 1, 1], [1, 1, 1, 1], [0, 0, 2, 2], [0, 0, 2, 2], ], tf.int32) instances = tf.constant([ [1, 1, 2, 2], [1, 1, 2, 2], [0, 0, 3, 3], [0, 0, 3, 3], ], tf.int32) gt_panoptic = combine_maps(gt_classes, instances, label_divisor) pred_panoptic = combine_maps(pred_classes, instances, label_divisor) semantic_probability = tf.constant([ [ [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0], [1, 1, 0, 0], ], [ [1, 1, 1, 1], [1, 1, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], ], [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 1], [0, 0, 1, 1], ], ], tf.float32) semantic_probability = tf.transpose(semantic_probability, [1, 2, 0]) # This score map gives higher score to the incorrect instance. bad_instance_scores = tf.constant([ [0.4, 0.4, 0.9, 0.9], [0.4, 0.4, 0.9, 0.9], [0.0, 0.0, 0.8, 0.8], [0.0, 0.0, 0.8, 0.8], ], tf.float32) metric_obj = coco_instance_ap.PanopticInstanceAveragePrecision( num_classes, things_list, label_divisor, ignore_label) metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability, bad_instance_scores) bad_result = metric_obj.result().numpy() logging.info('bad_result = %s', bad_result) expected_bad_result = [ 0.5025, 0.5025, 0.5025, 0.5025, -1., -1., 0.25, 0.75, 0.75, 0.75, -1., -1. ] np.testing.assert_almost_equal(bad_result, expected_bad_result, decimal=4) # This score map gives lower score to the incorrect instance. good_instance_scores = tf.constant([ [0.9, 0.9, 0.4, 0.4], [0.9, 0.9, 0.4, 0.4], [0.0, 0.0, 0.8, 0.8], [0.0, 0.0, 0.8, 0.8], ], tf.float32) metric_obj.reset_states() metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability, good_instance_scores) good_result = metric_obj.result().numpy() logging.info('good_result = %s', good_result) # Since the correct instance(s) have higher score, the "good" scores should # give a result with higher AP. expected_good_result = [ 0.75248, 0.75248, 0.75248, 0.75248, -1, -1, 0.75, 0.75, 0.75, 0.75, -1, -1 ] np.testing.assert_almost_equal(good_result, expected_good_result, decimal=4) def test_ignores_crowds(self): num_classes = 3 things_list = list(range(num_classes)) label_divisor = 256 ignore_label = 0 gt_classes = tf.constant([ [1, 1, 2, 2], [1, 1, 2, 2], [0, 0, 2, 2], [0, 0, 2, 2], ], tf.int32) pred_classes = tf.constant([ [1, 1, 1, 1], [1, 1, 1, 1], [0, 0, 2, 2], [0, 0, 2, 2], ], tf.int32) instances = tf.constant([ [1, 1, 2, 2], [1, 1, 2, 2], [0, 0, 3, 3], [0, 0, 3, 3], ], tf.int32) is_crowd_map = tf.math.equal(instances, 2) gt_panoptic = combine_maps(gt_classes, instances, label_divisor) pred_panoptic = combine_maps(pred_classes, instances, label_divisor) semantic_probability = tf.ones( tf.concat([tf.shape(pred_panoptic), [num_classes]], 0)) instance_score_map = tf.ones(tf.shape(pred_panoptic)) metric_obj = coco_instance_ap.PanopticInstanceAveragePrecision( num_classes, things_list, label_divisor, ignore_label) metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability, instance_score_map, is_crowd_map) result = metric_obj.result().numpy() logging.info('result = %s', result) # Expect perfect results (for the quantities that have an AP value), because # the only mistake is a "crowd" instance. expected_result = [1., 1., 1., 1., -1., -1., 1., 1., 1., 1., -1., -1.] np.testing.assert_almost_equal(result, expected_result, decimal=4) def test_ignores_stuff(self): num_classes = 4 things_list = [3] label_divisor = 256 ignore_label = 0 gt_classes = tf.constant([ [3, 3, 2, 2], [3, 3, 2, 2], [0, 0, 2, 2], [0, 0, 2, 2], ], tf.int32) pred_classes = tf.constant([ [3, 3, 1, 1], [3, 3, 1, 1], [0, 0, 2, 2], [0, 0, 2, 2], ], tf.int32) instances = tf.constant([ [1, 1, 2, 2], [1, 1, 2, 2], [0, 0, 3, 3], [0, 0, 3, 3], ], tf.int32) gt_panoptic = combine_maps(gt_classes, instances, label_divisor) pred_panoptic = combine_maps(pred_classes, instances, label_divisor) semantic_probability = tf.ones( tf.concat([tf.shape(pred_panoptic), [num_classes]], 0)) instance_score_map = tf.ones(tf.shape(pred_panoptic)) metric_obj = coco_instance_ap.PanopticInstanceAveragePrecision( num_classes, things_list, label_divisor, ignore_label) metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability, instance_score_map) result = metric_obj.result().numpy() logging.info('result = %s', result) # Expect perfect results (for the quantities that have an AP value), because # the mistakes are all in "stuff" classes. expected_result = [1., 1., 1., 1., -1., -1., 1., 1., 1., 1., -1., -1.] np.testing.assert_almost_equal(result, expected_result, decimal=4) if __name__ == '__main__': tf.test.main()