deeplab2 / evaluation /coco_instance_ap_test.py
akhaliq3
spaces demo
506da10
# coding=utf-8
# Copyright 2021 The Deeplab2 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the COCO Instance AP metric."""
from absl import logging
import numpy as np
import tensorflow as tf
from deeplab2.evaluation import coco_instance_ap
from deeplab2.evaluation import test_utils
# See the definition of the color names at:
# https://en.wikipedia.org/wiki/Web_colors.
_CLASS_COLOR_MAP = {
(0, 0, 0): 0,
(0, 0, 255): 1, # Person (blue).
(255, 0, 0): 2, # Bear (red).
(0, 255, 0): 3, # Tree (lime).
(255, 0, 255): 4, # Bird (fuchsia).
(0, 255, 255): 5, # Sky (aqua).
(255, 255, 0): 6, # Cat (yellow).
}
def combine_maps(semantic_map, instance_map, label_divisor):
combined_map = instance_map + semantic_map * label_divisor
return tf.cast(combined_map, tf.int32)
class CocoInstanceApTest(tf.test.TestCase):
def test_evaluates_single_image(self):
groundtruth_boxes = [
[0.25, 0.4, 0.75, 1.0],
]
groundtruth_classes = [8]
groundtruth_masks = [[
[0, 0, 0, 0, 0],
[0, 0, 1, 1, 0],
[0, 0, 1, 1, 1],
[0, 0, 0, 0, 0],
]]
groundtruth_is_crowd = [False]
detection_masks = [[
[0, 0, 0, 0, 0],
[0, 0, 1, 1, 0],
[0, 0, 1, 1, 0],
[0, 0, 0, 0, 0],
]]
detection_scores = [0.8]
detection_classes = [8]
groundtruth_boxes = tf.constant(groundtruth_boxes, dtype=tf.float32)
groundtruth_classes = tf.constant(groundtruth_classes, dtype=tf.int32)
groundtruth_masks = tf.constant(groundtruth_masks, dtype=tf.uint8)
groundtruth_is_crowd = tf.constant(groundtruth_is_crowd, dtype=tf.bool)
detection_masks = tf.constant(detection_masks, dtype=tf.uint8)
detection_scores = tf.constant(detection_scores, dtype=tf.float32)
detection_classes = tf.constant(detection_classes, dtype=tf.int32)
metric_obj = coco_instance_ap.InstanceAveragePrecision()
metric_obj.update_state(groundtruth_boxes, groundtruth_classes,
groundtruth_masks, groundtruth_is_crowd,
detection_masks, detection_scores,
detection_classes)
result = metric_obj.result().numpy()
# The IoU for the foreground match is 0.8. So it is a TP for 7/10 of the IoU
# thresholds.
expected_result = [0.7, 1, 1, 0.7, -1, -1, 0.7, 0.7, 0.7, 0.7, -1, -1]
np.testing.assert_array_almost_equal(result, expected_result)
class PanopticInstanceApTest(tf.test.TestCase):
def test_evaluates_single_image(self):
num_classes = 3
things_list = [1, 2]
label_divisor = 256
ignore_label = 0
instance_class_map = {
0: 0,
47: 1,
97: 1,
133: 1,
150: 1,
174: 1,
198: 2,
215: 1,
244: 1,
255: 1,
}
gt_instances, gt_classes = test_utils.panoptic_segmentation_with_class_map(
'team_gt_instance.png', instance_class_map)
gt_panoptic = combine_maps(gt_classes, gt_instances, label_divisor)
pred_classes = test_utils.read_segmentation_with_rgb_color_map(
'team_pred_class.png', _CLASS_COLOR_MAP)
pred_instances = test_utils.read_test_image(
'team_pred_instance.png', image_format='L')
pred_panoptic = combine_maps(pred_classes, pred_instances, label_divisor)
semantic_probability = tf.ones(
tf.concat([tf.shape(pred_panoptic), [num_classes]], 0))
instance_score_map = tf.ones(tf.shape(pred_panoptic))
metric_obj = coco_instance_ap.PanopticInstanceAveragePrecision(
num_classes, things_list, label_divisor, ignore_label)
metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability,
instance_score_map)
result = metric_obj.result().numpy()
logging.info('result = %s', result)
expected_result = [
0.2549, 0.9356, 0.1215, -1.0, 0.2399, 0.501, 0.0812, 0.2688, 0.2688,
-1.0, 0.2583, 0.5
]
np.testing.assert_almost_equal(result, expected_result, decimal=4)
def test_evaluates_with_scores(self):
num_classes = 3
things_list = list(range(num_classes))
label_divisor = 256
ignore_label = 0
gt_classes = tf.constant([
[1, 1, 2, 2],
[1, 1, 2, 2],
[0, 0, 2, 2],
[0, 0, 2, 2],
], tf.int32)
pred_classes = tf.constant([
[1, 1, 1, 1],
[1, 1, 1, 1],
[0, 0, 2, 2],
[0, 0, 2, 2],
], tf.int32)
instances = tf.constant([
[1, 1, 2, 2],
[1, 1, 2, 2],
[0, 0, 3, 3],
[0, 0, 3, 3],
], tf.int32)
gt_panoptic = combine_maps(gt_classes, instances, label_divisor)
pred_panoptic = combine_maps(pred_classes, instances, label_divisor)
semantic_probability = tf.constant([
[
[0, 0, 0, 0],
[0, 0, 0, 0],
[1, 1, 0, 0],
[1, 1, 0, 0],
],
[
[1, 1, 1, 1],
[1, 1, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0],
],
[
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 1, 1],
[0, 0, 1, 1],
],
], tf.float32)
semantic_probability = tf.transpose(semantic_probability, [1, 2, 0])
# This score map gives higher score to the incorrect instance.
bad_instance_scores = tf.constant([
[0.4, 0.4, 0.9, 0.9],
[0.4, 0.4, 0.9, 0.9],
[0.0, 0.0, 0.8, 0.8],
[0.0, 0.0, 0.8, 0.8],
], tf.float32)
metric_obj = coco_instance_ap.PanopticInstanceAveragePrecision(
num_classes, things_list, label_divisor, ignore_label)
metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability,
bad_instance_scores)
bad_result = metric_obj.result().numpy()
logging.info('bad_result = %s', bad_result)
expected_bad_result = [
0.5025, 0.5025, 0.5025, 0.5025, -1., -1., 0.25, 0.75, 0.75, 0.75, -1.,
-1.
]
np.testing.assert_almost_equal(bad_result, expected_bad_result, decimal=4)
# This score map gives lower score to the incorrect instance.
good_instance_scores = tf.constant([
[0.9, 0.9, 0.4, 0.4],
[0.9, 0.9, 0.4, 0.4],
[0.0, 0.0, 0.8, 0.8],
[0.0, 0.0, 0.8, 0.8],
], tf.float32)
metric_obj.reset_states()
metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability,
good_instance_scores)
good_result = metric_obj.result().numpy()
logging.info('good_result = %s', good_result)
# Since the correct instance(s) have higher score, the "good" scores should
# give a result with higher AP.
expected_good_result = [
0.75248, 0.75248, 0.75248, 0.75248, -1, -1, 0.75, 0.75, 0.75, 0.75, -1,
-1
]
np.testing.assert_almost_equal(good_result, expected_good_result, decimal=4)
def test_ignores_crowds(self):
num_classes = 3
things_list = list(range(num_classes))
label_divisor = 256
ignore_label = 0
gt_classes = tf.constant([
[1, 1, 2, 2],
[1, 1, 2, 2],
[0, 0, 2, 2],
[0, 0, 2, 2],
], tf.int32)
pred_classes = tf.constant([
[1, 1, 1, 1],
[1, 1, 1, 1],
[0, 0, 2, 2],
[0, 0, 2, 2],
], tf.int32)
instances = tf.constant([
[1, 1, 2, 2],
[1, 1, 2, 2],
[0, 0, 3, 3],
[0, 0, 3, 3],
], tf.int32)
is_crowd_map = tf.math.equal(instances, 2)
gt_panoptic = combine_maps(gt_classes, instances, label_divisor)
pred_panoptic = combine_maps(pred_classes, instances, label_divisor)
semantic_probability = tf.ones(
tf.concat([tf.shape(pred_panoptic), [num_classes]], 0))
instance_score_map = tf.ones(tf.shape(pred_panoptic))
metric_obj = coco_instance_ap.PanopticInstanceAveragePrecision(
num_classes, things_list, label_divisor, ignore_label)
metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability,
instance_score_map, is_crowd_map)
result = metric_obj.result().numpy()
logging.info('result = %s', result)
# Expect perfect results (for the quantities that have an AP value), because
# the only mistake is a "crowd" instance.
expected_result = [1., 1., 1., 1., -1., -1., 1., 1., 1., 1., -1., -1.]
np.testing.assert_almost_equal(result, expected_result, decimal=4)
def test_ignores_stuff(self):
num_classes = 4
things_list = [3]
label_divisor = 256
ignore_label = 0
gt_classes = tf.constant([
[3, 3, 2, 2],
[3, 3, 2, 2],
[0, 0, 2, 2],
[0, 0, 2, 2],
], tf.int32)
pred_classes = tf.constant([
[3, 3, 1, 1],
[3, 3, 1, 1],
[0, 0, 2, 2],
[0, 0, 2, 2],
], tf.int32)
instances = tf.constant([
[1, 1, 2, 2],
[1, 1, 2, 2],
[0, 0, 3, 3],
[0, 0, 3, 3],
], tf.int32)
gt_panoptic = combine_maps(gt_classes, instances, label_divisor)
pred_panoptic = combine_maps(pred_classes, instances, label_divisor)
semantic_probability = tf.ones(
tf.concat([tf.shape(pred_panoptic), [num_classes]], 0))
instance_score_map = tf.ones(tf.shape(pred_panoptic))
metric_obj = coco_instance_ap.PanopticInstanceAveragePrecision(
num_classes, things_list, label_divisor, ignore_label)
metric_obj.update_state(gt_panoptic, pred_panoptic, semantic_probability,
instance_score_map)
result = metric_obj.result().numpy()
logging.info('result = %s', result)
# Expect perfect results (for the quantities that have an AP value), because
# the mistakes are all in "stuff" classes.
expected_result = [1., 1., 1., 1., -1., -1., 1., 1., 1., 1., -1., -1.]
np.testing.assert_almost_equal(result, expected_result, decimal=4)
if __name__ == '__main__':
tf.test.main()