|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Tests for segmentation_tracking_quality.""" |
|
|
|
import numpy as np |
|
import tensorflow as tf |
|
|
|
from deeplab2.evaluation import segmentation_and_tracking_quality as stq |
|
|
|
|
|
def _compute_metric_and_compare(metric, ground_truth, prediction, |
|
expected_result): |
|
metric.update_state( |
|
tf.convert_to_tensor(ground_truth), tf.convert_to_tensor(prediction), 1) |
|
result = metric.result() |
|
metric.reset_states() |
|
np.testing.assert_almost_equal(result['STQ'], expected_result[0]) |
|
np.testing.assert_almost_equal(result['AQ'], expected_result[1]) |
|
np.testing.assert_almost_equal(result['IoU'], expected_result[2]) |
|
np.testing.assert_almost_equal(result['STQ_per_seq'], [expected_result[0]]) |
|
np.testing.assert_almost_equal(result['AQ_per_seq'], [expected_result[1]]) |
|
np.testing.assert_almost_equal(result['IoU_per_seq'], [expected_result[2]]) |
|
np.testing.assert_almost_equal(result['ID_per_seq'], [1]) |
|
np.testing.assert_almost_equal(result['Length_per_seq'], [1]) |
|
|
|
|
|
class STQualityTest(tf.test.TestCase): |
|
|
|
def test_complex_example(self): |
|
n_classes = 3 |
|
ignore_label = 255 |
|
|
|
things_list = [2] |
|
max_instances_per_category = 1000 |
|
|
|
ground_truth_semantic_1 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 2, 0, 1, 1, 1], |
|
[0, 2, 2, 2, 2, 1, 1, 1], |
|
[2, 2, 2, 2, 2, 2, 1, 1], |
|
[2, 2, 2, 2, 2, 2, 2, 1], |
|
[2, 2, 2, 2, 2, 2, 2, 1], |
|
[2, 2, 2, 2, 2, 2, 1, 1]]) |
|
ground_truth_semantic_2 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 2, 0, 0, 1, 1, 0, 0], |
|
[2, 2, 2, 1, 1, 1, 1, 0], |
|
[2, 2, 2, 2, 1, 1, 1, 1], |
|
[2, 2, 2, 2, 2, 1, 1, 1], |
|
[2, 2, 2, 2, 2, 1, 1, 1], |
|
[2, 2, 2, 2, 1, 1, 1, 1]]) |
|
ground_truth_semantic_3 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[2, 0, 1, 1, 1, 0, 0, 0], |
|
[2, 2, 1, 1, 1, 1, 0, 0], |
|
[2, 2, 2, 1, 1, 1, 1, 0], |
|
[2, 2, 2, 1, 1, 1, 1, 1], |
|
[2, 2, 2, 1, 1, 1, 1, 1]]) |
|
ground_truth_semantic = np.stack([ |
|
ground_truth_semantic_1, ground_truth_semantic_2, |
|
ground_truth_semantic_3 |
|
]) |
|
|
|
ground_truth_instance_1 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 2, 0, 0, 0, 0], |
|
[0, 2, 2, 2, 2, 0, 0, 0], |
|
[2, 2, 2, 2, 2, 2, 0, 0], |
|
[2, 2, 2, 2, 2, 2, 2, 0], |
|
[2, 2, 2, 2, 2, 2, 2, 0], |
|
[2, 2, 2, 2, 2, 2, 0, 0]]) |
|
ground_truth_instance_2 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 2, 0, 0, 0, 0, 0, 0], |
|
[2, 2, 2, 0, 0, 0, 0, 0], |
|
[2, 2, 2, 2, 0, 0, 0, 0], |
|
[2, 2, 2, 2, 2, 0, 0, 0], |
|
[2, 2, 2, 2, 2, 0, 0, 0], |
|
[2, 2, 2, 2, 0, 0, 0, 0]]) |
|
ground_truth_instance_3 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[2, 0, 0, 0, 0, 0, 0, 0], |
|
[2, 2, 0, 0, 0, 0, 0, 0], |
|
[2, 2, 2, 0, 0, 0, 0, 0], |
|
[2, 2, 2, 0, 0, 0, 0, 0], |
|
[2, 2, 2, 0, 0, 0, 0, 0]]) |
|
|
|
ground_truth_instance = np.stack([ |
|
ground_truth_instance_1, ground_truth_instance_2, |
|
ground_truth_instance_3 |
|
]) |
|
ground_truth = (ground_truth_semantic * max_instances_per_category |
|
+ ground_truth_instance) |
|
|
|
prediction_semantic_1 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 1, 0, 0], |
|
[0, 0, 0, 2, 2, 1, 1, 1], |
|
[0, 2, 2, 2, 2, 2, 1, 1], |
|
[2, 2, 2, 2, 2, 2, 2, 1], |
|
[2, 2, 2, 2, 2, 2, 2, 1], |
|
[2, 2, 2, 2, 2, 2, 2, 1]]) |
|
prediction_semantic_2 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 1, 1, 0, 0], |
|
[0, 2, 2, 2, 1, 1, 1, 1], |
|
[2, 2, 2, 2, 1, 1, 1, 1], |
|
[2, 2, 2, 2, 2, 1, 1, 1], |
|
[2, 2, 2, 2, 2, 2, 1, 1], |
|
[2, 2, 2, 2, 2, 1, 1, 1]]) |
|
prediction_semantic_3 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 1, 0, 0, 0], |
|
[0, 0, 1, 1, 1, 1, 0, 0], |
|
[2, 2, 2, 1, 1, 1, 0, 0], |
|
[2, 2, 2, 1, 1, 1, 1, 1], |
|
[2, 2, 2, 2, 1, 1, 1, 1], |
|
[2, 2, 2, 2, 1, 1, 1, 1]]) |
|
prediction_semantic = np.stack( |
|
[prediction_semantic_1, prediction_semantic_2, prediction_semantic_3]) |
|
|
|
prediction_instance_1 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 2, 2, 0, 0, 0], |
|
[0, 2, 2, 2, 2, 1, 0, 0], |
|
[2, 2, 2, 2, 2, 1, 1, 0], |
|
[2, 2, 2, 2, 1, 1, 1, 0], |
|
[2, 2, 2, 2, 1, 1, 1, 0]]) |
|
prediction_instance_2 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 2, 2, 2, 0, 0, 0, 0], |
|
[2, 2, 2, 2, 0, 0, 0, 0], |
|
[2, 2, 2, 2, 2, 0, 0, 0], |
|
[2, 2, 2, 2, 1, 1, 0, 0], |
|
[2, 2, 2, 2, 1, 0, 0, 0]]) |
|
prediction_instance_3 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[0, 0, 0, 0, 0, 0, 0, 0], |
|
[2, 2, 2, 0, 0, 0, 0, 0], |
|
[2, 2, 2, 0, 0, 0, 0, 0], |
|
[2, 2, 2, 2, 0, 0, 0, 0], |
|
[2, 2, 2, 2, 0, 0, 0, 0]]) |
|
prediction_instance = np.stack( |
|
[prediction_instance_1, prediction_instance_2, prediction_instance_3]) |
|
prediction = (prediction_semantic * max_instances_per_category |
|
+ prediction_instance) |
|
|
|
|
|
stq_metric = stq.STQuality( |
|
n_classes, things_list, ignore_label, max_instances_per_category, |
|
256 * 256) |
|
|
|
for i in range(3): |
|
stq_metric.update_state( |
|
tf.convert_to_tensor(ground_truth[i, ...], dtype=tf.int32), |
|
tf.convert_to_tensor(prediction[i, ...], dtype=tf.int32), |
|
1) |
|
|
|
result = stq_metric.result() |
|
|
|
np.testing.assert_almost_equal(result['STQ'], 0.66841773352) |
|
np.testing.assert_almost_equal(result['AQ'], 0.55366581415) |
|
np.testing.assert_almost_equal(result['IoU'], 0.8069529580309542) |
|
np.testing.assert_almost_equal(result['STQ_per_seq'], [0.66841773352]) |
|
np.testing.assert_almost_equal(result['AQ_per_seq'], [0.55366581415]) |
|
np.testing.assert_almost_equal(result['IoU_per_seq'], [0.8069529580309542]) |
|
np.testing.assert_almost_equal(result['ID_per_seq'], [1]) |
|
np.testing.assert_almost_equal(result['Length_per_seq'], [3]) |
|
|
|
def test_basic_examples(self): |
|
n_classes = 2 |
|
ignore_label = 255 |
|
|
|
things_list = [0] |
|
max_instances_per_category = 1000 |
|
|
|
|
|
ground_truth_track = np.array([[1, 1, 1, 1, 1]]) |
|
|
|
stq_metric = stq.STQuality( |
|
n_classes, things_list, ignore_label, max_instances_per_category, |
|
256 * 256) |
|
|
|
with self.subTest('Example 0'): |
|
predicted_track = np.array([[1, 1, 1, 1, 1]]) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, [1.0, 1.0, 1.0]) |
|
|
|
with self.subTest('Example 1'): |
|
predicted_track = np.array([[1, 1, 2, 2, 2]]) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, [0.72111026, 0.52, 1.0]) |
|
|
|
with self.subTest('Example 2'): |
|
predicted_track = np.array([[1, 2, 2, 2, 2]]) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, [0.82462113, 0.68, 1.0]) |
|
|
|
with self.subTest('Example 3'): |
|
predicted_track = np.array([[1, 2, 3, 4, 5]]) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, [0.447213596, 0.2, 1.0]) |
|
|
|
with self.subTest('Example 4'): |
|
predicted_track = np.array([[1, 2, 1, 2, 2]]) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, [0.72111026, 0.52, 1.0]) |
|
|
|
with self.subTest('Example 5'): |
|
predicted_track = ( |
|
np.array([[0, 1, 1, 1, 1]]) + |
|
np.array([[1, 0, 0, 0, 0]]) * max_instances_per_category) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, [0.50596443, 0.64, 0.4]) |
|
|
|
|
|
ground_truth_track = np.array([[0, 1, 1, 1, 1, 1]]) |
|
|
|
with self.subTest('Example 6'): |
|
predicted_track = np.array([[1, 1, 1, 1, 1, 1]]) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, [1.0, 1.0, 1.0]) |
|
|
|
with self.subTest('Example 7'): |
|
predicted_track = np.array([[2, 2, 2, 2, 1, 1]]) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, [0.72111026, 0.52, 1.0]) |
|
|
|
with self.subTest('Example 8'): |
|
predicted_track = ( |
|
np.array([[2, 2, 0, 1, 1, 1]]) + |
|
np.array([[0, 0, 1, 0, 0, 0]]) * max_instances_per_category) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, |
|
[0.40824829, 0.4, 5.0 / 12.0]) |
|
|
|
|
|
ground_truth_track = ( |
|
np.array([[0, 1, 1, 1, 1]]) + |
|
np.array([[1, 0, 0, 0, 0]]) * max_instances_per_category) |
|
|
|
with self.subTest('Example 9'): |
|
predicted_track = np.array([[1, 1, 1, 1, 1]]) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, [0.56568542, 0.8, 0.4]) |
|
|
|
with self.subTest('Example 10'): |
|
predicted_track = np.array([[2, 2, 2, 1, 1]]) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, |
|
[0.42426407, 0.45, 0.4]) |
|
|
|
with self.subTest('Example 11'): |
|
predicted_track = ( |
|
np.array([[2, 2, 0, 1, 1]]) + |
|
np.array([[0, 0, 1, 0, 0]]) * max_instances_per_category) |
|
_compute_metric_and_compare(stq_metric, ground_truth_track, |
|
predicted_track, |
|
[0.3, 0.3, 0.3]) |
|
|
|
|
|
if __name__ == '__main__': |
|
tf.test.main() |
|
|