|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Defines the top-level interface for evaluating segmentations.""" |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import abc |
|
import numpy as np |
|
import six |
|
|
|
|
|
_EPSILON = 1e-10 |
|
|
|
|
|
def realdiv_maybe_zero(x, y): |
|
"""Element-wise x / y where y may contain zeros, for those returns 0 too.""" |
|
return np.where( |
|
np.less(np.abs(y), _EPSILON), np.zeros_like(x), np.divide(x, y)) |
|
|
|
|
|
@six.add_metaclass(abc.ABCMeta) |
|
class SegmentationMetric(object): |
|
"""Abstract base class for computers of segmentation metrics. |
|
|
|
Subclasses will implement both: |
|
1. Comparing the predicted segmentation for an image with the groundtruth. |
|
2. Computing the final metric over a set of images. |
|
These are often done as separate steps, due to the need to accumulate |
|
intermediate values other than the metric itself across images, computing the |
|
actual metric value only on these accumulations after all the images have been |
|
compared. |
|
|
|
A simple usage would be: |
|
|
|
metric = MetricImplementation(...) |
|
for <image>, <groundtruth> in evaluation_set: |
|
<prediction> = run_segmentation(<image>) |
|
metric.compare_and_accumulate(<prediction>, <groundtruth>) |
|
print(metric.result()) |
|
|
|
""" |
|
|
|
def __init__(self, num_categories, ignored_label, max_instances_per_category, |
|
offset): |
|
"""Base initialization for SegmentationMetric. |
|
|
|
Args: |
|
num_categories: The number of segmentation categories (or "classes" in the |
|
dataset. |
|
ignored_label: A category id that is ignored in evaluation, e.g. the void |
|
label as defined in COCO panoptic segmentation dataset. |
|
max_instances_per_category: The maximum number of instances for each |
|
category. Used in ensuring unique instance labels. |
|
offset: The maximum number of unique labels. This is used, by multiplying |
|
the ground-truth labels, to generate unique ids for individual regions |
|
of overlap between groundtruth and predicted segments. |
|
""" |
|
self.num_categories = num_categories |
|
self.ignored_label = ignored_label |
|
self.max_instances_per_category = max_instances_per_category |
|
self.offset = offset |
|
self.reset() |
|
|
|
def _naively_combine_labels(self, category_array, instance_array): |
|
"""Naively creates a combined label array from categories and instances.""" |
|
return (category_array.astype(np.uint32) * self.max_instances_per_category + |
|
instance_array.astype(np.uint32)) |
|
|
|
@abc.abstractmethod |
|
def compare_and_accumulate( |
|
self, groundtruth_category_array, groundtruth_instance_array, |
|
predicted_category_array, predicted_instance_array): |
|
"""Compares predicted segmentation with groundtruth, accumulates its metric. |
|
|
|
It is not assumed that instance ids are unique across different categories. |
|
See for example combine_semantic_and_instance_predictions.py in official |
|
PanopticAPI evaluation code for issues to consider when fusing category |
|
and instance labels. |
|
|
|
Instances ids of the ignored category have the meaning that id 0 is "void" |
|
and remaining ones are crowd instances. |
|
|
|
Args: |
|
groundtruth_category_array: A 2D numpy uint16 array of groundtruth |
|
per-pixel category labels. |
|
groundtruth_instance_array: A 2D numpy uint16 array of groundtruth |
|
instance labels. |
|
predicted_category_array: A 2D numpy uint16 array of predicted per-pixel |
|
category labels. |
|
predicted_instance_array: A 2D numpy uint16 array of predicted instance |
|
labels. |
|
|
|
Returns: |
|
The value of the metric over all comparisons done so far, including this |
|
one, as a float scalar. |
|
""" |
|
raise NotImplementedError('Must be implemented in subclasses.') |
|
|
|
@abc.abstractmethod |
|
def result(self): |
|
"""Computes the metric over all comparisons done so far.""" |
|
raise NotImplementedError('Must be implemented in subclasses.') |
|
|
|
@abc.abstractmethod |
|
def detailed_results(self, is_thing=None): |
|
"""Computes and returns the detailed final metric results. |
|
|
|
Args: |
|
is_thing: A boolean array of length `num_categories`. The entry |
|
`is_thing[category_id]` is True iff that category is a "thing" category |
|
instead of "stuff." |
|
|
|
Returns: |
|
A dictionary with a breakdown of metrics and/or metric factors by things, |
|
stuff, and all categories. |
|
""" |
|
raise NotImplementedError('Not implemented in subclasses.') |
|
|
|
@abc.abstractmethod |
|
def result_per_category(self): |
|
"""For supported metrics, return individual per-category metric values. |
|
|
|
Returns: |
|
A numpy array of shape `[self.num_categories]`, where index `i` is the |
|
metrics value over only that category. |
|
""" |
|
raise NotImplementedError('Not implemented in subclass.') |
|
|
|
def print_detailed_results(self, is_thing=None, print_digits=3): |
|
"""Prints out a detailed breakdown of metric results. |
|
|
|
Args: |
|
is_thing: A boolean array of length num_categories. |
|
`is_thing[category_id]` will say whether that category is a "thing" |
|
rather than "stuff." |
|
print_digits: Number of significant digits to print in computed metrics. |
|
""" |
|
raise NotImplementedError('Not implemented in subclass.') |
|
|
|
@abc.abstractmethod |
|
def merge(self, other_instance): |
|
"""Combines the accumulated results of another instance into self. |
|
|
|
The following two cases should put `metric_a` into an equivalent state. |
|
|
|
Case 1 (with merge): |
|
|
|
metric_a = MetricsSubclass(...) |
|
metric_a.compare_and_accumulate(<comparison 1>) |
|
metric_a.compare_and_accumulate(<comparison 2>) |
|
|
|
metric_b = MetricsSubclass(...) |
|
metric_b.compare_and_accumulate(<comparison 3>) |
|
metric_b.compare_and_accumulate(<comparison 4>) |
|
|
|
metric_a.merge(metric_b) |
|
|
|
Case 2 (without merge): |
|
|
|
metric_a = MetricsSubclass(...) |
|
metric_a.compare_and_accumulate(<comparison 1>) |
|
metric_a.compare_and_accumulate(<comparison 2>) |
|
metric_a.compare_and_accumulate(<comparison 3>) |
|
metric_a.compare_and_accumulate(<comparison 4>) |
|
|
|
Args: |
|
other_instance: Another compatible instance of the same metric subclass. |
|
""" |
|
raise NotImplementedError('Not implemented in subclass.') |
|
|
|
@abc.abstractmethod |
|
def reset(self): |
|
"""Resets the accumulation to the metric class's state at initialization. |
|
|
|
Note that this function will be called in SegmentationMetric.__init__. |
|
""" |
|
raise NotImplementedError('Must be implemented in subclasses.') |
|
|