|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Tensorflow code for working with object instances in segmentation.""" |
|
|
|
from typing import Iterable |
|
from typing import Optional |
|
from typing import Tuple |
|
from typing import Union |
|
|
|
import tensorflow as tf |
|
|
|
|
|
def instances_without_ignore_categories(panoptic_labels: tf.Tensor, |
|
ignore_categories: Union[tf.Tensor, |
|
Iterable[int]], |
|
panoptic_divisor: Union[tf.Tensor, |
|
int] = 256): |
|
"""Determines which instances to keep after ignoring a set of categories. |
|
|
|
Args: |
|
panoptic_labels: An integer tensor of panoptic labels of shape `[height, |
|
width]`. Each element will be `category * panoptic_divisor + instance` for |
|
a pixel. |
|
ignore_categories: An iterable or tensor of integer category labels. |
|
Instances where the category portion of the label in `panoptic_labels` are |
|
in the ignore set will not be included in the results. |
|
panoptic_divisor: The divisor used to multiply the category label when |
|
constructing panoptic labels, as in integer or integer scalar tensor. |
|
|
|
Returns: |
|
A boolean tensor masking which of the input `panoptic_labels` corresponds |
|
to an instance that will be kept, or equivalently *not* ignored. |
|
""" |
|
ignore_categories = tf.convert_to_tensor( |
|
ignore_categories, dtype=panoptic_labels.dtype) |
|
panoptic_divisor = tf.convert_to_tensor( |
|
panoptic_divisor, dtype=panoptic_labels.dtype) |
|
|
|
instance_category = tf.math.floordiv(panoptic_labels, panoptic_divisor) |
|
instance_is_ignored = tf.math.reduce_any( |
|
tf.equal( |
|
tf.expand_dims(instance_category, 1), |
|
tf.expand_dims(ignore_categories, 0)), |
|
axis=1) |
|
instance_is_kept = tf.math.logical_not(instance_is_ignored) |
|
|
|
return instance_is_kept |
|
|
|
|
|
def _broadcast_over_instances(t, num_instances): |
|
out_shape = tf.concat([tf.shape(t), [num_instances]], axis=0) |
|
return tf.broadcast_to(tf.expand_dims(t, -1), out_shape) |
|
|
|
|
|
def instance_boxes_from_masks( |
|
panoptic_labels: tf.Tensor, |
|
ignore_categories: Optional[Union[tf.Tensor, Iterable[int]]] = None, |
|
panoptic_divisor: Union[tf.Tensor, int] = 256): |
|
"""Finds the bounding boxes around instances, given a panoptic label map. |
|
|
|
Args: |
|
panoptic_labels: An integer tensor of panoptic labelsof shape `[height, |
|
width]`. Each element will be `category * panoptic_divisor + instance` for |
|
a pixel. |
|
ignore_categories: An iterable or tensor of integer category labels. |
|
Instances where the category portion of the label in `panoptic_labels` are |
|
in the ignore set will not be included in the results. |
|
panoptic_divisor: The divisor used to multiply the category label when |
|
constructing panoptic labels, as in integer or integer scalar tensor. |
|
|
|
Returns: |
|
A tuple of arrays (unique_labels, box_coords). |
|
unique_labels: An tensor of each possible non-ignored label value in |
|
`panoptic_labels`, in the same order as the boxes. |
|
box_coords: An tensor of shape `[num_labels, 4]`. Each row is one box as |
|
`[ymin, xmin, ymax, xmax]`. |
|
""" |
|
label_shape = tf.shape(panoptic_labels) |
|
height = label_shape[0] |
|
width = label_shape[1] |
|
x_coord, y_coord = tf.meshgrid( |
|
tf.range(width, dtype=tf.float32), tf.range(height, dtype=tf.float32)) |
|
|
|
unique_labels, flat_instance_index = tf.unique( |
|
tf.reshape(panoptic_labels, [height * width])) |
|
num_instances = tf.size(unique_labels) |
|
instance_index = tf.reshape(flat_instance_index, [height, width]) |
|
|
|
y_coord_repeated = _broadcast_over_instances(y_coord, num_instances) |
|
x_coord_repeated = _broadcast_over_instances(x_coord, num_instances) |
|
instance_index_repeated = _broadcast_over_instances(instance_index, |
|
num_instances) |
|
|
|
instance_index_matches = tf.math.equal( |
|
instance_index_repeated, |
|
tf.reshape(tf.range(num_instances), [1, 1, num_instances])) |
|
|
|
|
|
|
|
|
|
inf3d = tf.broadcast_to([[[float('Inf')]]], tf.shape(x_coord_repeated)) |
|
y_or_inf = tf.where(instance_index_matches, y_coord_repeated, inf3d) |
|
y_or_neg_inf = tf.where(instance_index_matches, y_coord_repeated, -inf3d) |
|
x_or_inf = tf.where(instance_index_matches, x_coord_repeated, inf3d) |
|
x_or_neg_inf = tf.where(instance_index_matches, x_coord_repeated, -inf3d) |
|
|
|
y_min = tf.reduce_min(y_or_inf, axis=[0, 1]) |
|
x_min = tf.reduce_min(x_or_inf, axis=[0, 1]) |
|
y_max = tf.reduce_max(y_or_neg_inf, axis=[0, 1]) + 1 |
|
x_max = tf.reduce_max(x_or_neg_inf, axis=[0, 1]) + 1 |
|
|
|
box_coords = tf.stack([y_min, x_min, y_max, x_max], axis=1) |
|
|
|
if ignore_categories is not None: |
|
|
|
|
|
instance_is_kept = instances_without_ignore_categories( |
|
unique_labels, ignore_categories, panoptic_divisor) |
|
|
|
unique_labels = tf.boolean_mask(unique_labels, instance_is_kept) |
|
box_coords = tf.boolean_mask(box_coords, instance_is_kept) |
|
|
|
return unique_labels, box_coords |
|
|
|
|
|
def per_instance_masks(panoptic_labels: tf.Tensor, |
|
instance_panoptic_labels: tf.Tensor, |
|
out_dtype: tf.dtypes.DType = tf.bool) -> tf.Tensor: |
|
"""3D tensor where each slice in 3rd dimensions is an instance's mask.""" |
|
num_instances = tf.size(instance_panoptic_labels) |
|
matches = tf.equal( |
|
tf.expand_dims(panoptic_labels, 0), |
|
tf.reshape(instance_panoptic_labels, [num_instances, 1, 1])) |
|
return tf.cast(matches, out_dtype) |
|
|
|
|
|
def _average_per_instance(map_tensor: tf.Tensor, panoptic_labels: tf.Tensor, |
|
instance_panoptic_labels: tf.Tensor, |
|
instance_area: tf.Tensor) -> tf.Tensor: |
|
"""Finds the average of the values in map_tensor over each instance.""" |
|
|
|
|
|
|
|
|
|
pixel_in_instance = per_instance_masks(panoptic_labels, |
|
instance_panoptic_labels) |
|
|
|
map_dtype = map_tensor.dtype |
|
num_instances = tf.size(instance_panoptic_labels) |
|
map_or_zero = tf.where(pixel_in_instance, tf.expand_dims(map_tensor, 0), |
|
tf.zeros([num_instances, 1, 1], dtype=map_dtype)) |
|
|
|
|
|
instance_total_prob = tf.math.reduce_sum(map_or_zero, axis=[1, 2]) |
|
instance_avg_prob = tf.divide(instance_total_prob, |
|
tf.cast(instance_area, map_dtype)) |
|
|
|
return instance_avg_prob |
|
|
|
|
|
|
|
def per_instance_semantic_probabilities( |
|
panoptic_labels: tf.Tensor, |
|
instance_panoptic_labels: tf.Tensor, |
|
instance_area: tf.Tensor, |
|
semantic_probability: tf.Tensor, |
|
panoptic_divisor: Union[tf.Tensor, int], |
|
ignore_label: Union[tf.Tensor, int]) -> tf.Tensor: |
|
"""Mean probability for the semantic label of each unique instance.""" |
|
|
|
|
|
|
|
panoptic_divisor = tf.convert_to_tensor(panoptic_divisor, dtype=tf.int32) |
|
ignore_label = tf.convert_to_tensor(ignore_label, dtype=tf.int32) |
|
semantic_label_map = tf.math.floordiv(panoptic_labels, panoptic_divisor) |
|
|
|
map_shape = tf.shape(semantic_label_map) |
|
height = map_shape[0] |
|
width = map_shape[1] |
|
num_pixels = height * width |
|
|
|
semantic_index = tf.reshape(semantic_label_map, [num_pixels]) |
|
|
|
|
|
|
|
semantic_index = tf.where(semantic_index == ignore_label, 0, semantic_index) |
|
|
|
x, y = tf.meshgrid(tf.range(width), tf.range(height)) |
|
probability_index = tf.stack([ |
|
tf.reshape(y, [num_pixels]), |
|
tf.reshape(x, [num_pixels]), |
|
semantic_index, |
|
], |
|
axis=1) |
|
|
|
pixel_semantic_probability = tf.reshape( |
|
tf.gather_nd(semantic_probability, probability_index), [height, width]) |
|
|
|
pixel_semantic_probability = tf.where(semantic_label_map == ignore_label, 0.0, |
|
pixel_semantic_probability) |
|
|
|
instance_avg_prob = _average_per_instance(pixel_semantic_probability, |
|
panoptic_labels, |
|
instance_panoptic_labels, |
|
instance_area) |
|
|
|
return instance_avg_prob |
|
|
|
|
|
def combined_instance_scores( |
|
panoptic_labels: tf.Tensor, semantic_probability: tf.Tensor, |
|
instance_score_map: tf.Tensor, panoptic_divisor: Union[tf.Tensor, int], |
|
ignore_label: Union[tf.Tensor, int]) -> Tuple[tf.Tensor, tf.Tensor]: |
|
"""Combines (with a product) predicted semantic and instance probabilities. |
|
|
|
Args: |
|
panoptic_labels: A 2D integer tensor of panoptic format labels (each pixel |
|
entry is `semantic_label * panoptic_divisor + instance_label`). |
|
semantic_probability: A 3D float tensor, where the 3rd dimension is over |
|
semantic labels, and each spatial location will have the discrete |
|
distribution of the probabilities of the semantic classes. |
|
instance_score_map: A 2D float tensor, where the pixels for an instance will |
|
have the probability of that being an instance. |
|
panoptic_divisor: Integer scalar divisor/multiplier used to construct the |
|
panoptic labels. |
|
ignore_label: Integer scalar, for the "ignore" semantic label in the |
|
panoptic labels. |
|
|
|
Returns: |
|
A tuple of instance labels and the combined scores for those instances, each |
|
as a 1D tensor. |
|
""" |
|
panoptic_divisor = tf.convert_to_tensor(panoptic_divisor, dtype=tf.int32) |
|
ignore_label = tf.convert_to_tensor(ignore_label, dtype=tf.int32) |
|
|
|
num_pixels = tf.size(panoptic_labels) |
|
instance_panoptic_labels, _, instance_area = tf.unique_with_counts( |
|
tf.reshape(panoptic_labels, [num_pixels])) |
|
|
|
instance_semantic_labels = tf.math.floordiv(instance_panoptic_labels, |
|
panoptic_divisor) |
|
instance_panoptic_labels = tf.boolean_mask( |
|
instance_panoptic_labels, instance_semantic_labels != ignore_label) |
|
instance_area = tf.boolean_mask(instance_area, |
|
instance_semantic_labels != ignore_label) |
|
|
|
instance_semantic_probabilities = per_instance_semantic_probabilities( |
|
panoptic_labels, instance_panoptic_labels, instance_area, |
|
semantic_probability, panoptic_divisor, ignore_label) |
|
|
|
instance_scores = _average_per_instance(instance_score_map, panoptic_labels, |
|
instance_panoptic_labels, |
|
instance_area) |
|
|
|
combined_scores = instance_semantic_probabilities * instance_scores |
|
return instance_panoptic_labels, combined_scores |
|
|
|
|
|
def per_instance_is_crowd(is_crowd_map: tf.Tensor, id_map: tf.Tensor, |
|
output_ids: tf.Tensor) -> tf.Tensor: |
|
"""Determines the per-instance is_crowd value from a boolian is_crowd map. |
|
|
|
Args: |
|
is_crowd_map: A 2D boolean tensor. Where it is True, the instance in that |
|
region is a "crowd" instance. It is assumed that all pixels in an instance |
|
will have the same value in this map. |
|
id_map: A 2D integer tensor, with the instance id label at each pixel. |
|
output_ids: A 1D integer vector tensor, the per-instance ids for which to |
|
output the is_crowd values. |
|
|
|
Returns: |
|
A 1D boolean vector tensor, with the per-instance is_crowd value. The ith |
|
element of the return value will be the is_crowd result for the segment |
|
with the ith element of the output_ids argument. |
|
""" |
|
flat_is_crowd_map = tf.reshape(is_crowd_map, [-1]) |
|
flat_id_map = tf.reshape(id_map, [-1]) |
|
|
|
|
|
|
|
|
|
unique_ids, unique_index = tf.unique(flat_id_map) |
|
unique_is_crowd = tf.scatter_nd( |
|
tf.expand_dims(unique_index, 1), flat_is_crowd_map, tf.shape(unique_ids)) |
|
|
|
|
|
matching_ids = tf.math.equal( |
|
tf.expand_dims(output_ids, 1), tf.expand_dims(unique_ids, 0)) |
|
matching_index = tf.where(matching_ids)[:, 1] |
|
return tf.gather(unique_is_crowd, matching_index) |
|
|