|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""This file contains code to get a sample from a dataset.""" |
|
|
|
import functools |
|
|
|
import numpy as np |
|
import tensorflow as tf |
|
|
|
from deeplab2 import common |
|
from deeplab2.data import dataset_utils |
|
from deeplab2.data.preprocessing import input_preprocessing as preprocessing |
|
|
|
|
|
def _compute_gaussian_from_std(sigma): |
|
"""Computes the Gaussian and its size from a given standard deviation.""" |
|
size = int(6 * sigma + 3) |
|
x = np.arange(size, dtype=np.float) |
|
y = x[:, np.newaxis] |
|
x0, y0 = 3 * sigma + 1, 3 * sigma + 1 |
|
return np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2)), size |
|
|
|
|
|
class PanopticSampleGenerator: |
|
"""This class generates samples from images and labels.""" |
|
|
|
def __init__(self, |
|
dataset_info, |
|
is_training, |
|
crop_size, |
|
min_resize_value=None, |
|
max_resize_value=None, |
|
resize_factor=None, |
|
min_scale_factor=1., |
|
max_scale_factor=1., |
|
scale_factor_step_size=0, |
|
autoaugment_policy_name=None, |
|
only_semantic_annotations=False, |
|
thing_id_mask_annotations=False, |
|
max_thing_id=128, |
|
sigma=8, |
|
focus_small_instances=None): |
|
"""Initializes the panoptic segmentation generator. |
|
|
|
Args: |
|
dataset_info: A dictionary with the following keys. |
|
- `name`: String, dataset name. |
|
- `ignore_label`: Integer, ignore label. |
|
- `class_has_instances_list`: A list of integers indicating which |
|
class has instance annotations. |
|
- `panoptic_label_divisor`: Integer, panoptic label divisor. |
|
- `num_classes`: Integer, number of classes. |
|
- `is_video_dataset`: Boolean, is video dataset or not. |
|
is_training: Boolean, is training mode or not. |
|
crop_size: Image crop size [height, width]. |
|
min_resize_value: A 2-tuple of (height, width), desired minimum value |
|
after resize. If a single element is given, then height and width share |
|
the same value. None, empty or having 0 indicates no minimum value will |
|
be used. |
|
max_resize_value: A 2-tuple of (height, width), maximum allowed value |
|
after resize. If a single element is given, then height and width |
|
share the same value. None, empty or having 0 indicates no maximum |
|
value will be used. |
|
resize_factor: Resized dimensions are multiple of factor plus one. |
|
min_scale_factor: Minimum scale factor for random scale augmentation. |
|
max_scale_factor: Maximum scale factor for random scale augmentation. |
|
scale_factor_step_size: The step size from min scale factor to max scale |
|
factor. The input is randomly scaled based on the value of |
|
(min_scale_factor, max_scale_factor, scale_factor_step_size). |
|
autoaugment_policy_name: String, autoaugment policy name. See |
|
autoaugment_policy.py for available policies. |
|
only_semantic_annotations: An optional flag indicating whether the model |
|
needs only semantic annotations (default: False). |
|
thing_id_mask_annotations: An optional flag indicating whether the model |
|
needs thing_id_mask annotations. When `thing_id_mask_annotations` is |
|
True, we will additionally return mask annotation for each `thing` |
|
instance, encoded with a unique thing_id. This ground-truth annotation |
|
could be used to learn a better segmentation mask for each instance. |
|
`thing_id` indicates the number of unique thing-ID to each instance in |
|
an image, starting the counting from 0 (default: False). |
|
max_thing_id: The maximum number of possible thing instances per image. It |
|
is used together when thing_id_mask_annotations = True, representing the |
|
maximum thing ID encoded in the thing_id_mask. (default: 128). |
|
sigma: The standard deviation of the Gaussian used to encode the center |
|
keypoint (default: 8). |
|
focus_small_instances: An optional dict that defines how to deal with |
|
small instances (default: None): |
|
-`threshold`: An integer defining the threshold pixel number for an |
|
instance to be considered small. |
|
-`weight`: A number that defines the loss weight for small instances. |
|
""" |
|
self._dataset_info = dataset_info |
|
self._ignore_label = self._dataset_info['ignore_label'] |
|
self._only_semantic_annotations = only_semantic_annotations |
|
self._sigma = sigma |
|
self._instance_area_threshold = 0 |
|
self._small_instance_weight = 1.0 |
|
self._thing_id_mask_annotations = thing_id_mask_annotations |
|
self._max_thing_id = max_thing_id |
|
self._is_training = is_training |
|
self._preprocessing_fn = functools.partial( |
|
preprocessing.preprocess_image_and_label, |
|
crop_height=crop_size[0], |
|
crop_width=crop_size[1], |
|
min_resize_value=min_resize_value, |
|
max_resize_value=max_resize_value, |
|
resize_factor=resize_factor, |
|
min_scale_factor=min_scale_factor, |
|
max_scale_factor=max_scale_factor, |
|
scale_factor_step_size=scale_factor_step_size, |
|
autoaugment_policy_name=autoaugment_policy_name, |
|
ignore_label=self._ignore_label * |
|
self._dataset_info['panoptic_label_divisor'], |
|
is_training=self._is_training) |
|
|
|
if focus_small_instances is not None: |
|
self._instance_area_threshold = focus_small_instances['threshold'] |
|
self._small_instance_weight = focus_small_instances['weight'] |
|
|
|
self._gaussian, self._gaussian_size = _compute_gaussian_from_std( |
|
self._sigma) |
|
self._gaussian = tf.cast(tf.reshape(self._gaussian, [-1]), tf.float32) |
|
|
|
def __call__(self, sample_dict): |
|
"""Gets a sample. |
|
|
|
Args: |
|
sample_dict: A dictionary with the following keys and values: |
|
- `image`: A tensor of shape [image_height, image_width, 3]. |
|
- `image_name`: String, image name. |
|
- `label`: A tensor of shape [label_height, label_width, 1] or None. |
|
- `height`: An integer specifying the height of the image. |
|
- `width`: An integer specifying the width of the image. |
|
- `sequence`: An optional string specifying the sequence name. |
|
- `prev_image`: An optional tensor of the same shape as `image`. |
|
- `prev_label`: An optional tensor of the same shape as `label`. |
|
- `next_image`: An optional next-frame tensor of the shape of `image`. |
|
- `next_label`: An optional next-frame tensor of the shape of `label`. |
|
|
|
Returns: |
|
sample: A dictionary storing required data for panoptic segmentation. |
|
""" |
|
return self.call(**sample_dict) |
|
|
|
def call(self, |
|
image, |
|
image_name, |
|
label, |
|
height, |
|
width, |
|
sequence='', |
|
prev_image=None, |
|
prev_label=None, |
|
next_image=None, |
|
next_label=None): |
|
"""Gets a sample. |
|
|
|
Args: |
|
image: A tensor of shape [image_height, image_width, 3]. |
|
image_name: String, image name. |
|
label: A tensor of shape [label_height, label_width, 1] or None. |
|
height: An integer specifying the height of the image. |
|
width: An integer specifying the width of the image. |
|
sequence: An optional string specifying the sequence name. |
|
prev_image: An optional tensor of shape [image_height, image_width, 3]. |
|
prev_label: An optional tensor of shape [label_height, label_width, 1]. |
|
next_image: An optional tensor of shape [image_height, image_width, 3]. |
|
next_label: An optional tensor of shape [label_height, label_width, 1]. |
|
|
|
Returns: |
|
sample: A dictionary storing required data for panoptic segmentation. |
|
|
|
Raises: |
|
ValueError: An error occurs when the label shape is invalid. |
|
NotImplementedError: An error occurs when thing_id_mask_annotations comes |
|
together with prev_image or prev_label, not currently implemented. |
|
""" |
|
if label is not None: |
|
label.get_shape().assert_is_compatible_with( |
|
tf.TensorShape([None, None, 1])) |
|
original_label = tf.cast(label, dtype=tf.int32, name='original_label') |
|
if next_label is not None: |
|
original_next_label = tf.cast( |
|
next_label, dtype=tf.int32, name='original_next_label') |
|
|
|
if next_image is not None: |
|
resized_image, image, label, next_image, next_label = ( |
|
self._preprocessing_fn( |
|
image, label, prev_image=next_image, prev_label=next_label)) |
|
else: |
|
resized_image, image, label, prev_image, prev_label = ( |
|
self._preprocessing_fn( |
|
image, label, prev_image=prev_image, prev_label=prev_label)) |
|
sample = { |
|
common.IMAGE: image |
|
} |
|
if prev_image is not None: |
|
sample[common.IMAGE] = tf.concat([image, prev_image], axis=2) |
|
if next_image is not None: |
|
sample[common.NEXT_IMAGE] = next_image |
|
sample[common.IMAGE] = tf.concat([image, next_image], axis=2) |
|
if label is not None: |
|
|
|
semantic_label, panoptic_label, thing_mask, crowd_region = ( |
|
dataset_utils.get_semantic_and_panoptic_label( |
|
self._dataset_info, label, self._ignore_label)) |
|
sample[common.GT_SEMANTIC_KEY] = tf.squeeze(semantic_label, axis=2) |
|
semantic_weights = tf.ones_like(semantic_label, dtype=tf.float32) |
|
sample[common.SEMANTIC_LOSS_WEIGHT_KEY] = tf.squeeze( |
|
semantic_weights, axis=2) |
|
sample[common.GT_IS_CROWD] = tf.squeeze(crowd_region, axis=2) |
|
|
|
if not self._only_semantic_annotations: |
|
|
|
sample[common.GT_PANOPTIC_KEY] = tf.squeeze(label, axis=2) |
|
|
|
non_crowd_and_non_ignore_regions = tf.logical_and( |
|
tf.logical_not(crowd_region), |
|
tf.not_equal(semantic_label, self._ignore_label)) |
|
sample[common.CENTER_LOSS_WEIGHT_KEY] = tf.squeeze(tf.cast( |
|
non_crowd_and_non_ignore_regions, tf.float32), axis=2) |
|
|
|
non_crowd_things = tf.logical_and( |
|
tf.logical_not(crowd_region), thing_mask) |
|
sample[common.REGRESSION_LOSS_WEIGHT_KEY] = tf.squeeze(tf.cast( |
|
non_crowd_things, tf.float32), axis=2) |
|
|
|
prev_panoptic_label = None |
|
next_panoptic_label = None |
|
if prev_label is not None: |
|
_, prev_panoptic_label, _, _ = ( |
|
dataset_utils.get_semantic_and_panoptic_label( |
|
self._dataset_info, prev_label, self._ignore_label)) |
|
if next_label is not None: |
|
_, next_panoptic_label, _, _ = ( |
|
dataset_utils.get_semantic_and_panoptic_label( |
|
self._dataset_info, next_label, self._ignore_label)) |
|
(sample[common.GT_INSTANCE_CENTER_KEY], |
|
sample[common.GT_INSTANCE_REGRESSION_KEY], |
|
sample[common.SEMANTIC_LOSS_WEIGHT_KEY], |
|
prev_center_map, |
|
frame_center_offsets, |
|
next_offset) = self._generate_gt_center_and_offset( |
|
panoptic_label, semantic_weights, prev_panoptic_label, |
|
next_panoptic_label) |
|
|
|
sample[common.GT_INSTANCE_REGRESSION_KEY] = tf.cast( |
|
sample[common.GT_INSTANCE_REGRESSION_KEY], tf.float32) |
|
|
|
if next_label is not None: |
|
sample[common.GT_NEXT_INSTANCE_REGRESSION_KEY] = tf.cast( |
|
next_offset, tf.float32) |
|
sample[common.NEXT_REGRESSION_LOSS_WEIGHT_KEY] = tf.cast( |
|
tf.greater(tf.reduce_sum(tf.abs(next_offset), axis=2), 0), |
|
tf.float32) |
|
|
|
|
|
|
|
sample[common.GT_INSTANCE_CENTER_KEY] = tf.squeeze( |
|
sample[common.GT_INSTANCE_CENTER_KEY], axis=2) |
|
sample[common.SEMANTIC_LOSS_WEIGHT_KEY] = tf.squeeze( |
|
sample[common.SEMANTIC_LOSS_WEIGHT_KEY], axis=2) |
|
|
|
if prev_label is not None: |
|
sample[common.GT_FRAME_OFFSET_KEY] = frame_center_offsets |
|
sample[common.GT_FRAME_OFFSET_KEY] = tf.cast( |
|
sample[common.GT_FRAME_OFFSET_KEY], tf.float32) |
|
frame_offsets_present = tf.logical_or( |
|
tf.not_equal(frame_center_offsets[..., 0], 0), |
|
tf.not_equal(frame_center_offsets[..., 1], 0)) |
|
sample[common.FRAME_REGRESSION_LOSS_WEIGHT_KEY] = tf.cast( |
|
frame_offsets_present, tf.float32) |
|
if self._is_training: |
|
sample[common.IMAGE] = tf.concat( |
|
[sample[common.IMAGE], prev_center_map], axis=2) |
|
|
|
if self._thing_id_mask_annotations: |
|
if any([prev_image is not None, |
|
prev_label is not None, |
|
next_image is not None, |
|
next_label is not None]): |
|
raise NotImplementedError( |
|
'Current implementation of Max-DeepLab does not support ' |
|
+ 'prev_image, prev_label, next_image, or next_label.') |
|
thing_id_mask, thing_id_class = ( |
|
self._generate_thing_id_mask_and_class( |
|
panoptic_label, non_crowd_things)) |
|
sample[common.GT_THING_ID_MASK_KEY] = tf.squeeze( |
|
thing_id_mask, axis=2) |
|
sample[common.GT_THING_ID_CLASS_KEY] = thing_id_class |
|
|
|
if not self._is_training: |
|
|
|
sample[common.RESIZED_IMAGE] = resized_image |
|
sample[common.IMAGE_NAME] = image_name |
|
sample[common.GT_SIZE_RAW] = tf.stack([height, width], axis=0) |
|
if self._dataset_info['is_video_dataset']: |
|
sample[common.SEQUENCE_ID] = sequence |
|
|
|
if label is not None: |
|
orig_semantic_label, _, _, orig_crowd_region = ( |
|
dataset_utils.get_semantic_and_panoptic_label( |
|
self._dataset_info, original_label, self._ignore_label)) |
|
sample[common.GT_SEMANTIC_RAW] = tf.squeeze(orig_semantic_label, axis=2) |
|
if not self._only_semantic_annotations: |
|
sample[common.GT_PANOPTIC_RAW] = tf.squeeze(original_label, axis=2) |
|
sample[common.GT_IS_CROWD_RAW] = tf.squeeze(orig_crowd_region) |
|
if next_label is not None: |
|
sample[common.GT_NEXT_PANOPTIC_RAW] = tf.squeeze( |
|
original_next_label, axis=2) |
|
return sample |
|
|
|
def _generate_thing_id_mask_and_class(self, |
|
panoptic_label, |
|
non_crowd_things): |
|
"""Generates the ground-truth thing-ID masks and their class labels. |
|
|
|
It computes thing-ID mask and class with unique ID for each thing instance. |
|
`thing_id` indicates the number of unique thing-ID to each instance in an |
|
image, starting the counting from 0. Each pixel in thing_id_mask is labeled |
|
with the corresponding thing-ID. |
|
|
|
Args: |
|
panoptic_label: A tf.Tensor of shape [height, width, 1]. |
|
non_crowd_things: A tf.Tensor of shape [height, width, 1], indicating |
|
non-crowd and thing-class regions. |
|
|
|
Returns: |
|
thing_id_mask: A tf.Tensor of shape [height, width, 1]. It assigns each |
|
non-crowd thing instance a unique mask-ID label, starting from 0. |
|
Unassigned pixels are set to -1. |
|
thing_id_class: A tf.Tensor of shape [max_thing_id]. It contains semantic |
|
ID of each instance assigned to thing_id_mask. The remaining |
|
(max_thing_id - num_things) elements are set to -1. |
|
|
|
Raises: |
|
ValueError: An error occurs when the thing-ID mask contains stuff or crowd |
|
region. |
|
ValueError: An error occurs when thing_count is greater or equal to |
|
self._max_thing_id. |
|
|
|
""" |
|
unique_ids, _ = tf.unique(tf.reshape(panoptic_label, [-1])) |
|
thing_id_mask = -tf.ones_like(panoptic_label) |
|
thing_id_class = -tf.ones(self._max_thing_id) |
|
thing_count = 0 |
|
for panoptic_id in unique_ids: |
|
semantic_id = panoptic_id // self._dataset_info['panoptic_label_divisor'] |
|
|
|
|
|
|
|
|
|
if (semantic_id == self._dataset_info['ignore_label'] or |
|
panoptic_id % self._dataset_info['panoptic_label_divisor'] == 0): |
|
continue |
|
|
|
assert_stuff_crowd = tf.debugging.Assert( |
|
tf.reduce_all(non_crowd_things[panoptic_label == panoptic_id]), |
|
['thing-ID mask here must not contain stuff or crowd region.']) |
|
with tf.control_dependencies([assert_stuff_crowd]): |
|
panoptic_id = tf.identity(panoptic_id) |
|
|
|
thing_id_mask = tf.where(panoptic_label == panoptic_id, |
|
thing_count, thing_id_mask) |
|
|
|
assert_thing_count = tf.debugging.Assert( |
|
thing_count < self._max_thing_id, |
|
['thing_count must be smaller than self._max_thing_id.']) |
|
with tf.control_dependencies([assert_thing_count]): |
|
thing_count = tf.identity(thing_count) |
|
|
|
thing_id_class = tf.tensor_scatter_nd_update( |
|
thing_id_class, [[thing_count]], [semantic_id]) |
|
thing_count += 1 |
|
return thing_id_mask, thing_id_class |
|
|
|
def _generate_prev_centers_with_noise(self, |
|
panoptic_label, |
|
offset_noise_factor=0.05, |
|
false_positive_rate=0.2, |
|
false_positive_noise_factor=0.05): |
|
"""Generates noisy center predictions for the previous frame. |
|
|
|
Args: |
|
panoptic_label: A tf.Tensor of shape [height, width, 1]. |
|
offset_noise_factor: An optional float defining the maximum fraction of |
|
the object size that is used to displace the previous center. |
|
false_positive_rate: An optional float indicating at which probability |
|
false positives should be added. |
|
false_positive_noise_factor: An optional float defining the maximum |
|
fraction of the object size that is used to displace the false positive |
|
center. |
|
|
|
Returns: |
|
A tuple of (center, ids_to_center) with both being tf.Tensor of shape |
|
[height, width, 1] and shape [N, 2] where N is the number of unique IDs. |
|
""" |
|
height = tf.shape(panoptic_label)[0] |
|
width = tf.shape(panoptic_label)[1] |
|
|
|
|
|
center_pad_begin = int(round(3 * self._sigma + 1)) |
|
center_pad_end = int(round(3 * self._sigma + 2)) |
|
center_pad = center_pad_begin + center_pad_end |
|
|
|
center = tf.zeros((height + center_pad, width + center_pad)) |
|
unique_ids, _ = tf.unique(tf.reshape(panoptic_label, [-1])) |
|
ids_to_center_x = tf.zeros_like(unique_ids, dtype=tf.int32) |
|
ids_to_center_y = tf.zeros_like(unique_ids, dtype=tf.int32) |
|
|
|
for panoptic_id in unique_ids: |
|
semantic_id = panoptic_id // self._dataset_info['panoptic_label_divisor'] |
|
|
|
|
|
|
|
if (semantic_id == self._dataset_info['ignore_label'] or |
|
panoptic_id % self._dataset_info['panoptic_label_divisor'] == 0): |
|
continue |
|
|
|
|
|
mask_index = tf.cast( |
|
tf.transpose(tf.where(panoptic_label == panoptic_id)), tf.float32) |
|
centers = tf.reduce_mean(mask_index, axis=1) |
|
bbox_size = ( |
|
tf.reduce_max(mask_index, axis=1) - tf.reduce_min(mask_index, axis=1)) |
|
|
|
|
|
center_y = ( |
|
centers[0] + tf.random.normal([], dtype=tf.float32) * |
|
offset_noise_factor * bbox_size[0]) |
|
center_x = ( |
|
centers[1] + tf.random.normal([], dtype=tf.float32) * |
|
offset_noise_factor * bbox_size[1]) |
|
|
|
center_x = tf.minimum( |
|
tf.maximum(tf.cast(tf.round(center_x), tf.int32), 0), width - 1) |
|
center_y = tf.minimum( |
|
tf.maximum(tf.cast(tf.round(center_y), tf.int32), 0), height - 1) |
|
|
|
id_index = tf.where(tf.equal(panoptic_id, unique_ids)) |
|
ids_to_center_x = tf.tensor_scatter_nd_update( |
|
ids_to_center_x, id_index, tf.expand_dims(center_x, axis=0)) |
|
ids_to_center_y = tf.tensor_scatter_nd_update( |
|
ids_to_center_y, id_index, tf.expand_dims(center_y, axis=0)) |
|
|
|
def add_center_gaussian(center_x_coord, center_y_coord, center): |
|
|
|
|
|
upper_left = center_x_coord, center_y_coord |
|
bottom_right = (upper_left[0] + self._gaussian_size, |
|
upper_left[1] + self._gaussian_size) |
|
|
|
indices_x, indices_y = tf.meshgrid( |
|
tf.range(upper_left[0], bottom_right[0]), |
|
tf.range(upper_left[1], bottom_right[1])) |
|
indices = tf.transpose( |
|
tf.stack([tf.reshape(indices_y, [-1]), |
|
tf.reshape(indices_x, [-1])])) |
|
|
|
return tf.tensor_scatter_nd_max( |
|
center, indices, self._gaussian, name='center_scatter') |
|
|
|
center = add_center_gaussian(center_x, center_y, center) |
|
|
|
center_y = ( |
|
tf.cast(center_y, dtype=tf.float32) + |
|
tf.random.normal([], dtype=tf.float32) * false_positive_noise_factor * |
|
bbox_size[0]) |
|
center_x = ( |
|
tf.cast(center_x, dtype=tf.float32) + |
|
tf.random.normal([], dtype=tf.float32) * false_positive_noise_factor * |
|
bbox_size[1]) |
|
|
|
center_x = tf.minimum( |
|
tf.maximum(tf.cast(tf.round(center_x), tf.int32), 0), width - 1) |
|
center_y = tf.minimum( |
|
tf.maximum(tf.cast(tf.round(center_y), tf.int32), 0), height - 1) |
|
|
|
center = center + tf.cast( |
|
tf.random.uniform([], dtype=tf.float32) < false_positive_rate, |
|
tf.float32) * ( |
|
add_center_gaussian(center_x, center_y, center) - center) |
|
|
|
center = center[center_pad_begin:(center_pad_begin + height), |
|
center_pad_begin:(center_pad_begin + width)] |
|
center = tf.expand_dims(center, -1) |
|
return center, unique_ids, ids_to_center_x, ids_to_center_y |
|
|
|
def _generate_gt_center_and_offset(self, |
|
panoptic_label, |
|
semantic_weights, |
|
prev_panoptic_label=None, |
|
next_panoptic_label=None): |
|
"""Generates the ground-truth center and offset from the panoptic labels. |
|
|
|
Additionally, the per-pixel weights for the semantic branch are increased |
|
for small instances. In case, prev_panoptic_label is passed, it also |
|
computes the previous center heatmap with random noise and the offsets |
|
between center maps. |
|
|
|
Args: |
|
panoptic_label: A tf.Tensor of shape [height, width, 1]. |
|
semantic_weights: A tf.Tensor of shape [height, width, 1]. |
|
prev_panoptic_label: An optional tf.Tensor of shape [height, width, 1]. |
|
next_panoptic_label: An optional tf.Tensor of shape [height, width, 1]. |
|
|
|
Returns: |
|
A tuple (center, offsets, weights, prev_center, frame_offset*, |
|
next_offset) with each being a tf.Tensor of shape [height, width, 1 (2*)]. |
|
If prev_panoptic_label is None, prev_center and frame_offset are None. |
|
If next_panoptic_label is None, next_offset is None. |
|
""" |
|
height = tf.shape(panoptic_label)[0] |
|
width = tf.shape(panoptic_label)[1] |
|
|
|
|
|
center_pad_begin = int(round(3 * self._sigma + 1)) |
|
center_pad_end = int(round(3 * self._sigma + 2)) |
|
center_pad = center_pad_begin + center_pad_end |
|
|
|
center = tf.zeros((height + center_pad, width + center_pad)) |
|
offset_x = tf.zeros((height, width, 1), dtype=tf.int32) |
|
offset_y = tf.zeros((height, width, 1), dtype=tf.int32) |
|
unique_ids, _ = tf.unique(tf.reshape(panoptic_label, [-1])) |
|
|
|
prev_center = None |
|
frame_offsets = None |
|
|
|
|
|
frame_offset_x = tf.zeros((height, width, 1), dtype=tf.int32) |
|
frame_offset_y = tf.zeros((height, width, 1), dtype=tf.int32) |
|
|
|
|
|
next_offset = None |
|
next_offset_y = tf.zeros((height, width, 1), dtype=tf.int32) |
|
next_offset_x = tf.zeros((height, width, 1), dtype=tf.int32) |
|
|
|
if prev_panoptic_label is not None: |
|
(prev_center, prev_unique_ids, prev_centers_x, prev_centers_y |
|
) = self._generate_prev_centers_with_noise(prev_panoptic_label) |
|
|
|
for panoptic_id in unique_ids: |
|
semantic_id = panoptic_id // self._dataset_info['panoptic_label_divisor'] |
|
|
|
|
|
|
|
if (semantic_id == self._dataset_info['ignore_label'] or |
|
panoptic_id % self._dataset_info['panoptic_label_divisor'] == 0): |
|
continue |
|
|
|
|
|
mask_index = tf.transpose(tf.where(panoptic_label == panoptic_id)) |
|
mask_y_index = mask_index[0] |
|
mask_x_index = mask_index[1] |
|
|
|
next_mask_index = None |
|
next_mask_y_index = None |
|
next_mask_x_index = None |
|
if next_panoptic_label is not None: |
|
next_mask_index = tf.transpose( |
|
tf.where(next_panoptic_label == panoptic_id)) |
|
next_mask_y_index = next_mask_index[0] |
|
next_mask_x_index = next_mask_index[1] |
|
|
|
instance_area = tf.shape(mask_x_index) |
|
if instance_area < self._instance_area_threshold: |
|
semantic_weights = tf.where(panoptic_label == panoptic_id, |
|
self._small_instance_weight, |
|
semantic_weights) |
|
|
|
centers = tf.reduce_mean(tf.cast(mask_index, tf.float32), axis=1) |
|
|
|
center_x = tf.cast(tf.round(centers[1]), tf.int32) |
|
center_y = tf.cast(tf.round(centers[0]), tf.int32) |
|
|
|
|
|
|
|
upper_left = center_x, center_y |
|
bottom_right = (upper_left[0] + self._gaussian_size, |
|
upper_left[1] + self._gaussian_size) |
|
|
|
indices_x, indices_y = tf.meshgrid( |
|
tf.range(upper_left[0], bottom_right[0]), |
|
tf.range(upper_left[1], bottom_right[1])) |
|
indices = tf.transpose( |
|
tf.stack([tf.reshape(indices_y, [-1]), |
|
tf.reshape(indices_x, [-1])])) |
|
|
|
center = tf.tensor_scatter_nd_max( |
|
center, indices, self._gaussian, name='center_scatter') |
|
offset_y = tf.tensor_scatter_nd_update( |
|
offset_y, |
|
tf.transpose(mask_index), |
|
center_y - tf.cast(mask_y_index, tf.int32), |
|
name='offset_y_scatter') |
|
offset_x = tf.tensor_scatter_nd_update( |
|
offset_x, |
|
tf.transpose(mask_index), |
|
center_x - tf.cast(mask_x_index, tf.int32), |
|
name='offset_x_scatter') |
|
if prev_panoptic_label is not None: |
|
mask = tf.equal(prev_unique_ids, panoptic_id) |
|
if tf.math.count_nonzero(mask) > 0: |
|
prev_center_x = prev_centers_x[mask] |
|
prev_center_y = prev_centers_y[mask] |
|
|
|
frame_offset_y = tf.tensor_scatter_nd_update( |
|
frame_offset_y, |
|
tf.transpose(mask_index), |
|
prev_center_y - tf.cast(mask_y_index, tf.int32), |
|
name='frame_offset_y_scatter') |
|
frame_offset_x = tf.tensor_scatter_nd_update( |
|
frame_offset_x, |
|
tf.transpose(mask_index), |
|
prev_center_x - tf.cast(mask_x_index, tf.int32), |
|
name='frame_offset_x_scatter') |
|
if next_panoptic_label is not None: |
|
next_offset_y = tf.tensor_scatter_nd_update( |
|
next_offset_y, |
|
tf.transpose(next_mask_index), |
|
center_y - tf.cast(next_mask_y_index, tf.int32), |
|
name='next_offset_y_scatter') |
|
next_offset_x = tf.tensor_scatter_nd_update( |
|
next_offset_x, |
|
tf.transpose(next_mask_index), |
|
center_x - tf.cast(next_mask_x_index, tf.int32), |
|
name='next_offset_x_scatter') |
|
|
|
offset = tf.concat([offset_y, offset_x], axis=2) |
|
center = center[center_pad_begin:(center_pad_begin + height), |
|
center_pad_begin:(center_pad_begin + width)] |
|
center = tf.expand_dims(center, -1) |
|
if prev_panoptic_label is not None: |
|
frame_offsets = tf.concat([frame_offset_y, frame_offset_x], axis=2) |
|
if next_panoptic_label is not None: |
|
next_offset = tf.concat([next_offset_y, next_offset_x], axis=2) |
|
return (center, offset, semantic_weights, prev_center, frame_offsets, |
|
next_offset) |
|
|