Spaces:

akhaliq
/

deeplab2

Runtime error

deeplab2 / data /sample_generator.py

akhaliq3

spaces demo

506da10 almost 4 years ago

29.8 kB

	# coding=utf-8
	# Copyright 2021 The Deeplab2 Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""This file contains code to get a sample from a dataset."""

	import functools

	import numpy as np
	import tensorflow as tf

	from deeplab2 import common
	from deeplab2.data import dataset_utils
	from deeplab2.data.preprocessing import input_preprocessing as preprocessing


	def _compute_gaussian_from_std(sigma):
	"""Computes the Gaussian and its size from a given standard deviation."""
	size = int(6 * sigma + 3)
	x = np.arange(size, dtype=np.float)
	y = x[:, np.newaxis]
	x0, y0 = 3 * sigma + 1, 3 * sigma + 1
	return np.exp(-((x - x0)2 + (y - y0)2) / (2 * sigma**2)), size


	class PanopticSampleGenerator:
	"""This class generates samples from images and labels."""

	def __init__(self,
	dataset_info,
	is_training,
	crop_size,
	min_resize_value=None,
	max_resize_value=None,
	resize_factor=None,
	min_scale_factor=1.,
	max_scale_factor=1.,
	scale_factor_step_size=0,
	autoaugment_policy_name=None,
	only_semantic_annotations=False,
	thing_id_mask_annotations=False,
	max_thing_id=128,
	sigma=8,
	focus_small_instances=None):
	"""Initializes the panoptic segmentation generator.

	Args:
	dataset_info: A dictionary with the following keys.
	- `name`: String, dataset name.
	- `ignore_label`: Integer, ignore label.
	- `class_has_instances_list`: A list of integers indicating which
	class has instance annotations.
	- `panoptic_label_divisor`: Integer, panoptic label divisor.
	- `num_classes`: Integer, number of classes.
	- `is_video_dataset`: Boolean, is video dataset or not.
	is_training: Boolean, is training mode or not.
	crop_size: Image crop size [height, width].
	min_resize_value: A 2-tuple of (height, width), desired minimum value
	after resize. If a single element is given, then height and width share
	the same value. None, empty or having 0 indicates no minimum value will
	be used.
	max_resize_value: A 2-tuple of (height, width), maximum allowed value
	after resize. If a single element is given, then height and width
	share the same value. None, empty or having 0 indicates no maximum
	value will be used.
	resize_factor: Resized dimensions are multiple of factor plus one.
	min_scale_factor: Minimum scale factor for random scale augmentation.
	max_scale_factor: Maximum scale factor for random scale augmentation.
	scale_factor_step_size: The step size from min scale factor to max scale
	factor. The input is randomly scaled based on the value of
	(min_scale_factor, max_scale_factor, scale_factor_step_size).
	autoaugment_policy_name: String, autoaugment policy name. See
	autoaugment_policy.py for available policies.
	only_semantic_annotations: An optional flag indicating whether the model
	needs only semantic annotations (default: False).
	thing_id_mask_annotations: An optional flag indicating whether the model
	needs thing_id_mask annotations. When `thing_id_mask_annotations` is
	True, we will additionally return mask annotation for each `thing`
	instance, encoded with a unique thing_id. This ground-truth annotation
	could be used to learn a better segmentation mask for each instance.
	`thing_id` indicates the number of unique thing-ID to each instance in
	an image, starting the counting from 0 (default: False).
	max_thing_id: The maximum number of possible thing instances per image. It
	is used together when thing_id_mask_annotations = True, representing the
	maximum thing ID encoded in the thing_id_mask. (default: 128).
	sigma: The standard deviation of the Gaussian used to encode the center
	keypoint (default: 8).
	focus_small_instances: An optional dict that defines how to deal with
	small instances (default: None):
	-`threshold`: An integer defining the threshold pixel number for an
	instance to be considered small.
	-`weight`: A number that defines the loss weight for small instances.
	"""
	self._dataset_info = dataset_info
	self._ignore_label = self._dataset_info['ignore_label']
	self._only_semantic_annotations = only_semantic_annotations
	self._sigma = sigma
	self._instance_area_threshold = 0
	self._small_instance_weight = 1.0
	self._thing_id_mask_annotations = thing_id_mask_annotations
	self._max_thing_id = max_thing_id
	self._is_training = is_training
	self._preprocessing_fn = functools.partial(
	preprocessing.preprocess_image_and_label,
	crop_height=crop_size[0],
	crop_width=crop_size[1],
	min_resize_value=min_resize_value,
	max_resize_value=max_resize_value,
	resize_factor=resize_factor,
	min_scale_factor=min_scale_factor,
	max_scale_factor=max_scale_factor,
	scale_factor_step_size=scale_factor_step_size,
	autoaugment_policy_name=autoaugment_policy_name,
	ignore_label=self._ignore_label *
	self._dataset_info['panoptic_label_divisor'],
	is_training=self._is_training)

	if focus_small_instances is not None:
	self._instance_area_threshold = focus_small_instances['threshold']
	self._small_instance_weight = focus_small_instances['weight']

	self._gaussian, self._gaussian_size = _compute_gaussian_from_std(
	self._sigma)
	self._gaussian = tf.cast(tf.reshape(self._gaussian, [-1]), tf.float32)

	def __call__(self, sample_dict):
	"""Gets a sample.

	Args:
	sample_dict: A dictionary with the following keys and values:
	- `image`: A tensor of shape [image_height, image_width, 3].
	- `image_name`: String, image name.
	- `label`: A tensor of shape [label_height, label_width, 1] or None.
	- `height`: An integer specifying the height of the image.
	- `width`: An integer specifying the width of the image.
	- `sequence`: An optional string specifying the sequence name.
	- `prev_image`: An optional tensor of the same shape as `image`.
	- `prev_label`: An optional tensor of the same shape as `label`.
	- `next_image`: An optional next-frame tensor of the shape of `image`.
	- `next_label`: An optional next-frame tensor of the shape of `label`.

	Returns:
	sample: A dictionary storing required data for panoptic segmentation.
	"""
	return self.call(**sample_dict)

	def call(self,
	image,
	image_name,
	label,
	height,
	width,
	sequence='',
	prev_image=None,
	prev_label=None,
	next_image=None,
	next_label=None):
	"""Gets a sample.

	Args:
	image: A tensor of shape [image_height, image_width, 3].
	image_name: String, image name.
	label: A tensor of shape [label_height, label_width, 1] or None.
	height: An integer specifying the height of the image.
	width: An integer specifying the width of the image.
	sequence: An optional string specifying the sequence name.
	prev_image: An optional tensor of shape [image_height, image_width, 3].
	prev_label: An optional tensor of shape [label_height, label_width, 1].
	next_image: An optional tensor of shape [image_height, image_width, 3].
	next_label: An optional tensor of shape [label_height, label_width, 1].

	Returns:
	sample: A dictionary storing required data for panoptic segmentation.

	Raises:
	ValueError: An error occurs when the label shape is invalid.
	NotImplementedError: An error occurs when thing_id_mask_annotations comes
	together with prev_image or prev_label, not currently implemented.
	"""
	if label is not None:
	label.get_shape().assert_is_compatible_with(
	tf.TensorShape([None, None, 1]))
	original_label = tf.cast(label, dtype=tf.int32, name='original_label')
	if next_label is not None:
	original_next_label = tf.cast(
	next_label, dtype=tf.int32, name='original_next_label')
	# Reusing the preprocessing function for both next and prev samples.
	if next_image is not None:
	resized_image, image, label, next_image, next_label = (
	self._preprocessing_fn(
	image, label, prev_image=next_image, prev_label=next_label))
	else:
	resized_image, image, label, prev_image, prev_label = (
	self._preprocessing_fn(
	image, label, prev_image=prev_image, prev_label=prev_label))
	sample = {
	common.IMAGE: image
	}
	if prev_image is not None:
	sample[common.IMAGE] = tf.concat([image, prev_image], axis=2)
	if next_image is not None:
	sample[common.NEXT_IMAGE] = next_image
	sample[common.IMAGE] = tf.concat([image, next_image], axis=2)
	if label is not None:
	# Panoptic label for crowd regions will be ignore_label.
	semantic_label, panoptic_label, thing_mask, crowd_region = (
	dataset_utils.get_semantic_and_panoptic_label(
	self._dataset_info, label, self._ignore_label))
	sample[common.GT_SEMANTIC_KEY] = tf.squeeze(semantic_label, axis=2)
	semantic_weights = tf.ones_like(semantic_label, dtype=tf.float32)
	sample[common.SEMANTIC_LOSS_WEIGHT_KEY] = tf.squeeze(
	semantic_weights, axis=2)
	sample[common.GT_IS_CROWD] = tf.squeeze(crowd_region, axis=2)

	if not self._only_semantic_annotations:
	# The sample will have the original label including crowd regions.
	sample[common.GT_PANOPTIC_KEY] = tf.squeeze(label, axis=2)
	# Compute center loss for all non-crowd and non-ignore pixels.
	non_crowd_and_non_ignore_regions = tf.logical_and(
	tf.logical_not(crowd_region),
	tf.not_equal(semantic_label, self._ignore_label))
	sample[common.CENTER_LOSS_WEIGHT_KEY] = tf.squeeze(tf.cast(
	non_crowd_and_non_ignore_regions, tf.float32), axis=2)
	# Compute regression loss only for thing pixels that are not crowd.
	non_crowd_things = tf.logical_and(
	tf.logical_not(crowd_region), thing_mask)
	sample[common.REGRESSION_LOSS_WEIGHT_KEY] = tf.squeeze(tf.cast(
	non_crowd_things, tf.float32), axis=2)

	prev_panoptic_label = None
	next_panoptic_label = None
	if prev_label is not None:
	_, prev_panoptic_label, _, _ = (
	dataset_utils.get_semantic_and_panoptic_label(
	self._dataset_info, prev_label, self._ignore_label))
	if next_label is not None:
	_, next_panoptic_label, _, _ = (
	dataset_utils.get_semantic_and_panoptic_label(
	self._dataset_info, next_label, self._ignore_label))
	(sample[common.GT_INSTANCE_CENTER_KEY],
	sample[common.GT_INSTANCE_REGRESSION_KEY],
	sample[common.SEMANTIC_LOSS_WEIGHT_KEY],
	prev_center_map,
	frame_center_offsets,
	next_offset) = self._generate_gt_center_and_offset(
	panoptic_label, semantic_weights, prev_panoptic_label,
	next_panoptic_label)

	sample[common.GT_INSTANCE_REGRESSION_KEY] = tf.cast(
	sample[common.GT_INSTANCE_REGRESSION_KEY], tf.float32)

	if next_label is not None:
	sample[common.GT_NEXT_INSTANCE_REGRESSION_KEY] = tf.cast(
	next_offset, tf.float32)
	sample[common.NEXT_REGRESSION_LOSS_WEIGHT_KEY] = tf.cast(
	tf.greater(tf.reduce_sum(tf.abs(next_offset), axis=2), 0),
	tf.float32)

	# Only squeeze center map and semantic loss weights, as regression map
	# has two channels (x and y offsets).
	sample[common.GT_INSTANCE_CENTER_KEY] = tf.squeeze(
	sample[common.GT_INSTANCE_CENTER_KEY], axis=2)
	sample[common.SEMANTIC_LOSS_WEIGHT_KEY] = tf.squeeze(
	sample[common.SEMANTIC_LOSS_WEIGHT_KEY], axis=2)

	if prev_label is not None:
	sample[common.GT_FRAME_OFFSET_KEY] = frame_center_offsets
	sample[common.GT_FRAME_OFFSET_KEY] = tf.cast(
	sample[common.GT_FRAME_OFFSET_KEY], tf.float32)
	frame_offsets_present = tf.logical_or(
	tf.not_equal(frame_center_offsets[..., 0], 0),
	tf.not_equal(frame_center_offsets[..., 1], 0))
	sample[common.FRAME_REGRESSION_LOSS_WEIGHT_KEY] = tf.cast(
	frame_offsets_present, tf.float32)
	if self._is_training:
	sample[common.IMAGE] = tf.concat(
	[sample[common.IMAGE], prev_center_map], axis=2)

	if self._thing_id_mask_annotations:
	if any([prev_image is not None,
	prev_label is not None,
	next_image is not None,
	next_label is not None]):
	raise NotImplementedError(
	'Current implementation of Max-DeepLab does not support '
	+ 'prev_image, prev_label, next_image, or next_label.')
	thing_id_mask, thing_id_class = (
	self._generate_thing_id_mask_and_class(
	panoptic_label, non_crowd_things))
	sample[common.GT_THING_ID_MASK_KEY] = tf.squeeze(
	thing_id_mask, axis=2)
	sample[common.GT_THING_ID_CLASS_KEY] = thing_id_class

	if not self._is_training:
	# Resized image is only used during visualization.
	sample[common.RESIZED_IMAGE] = resized_image
	sample[common.IMAGE_NAME] = image_name
	sample[common.GT_SIZE_RAW] = tf.stack([height, width], axis=0)
	if self._dataset_info['is_video_dataset']:
	sample[common.SEQUENCE_ID] = sequence
	# Keep original labels for evaluation.
	if label is not None:
	orig_semantic_label, _, _, orig_crowd_region = (
	dataset_utils.get_semantic_and_panoptic_label(
	self._dataset_info, original_label, self._ignore_label))
	sample[common.GT_SEMANTIC_RAW] = tf.squeeze(orig_semantic_label, axis=2)
	if not self._only_semantic_annotations:
	sample[common.GT_PANOPTIC_RAW] = tf.squeeze(original_label, axis=2)
	sample[common.GT_IS_CROWD_RAW] = tf.squeeze(orig_crowd_region)
	if next_label is not None:
	sample[common.GT_NEXT_PANOPTIC_RAW] = tf.squeeze(
	original_next_label, axis=2)
	return sample

	def _generate_thing_id_mask_and_class(self,
	panoptic_label,
	non_crowd_things):
	"""Generates the ground-truth thing-ID masks and their class labels.

	It computes thing-ID mask and class with unique ID for each thing instance.
	`thing_id` indicates the number of unique thing-ID to each instance in an
	image, starting the counting from 0. Each pixel in thing_id_mask is labeled
	with the corresponding thing-ID.

	Args:
	panoptic_label: A tf.Tensor of shape [height, width, 1].
	non_crowd_things: A tf.Tensor of shape [height, width, 1], indicating
	non-crowd and thing-class regions.

	Returns:
	thing_id_mask: A tf.Tensor of shape [height, width, 1]. It assigns each
	non-crowd thing instance a unique mask-ID label, starting from 0.
	Unassigned pixels are set to -1.
	thing_id_class: A tf.Tensor of shape [max_thing_id]. It contains semantic
	ID of each instance assigned to thing_id_mask. The remaining
	(max_thing_id - num_things) elements are set to -1.

	Raises:
	ValueError: An error occurs when the thing-ID mask contains stuff or crowd
	region.
	ValueError: An error occurs when thing_count is greater or equal to
	self._max_thing_id.

	"""
	unique_ids, _ = tf.unique(tf.reshape(panoptic_label, [-1]))
	thing_id_mask = -tf.ones_like(panoptic_label)
	thing_id_class = -tf.ones(self._max_thing_id)
	thing_count = 0
	for panoptic_id in unique_ids:
	semantic_id = panoptic_id // self._dataset_info['panoptic_label_divisor']
	# Filter out IDs that are not thing instances (i.e., IDs for ignore_label,
	# stuff classes or crowd). Stuff classes and crowd regions both have IDs
	# of the form panoptic_id = semantic_id * label_divisor (i.e., instance id
	# = 0)
	if (semantic_id == self._dataset_info['ignore_label'] or
	panoptic_id % self._dataset_info['panoptic_label_divisor'] == 0):
	continue

	assert_stuff_crowd = tf.debugging.Assert(
	tf.reduce_all(non_crowd_things[panoptic_label == panoptic_id]),
	['thing-ID mask here must not contain stuff or crowd region.'])
	with tf.control_dependencies([assert_stuff_crowd]):
	panoptic_id = tf.identity(panoptic_id)

	thing_id_mask = tf.where(panoptic_label == panoptic_id,
	thing_count, thing_id_mask)

	assert_thing_count = tf.debugging.Assert(
	thing_count < self._max_thing_id,
	['thing_count must be smaller than self._max_thing_id.'])
	with tf.control_dependencies([assert_thing_count]):
	thing_count = tf.identity(thing_count)

	thing_id_class = tf.tensor_scatter_nd_update(
	thing_id_class, [[thing_count]], [semantic_id])
	thing_count += 1
	return thing_id_mask, thing_id_class

	def _generate_prev_centers_with_noise(self,
	panoptic_label,
	offset_noise_factor=0.05,
	false_positive_rate=0.2,
	false_positive_noise_factor=0.05):
	"""Generates noisy center predictions for the previous frame.

	Args:
	panoptic_label: A tf.Tensor of shape [height, width, 1].
	offset_noise_factor: An optional float defining the maximum fraction of
	the object size that is used to displace the previous center.
	false_positive_rate: An optional float indicating at which probability
	false positives should be added.
	false_positive_noise_factor: An optional float defining the maximum
	fraction of the object size that is used to displace the false positive
	center.

	Returns:
	A tuple of (center, ids_to_center) with both being tf.Tensor of shape
	[height, width, 1] and shape [N, 2] where N is the number of unique IDs.
	"""
	height = tf.shape(panoptic_label)[0]
	width = tf.shape(panoptic_label)[1]

	# Pad center to make boundary handling easier.
	center_pad_begin = int(round(3 * self._sigma + 1))
	center_pad_end = int(round(3 * self._sigma + 2))
	center_pad = center_pad_begin + center_pad_end

	center = tf.zeros((height + center_pad, width + center_pad))
	unique_ids, _ = tf.unique(tf.reshape(panoptic_label, [-1]))
	ids_to_center_x = tf.zeros_like(unique_ids, dtype=tf.int32)
	ids_to_center_y = tf.zeros_like(unique_ids, dtype=tf.int32)

	for panoptic_id in unique_ids:
	semantic_id = panoptic_id // self._dataset_info['panoptic_label_divisor']
	# Filter out IDs that should be ignored, are stuff classes or crowd.
	# Stuff classes and crowd regions both have IDs of the form panoptic_id =
	# semantic_id * label_divisor
	if (semantic_id == self._dataset_info['ignore_label'] or
	panoptic_id % self._dataset_info['panoptic_label_divisor'] == 0):
	continue

	# Convert [[y0, x0, 0], ...] to [[y0, ...], [x0, ...], [0, ...]].
	mask_index = tf.cast(
	tf.transpose(tf.where(panoptic_label == panoptic_id)), tf.float32)
	centers = tf.reduce_mean(mask_index, axis=1)
	bbox_size = (
	tf.reduce_max(mask_index, axis=1) - tf.reduce_min(mask_index, axis=1))

	# Add noise.
	center_y = (
	centers[0] + tf.random.normal([], dtype=tf.float32) *
	offset_noise_factor * bbox_size[0])
	center_x = (
	centers[1] + tf.random.normal([], dtype=tf.float32) *
	offset_noise_factor * bbox_size[1])

	center_x = tf.minimum(
	tf.maximum(tf.cast(tf.round(center_x), tf.int32), 0), width - 1)
	center_y = tf.minimum(
	tf.maximum(tf.cast(tf.round(center_y), tf.int32), 0), height - 1)

	id_index = tf.where(tf.equal(panoptic_id, unique_ids))
	ids_to_center_x = tf.tensor_scatter_nd_update(
	ids_to_center_x, id_index, tf.expand_dims(center_x, axis=0))
	ids_to_center_y = tf.tensor_scatter_nd_update(
	ids_to_center_y, id_index, tf.expand_dims(center_y, axis=0))

	def add_center_gaussian(center_x_coord, center_y_coord, center):
	# Due to the padding with center_pad_begin in center, the computed
	# center becomes the upper left corner in the center tensor.
	upper_left = center_x_coord, center_y_coord
	bottom_right = (upper_left[0] + self._gaussian_size,
	upper_left[1] + self._gaussian_size)

	indices_x, indices_y = tf.meshgrid(
	tf.range(upper_left[0], bottom_right[0]),
	tf.range(upper_left[1], bottom_right[1]))
	indices = tf.transpose(
	tf.stack([tf.reshape(indices_y, [-1]),
	tf.reshape(indices_x, [-1])]))

	return tf.tensor_scatter_nd_max(
	center, indices, self._gaussian, name='center_scatter')

	center = add_center_gaussian(center_x, center_y, center)
	# Generate false positives.
	center_y = (
	tf.cast(center_y, dtype=tf.float32) +
	tf.random.normal([], dtype=tf.float32) * false_positive_noise_factor *
	bbox_size[0])
	center_x = (
	tf.cast(center_x, dtype=tf.float32) +
	tf.random.normal([], dtype=tf.float32) * false_positive_noise_factor *
	bbox_size[1])

	center_x = tf.minimum(
	tf.maximum(tf.cast(tf.round(center_x), tf.int32), 0), width - 1)
	center_y = tf.minimum(
	tf.maximum(tf.cast(tf.round(center_y), tf.int32), 0), height - 1)
	# Draw a sample to decide whether to add a false positive or not.
	center = center + tf.cast(
	tf.random.uniform([], dtype=tf.float32) < false_positive_rate,
	tf.float32) * (
	add_center_gaussian(center_x, center_y, center) - center)

	center = center[center_pad_begin:(center_pad_begin + height),
	center_pad_begin:(center_pad_begin + width)]
	center = tf.expand_dims(center, -1)
	return center, unique_ids, ids_to_center_x, ids_to_center_y

	def _generate_gt_center_and_offset(self,
	panoptic_label,
	semantic_weights,
	prev_panoptic_label=None,
	next_panoptic_label=None):
	"""Generates the ground-truth center and offset from the panoptic labels.

	Additionally, the per-pixel weights for the semantic branch are increased
	for small instances. In case, prev_panoptic_label is passed, it also
	computes the previous center heatmap with random noise and the offsets
	between center maps.

	Args:
	panoptic_label: A tf.Tensor of shape [height, width, 1].
	semantic_weights: A tf.Tensor of shape [height, width, 1].
	prev_panoptic_label: An optional tf.Tensor of shape [height, width, 1].
	next_panoptic_label: An optional tf.Tensor of shape [height, width, 1].

	Returns:
	A tuple (center, offsets, weights, prev_center, frame_offset*,
	next_offset) with each being a tf.Tensor of shape [height, width, 1 (2*)].
	If prev_panoptic_label is None, prev_center and frame_offset are None.
	If next_panoptic_label is None, next_offset is None.
	"""
	height = tf.shape(panoptic_label)[0]
	width = tf.shape(panoptic_label)[1]

	# Pad center to make boundary handling easier.
	center_pad_begin = int(round(3 * self._sigma + 1))
	center_pad_end = int(round(3 * self._sigma + 2))
	center_pad = center_pad_begin + center_pad_end

	center = tf.zeros((height + center_pad, width + center_pad))
	offset_x = tf.zeros((height, width, 1), dtype=tf.int32)
	offset_y = tf.zeros((height, width, 1), dtype=tf.int32)
	unique_ids, _ = tf.unique(tf.reshape(panoptic_label, [-1]))

	prev_center = None
	frame_offsets = None
	# Due to loop handling in tensorflow, these variables had to be defined for
	# all cases.
	frame_offset_x = tf.zeros((height, width, 1), dtype=tf.int32)
	frame_offset_y = tf.zeros((height, width, 1), dtype=tf.int32)

	# Next-frame instance offsets.
	next_offset = None
	next_offset_y = tf.zeros((height, width, 1), dtype=tf.int32)
	next_offset_x = tf.zeros((height, width, 1), dtype=tf.int32)

	if prev_panoptic_label is not None:
	(prev_center, prev_unique_ids, prev_centers_x, prev_centers_y
	) = self._generate_prev_centers_with_noise(prev_panoptic_label)

	for panoptic_id in unique_ids:
	semantic_id = panoptic_id // self._dataset_info['panoptic_label_divisor']
	# Filter out IDs that should be ignored, are stuff classes or crowd.
	# Stuff classes and crowd regions both have IDs of the form panopti_id =
	# semantic_id * label_divisor
	if (semantic_id == self._dataset_info['ignore_label'] or
	panoptic_id % self._dataset_info['panoptic_label_divisor'] == 0):
	continue

	# Convert [[y0, x0, 0], ...] to [[y0, ...], [x0, ...], [0, ...]].
	mask_index = tf.transpose(tf.where(panoptic_label == panoptic_id))
	mask_y_index = mask_index[0]
	mask_x_index = mask_index[1]

	next_mask_index = None
	next_mask_y_index = None
	next_mask_x_index = None
	if next_panoptic_label is not None:
	next_mask_index = tf.transpose(
	tf.where(next_panoptic_label == panoptic_id))
	next_mask_y_index = next_mask_index[0]
	next_mask_x_index = next_mask_index[1]

	instance_area = tf.shape(mask_x_index)
	if instance_area < self._instance_area_threshold:
	semantic_weights = tf.where(panoptic_label == panoptic_id,
	self._small_instance_weight,
	semantic_weights)

	centers = tf.reduce_mean(tf.cast(mask_index, tf.float32), axis=1)

	center_x = tf.cast(tf.round(centers[1]), tf.int32)
	center_y = tf.cast(tf.round(centers[0]), tf.int32)

	# Due to the padding with center_pad_begin in center, the computed center
	# becomes the upper left corner in the center tensor.
	upper_left = center_x, center_y
	bottom_right = (upper_left[0] + self._gaussian_size,
	upper_left[1] + self._gaussian_size)

	indices_x, indices_y = tf.meshgrid(
	tf.range(upper_left[0], bottom_right[0]),
	tf.range(upper_left[1], bottom_right[1]))
	indices = tf.transpose(
	tf.stack([tf.reshape(indices_y, [-1]),
	tf.reshape(indices_x, [-1])]))

	center = tf.tensor_scatter_nd_max(
	center, indices, self._gaussian, name='center_scatter')
	offset_y = tf.tensor_scatter_nd_update(
	offset_y,
	tf.transpose(mask_index),
	center_y - tf.cast(mask_y_index, tf.int32),
	name='offset_y_scatter')
	offset_x = tf.tensor_scatter_nd_update(
	offset_x,
	tf.transpose(mask_index),
	center_x - tf.cast(mask_x_index, tf.int32),
	name='offset_x_scatter')
	if prev_panoptic_label is not None:
	mask = tf.equal(prev_unique_ids, panoptic_id)
	if tf.math.count_nonzero(mask) > 0:
	prev_center_x = prev_centers_x[mask]
	prev_center_y = prev_centers_y[mask]

	frame_offset_y = tf.tensor_scatter_nd_update(
	frame_offset_y,
	tf.transpose(mask_index),
	prev_center_y - tf.cast(mask_y_index, tf.int32),
	name='frame_offset_y_scatter')
	frame_offset_x = tf.tensor_scatter_nd_update(
	frame_offset_x,
	tf.transpose(mask_index),
	prev_center_x - tf.cast(mask_x_index, tf.int32),
	name='frame_offset_x_scatter')
	if next_panoptic_label is not None:
	next_offset_y = tf.tensor_scatter_nd_update(
	next_offset_y,
	tf.transpose(next_mask_index),
	center_y - tf.cast(next_mask_y_index, tf.int32),
	name='next_offset_y_scatter')
	next_offset_x = tf.tensor_scatter_nd_update(
	next_offset_x,
	tf.transpose(next_mask_index),
	center_x - tf.cast(next_mask_x_index, tf.int32),
	name='next_offset_x_scatter')

	offset = tf.concat([offset_y, offset_x], axis=2)
	center = center[center_pad_begin:(center_pad_begin + height),
	center_pad_begin:(center_pad_begin + width)]
	center = tf.expand_dims(center, -1)
	if prev_panoptic_label is not None:
	frame_offsets = tf.concat([frame_offset_y, frame_offset_x], axis=2)
	if next_panoptic_label is not None:
	next_offset = tf.concat([next_offset_y, next_offset_x], axis=2)
	return (center, offset, semantic_weights, prev_center, frame_offsets,
	next_offset)