|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Prepare the data used for FEELVOS training/evaluation.""" |
|
import tensorflow as tf |
|
|
|
from deeplab.core import feature_extractor |
|
from deeplab.core import preprocess_utils |
|
|
|
|
|
|
|
_PROB_OF_FLIP = 0.5 |
|
|
|
get_random_scale = preprocess_utils.get_random_scale |
|
randomly_scale_image_and_label = ( |
|
preprocess_utils.randomly_scale_image_and_label) |
|
|
|
|
|
def preprocess_image_and_label(image, |
|
label, |
|
crop_height, |
|
crop_width, |
|
min_resize_value=None, |
|
max_resize_value=None, |
|
resize_factor=None, |
|
min_scale_factor=1., |
|
max_scale_factor=1., |
|
scale_factor_step_size=0, |
|
ignore_label=255, |
|
is_training=True, |
|
model_variant=None): |
|
"""Preprocesses the image and label. |
|
|
|
Args: |
|
image: Input image. |
|
label: Ground truth annotation label. |
|
crop_height: The height value used to crop the image and label. |
|
crop_width: The width value used to crop the image and label. |
|
min_resize_value: Desired size of the smaller image side. |
|
max_resize_value: Maximum allowed size of the larger image side. |
|
resize_factor: Resized dimensions are multiple of factor plus one. |
|
min_scale_factor: Minimum scale factor value. |
|
max_scale_factor: Maximum scale factor value. |
|
scale_factor_step_size: The step size from min scale factor to max scale |
|
factor. The input is randomly scaled based on the value of |
|
(min_scale_factor, max_scale_factor, scale_factor_step_size). |
|
ignore_label: The label value which will be ignored for training and |
|
evaluation. |
|
is_training: If the preprocessing is used for training or not. |
|
model_variant: Model variant (string) for choosing how to mean-subtract the |
|
images. See feature_extractor.network_map for supported model variants. |
|
|
|
Returns: |
|
original_image: Original image (could be resized). |
|
processed_image: Preprocessed image. |
|
label: Preprocessed ground truth segmentation label. |
|
|
|
Raises: |
|
ValueError: Ground truth label not provided during training. |
|
""" |
|
if is_training and label is None: |
|
raise ValueError('During training, label must be provided.') |
|
if model_variant is None: |
|
tf.logging.warning('Default mean-subtraction is performed. Please specify ' |
|
'a model_variant. See feature_extractor.network_map for ' |
|
'supported model variants.') |
|
|
|
|
|
original_image = image |
|
|
|
processed_image = tf.cast(image, tf.float32) |
|
|
|
if label is not None: |
|
label = tf.cast(label, tf.int32) |
|
|
|
|
|
if min_resize_value is not None or max_resize_value is not None: |
|
[processed_image, label] = ( |
|
preprocess_utils.resize_to_range( |
|
image=processed_image, |
|
label=label, |
|
min_size=min_resize_value, |
|
max_size=max_resize_value, |
|
factor=resize_factor, |
|
align_corners=True)) |
|
|
|
original_image = tf.identity(processed_image) |
|
|
|
|
|
scale = get_random_scale( |
|
min_scale_factor, max_scale_factor, scale_factor_step_size) |
|
processed_image, label = randomly_scale_image_and_label( |
|
processed_image, label, scale) |
|
|
|
processed_image.set_shape([None, None, 3]) |
|
|
|
if crop_height is not None and crop_width is not None: |
|
|
|
image_shape = tf.shape(processed_image) |
|
image_height = image_shape[0] |
|
image_width = image_shape[1] |
|
|
|
target_height = image_height + tf.maximum(crop_height - image_height, 0) |
|
target_width = image_width + tf.maximum(crop_width - image_width, 0) |
|
|
|
|
|
mean_pixel = tf.reshape( |
|
feature_extractor.mean_pixel(model_variant), [1, 1, 3]) |
|
processed_image = preprocess_utils.pad_to_bounding_box( |
|
processed_image, 0, 0, target_height, target_width, mean_pixel) |
|
|
|
if label is not None: |
|
label = preprocess_utils.pad_to_bounding_box( |
|
label, 0, 0, target_height, target_width, ignore_label) |
|
|
|
|
|
if is_training and label is not None: |
|
processed_image, label = preprocess_utils.random_crop( |
|
[processed_image, label], crop_height, crop_width) |
|
|
|
processed_image.set_shape([crop_height, crop_width, 3]) |
|
|
|
if label is not None: |
|
label.set_shape([crop_height, crop_width, 1]) |
|
|
|
if is_training: |
|
|
|
processed_image, label, _ = preprocess_utils.flip_dim( |
|
[processed_image, label], _PROB_OF_FLIP, dim=1) |
|
|
|
return original_image, processed_image, label |
|
|
|
|
|
def preprocess_images_and_labels_consistently(images, |
|
labels, |
|
crop_height, |
|
crop_width, |
|
min_resize_value=None, |
|
max_resize_value=None, |
|
resize_factor=None, |
|
min_scale_factor=1., |
|
max_scale_factor=1., |
|
scale_factor_step_size=0, |
|
ignore_label=255, |
|
is_training=True, |
|
model_variant=None): |
|
"""Preprocesses images and labels in a consistent way. |
|
|
|
Similar to preprocess_image_and_label, but works on a list of images |
|
and a list of labels and uses the same crop coordinates and either flips |
|
all images and labels or none of them. |
|
|
|
Args: |
|
images: List of input images. |
|
labels: List of ground truth annotation labels. |
|
crop_height: The height value used to crop the image and label. |
|
crop_width: The width value used to crop the image and label. |
|
min_resize_value: Desired size of the smaller image side. |
|
max_resize_value: Maximum allowed size of the larger image side. |
|
resize_factor: Resized dimensions are multiple of factor plus one. |
|
min_scale_factor: Minimum scale factor value. |
|
max_scale_factor: Maximum scale factor value. |
|
scale_factor_step_size: The step size from min scale factor to max scale |
|
factor. The input is randomly scaled based on the value of |
|
(min_scale_factor, max_scale_factor, scale_factor_step_size). |
|
ignore_label: The label value which will be ignored for training and |
|
evaluation. |
|
is_training: If the preprocessing is used for training or not. |
|
model_variant: Model variant (string) for choosing how to mean-subtract the |
|
images. See feature_extractor.network_map for supported model variants. |
|
|
|
Returns: |
|
original_images: Original images (could be resized). |
|
processed_images: Preprocessed images. |
|
labels: Preprocessed ground truth segmentation labels. |
|
|
|
Raises: |
|
ValueError: Ground truth label not provided during training. |
|
""" |
|
if is_training and labels is None: |
|
raise ValueError('During training, labels must be provided.') |
|
if model_variant is None: |
|
tf.logging.warning('Default mean-subtraction is performed. Please specify ' |
|
'a model_variant. See feature_extractor.network_map for ' |
|
'supported model variants.') |
|
if labels is not None: |
|
assert len(images) == len(labels) |
|
num_imgs = len(images) |
|
|
|
|
|
original_images = images |
|
|
|
processed_images = [tf.cast(image, tf.float32) for image in images] |
|
|
|
if labels is not None: |
|
labels = [tf.cast(label, tf.int32) for label in labels] |
|
|
|
|
|
if min_resize_value is not None or max_resize_value is not None: |
|
processed_images, labels = zip(*[ |
|
preprocess_utils.resize_to_range( |
|
image=processed_image, |
|
label=label, |
|
min_size=min_resize_value, |
|
max_size=max_resize_value, |
|
factor=resize_factor, |
|
align_corners=True) for processed_image, label |
|
in zip(processed_images, labels)]) |
|
|
|
original_images = [tf.identity(processed_image) |
|
for processed_image in processed_images] |
|
|
|
|
|
scale = get_random_scale( |
|
min_scale_factor, max_scale_factor, scale_factor_step_size) |
|
processed_images, labels = zip( |
|
*[randomly_scale_image_and_label(processed_image, label, scale) |
|
for processed_image, label in zip(processed_images, labels)]) |
|
|
|
for processed_image in processed_images: |
|
processed_image.set_shape([None, None, 3]) |
|
|
|
if crop_height is not None and crop_width is not None: |
|
|
|
image_shape = tf.shape(processed_images[0]) |
|
image_height = image_shape[0] |
|
image_width = image_shape[1] |
|
|
|
target_height = image_height + tf.maximum(crop_height - image_height, 0) |
|
target_width = image_width + tf.maximum(crop_width - image_width, 0) |
|
|
|
|
|
mean_pixel = tf.reshape( |
|
feature_extractor.mean_pixel(model_variant), [1, 1, 3]) |
|
processed_images = [preprocess_utils.pad_to_bounding_box( |
|
processed_image, 0, 0, target_height, target_width, mean_pixel) |
|
for processed_image in processed_images] |
|
|
|
if labels is not None: |
|
labels = [preprocess_utils.pad_to_bounding_box( |
|
label, 0, 0, target_height, target_width, ignore_label) |
|
for label in labels] |
|
|
|
|
|
if is_training and labels is not None: |
|
cropped = preprocess_utils.random_crop( |
|
processed_images + labels, crop_height, crop_width) |
|
assert len(cropped) == 2 * num_imgs |
|
processed_images = cropped[:num_imgs] |
|
labels = cropped[num_imgs:] |
|
|
|
for processed_image in processed_images: |
|
processed_image.set_shape([crop_height, crop_width, 3]) |
|
|
|
if labels is not None: |
|
for label in labels: |
|
label.set_shape([crop_height, crop_width, 1]) |
|
|
|
if is_training: |
|
|
|
res = preprocess_utils.flip_dim( |
|
list(processed_images + labels), _PROB_OF_FLIP, dim=1) |
|
maybe_flipped = res[:-1] |
|
assert len(maybe_flipped) == 2 * num_imgs |
|
processed_images = maybe_flipped[:num_imgs] |
|
labels = maybe_flipped[num_imgs:] |
|
|
|
return original_images, processed_images, labels |
|
|