|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""This file contains functions to preprocess images and labels.""" |
|
|
|
import tensorflow as tf |
|
|
|
from deeplab2.data.preprocessing import autoaugment_utils |
|
from deeplab2.data.preprocessing import preprocess_utils |
|
|
|
|
|
|
|
_PROB_OF_FLIP = 0.5 |
|
|
|
_MEAN_PIXEL = [127.5, 127.5, 127.5] |
|
|
|
|
|
def _pad_image_and_label(image, label, offset_height, offset_width, |
|
target_height, target_width, ignore_label=None): |
|
"""Pads the image and the label to the given size. |
|
|
|
Args: |
|
image: A tf.Tensor of shape [height, width, channels]. |
|
label: A tf.Tensor of shape [height, width, 1] or None. |
|
offset_height: The number of rows of zeros to add on top of the image and |
|
label. |
|
offset_width: The number of columns of zeros to add on the left of the image |
|
and label. |
|
target_height: The total height after padding. |
|
target_width: The total width after padding. |
|
ignore_label: The ignore_label for the label. Must only be set when label is |
|
given. |
|
|
|
Returns: |
|
The padded image and label as a tuple (padded_image, padded_label). |
|
|
|
Raises: |
|
tf.errors.InvalidArgumentError: An error occurs if the padding configuration |
|
is invalid. |
|
ValueError: An error occurs if label is given without an ignore_label. |
|
""" |
|
height = tf.shape(image)[0] |
|
width = tf.shape(image)[1] |
|
original_dtype = image.dtype |
|
if original_dtype not in (tf.float32, tf.float64): |
|
image = tf.cast(image, tf.float32) |
|
|
|
bottom_padding = target_height - offset_height - height |
|
right_padding = target_width - offset_width - width |
|
|
|
assert_bottom_padding = tf.assert_greater( |
|
bottom_padding, -1, |
|
'The padding configuration is not valid. Please either increase the ' |
|
'target size or reduce the padding offset.') |
|
assert_right_padding = tf.assert_greater( |
|
right_padding, -1, 'The padding configuration is not valid. Please either' |
|
' increase the target size or reduce the padding offset.') |
|
with tf.control_dependencies([assert_bottom_padding, assert_right_padding]): |
|
paddings = [[offset_height, bottom_padding], [offset_width, right_padding], |
|
[0, 0]] |
|
|
|
image = image - _MEAN_PIXEL |
|
image = tf.pad(image, paddings) |
|
image = image + _MEAN_PIXEL |
|
image = tf.cast(image, original_dtype) |
|
|
|
if label is not None: |
|
if ignore_label is None: |
|
raise ValueError( |
|
'If a label is given, the ignore label must be set too.') |
|
label = tf.pad(label, paddings, constant_values=ignore_label) |
|
|
|
return image, label |
|
|
|
|
|
def _update_max_resize_value(max_resize_value, crop_size, is_inference=False): |
|
"""Checks and may update max_resize_value. |
|
|
|
Args: |
|
max_resize_value: A 2-tuple of (height, width), maximum allowed value |
|
after resize. If a single element is given, then height and width |
|
share the same value. None, empty or having 0 indicates no maximum value |
|
will be used. |
|
crop_size: A 2-tuple of (height, width), crop size used. |
|
is_inference: Boolean, whether the model is performing inference or not. |
|
|
|
Returns: |
|
Updated max_resize_value. |
|
""" |
|
max_resize_value = preprocess_utils.process_resize_value(max_resize_value) |
|
if max_resize_value is None and is_inference: |
|
|
|
|
|
max_resize_value = crop_size |
|
|
|
if max_resize_value is None: |
|
return None |
|
|
|
if max_resize_value[0] > crop_size[0] or max_resize_value[1] > crop_size[1]: |
|
raise ValueError( |
|
'Maximum resize value provided (%s) exceeds model crop size (%s)' % |
|
(max_resize_value, crop_size)) |
|
return max_resize_value |
|
|
|
|
|
def preprocess_image_and_label(image, |
|
label, |
|
crop_height, |
|
crop_width, |
|
prev_image=None, |
|
prev_label=None, |
|
min_resize_value=None, |
|
max_resize_value=None, |
|
resize_factor=None, |
|
min_scale_factor=1., |
|
max_scale_factor=1., |
|
scale_factor_step_size=0, |
|
ignore_label=None, |
|
is_training=True, |
|
autoaugment_policy_name=None): |
|
"""Preprocesses the image and label. |
|
|
|
Args: |
|
image: A tf.Tensor containing the image with shape [height, width, 3]. |
|
label: A tf.Tensor containing the label with shape [height, width, 1] or |
|
None. |
|
crop_height: The height value used to crop the image and label. |
|
crop_width: The width value used to crop the image and label. |
|
prev_image: An optional tensor of shape [image_height, image_width, 3]. |
|
prev_label: An optional tensor of shape [label_height, label_width, 1]. |
|
min_resize_value: A 2-tuple of (height, width), desired minimum value |
|
after resize. If a single element is given, then height and width share |
|
the same value. None, empty or having 0 indicates no minimum value will |
|
be used. |
|
max_resize_value: A 2-tuple of (height, width), maximum allowed value |
|
after resize. If a single element is given, then height and width |
|
share the same value. None, empty or having 0 indicates no maximum value |
|
will be used. |
|
resize_factor: Resized dimensions are multiple of factor plus one. |
|
min_scale_factor: Minimum scale factor for random scale augmentation. |
|
max_scale_factor: Maximum scale factor for random scale augmentation. |
|
scale_factor_step_size: The step size from min scale factor to max scale |
|
factor. The input is randomly scaled based on the value of |
|
(min_scale_factor, max_scale_factor, scale_factor_step_size). |
|
ignore_label: The label value which will be ignored for training and |
|
evaluation. |
|
is_training: If the preprocessing is used for training or not. |
|
autoaugment_policy_name: String, autoaugment policy name. See |
|
autoaugment_policy.py for available policies. |
|
|
|
Returns: |
|
resized_image: The resized input image without other augmentations as a |
|
tf.Tensor. |
|
processed_image: The preprocessed image as a tf.Tensor. |
|
label: The preprocessed groundtruth segmentation label as a tf.Tensor. |
|
|
|
Raises: |
|
ValueError: Ground truth label not provided during training. |
|
""" |
|
if is_training and label is None: |
|
raise ValueError('During training, label must be provided.') |
|
|
|
image.get_shape().assert_is_compatible_with(tf.TensorShape([None, None, 3])) |
|
|
|
|
|
resized_image = image |
|
if prev_image is not None: |
|
image = tf.concat([image, prev_image], axis=2) |
|
processed_image = tf.cast(image, tf.float32) |
|
processed_prev_image = None |
|
|
|
if label is not None: |
|
label.get_shape().assert_is_compatible_with(tf.TensorShape([None, None, 1])) |
|
if prev_label is not None: |
|
label = tf.concat([label, prev_label], axis=2) |
|
label = tf.cast(label, tf.int32) |
|
|
|
|
|
if any([min_resize_value, max_resize_value, not is_training]): |
|
max_resize_value = _update_max_resize_value( |
|
max_resize_value, |
|
crop_size=(crop_height, crop_width), |
|
is_inference=not is_training) |
|
|
|
processed_image, label = ( |
|
preprocess_utils.resize_to_range( |
|
image=processed_image, |
|
label=label, |
|
min_size=min_resize_value, |
|
max_size=max_resize_value, |
|
factor=resize_factor, |
|
align_corners=True)) |
|
if prev_image is None: |
|
resized_image = tf.identity(processed_image) |
|
else: |
|
resized_image, _ = tf.split(processed_image, 2, axis=2) |
|
|
|
if prev_image is not None: |
|
processed_image, processed_prev_image = tf.split(processed_image, 2, axis=2) |
|
|
|
if prev_label is not None: |
|
label, prev_label = tf.split(label, 2, axis=2) |
|
|
|
if not is_training: |
|
image_height = tf.shape(processed_image)[0] |
|
image_width = tf.shape(processed_image)[1] |
|
|
|
offset_height = 0 |
|
offset_width = 0 |
|
processed_image, label = _pad_image_and_label(processed_image, label, |
|
offset_height, offset_width, |
|
crop_height, crop_width, |
|
ignore_label) |
|
processed_image.set_shape([crop_height, crop_width, 3]) |
|
if label is not None: |
|
label.set_shape([crop_height, crop_width, 1]) |
|
if prev_image is not None: |
|
processed_prev_image, prev_label = _pad_image_and_label( |
|
processed_prev_image, prev_label, offset_height, offset_width, |
|
crop_height, crop_width, ignore_label) |
|
processed_prev_image.set_shape([crop_height, crop_width, 3]) |
|
if prev_label is not None: |
|
prev_label.set_shape([crop_height, crop_width, 1]) |
|
return (resized_image, processed_image, label, processed_prev_image, |
|
prev_label) |
|
|
|
|
|
scale = preprocess_utils.get_random_scale( |
|
min_scale_factor, max_scale_factor, scale_factor_step_size) |
|
processed_image, label = preprocess_utils.randomly_scale_image_and_label( |
|
processed_image, label, scale) |
|
if processed_prev_image is not None: |
|
(processed_prev_image, |
|
prev_label) = preprocess_utils.randomly_scale_image_and_label( |
|
processed_prev_image, prev_label, scale) |
|
|
|
|
|
if autoaugment_policy_name: |
|
processed_image, label = _autoaugment_helper( |
|
processed_image, label, ignore_label, autoaugment_policy_name) |
|
if processed_prev_image is not None: |
|
processed_prev_image, prev_label = _autoaugment_helper( |
|
processed_prev_image, prev_label, ignore_label, |
|
autoaugment_policy_name) |
|
|
|
|
|
image_height = tf.shape(processed_image)[0] |
|
image_width = tf.shape(processed_image)[1] |
|
target_height = image_height + tf.maximum(crop_height - image_height, 0) |
|
target_width = image_width + tf.maximum(crop_width - image_width, 0) |
|
|
|
|
|
def _uniform_offset(margin): |
|
return tf.random.uniform( |
|
[], minval=0, maxval=tf.maximum(margin, 1), dtype=tf.int32) |
|
|
|
offset_height = _uniform_offset(crop_height - image_height) |
|
offset_width = _uniform_offset(crop_width - image_width) |
|
processed_image, label = _pad_image_and_label(processed_image, label, |
|
offset_height, offset_width, |
|
target_height, target_width, |
|
ignore_label) |
|
if processed_prev_image is not None: |
|
processed_prev_image, prev_label = _pad_image_and_label( |
|
processed_prev_image, prev_label, offset_height, offset_width, |
|
target_height, target_width, ignore_label) |
|
|
|
if processed_prev_image is not None: |
|
(processed_image, label, processed_prev_image, |
|
prev_label) = preprocess_utils.random_crop( |
|
[processed_image, label, processed_prev_image, prev_label], |
|
crop_height, crop_width) |
|
|
|
(processed_image, label, processed_prev_image, prev_label, |
|
_) = preprocess_utils.flip_dim( |
|
[processed_image, label, processed_prev_image, prev_label], |
|
_PROB_OF_FLIP, |
|
dim=1) |
|
else: |
|
processed_image, label = preprocess_utils.random_crop( |
|
[processed_image, label], crop_height, crop_width) |
|
|
|
processed_image, label, _ = preprocess_utils.flip_dim( |
|
[processed_image, label], _PROB_OF_FLIP, dim=1) |
|
|
|
return resized_image, processed_image, label, processed_prev_image, prev_label |
|
|
|
|
|
def _autoaugment_helper(image, label, ignore_label, policy_name): |
|
image = tf.cast(image, tf.uint8) |
|
label = tf.cast(label, tf.int32) |
|
image, label = autoaugment_utils.distort_image_with_autoaugment( |
|
image, label, ignore_label, policy_name) |
|
image = tf.cast(image, tf.float32) |
|
return image, label |
|
|