Metric3D / training /mono /utils /transform.py
zach
initial commit based on github repo
3ef1661
#import collections
import collections.abc as collections
import cv2
import math
import numpy as np
import numbers
import random
import torch
from imgaug import augmenters as iaa
import matplotlib
import matplotlib.cm
import mono.utils.weather_aug_utils as wa
"""
Provides a set of Pytorch transforms that use OpenCV instead of PIL (Pytorch default)
for image manipulation.
"""
class Compose(object):
# Composes transforms: transforms.Compose([transforms.RandScale([0.5, 2.0]), transforms.ToTensor()])
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
for t in self.transforms:
images, labels, intrinsics, cam_models, normals, other_labels, transform_paras = t(images, labels, intrinsics, cam_models, normals, other_labels, transform_paras)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class ToTensor(object):
# Converts numpy.ndarray (H x W x C) to a torch.FloatTensor of shape (C x H x W).
def __init__(self, **kwargs):
return
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
if not isinstance(images, list) or not isinstance(labels, list) or not isinstance(intrinsics, list):
raise (RuntimeError("transform.ToTensor() only handle inputs/labels/intrinsics lists."))
if len(images) != len(intrinsics):
raise (RuntimeError("Numbers of images and intrinsics are not matched."))
if not isinstance(images[0], np.ndarray) or not isinstance(labels[0], np.ndarray):
raise (RuntimeError("transform.ToTensor() only handle np.ndarray for the input and label."
"[eg: data readed by cv2.imread()].\n"))
if not isinstance(intrinsics[0], list):
raise (RuntimeError("transform.ToTensor() only handle list for the camera intrinsics"))
if len(images[0].shape) > 3 or len(images[0].shape) < 2:
raise (RuntimeError("transform.ToTensor() only handle image(np.ndarray) with 3 dims or 2 dims.\n"))
if len(labels[0].shape) > 3 or len(labels[0].shape) < 2:
raise (RuntimeError("transform.ToTensor() only handle label(np.ndarray) with 3 dims or 2 dims.\n"))
if len(intrinsics[0]) >4 or len(intrinsics[0]) < 3:
raise (RuntimeError("transform.ToTensor() only handle intrinsic(list) with 3 sizes or 4 sizes.\n"))
for i, img in enumerate(images):
if len(img.shape) == 2:
img = np.expand_dims(img, axis=2)
images[i] = torch.from_numpy(img.transpose((2, 0, 1))).float()
for i, lab in enumerate(labels):
if len(lab.shape) == 2:
lab = np.expand_dims(lab, axis=0)
labels[i] = torch.from_numpy(lab).float()
for i, intrinsic in enumerate(intrinsics):
if len(intrinsic) == 3:
intrinsic = [intrinsic[0],] + intrinsic
intrinsics[i] = torch.tensor(intrinsic, dtype=torch.float)
if cam_models is not None:
for i, cam_model in enumerate(cam_models):
cam_models[i] = torch.from_numpy(cam_model.transpose((2, 0, 1))).float() if cam_model is not None else None
if normals is not None:
for i, normal in enumerate(normals):
normals[i] = torch.from_numpy(normal.transpose((2, 0, 1))).float()
if other_labels is not None:
for i, lab in enumerate(other_labels):
if len(lab.shape) == 2:
lab = np.expand_dims(lab, axis=0)
other_labels[i] = torch.from_numpy(lab).float()
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class Normalize(object):
# Normalize tensor with mean and standard deviation along channel: channel = (channel - mean) / std
def __init__(self, mean, std=None, **kwargs):
if std is None:
assert len(mean) > 0
else:
assert len(mean) == len(std)
self.mean = torch.tensor(mean).float()[:, None, None]
self.std = torch.tensor(std).float()[:, None, None] if std is not None \
else torch.tensor([1.0, 1.0, 1.0]).float()[:, None, None]
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
# if self.std is None:
# # for t, m in zip(image, self.mean):
# # t.sub(m)
# image = image - self.mean
# if ref_images is not None:
# for i, ref_i in enumerate(ref_images):
# ref_images[i] = ref_i - self.mean
# else:
# # for t, m, s in zip(image, self.mean, self.std):
# # t.sub(m).div(s)
# image = (image - self.mean) / self.std
# if ref_images is not None:
# for i, ref_i in enumerate(ref_images):
# ref_images[i] = (ref_i - self.mean) / self.std
for i, img in enumerate(images):
img = torch.div((img - self.mean), self.std)
images[i] = img
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class ResizeCanonical(object):
"""
Resize the input to the canonical space first, then resize the input with random sampled size.
In the first stage, we assume the distance holds while the camera model varies.
In the second stage, we aim to simulate the observation in different distance. The camera will move along the optical axis.
Args:
images: list of RGB images.
labels: list of depth/disparity labels.
other labels: other labels, such as instance segmentations, semantic segmentations...
"""
def __init__(self, **kwargs):
self.ratio_range = kwargs['ratio_range']
self.canonical_focal = kwargs['focal_length']
self.crop_size = kwargs['crop_size']
def random_on_canonical_transform(self, image, label, intrinsic, cam_model, to_random_ratio):
ori_h, ori_w, _ = image.shape
ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0
to_canonical_ratio = self.canonical_focal / ori_focal
to_scale_ratio = to_random_ratio
resize_ratio = to_canonical_ratio * to_random_ratio
reshape_h = int(ori_h * resize_ratio + 0.5)
reshape_w = int(ori_w * resize_ratio + 0.5)
image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
if intrinsic is not None:
intrinsic = [self.canonical_focal, self.canonical_focal, intrinsic[2]*resize_ratio, intrinsic[3]*resize_ratio]
if label is not None:
# number of other labels may be less than that of image
label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
# scale the label and camera intrinsics
label = label / to_scale_ratio
if cam_model is not None:
# Should not directly resize the cam_model.
# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
return image, label, intrinsic, cam_model, to_scale_ratio
def random_on_crop_transform(self, image, label, intrinsic, cam_model, to_random_ratio):
ori_h, ori_w, _ = image.shape
crop_h, crop_w = self.crop_size
ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0
to_canonical_ratio = self.canonical_focal / ori_focal
# random resize based on the last crop size
proposal_reshape_h = int(crop_h * to_random_ratio + 0.5)
proposal_reshape_w = int(crop_w * to_random_ratio + 0.5)
resize_ratio_h = proposal_reshape_h / ori_h
resize_ratio_w = proposal_reshape_w / ori_w
resize_ratio = min(resize_ratio_h, resize_ratio_w) # resize based on the long edge
reshape_h = int(ori_h * resize_ratio + 0.5)
reshape_w = int(ori_w * resize_ratio + 0.5)
to_scale_ratio = resize_ratio / to_canonical_ratio
image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
if intrinsic is not None:
intrinsic = [self.canonical_focal, self.canonical_focal, intrinsic[2]*resize_ratio, intrinsic[3]*resize_ratio]
if label is not None:
# number of other labels may be less than that of image
label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
# scale the label and camera intrinsics
label = label / to_scale_ratio
if cam_model is not None:
# Should not directly resize the cam_model.
# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
return image, label, intrinsic, cam_model, to_scale_ratio
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
assert labels[0].dtype == np.float
target_focal = (intrinsics[0][0] + intrinsics[0][1]) / 2.0
target_to_canonical_ratio = self.canonical_focal / target_focal
target_img_shape = images[0].shape
to_random_ratio = random.uniform(self.ratio_range[0], self.ratio_range[1])
to_scale_ratio = 0.0
for i in range(len(images)):
img = images[i]
label = labels[i] if i < len(labels) else None
intrinsic = intrinsics[i] if i < len(intrinsics) else None
cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
img, label, intrinsic, cam_model, to_scale_ratio = self.random_on_canonical_transform(
img, label, intrinsic, cam_model, to_random_ratio)
images[i] = img
if label is not None:
labels[i] = label
if intrinsic is not None:
intrinsics[i] = intrinsic
if cam_model is not None:
cam_models[i] = cam_model
if normals != None:
reshape_h, reshape_w, _ = images[0].shape
for i, normal in enumerate(normals):
normals[i] = cv2.resize(normal, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
if other_labels != None:
# other labels are like semantic segmentations, instance segmentations, instance planes segmentations...
#resize_ratio = target_to_canonical_ratio * to_scale_ratio
#reshape_h = int(target_img_shape[0] * resize_ratio + 0.5)
#reshape_w = int(target_img_shape[1] * resize_ratio + 0.5)
reshape_h, reshape_w, _ = images[0].shape
for i, other_label_i in enumerate(other_labels):
other_labels[i] = cv2.resize(other_label_i, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
if transform_paras is not None:
transform_paras.update(label_scale_factor = 1.0/to_scale_ratio)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class LabelScaleCononical(object):
"""
To solve the ambiguity observation for the mono branch, i.e. different focal length (object size) with the same depth, cameras are
mapped to a cononical space. To mimic this, we set the focal length to a canonical one and scale the depth value. NOTE: resize the image based on the ratio can also solve this ambiguity.
Args:
images: list of RGB images.
labels: list of depth/disparity labels.
other labels: other labels, such as instance segmentations, semantic segmentations...
"""
def __init__(self, **kwargs):
self.canonical_focal = kwargs['focal_length']
def _get_scale_ratio(self, intrinsic):
target_focal_x = intrinsic[0]
label_scale_ratio = self.canonical_focal / target_focal_x
pose_scale_ratio = 1.0
return label_scale_ratio, pose_scale_ratio
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
#assert labels[0].dtype == np.float
label_scale_ratio = None
pose_scale_ratio = None
for i in range(len(intrinsics)):
img_i = images[i]
label_i = labels[i] if i < len(labels) else None
intrinsic_i = intrinsics[i].copy()
cam_model_i = cam_models[i] if cam_models is not None and i < len(cam_models) else None
label_scale_ratio, pose_scale_ratio = self._get_scale_ratio(intrinsic_i)
# adjust the focal length, map the current camera to the canonical space
intrinsics[i] = [intrinsic_i[0]*label_scale_ratio, intrinsic_i[1]*label_scale_ratio, intrinsic_i[2], intrinsic_i[3]]
# scale the label to the canonical space
if label_i is not None:
labels[i] = label_i * label_scale_ratio
if cam_model_i is not None:
# As the focal length is adjusted (canonical focal length), the camera model should be re-built.
ori_h, ori_w, _ = img_i.shape
cam_models[i] = build_camera_model(ori_h, ori_w, intrinsics[i])
if transform_paras is not None:
transform_paras.update(label_scale_factor = label_scale_ratio)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class ResizeKeepRatio(object):
"""
Resize and pad to a given size. Hold the aspect ratio.
This resizing assumes that the camera model remains unchanged.
Args:
resize_size: predefined output size.
"""
def __init__(self, resize_size, padding=None, ignore_label=-1, **kwargs):
if isinstance(resize_size, int):
self.resize_h = resize_size
self.resize_w = resize_size
elif isinstance(resize_size, collections.Iterable) and len(resize_size) == 2 \
and isinstance(resize_size[0], int) and isinstance(resize_size[1], int) \
and resize_size[0] > 0 and resize_size[1] > 0:
self.resize_h = resize_size[0]
self.resize_w = resize_size[1]
else:
raise (RuntimeError("crop size error.\n"))
if padding is None:
self.padding = padding
elif isinstance(padding, list):
if all(isinstance(i, numbers.Number) for i in padding):
self.padding = padding
else:
raise (RuntimeError("padding in Crop() should be a number list\n"))
if len(padding) != 3:
raise (RuntimeError("padding channel is not equal with 3\n"))
else:
raise (RuntimeError("padding in Crop() should be a number list\n"))
if isinstance(ignore_label, int):
self.ignore_label = ignore_label
else:
raise (RuntimeError("ignore_label should be an integer number\n"))
self.crop_size = kwargs['crop_size']
self.canonical_focal = kwargs['focal_length']
def main_data_transform(self, image, label, intrinsic, cam_model, resize_ratio, padding, to_scale_ratio):
"""
Resize data first and then do the padding.
'label' will be scaled.
"""
h, w, _ = image.shape
reshape_h = int(resize_ratio * h)
reshape_w = int(resize_ratio * w)
pad_h, pad_w, pad_h_half, pad_w_half = padding
# resize
image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
# padding
image = cv2.copyMakeBorder(
image,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=self.padding)
if label is not None:
# label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
label = resize_depth_preserve(label, (reshape_h, reshape_w))
label = cv2.copyMakeBorder(
label,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=self.ignore_label)
# scale the label
label = label / to_scale_ratio
# Resize, adjust principle point
if intrinsic is not None:
intrinsic[2] = intrinsic[2] * resize_ratio
intrinsic[3] = intrinsic[3] * resize_ratio
if cam_model is not None:
#cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
cam_model = cv2.copyMakeBorder(
cam_model,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=self.ignore_label)
# Pad, adjust the principle point
if intrinsic is not None:
intrinsic[2] = intrinsic[2] + pad_w_half
intrinsic[3] = intrinsic[3] + pad_h_half
return image, label, intrinsic, cam_model
def get_label_scale_factor(self, image, intrinsic, resize_ratio):
ori_h, ori_w, _ = image.shape
crop_h, crop_w = self.crop_size
ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0 #intrinsic[0] #
to_canonical_ratio = self.canonical_focal / ori_focal
to_scale_ratio = resize_ratio / to_canonical_ratio
return to_scale_ratio
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
target_h, target_w, _ = images[0].shape
resize_ratio_h = self.resize_h / target_h
resize_ratio_w = self.resize_w / target_w
resize_ratio = min(resize_ratio_h, resize_ratio_w)
reshape_h = int(resize_ratio * target_h)
reshape_w = int(resize_ratio * target_w)
pad_h = max(self.resize_h - reshape_h, 0)
pad_w = max(self.resize_w - reshape_w, 0)
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
pad_info = [pad_h, pad_w, pad_h_half, pad_w_half]
to_scale_ratio = self.get_label_scale_factor(images[0], intrinsics[0], resize_ratio)
for i in range(len(images)):
img = images[i]
label = labels[i] if i < len(labels) else None
intrinsic = intrinsics[i] if i < len(intrinsics) else None
cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
img, label, intrinsic, cam_model = self.main_data_transform(
img, label, intrinsic, cam_model, resize_ratio, pad_info, to_scale_ratio)
images[i] = img
if label is not None:
labels[i] = label
if intrinsic is not None:
intrinsics[i] = intrinsic
if cam_model is not None:
cam_models[i] = cam_model
if normals is not None:
for i, normal in enumerate(normals):
normal = cv2.resize(normal, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
# pad
normals[i] = cv2.copyMakeBorder(
normal,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=0)
if other_labels is not None:
for i, other_lab in enumerate(other_labels):
# resize
other_lab = cv2.resize(other_lab, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
# pad
other_labels[i] = cv2.copyMakeBorder(
other_lab,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=self.ignore_label)
if transform_paras is not None:
transform_paras.update(pad=[pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half])
if 'label_scale_factor' in transform_paras:
transform_paras['label_scale_factor'] = transform_paras['label_scale_factor'] * 1.0 / to_scale_ratio
else:
transform_paras.update(label_scale_factor=1.0/to_scale_ratio)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class KeepResizeCanoSize(object):
"""
Resize and pad to a given size. Hold the aspect ratio.
This resizing assumes that the camera model remains unchanged.
Args:
resize_size: predefined output size.
"""
def __init__(self, resize_size, padding=None, ignore_label=-1, **kwargs):
if isinstance(resize_size, int):
self.resize_h = resize_size
self.resize_w = resize_size
elif isinstance(resize_size, collections.Iterable) and len(resize_size) == 2 \
and isinstance(resize_size[0], int) and isinstance(resize_size[1], int) \
and resize_size[0] > 0 and resize_size[1] > 0:
self.resize_h = resize_size[0]
self.resize_w = resize_size[1]
else:
raise (RuntimeError("crop size error.\n"))
if padding is None:
self.padding = padding
elif isinstance(padding, list):
if all(isinstance(i, numbers.Number) for i in padding):
self.padding = padding
else:
raise (RuntimeError("padding in Crop() should be a number list\n"))
if len(padding) != 3:
raise (RuntimeError("padding channel is not equal with 3\n"))
else:
raise (RuntimeError("padding in Crop() should be a number list\n"))
if isinstance(ignore_label, int):
self.ignore_label = ignore_label
else:
raise (RuntimeError("ignore_label should be an integer number\n"))
self.crop_size = kwargs['crop_size']
self.canonical_focal = kwargs['focal_length']
def main_data_transform(self, image, label, intrinsic, cam_model, resize_ratio, padding, to_scale_ratio):
"""
Resize data first and then do the padding.
'label' will be scaled.
"""
h, w, _ = image.shape
reshape_h = int(resize_ratio * h)
reshape_w = int(resize_ratio * w)
pad_h, pad_w, pad_h_half, pad_w_half = padding
# resize
image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
# padding
image = cv2.copyMakeBorder(
image,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=self.padding)
if label is not None:
# label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
label = resize_depth_preserve(label, (reshape_h, reshape_w))
label = cv2.copyMakeBorder(
label,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=self.ignore_label)
# scale the label
label = label / to_scale_ratio
# Resize, adjust principle point
if intrinsic is not None:
intrinsic[2] = intrinsic[2] * resize_ratio
intrinsic[3] = intrinsic[3] * resize_ratio
if cam_model is not None:
#cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
cam_model = cv2.copyMakeBorder(
cam_model,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=self.ignore_label)
# Pad, adjust the principle point
if intrinsic is not None:
intrinsic[2] = intrinsic[2] + pad_w_half
intrinsic[3] = intrinsic[3] + pad_h_half
return image, label, intrinsic, cam_model
# def get_label_scale_factor(self, image, intrinsic, resize_ratio):
# ori_h, ori_w, _ = image.shape
# crop_h, crop_w = self.crop_size
# ori_focal = intrinsic[0] #(intrinsic[0] + intrinsic[1]) / 2.0
# to_canonical_ratio = self.canonical_focal / ori_focal
# to_scale_ratio = resize_ratio / to_canonical_ratio
# return to_scale_ratio
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
target_h, target_w, _ = images[0].shape
ori_focal = intrinsics[0][0]
to_canonical_ratio = self.canonical_focal / ori_focal
resize_ratio = to_canonical_ratio
reshape_h = int(resize_ratio * target_h)
reshape_w = int(resize_ratio * target_w)
pad_h = 32 - reshape_h % 32
pad_w = 32 - reshape_w % 32
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
pad_info = [pad_h, pad_w, pad_h_half, pad_w_half]
to_scale_ratio = 1.0
for i in range(len(images)):
img = images[i]
label = labels[i] if i < len(labels) else None
intrinsic = intrinsics[i] if i < len(intrinsics) else None
cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
img, label, intrinsic, cam_model = self.main_data_transform(
img, label, intrinsic, cam_model, resize_ratio, pad_info, to_scale_ratio)
images[i] = img
if label is not None:
labels[i] = label
if intrinsic is not None:
intrinsics[i] = intrinsic
if cam_model is not None:
cam_models[i] = cam_model
if normals is not None:
for i, normal in enumerate(normals):
# resize
normal = cv2.resize(normal, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
# pad
normals[i] = cv2.copyMakeBorder(
normal,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=0)
if other_labels is not None:
for i, other_lab in enumerate(other_labels):
# resize
other_lab = cv2.resize(other_lab, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
# pad
other_labels[i] = cv2.copyMakeBorder(
other_lab,
pad_h_half,
pad_h - pad_h_half,
pad_w_half,
pad_w - pad_w_half,
cv2.BORDER_CONSTANT,
value=self.ignore_label)
if transform_paras is not None:
transform_paras.update(pad=[pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half])
if 'label_scale_factor' in transform_paras:
transform_paras['label_scale_factor'] = transform_paras['label_scale_factor'] * 1.0 / to_scale_ratio
else:
transform_paras.update(label_scale_factor=1.0/to_scale_ratio)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class RandomCrop(object):
"""Crops the given ndarray image (H*W*C or H*W).
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is made.
"""
def __init__(self, crop_size, crop_type='center', padding=None, ignore_label=-1, **kwargs):
if isinstance(crop_size, int):
self.crop_h = crop_size
self.crop_w = crop_size
elif isinstance(crop_size, collections.Iterable) and len(crop_size) == 2 \
and isinstance(crop_size[0], int) and isinstance(crop_size[1], int) \
and crop_size[0] > 0 and crop_size[1] > 0:
self.crop_h = crop_size[0]
self.crop_w = crop_size[1]
else:
raise (RuntimeError("crop size error.\n"))
if crop_type == 'center' or crop_type == 'rand' or crop_type=='rand_in_field':
self.crop_type = crop_type
else:
raise (RuntimeError("crop type error: rand | center | rand_in_field \n"))
if padding is None:
self.padding = padding
elif isinstance(padding, list):
if all(isinstance(i, numbers.Number) for i in padding):
self.padding = padding
else:
raise (RuntimeError("padding in Crop() should be a number list\n"))
if len(padding) != 3:
raise (RuntimeError("padding channel is not equal with 3\n"))
else:
raise (RuntimeError("padding in Crop() should be a number list\n"))
if isinstance(ignore_label, int):
self.ignore_label = ignore_label
else:
raise (RuntimeError("ignore_label should be an integer number\n"))
def cal_padding_paras(self, h, w):
# padding if current size is not satisfied
pad_h = max(self.crop_h - h, 0)
pad_w = max(self.crop_w - w, 0)
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
return pad_h, pad_w, pad_h_half, pad_w_half
def cal_cropping_paras(self, h, w, intrinsic):
u0 = intrinsic[2]
v0 = intrinsic[3]
if self.crop_type == 'rand':
h_min = 0
h_max = h - self.crop_h
w_min = 0
w_max = w - self.crop_w
elif self.crop_type == 'center':
h_min = (h - self.crop_h) / 2
h_max = (h - self.crop_h) / 2
w_min = (w - self.crop_w) / 2
w_max = (w - self.crop_w) / 2
else: # rand in field
h_min = min(max(0, v0 - 0.75*self.crop_h), h-self.crop_h)
h_max = min(max(v0 - 0.25*self.crop_h, 0), h-self.crop_h)
w_min = min(max(0, u0 - 0.75*self.crop_w), w-self.crop_w)
w_max = min(max(u0 - 0.25*self.crop_w, 0), w-self.crop_w)
h_off = random.randint(int(h_min), int(h_max))
w_off = random.randint(int(w_min), int(w_max))
return h_off, w_off
def main_data_transform(self, image, label, intrinsic, cam_model,
pad_h, pad_w, pad_h_half, pad_w_half, h_off, w_off):
# padding if current size is not satisfied
if pad_h > 0 or pad_w > 0:
if self.padding is None:
raise (RuntimeError("depthtransform.Crop() need padding while padding argument is None\n"))
image = cv2.copyMakeBorder(image, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.padding)
if label is not None:
label = cv2.copyMakeBorder(label, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
if cam_model is not None:
cam_model = cv2.copyMakeBorder(cam_model, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
# cropping
image = image[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]
if label is not None:
label = label[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]
if cam_model is not None:
cam_model = cam_model[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]
if intrinsic is not None:
intrinsic[2] = intrinsic[2] + pad_w_half - w_off
intrinsic[3] = intrinsic[3] + pad_h_half - h_off
return image, label, intrinsic, cam_model
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
if 'random_crop_size' in transform_paras and transform_paras['random_crop_size'] is not None \
and (transform_paras['random_crop_size'][0] + transform_paras['random_crop_size'][1] > 500):
self.crop_h = int(transform_paras['random_crop_size'][0].item())
self.crop_w = int(transform_paras['random_crop_size'][1].item())
target_img = images[0]
target_h, target_w, _ = target_img.shape
target_intrinsic = intrinsics[0]
pad_h, pad_w, pad_h_half, pad_w_half = self.cal_padding_paras(target_h, target_w)
h_off, w_off = self.cal_cropping_paras(target_h+pad_h, target_w+pad_w, target_intrinsic)
for i in range(len(images)):
img = images[i]
label = labels[i] if i < len(labels) else None
intrinsic = intrinsics[i].copy() if i < len(intrinsics) else None
cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
img, label, intrinsic, cam_model = self.main_data_transform(
img, label, intrinsic, cam_model,
pad_h, pad_w, pad_h_half, pad_w_half, h_off, w_off)
images[i] = img
if label is not None:
labels[i] = label
if intrinsic is not None:
intrinsics[i] = intrinsic
if cam_model is not None:
cam_models[i] = cam_model
pad=[pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half]
if normals is not None:
for i, normal in enumerate(normals):
# padding if current size is not satisfied
normal = cv2.copyMakeBorder(normal, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=0)
normals[i] = normal[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]
if other_labels is not None:
for i, other_lab in enumerate(other_labels):
# padding if current size is not satisfied
other_lab = cv2.copyMakeBorder(other_lab, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
other_labels[i] = other_lab[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]
if transform_paras is not None:
transform_paras.update(dict(pad=pad))
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class RandomResize(object):
"""
Random resize the image. During this process, the camera model is hold, and thus the depth label is scaled.
Args:
images: list of RGB images.
labels: list of depth/disparity labels.
other labels: other labels, such as instance segmentations, semantic segmentations...
"""
def __init__(self, ratio_range=(0.85, 1.15), prob=0.5, is_lidar=True, **kwargs):
self.ratio_range = ratio_range
self.is_lidar = is_lidar
self.prob = prob
def random_resize(self, image, label, intrinsic, cam_model, to_random_ratio):
ori_h, ori_w, _ = image.shape
resize_ratio = to_random_ratio
label_scale_ratio = 1.0 / resize_ratio
reshape_h = int(ori_h * resize_ratio + 0.5)
reshape_w = int(ori_w * resize_ratio + 0.5)
image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
if intrinsic is not None:
intrinsic = [intrinsic[0], intrinsic[1], intrinsic[2]*resize_ratio, intrinsic[3]*resize_ratio]
if label is not None:
if self.is_lidar:
label = resize_depth_preserve(label, (reshape_h, reshape_w))
else:
label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
# scale the label
label = label * label_scale_ratio
if cam_model is not None:
# Should not directly resize the cam_model.
# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
return image, label, intrinsic, cam_model, label_scale_ratio
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
assert labels[0].dtype == np.float
# target_focal = (intrinsics[0][0] + intrinsics[0][1]) / 2.0
# target_to_canonical_ratio = self.canonical_focal / target_focal
# target_img_shape = images[0].shape
prob = random.uniform(0, 1)
if prob < self.prob:
to_random_ratio = random.uniform(self.ratio_range[0], self.ratio_range[1])
else:
to_random_ratio = 1.0
label_scale_ratio = 0.0
for i in range(len(images)):
img = images[i]
label = labels[i] if i < len(labels) else None
intrinsic = intrinsics[i].copy() if i < len(intrinsics) else None
cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
img, label, intrinsic, cam_model, label_scale_ratio = self.random_resize(
img, label, intrinsic, cam_model, to_random_ratio)
images[i] = img
if label is not None:
labels[i] = label
if intrinsic is not None:
intrinsics[i] = intrinsic.copy()
if cam_model is not None:
cam_models[i] = cam_model
if normals != None:
reshape_h, reshape_w, _ = images[0].shape
for i, norm in enumerate(normals):
normals[i] = cv2.resize(norm, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
if other_labels != None:
# other labels are like semantic segmentations, instance segmentations, instance planes segmentations...
#resize_ratio = target_to_canonical_ratio * to_scale_ratio
#reshape_h = int(target_img_shape[0] * resize_ratio + 0.5)
#reshape_w = int(target_img_shape[1] * resize_ratio + 0.5)
reshape_h, reshape_w, _ = images[0].shape
for i, other_label_i in enumerate(other_labels):
other_labels[i] = cv2.resize(other_label_i, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
if transform_paras is not None:
if 'label_scale_factor' in transform_paras:
transform_paras['label_scale_factor'] = transform_paras['label_scale_factor'] * label_scale_ratio
else:
transform_paras.update(label_scale_factor = label_scale_ratio)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class RandomEdgeMask(object):
"""
Random mask the input and labels.
Args:
images: list of RGB images.
labels: list of depth/disparity labels.
other labels: other labels, such as instance segmentations, semantic segmentations...
"""
def __init__(self, mask_maxsize=32, prob=0.5, rgb_invalid=[0,0,0], label_invalid=-1,**kwargs):
self.mask_maxsize = mask_maxsize
self.prob = prob
self.rgb_invalid = rgb_invalid
self.label_invalid = label_invalid
def mask_edge(self, image, mask_edgesize, mask_value):
H, W = image.shape[0], image.shape[1]
# up
image[0:mask_edgesize[0], :, ...] = mask_value
# down
image[H-mask_edgesize[1]:H, :, ...] = mask_value
# left
image[:, 0:mask_edgesize[2], ...] = mask_value
# right
image[:, W-mask_edgesize[3]:W, ...] = mask_value
return image
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
assert labels[0].dtype == np.float
prob = random.uniform(0, 1)
if prob > self.prob:
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
mask_edgesize = random.sample(range(self.mask_maxsize), 4) #[up, down, left, right]
for i in range(len(images)):
img = images[i]
label = labels[i] if i < len(labels) else None
img = self.mask_edge(img, mask_edgesize, self.rgb_invalid)
images[i] = img
if label is not None:
label = self.mask_edge(label, mask_edgesize, self.label_invalid)
labels[i] = label
if normals != None:
for i, normal in enumerate(normals):
normals[i] = self.mask_edge(normal, mask_edgesize, mask_value=0)
if other_labels != None:
# other labels are like semantic segmentations, instance segmentations, instance planes segmentations...
for i, other_label_i in enumerate(other_labels):
other_labels[i] = self.mask_edge(other_label_i, mask_edgesize, self.label_invalid)
if transform_paras is not None:
pad = transform_paras['pad'] if 'pad' in transform_paras else [0,0,0,0]
new_pad = [max(mask_edgesize[0], pad[0]), max(mask_edgesize[1], pad[1]), max(mask_edgesize[2], pad[2]), max(mask_edgesize[3], pad[3])]
transform_paras.update(dict(pad=new_pad))
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class AdjustSize(object):
"""Crops the given ndarray image (H*W*C or H*W).
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is made.
"""
def __init__(self, padding=None, ignore_label=-1, **kwargs):
if padding is None:
self.padding = padding
elif isinstance(padding, list):
if all(isinstance(i, numbers.Number) for i in padding):
self.padding = padding
else:
raise (RuntimeError("padding in Crop() should be a number list\n"))
if len(padding) != 3:
raise (RuntimeError("padding channel is not equal with 3\n"))
else:
raise (RuntimeError("padding in Crop() should be a number list\n"))
if isinstance(ignore_label, int):
self.ignore_label = ignore_label
else:
raise (RuntimeError("ignore_label should be an integer number\n"))
def get_pad_paras(self, h, w):
pad_h = 32 - h % 32 if h %32 != 0 else 0
pad_w = 32 - w % 32 if w %32 != 0 else 0
pad_h_half = int(pad_h // 2)
pad_w_half = int(pad_w // 2)
return pad_h, pad_w, pad_h_half, pad_w_half
def main_data_transform(self, image, label, intrinsic, cam_model):
h, w, _ = image.shape
pad_h, pad_w, pad_h_half, pad_w_half = self.get_pad_paras(h=h, w=w)
if pad_h > 0 or pad_w > 0:
if self.padding is None:
raise (RuntimeError("depthtransform.Crop() need padding while padding argument is None\n"))
image = cv2.copyMakeBorder(image, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.padding)
if label is not None:
label = cv2.copyMakeBorder(label, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
if cam_model is not None:
cam_model = cv2.copyMakeBorder(cam_model, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
if intrinsic is not None:
intrinsic[2] = intrinsic[2] + pad_w_half
intrinsic[3] = intrinsic[3] + pad_h_half
pad=[pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half]
return image, label, intrinsic, cam_model, pad
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
target_img = images[0]
target_h, target_w, _ = target_img.shape
for i in range(len(images)):
img = images[i]
label = labels[i] if i < len(labels) else None
intrinsic = intrinsics[i] if i < len(intrinsics) else None
cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
img, label, intrinsic, cam_model, pad = self.main_data_transform(
img, label, intrinsic, cam_model)
images[i] = img
if label is not None:
labels[i] = label
if intrinsic is not None:
intrinsics[i] = intrinsic
if cam_model is not None:
cam_models[i] = cam_model
if transform_paras is not None:
transform_paras.update(dict(pad=pad))
if normals is not None:
pad_h, pad_w, pad_h_half, pad_w_half = self.get_pad_paras(h=target_h, w=target_w)
for i, normal in enumerate(normals):
normals[i] = cv2.copyMakeBorder(normal, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=0)
if other_labels is not None:
pad_h, pad_w, pad_h_half, pad_w_half = self.get_pad_paras(h=target_h, w=target_w)
for i, other_lab in enumerate(other_labels):
other_labels[i] = cv2.copyMakeBorder(other_lab, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class RandomHorizontalFlip(object):
def __init__(self, prob=0.5, **kwargs):
self.p = prob
def main_data_transform(self, image, label, intrinsic, cam_model, rotate):
if rotate:
image = cv2.flip(image, 1)
if label is not None:
label = cv2.flip(label, 1)
if intrinsic is not None:
h, w, _ = image.shape
intrinsic[2] = w - intrinsic[2]
intrinsic[3] = h - intrinsic[3]
if cam_model is not None:
cam_model = cv2.flip(cam_model, 1)
cam_model[:, :, 0] = cam_model[:, :, 0] * -1
cam_model[:, :, 2] = cam_model[:, :, 2] * -1
return image, label, intrinsic, cam_model
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
rotate = random.random() > self.p
for i in range(len(images)):
img = images[i]
label = labels[i] if i < len(labels) else None
intrinsic = intrinsics[i] if i < len(intrinsics) else None
cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
img, label, intrinsic, cam_model = self.main_data_transform(
img, label, intrinsic, cam_model, rotate)
images[i] = img
if label is not None:
labels[i] = label
if intrinsic is not None:
intrinsics[i] = intrinsic
if cam_model is not None:
cam_models[i] = cam_model
if normals is not None:
for i, normal in enumerate(normals):
if rotate:
normal = cv2.flip(normal, 1)
normal[:, :, 0] = -normal[:, :, 0] # NOTE: check the direction of normal coordinates axis, this is used in https://github.com/baegwangbin/surface_normal_uncertainty
normals[i] = normal
if other_labels is not None:
for i, other_lab in enumerate(other_labels):
if rotate:
other_lab = cv2.flip(other_lab, 1)
other_labels[i] = other_lab
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class RandomBlur(object):
def __init__(self,
aver_kernal=(2, 10),
motion_kernal=(5, 15),
angle=[-80, 80],
prob=0.3,
**kwargs):
gaussian_blur = iaa.AverageBlur(k=aver_kernal)
motion_blur = iaa.MotionBlur(k=motion_kernal, angle=angle)
zoom_blur = iaa.imgcorruptlike.ZoomBlur(severity=1)
self.prob = prob
self.blurs = [gaussian_blur, motion_blur, zoom_blur]
def blur(self, imgs, id):
blur_mtd = self.blurs[id]
return blur_mtd(images=imgs)
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
prob = random.random()
if prob < self.prob:
id = random.randint(0, len(self.blurs)-1)
images = self.blur(images, id)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class RGBCompresion(object):
def __init__(self, prob=0.1, compression=(0, 50), **kwargs):
self.rgb_compress = iaa.Sequential(
[
iaa.JpegCompression(compression=compression),
],
random_order=True,
)
self.prob = prob
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
if random.random() < self.prob:
images = self.rgb_compress(images=images)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class RGB2BGR(object):
# Converts image from RGB order to BGR order, for model initialized from Caffe
def __init__(self, **kwargs):
return
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
for i, img in enumerate(images):
images[i] = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class BGR2RGB(object):
# Converts image from BGR order to RGB order, for model initialized from Pytorch
def __init__(self, **kwargs):
return
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
for i, img in enumerate(images):
images[i] = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class PhotoMetricDistortion(object):
"""Apply photometric distortion to image sequentially, every transformation
is applied with a probability of 0.5. The position of random contrast is in
second or second to last.
1. random brightness
2. random contrast (mode 0)
3. convert color from BGR to HSV
4. random saturation
5. random hue
6. convert color from HSV to BGR
7. random contrast (mode 1)
Args:
brightness_delta (int): delta of brightness.
contrast_range (tuple): range of contrast.
saturation_range (tuple): range of saturation.
hue_delta (int): delta of hue.
"""
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18,
to_gray_prob=0.3,
distortion_prob=0.3,
**kwargs):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta
self.gray_aug = iaa.Grayscale(alpha=(0.8, 1.0))
self.to_gray_prob = to_gray_prob
self.distortion_prob = distortion_prob
def convert(self, img, alpha=1.0, beta=0.0):
"""Multiple with alpha and add beat with clip."""
img = img.astype(np.float32) * alpha + beta
img = np.clip(img, 0, 255)
return img.astype(np.uint8)
def brightness(self, img, beta, do):
"""Brightness distortion."""
if do:
# beta = random.uniform(-self.brightness_delta,
# self.brightness_delta)
img = self.convert(
img,
beta=beta)
return img
def contrast(self, img, alpha, do):
"""Contrast distortion."""
if do:
#alpha = random.uniform(self.contrast_lower, self.contrast_upper)
img = self.convert(
img,
alpha=alpha)
return img
def saturation(self, img, alpha, do):
"""Saturation distortion."""
if do:
# alpha = random.uniform(self.saturation_lower,
# self.saturation_upper)
img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
img[:, :, 1] = self.convert(
img[:, :, 1],
alpha=alpha)
img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
return img
def hue(self, img, rand_hue, do):
"""Hue distortion."""
if do:
# rand_hue = random.randint(-self.hue_delta, self.hue_delta)
img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
img[:, :, 0] = (img[:, :, 0].astype(int) + rand_hue) % 180
img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
return img
def rgb2gray(self, img):
img = self.gray_aug(image=img)
return img
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
"""Call function to perform photometric distortion on images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images distorted.
"""
brightness_beta = random.uniform(-self.brightness_delta, self.brightness_delta)
brightness_do = random.random() < self.distortion_prob
contrast_alpha = random.uniform(self.contrast_lower, self.contrast_upper)
contrast_do = random.random() < self.distortion_prob
saturate_alpha = random.uniform(self.saturation_lower, self.saturation_upper)
saturate_do = random.random() < self.distortion_prob
rand_hue = random.randint(-self.hue_delta, self.hue_delta)
rand_hue_do = random.random() < self.distortion_prob
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = 1 if random.random() > 0.5 else 2
for i, img in enumerate(images):
if random.random() < self.to_gray_prob:
img = self.rgb2gray(img)
else:
# random brightness
img = self.brightness(img, brightness_beta, brightness_do)
if mode == 1:
img = self.contrast(img, contrast_alpha, contrast_do)
# random saturation
img = self.saturation(img, saturate_alpha, saturate_do)
# random hue
img = self.hue(img, rand_hue, rand_hue_do)
# random contrast
if mode == 0:
img = self.contrast(img, contrast_alpha, contrast_do)
images[i] = img
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
class Weather(object):
"""Apply the following weather augmentations to data.
Args:
prob (float): probability to enforce the weather augmentation.
"""
def __init__(self,
prob=0.3,
**kwargs):
snow = iaa.FastSnowyLandscape(
lightness_threshold=[50, 100],
lightness_multiplier=(1.2, 2)
)
cloud = iaa.Clouds()
fog = iaa.Fog()
snow_flakes = iaa.Snowflakes(flake_size=(0.2, 0.4), speed=(0.001, 0.03)) #iaa.imgcorruptlike.Snow(severity=2)#
rain = iaa.Rain(speed=(0.1, 0.3), drop_size=(0.1, 0.3))
# rain_drops = RainDrop_Augmentor()
self.aug_list = [
snow, cloud, fog, snow_flakes, rain,
#wa.add_sun_flare, wa.darken, wa.random_brightness,
]
self.prob = prob
def aug_with_weather(self, imgs, id):
weather = self.aug_list[id]
if id <5:
return weather(images=imgs)
else:
return weather(imgs)
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
"""Call function to perform photometric distortion on images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images distorted.
"""
if random.random() < self.prob:
select_id = np.random.randint(0, high=len(self.aug_list))
images = self.aug_with_weather(images, select_id)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
def resize_depth_preserve(depth, shape):
"""
Resizes depth map preserving all valid depth pixels
Multiple downsampled points can be assigned to the same pixel.
Parameters
----------
depth : np.array [h,w]
Depth map
shape : tuple (H,W)
Output shape
Returns
-------
depth : np.array [H,W,1]
Resized depth map
"""
# Store dimensions and reshapes to single column
depth = np.squeeze(depth)
h, w = depth.shape
x = depth.reshape(-1)
# Create coordinate grid
uv = np.mgrid[:h, :w].transpose(1, 2, 0).reshape(-1, 2)
# Filters valid points
idx = x > 0
crd, val = uv[idx], x[idx]
# Downsamples coordinates
crd[:, 0] = (crd[:, 0] * (shape[0] / h) + 0.5).astype(np.int32)
crd[:, 1] = (crd[:, 1] * (shape[1] / w) + 0.5).astype(np.int32)
# Filters points inside image
idx = (crd[:, 0] < shape[0]) & (crd[:, 1] < shape[1])
crd, val = crd[idx], val[idx]
# Creates downsampled depth image and assigns points
depth = np.zeros(shape)
depth[crd[:, 0], crd[:, 1]] = val
# Return resized depth map
return depth
def gray_to_colormap(img, cmap='rainbow', max_value=None):
"""
Transfer gray map to matplotlib colormap
"""
assert img.ndim == 2
img[img<0] = 0
mask_invalid = img < 1e-10
if max_value == None:
img = img / (img.max() + 1e-8)
else:
img = img / (max_value + 1e-8)
norm = matplotlib.colors.Normalize(vmin=0, vmax=1.1)
cmap_m = matplotlib.cm.get_cmap(cmap)
map = matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap_m)
colormap = (map.to_rgba(img)[:, :, :3] * 255).astype(np.uint8)
colormap[mask_invalid] = 0
return colormap
class LiDarResizeCanonical(object):
"""
Resize the input to the canonical space first, then resize the input with random sampled size.
In the first stage, we assume the distance holds while the camera model varies.
In the second stage, we aim to simulate the observation in different distance. The camera will move along the optical axis.
"""
def __init__(self, **kwargs):
self.ratio_range = kwargs['ratio_range']
self.canonical_focal = kwargs['focal_length']
self.crop_size = kwargs['crop_size']
def random_on_canonical_transform(self, image, label, intrinsic, cam_model, to_random_ratio):
ori_h, ori_w, _ = image.shape
ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0
to_canonical_ratio = self.canonical_focal / ori_focal
to_scale_ratio = to_random_ratio
resize_ratio = to_canonical_ratio * to_random_ratio
reshape_h = int(ori_h * resize_ratio + 0.5)
reshape_w = int(ori_w * resize_ratio + 0.5)
image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
if intrinsic is not None:
intrinsic = [self.canonical_focal, self.canonical_focal, intrinsic[2]*resize_ratio, intrinsic[3]*resize_ratio]
if label is not None:
# number of other labels may be less than that of image
#label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
label = resize_depth_preserve(label, (reshape_h, reshape_w))
# scale the label and camera intrinsics
label = label / to_scale_ratio
if cam_model is not None:
# Should not directly resize the cam_model.
# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
return image, label, intrinsic, cam_model, to_scale_ratio
def random_on_crop_transform(self, image, label, intrinsic, cam_model, to_random_ratio):
ori_h, ori_w, _ = image.shape
crop_h, crop_w = self.crop_size
ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0
to_canonical_ratio = self.canonical_focal / ori_focal
# random resize based on the last crop size
proposal_reshape_h = int(crop_h * to_random_ratio + 0.5)
proposal_reshape_w = int(crop_w * to_random_ratio + 0.5)
resize_ratio_h = proposal_reshape_h / ori_h
resize_ratio_w = proposal_reshape_w / ori_w
resize_ratio = min(resize_ratio_h, resize_ratio_w) # resize based on the long edge
reshape_h = int(ori_h * resize_ratio + 0.5)
reshape_w = int(ori_w * resize_ratio + 0.5)
to_scale_ratio = resize_ratio / to_canonical_ratio
image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
if intrinsic is not None:
intrinsic = [self.canonical_focal, self.canonical_focal, intrinsic[2]*resize_ratio, intrinsic[3]*resize_ratio]
if label is not None:
# number of other labels may be less than that of image
# label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
label = resize_depth_preserve(label, (reshape_h, reshape_w))
# scale the label and camera intrinsics
label = label / to_scale_ratio
if cam_model is not None:
# Should not directly resize the cam_model.
# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
return image, label, intrinsic, cam_model, to_scale_ratio
def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
assert labels[0].dtype == np.float
target_focal = (intrinsics[0][0] + intrinsics[0][1]) / 2.0
target_to_canonical_ratio = self.canonical_focal / target_focal
target_img_shape = images[0].shape
to_random_ratio = random.uniform(self.ratio_range[0], self.ratio_range[1])
to_scale_ratio = 0
for i in range(len(images)):
img = images[i]
label = labels[i] if i < len(labels) else None
intrinsic = intrinsics[i] if i < len(intrinsics) else None
cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
img, label, intrinsic, cam_model, to_scale_ratio = self.random_on_canonical_transform(
img, label, intrinsic, cam_model, to_random_ratio)
images[i] = img
if label is not None:
labels[i] = label
if intrinsic is not None:
intrinsics[i] = intrinsic
if cam_model is not None:
cam_models[i] = cam_model
if normals != None:
reshape_h, reshape_w, _ = images[0].shape
for i, normal in enumerate(normals):
normals[i] = cv2.resize(normal, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
if other_labels != None:
# other labels are like semantic segmentations, instance segmentations, instance planes segmentations...
# resize_ratio = target_to_canonical_ratio * to_random_ratio
# reshape_h = int(target_img_shape[0] * resize_ratio + 0.5)
# reshape_w = int(target_img_shape[1] * resize_ratio + 0.5)
reshape_h, reshape_w, _ = images[0].shape
for i, other_label_i in enumerate(other_labels):
other_labels[i] = cv2.resize(other_label_i, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
if transform_paras is not None:
transform_paras.update(label_scale_factor = 1.0/to_scale_ratio)
return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras
def build_camera_model(H : int, W : int, intrinsics : list) -> np.array:
"""
Encode the camera intrinsic parameters (focal length and principle point) to a 4-channel map.
"""
fx, fy, u0, v0 = intrinsics
f = (fx + fy) / 2.0
# principle point location
x_row = np.arange(0, W).astype(np.float32)
x_row_center_norm = (x_row - u0) / W
x_center = np.tile(x_row_center_norm, (H, 1)) # [H, W]
y_col = np.arange(0, H).astype(np.float32)
y_col_center_norm = (y_col - v0) / H
y_center = np.tile(y_col_center_norm, (W, 1)).T
# FoV
fov_x = np.arctan(x_center / (f / W))
fov_y = np.arctan(y_center/ (f / H))
cam_model = np.stack([x_center, y_center, fov_x, fov_y], axis=2)
return cam_model
if __name__ == '__main__':
img = cv2.imread('/mnt/mldb/raw/62b3ed3455e805efcb28c74b/NuScenes/data_test/samples/CAM_FRONT/n008-2018-08-01-15-34-25-0400__CAM_FRONT__1533152214512404.jpg', -1)
H, W, _ = img.shape
label = img[:, :, 0]
intrinsic = [1000, 1000, W//2, H//2]
for i in range(20):
weather_aug = Weather(prob=1.0)
img_aug, label, intrinsic, cam_model, ref_images, transform_paras = weather_aug([img, ], [label,], [intrinsic,])
cv2.imwrite(f'test_aug_{i}.jpg', img_aug[0])
print('Done')