Metric3D / training /mono /utils /transform.py

zach

initial commit based on github repo

3ef1661 3 months ago

No virus

68.6 kB

	#import collections
	import collections.abc as collections
	import cv2
	import math
	import numpy as np
	import numbers
	import random
	import torch
	from imgaug import augmenters as iaa
	import matplotlib
	import matplotlib.cm
	import mono.utils.weather_aug_utils as wa

	"""
	Provides a set of Pytorch transforms that use OpenCV instead of PIL (Pytorch default)
	for image manipulation.
	"""

	class Compose(object):
	# Composes transforms: transforms.Compose([transforms.RandScale([0.5, 2.0]), transforms.ToTensor()])
	def __init__(self, transforms):
	self.transforms = transforms

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	for t in self.transforms:
	images, labels, intrinsics, cam_models, normals, other_labels, transform_paras = t(images, labels, intrinsics, cam_models, normals, other_labels, transform_paras)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras

	class ToTensor(object):
	# Converts numpy.ndarray (H x W x C) to a torch.FloatTensor of shape (C x H x W).
	def __init__(self, **kwargs):
	return
	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	if not isinstance(images, list) or not isinstance(labels, list) or not isinstance(intrinsics, list):
	raise (RuntimeError("transform.ToTensor() only handle inputs/labels/intrinsics lists."))
	if len(images) != len(intrinsics):
	raise (RuntimeError("Numbers of images and intrinsics are not matched."))
	if not isinstance(images[0], np.ndarray) or not isinstance(labels[0], np.ndarray):
	raise (RuntimeError("transform.ToTensor() only handle np.ndarray for the input and label."
	"[eg: data readed by cv2.imread()].\n"))
	if not isinstance(intrinsics[0], list):
	raise (RuntimeError("transform.ToTensor() only handle list for the camera intrinsics"))

	if len(images[0].shape) > 3 or len(images[0].shape) < 2:
	raise (RuntimeError("transform.ToTensor() only handle image(np.ndarray) with 3 dims or 2 dims.\n"))
	if len(labels[0].shape) > 3 or len(labels[0].shape) < 2:
	raise (RuntimeError("transform.ToTensor() only handle label(np.ndarray) with 3 dims or 2 dims.\n"))

	if len(intrinsics[0]) >4 or len(intrinsics[0]) < 3:
	raise (RuntimeError("transform.ToTensor() only handle intrinsic(list) with 3 sizes or 4 sizes.\n"))

	for i, img in enumerate(images):
	if len(img.shape) == 2:
	img = np.expand_dims(img, axis=2)
	images[i] = torch.from_numpy(img.transpose((2, 0, 1))).float()
	for i, lab in enumerate(labels):
	if len(lab.shape) == 2:
	lab = np.expand_dims(lab, axis=0)
	labels[i] = torch.from_numpy(lab).float()
	for i, intrinsic in enumerate(intrinsics):
	if len(intrinsic) == 3:
	intrinsic = [intrinsic[0],] + intrinsic
	intrinsics[i] = torch.tensor(intrinsic, dtype=torch.float)
	if cam_models is not None:
	for i, cam_model in enumerate(cam_models):
	cam_models[i] = torch.from_numpy(cam_model.transpose((2, 0, 1))).float() if cam_model is not None else None
	if normals is not None:
	for i, normal in enumerate(normals):
	normals[i] = torch.from_numpy(normal.transpose((2, 0, 1))).float()
	if other_labels is not None:
	for i, lab in enumerate(other_labels):
	if len(lab.shape) == 2:
	lab = np.expand_dims(lab, axis=0)
	other_labels[i] = torch.from_numpy(lab).float()
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras

	class Normalize(object):
	# Normalize tensor with mean and standard deviation along channel: channel = (channel - mean) / std
	def __init__(self, mean, std=None, **kwargs):
	if std is None:
	assert len(mean) > 0
	else:
	assert len(mean) == len(std)
	self.mean = torch.tensor(mean).float()[:, None, None]
	self.std = torch.tensor(std).float()[:, None, None] if std is not None \
	else torch.tensor([1.0, 1.0, 1.0]).float()[:, None, None]

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	# if self.std is None:
	# # for t, m in zip(image, self.mean):
	# # t.sub(m)
	# image = image - self.mean
	# if ref_images is not None:
	# for i, ref_i in enumerate(ref_images):
	# ref_images[i] = ref_i - self.mean
	# else:
	# # for t, m, s in zip(image, self.mean, self.std):
	# # t.sub(m).div(s)
	# image = (image - self.mean) / self.std
	# if ref_images is not None:
	# for i, ref_i in enumerate(ref_images):
	# ref_images[i] = (ref_i - self.mean) / self.std
	for i, img in enumerate(images):
	img = torch.div((img - self.mean), self.std)
	images[i] = img
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras

	class ResizeCanonical(object):
	"""
	Resize the input to the canonical space first, then resize the input with random sampled size.
	In the first stage, we assume the distance holds while the camera model varies.
	In the second stage, we aim to simulate the observation in different distance. The camera will move along the optical axis.
	Args:
	images: list of RGB images.
	labels: list of depth/disparity labels.
	other labels: other labels, such as instance segmentations, semantic segmentations...
	"""
	def __init__(self, **kwargs):
	self.ratio_range = kwargs['ratio_range']
	self.canonical_focal = kwargs['focal_length']
	self.crop_size = kwargs['crop_size']

	def random_on_canonical_transform(self, image, label, intrinsic, cam_model, to_random_ratio):
	ori_h, ori_w, _ = image.shape
	ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0

	to_canonical_ratio = self.canonical_focal / ori_focal
	to_scale_ratio = to_random_ratio
	resize_ratio = to_canonical_ratio * to_random_ratio
	reshape_h = int(ori_h * resize_ratio + 0.5)
	reshape_w = int(ori_w * resize_ratio + 0.5)

	image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	if intrinsic is not None:
	intrinsic = [self.canonical_focal, self.canonical_focal, intrinsic[2]resize_ratio, intrinsic[3]resize_ratio]
	if label is not None:
	# number of other labels may be less than that of image
	label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	# scale the label and camera intrinsics
	label = label / to_scale_ratio

	if cam_model is not None:
	# Should not directly resize the cam_model.
	# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
	# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)

	return image, label, intrinsic, cam_model, to_scale_ratio

	def random_on_crop_transform(self, image, label, intrinsic, cam_model, to_random_ratio):
	ori_h, ori_w, _ = image.shape
	crop_h, crop_w = self.crop_size
	ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0

	to_canonical_ratio = self.canonical_focal / ori_focal

	# random resize based on the last crop size
	proposal_reshape_h = int(crop_h * to_random_ratio + 0.5)
	proposal_reshape_w = int(crop_w * to_random_ratio + 0.5)
	resize_ratio_h = proposal_reshape_h / ori_h
	resize_ratio_w = proposal_reshape_w / ori_w
	resize_ratio = min(resize_ratio_h, resize_ratio_w) # resize based on the long edge
	reshape_h = int(ori_h * resize_ratio + 0.5)
	reshape_w = int(ori_w * resize_ratio + 0.5)

	to_scale_ratio = resize_ratio / to_canonical_ratio

	image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	if intrinsic is not None:
	intrinsic = [self.canonical_focal, self.canonical_focal, intrinsic[2]resize_ratio, intrinsic[3]resize_ratio]
	if label is not None:
	# number of other labels may be less than that of image
	label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	# scale the label and camera intrinsics
	label = label / to_scale_ratio

	if cam_model is not None:
	# Should not directly resize the cam_model.
	# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
	# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
	return image, label, intrinsic, cam_model, to_scale_ratio

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
	assert labels[0].dtype == np.float
	target_focal = (intrinsics[0][0] + intrinsics[0][1]) / 2.0
	target_to_canonical_ratio = self.canonical_focal / target_focal
	target_img_shape = images[0].shape
	to_random_ratio = random.uniform(self.ratio_range[0], self.ratio_range[1])
	to_scale_ratio = 0.0
	for i in range(len(images)):
	img = images[i]
	label = labels[i] if i < len(labels) else None
	intrinsic = intrinsics[i] if i < len(intrinsics) else None
	cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
	img, label, intrinsic, cam_model, to_scale_ratio = self.random_on_canonical_transform(
	img, label, intrinsic, cam_model, to_random_ratio)

	images[i] = img
	if label is not None:
	labels[i] = label
	if intrinsic is not None:
	intrinsics[i] = intrinsic
	if cam_model is not None:
	cam_models[i] = cam_model

	if normals != None:
	reshape_h, reshape_w, _ = images[0].shape
	for i, normal in enumerate(normals):
	normals[i] = cv2.resize(normal, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)

	if other_labels != None:
	# other labels are like semantic segmentations, instance segmentations, instance planes segmentations...
	#resize_ratio = target_to_canonical_ratio * to_scale_ratio
	#reshape_h = int(target_img_shape[0] * resize_ratio + 0.5)
	#reshape_w = int(target_img_shape[1] * resize_ratio + 0.5)
	reshape_h, reshape_w, _ = images[0].shape
	for i, other_label_i in enumerate(other_labels):
	other_labels[i] = cv2.resize(other_label_i, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)

	if transform_paras is not None:
	transform_paras.update(label_scale_factor = 1.0/to_scale_ratio)

	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras


	class LabelScaleCononical(object):
	"""
	To solve the ambiguity observation for the mono branch, i.e. different focal length (object size) with the same depth, cameras are
	mapped to a cononical space. To mimic this, we set the focal length to a canonical one and scale the depth value. NOTE: resize the image based on the ratio can also solve this ambiguity.
	Args:
	images: list of RGB images.
	labels: list of depth/disparity labels.
	other labels: other labels, such as instance segmentations, semantic segmentations...
	"""
	def __init__(self, **kwargs):
	self.canonical_focal = kwargs['focal_length']

	def _get_scale_ratio(self, intrinsic):
	target_focal_x = intrinsic[0]
	label_scale_ratio = self.canonical_focal / target_focal_x
	pose_scale_ratio = 1.0
	return label_scale_ratio, pose_scale_ratio

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
	#assert labels[0].dtype == np.float

	label_scale_ratio = None
	pose_scale_ratio = None

	for i in range(len(intrinsics)):
	img_i = images[i]
	label_i = labels[i] if i < len(labels) else None
	intrinsic_i = intrinsics[i].copy()
	cam_model_i = cam_models[i] if cam_models is not None and i < len(cam_models) else None

	label_scale_ratio, pose_scale_ratio = self._get_scale_ratio(intrinsic_i)

	# adjust the focal length, map the current camera to the canonical space
	intrinsics[i] = [intrinsic_i[0]label_scale_ratio, intrinsic_i[1]label_scale_ratio, intrinsic_i[2], intrinsic_i[3]]

	# scale the label to the canonical space
	if label_i is not None:
	labels[i] = label_i * label_scale_ratio

	if cam_model_i is not None:
	# As the focal length is adjusted (canonical focal length), the camera model should be re-built.
	ori_h, ori_w, _ = img_i.shape
	cam_models[i] = build_camera_model(ori_h, ori_w, intrinsics[i])


	if transform_paras is not None:
	transform_paras.update(label_scale_factor = label_scale_ratio)

	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras



	class ResizeKeepRatio(object):
	"""
	Resize and pad to a given size. Hold the aspect ratio.
	This resizing assumes that the camera model remains unchanged.
	Args:
	resize_size: predefined output size.
	"""
	def __init__(self, resize_size, padding=None, ignore_label=-1, **kwargs):
	if isinstance(resize_size, int):
	self.resize_h = resize_size
	self.resize_w = resize_size
	elif isinstance(resize_size, collections.Iterable) and len(resize_size) == 2 \
	and isinstance(resize_size[0], int) and isinstance(resize_size[1], int) \
	and resize_size[0] > 0 and resize_size[1] > 0:
	self.resize_h = resize_size[0]
	self.resize_w = resize_size[1]
	else:
	raise (RuntimeError("crop size error.\n"))
	if padding is None:
	self.padding = padding
	elif isinstance(padding, list):
	if all(isinstance(i, numbers.Number) for i in padding):
	self.padding = padding
	else:
	raise (RuntimeError("padding in Crop() should be a number list\n"))
	if len(padding) != 3:
	raise (RuntimeError("padding channel is not equal with 3\n"))
	else:
	raise (RuntimeError("padding in Crop() should be a number list\n"))
	if isinstance(ignore_label, int):
	self.ignore_label = ignore_label
	else:
	raise (RuntimeError("ignore_label should be an integer number\n"))
	self.crop_size = kwargs['crop_size']
	self.canonical_focal = kwargs['focal_length']

	def main_data_transform(self, image, label, intrinsic, cam_model, resize_ratio, padding, to_scale_ratio):
	"""
	Resize data first and then do the padding.
	'label' will be scaled.
	"""
	h, w, _ = image.shape
	reshape_h = int(resize_ratio * h)
	reshape_w = int(resize_ratio * w)

	pad_h, pad_w, pad_h_half, pad_w_half = padding

	# resize
	image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	# padding
	image = cv2.copyMakeBorder(
	image,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=self.padding)

	if label is not None:
	# label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	label = resize_depth_preserve(label, (reshape_h, reshape_w))
	label = cv2.copyMakeBorder(
	label,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=self.ignore_label)
	# scale the label
	label = label / to_scale_ratio

	# Resize, adjust principle point
	if intrinsic is not None:
	intrinsic[2] = intrinsic[2] * resize_ratio
	intrinsic[3] = intrinsic[3] * resize_ratio

	if cam_model is not None:
	#cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
	cam_model = cv2.copyMakeBorder(
	cam_model,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=self.ignore_label)

	# Pad, adjust the principle point
	if intrinsic is not None:
	intrinsic[2] = intrinsic[2] + pad_w_half
	intrinsic[3] = intrinsic[3] + pad_h_half
	return image, label, intrinsic, cam_model

	def get_label_scale_factor(self, image, intrinsic, resize_ratio):
	ori_h, ori_w, _ = image.shape
	crop_h, crop_w = self.crop_size
	ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0 #intrinsic[0] #

	to_canonical_ratio = self.canonical_focal / ori_focal
	to_scale_ratio = resize_ratio / to_canonical_ratio
	return to_scale_ratio

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	target_h, target_w, _ = images[0].shape
	resize_ratio_h = self.resize_h / target_h
	resize_ratio_w = self.resize_w / target_w
	resize_ratio = min(resize_ratio_h, resize_ratio_w)
	reshape_h = int(resize_ratio * target_h)
	reshape_w = int(resize_ratio * target_w)
	pad_h = max(self.resize_h - reshape_h, 0)
	pad_w = max(self.resize_w - reshape_w, 0)
	pad_h_half = int(pad_h / 2)
	pad_w_half = int(pad_w / 2)

	pad_info = [pad_h, pad_w, pad_h_half, pad_w_half]
	to_scale_ratio = self.get_label_scale_factor(images[0], intrinsics[0], resize_ratio)

	for i in range(len(images)):
	img = images[i]
	label = labels[i] if i < len(labels) else None
	intrinsic = intrinsics[i] if i < len(intrinsics) else None
	cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
	img, label, intrinsic, cam_model = self.main_data_transform(
	img, label, intrinsic, cam_model, resize_ratio, pad_info, to_scale_ratio)
	images[i] = img
	if label is not None:
	labels[i] = label
	if intrinsic is not None:
	intrinsics[i] = intrinsic
	if cam_model is not None:
	cam_models[i] = cam_model

	if normals is not None:
	for i, normal in enumerate(normals):
	normal = cv2.resize(normal, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	# pad
	normals[i] = cv2.copyMakeBorder(
	normal,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=0)

	if other_labels is not None:

	for i, other_lab in enumerate(other_labels):
	# resize
	other_lab = cv2.resize(other_lab, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	# pad
	other_labels[i] = cv2.copyMakeBorder(
	other_lab,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=self.ignore_label)


	if transform_paras is not None:
	transform_paras.update(pad=[pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half])
	if 'label_scale_factor' in transform_paras:
	transform_paras['label_scale_factor'] = transform_paras['label_scale_factor'] * 1.0 / to_scale_ratio
	else:
	transform_paras.update(label_scale_factor=1.0/to_scale_ratio)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras

	class KeepResizeCanoSize(object):
	"""
	Resize and pad to a given size. Hold the aspect ratio.
	This resizing assumes that the camera model remains unchanged.
	Args:
	resize_size: predefined output size.
	"""
	def __init__(self, resize_size, padding=None, ignore_label=-1, **kwargs):
	if isinstance(resize_size, int):
	self.resize_h = resize_size
	self.resize_w = resize_size
	elif isinstance(resize_size, collections.Iterable) and len(resize_size) == 2 \
	and isinstance(resize_size[0], int) and isinstance(resize_size[1], int) \
	and resize_size[0] > 0 and resize_size[1] > 0:
	self.resize_h = resize_size[0]
	self.resize_w = resize_size[1]
	else:
	raise (RuntimeError("crop size error.\n"))
	if padding is None:
	self.padding = padding
	elif isinstance(padding, list):
	if all(isinstance(i, numbers.Number) for i in padding):
	self.padding = padding
	else:
	raise (RuntimeError("padding in Crop() should be a number list\n"))
	if len(padding) != 3:
	raise (RuntimeError("padding channel is not equal with 3\n"))
	else:
	raise (RuntimeError("padding in Crop() should be a number list\n"))
	if isinstance(ignore_label, int):
	self.ignore_label = ignore_label
	else:
	raise (RuntimeError("ignore_label should be an integer number\n"))
	self.crop_size = kwargs['crop_size']
	self.canonical_focal = kwargs['focal_length']

	def main_data_transform(self, image, label, intrinsic, cam_model, resize_ratio, padding, to_scale_ratio):
	"""
	Resize data first and then do the padding.
	'label' will be scaled.
	"""
	h, w, _ = image.shape
	reshape_h = int(resize_ratio * h)
	reshape_w = int(resize_ratio * w)

	pad_h, pad_w, pad_h_half, pad_w_half = padding

	# resize
	image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	# padding
	image = cv2.copyMakeBorder(
	image,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=self.padding)

	if label is not None:
	# label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	label = resize_depth_preserve(label, (reshape_h, reshape_w))
	label = cv2.copyMakeBorder(
	label,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=self.ignore_label)
	# scale the label
	label = label / to_scale_ratio

	# Resize, adjust principle point
	if intrinsic is not None:
	intrinsic[2] = intrinsic[2] * resize_ratio
	intrinsic[3] = intrinsic[3] * resize_ratio

	if cam_model is not None:
	#cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
	cam_model = cv2.copyMakeBorder(
	cam_model,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=self.ignore_label)

	# Pad, adjust the principle point
	if intrinsic is not None:
	intrinsic[2] = intrinsic[2] + pad_w_half
	intrinsic[3] = intrinsic[3] + pad_h_half
	return image, label, intrinsic, cam_model

	# def get_label_scale_factor(self, image, intrinsic, resize_ratio):
	# ori_h, ori_w, _ = image.shape
	# crop_h, crop_w = self.crop_size
	# ori_focal = intrinsic[0] #(intrinsic[0] + intrinsic[1]) / 2.0

	# to_canonical_ratio = self.canonical_focal / ori_focal
	# to_scale_ratio = resize_ratio / to_canonical_ratio
	# return to_scale_ratio

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	target_h, target_w, _ = images[0].shape
	ori_focal = intrinsics[0][0]
	to_canonical_ratio = self.canonical_focal / ori_focal

	resize_ratio = to_canonical_ratio
	reshape_h = int(resize_ratio * target_h)
	reshape_w = int(resize_ratio * target_w)

	pad_h = 32 - reshape_h % 32
	pad_w = 32 - reshape_w % 32
	pad_h_half = int(pad_h / 2)
	pad_w_half = int(pad_w / 2)

	pad_info = [pad_h, pad_w, pad_h_half, pad_w_half]
	to_scale_ratio = 1.0

	for i in range(len(images)):
	img = images[i]
	label = labels[i] if i < len(labels) else None
	intrinsic = intrinsics[i] if i < len(intrinsics) else None
	cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
	img, label, intrinsic, cam_model = self.main_data_transform(
	img, label, intrinsic, cam_model, resize_ratio, pad_info, to_scale_ratio)
	images[i] = img
	if label is not None:
	labels[i] = label
	if intrinsic is not None:
	intrinsics[i] = intrinsic
	if cam_model is not None:
	cam_models[i] = cam_model

	if normals is not None:

	for i, normal in enumerate(normals):
	# resize
	normal = cv2.resize(normal, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	# pad
	normals[i] = cv2.copyMakeBorder(
	normal,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=0)

	if other_labels is not None:

	for i, other_lab in enumerate(other_labels):
	# resize
	other_lab = cv2.resize(other_lab, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	# pad
	other_labels[i] = cv2.copyMakeBorder(
	other_lab,
	pad_h_half,
	pad_h - pad_h_half,
	pad_w_half,
	pad_w - pad_w_half,
	cv2.BORDER_CONSTANT,
	value=self.ignore_label)


	if transform_paras is not None:
	transform_paras.update(pad=[pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half])
	if 'label_scale_factor' in transform_paras:
	transform_paras['label_scale_factor'] = transform_paras['label_scale_factor'] * 1.0 / to_scale_ratio
	else:
	transform_paras.update(label_scale_factor=1.0/to_scale_ratio)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras


	class RandomCrop(object):
	"""Crops the given ndarray image (HWC or H*W).
	Args:
	size (sequence or int): Desired output size of the crop. If size is an
	int instead of sequence like (h, w), a square crop (size, size) is made.
	"""
	def __init__(self, crop_size, crop_type='center', padding=None, ignore_label=-1, **kwargs):
	if isinstance(crop_size, int):
	self.crop_h = crop_size
	self.crop_w = crop_size
	elif isinstance(crop_size, collections.Iterable) and len(crop_size) == 2 \
	and isinstance(crop_size[0], int) and isinstance(crop_size[1], int) \
	and crop_size[0] > 0 and crop_size[1] > 0:
	self.crop_h = crop_size[0]
	self.crop_w = crop_size[1]
	else:
	raise (RuntimeError("crop size error.\n"))
	if crop_type == 'center' or crop_type == 'rand' or crop_type=='rand_in_field':
	self.crop_type = crop_type
	else:
	raise (RuntimeError("crop type error: rand \| center \| rand_in_field \n"))
	if padding is None:
	self.padding = padding
	elif isinstance(padding, list):
	if all(isinstance(i, numbers.Number) for i in padding):
	self.padding = padding
	else:
	raise (RuntimeError("padding in Crop() should be a number list\n"))
	if len(padding) != 3:
	raise (RuntimeError("padding channel is not equal with 3\n"))
	else:
	raise (RuntimeError("padding in Crop() should be a number list\n"))
	if isinstance(ignore_label, int):
	self.ignore_label = ignore_label
	else:
	raise (RuntimeError("ignore_label should be an integer number\n"))


	def cal_padding_paras(self, h, w):
	# padding if current size is not satisfied
	pad_h = max(self.crop_h - h, 0)
	pad_w = max(self.crop_w - w, 0)
	pad_h_half = int(pad_h / 2)
	pad_w_half = int(pad_w / 2)
	return pad_h, pad_w, pad_h_half, pad_w_half

	def cal_cropping_paras(self, h, w, intrinsic):
	u0 = intrinsic[2]
	v0 = intrinsic[3]
	if self.crop_type == 'rand':
	h_min = 0
	h_max = h - self.crop_h
	w_min = 0
	w_max = w - self.crop_w
	elif self.crop_type == 'center':
	h_min = (h - self.crop_h) / 2
	h_max = (h - self.crop_h) / 2
	w_min = (w - self.crop_w) / 2
	w_max = (w - self.crop_w) / 2
	else: # rand in field
	h_min = min(max(0, v0 - 0.75*self.crop_h), h-self.crop_h)
	h_max = min(max(v0 - 0.25*self.crop_h, 0), h-self.crop_h)
	w_min = min(max(0, u0 - 0.75*self.crop_w), w-self.crop_w)
	w_max = min(max(u0 - 0.25*self.crop_w, 0), w-self.crop_w)

	h_off = random.randint(int(h_min), int(h_max))
	w_off = random.randint(int(w_min), int(w_max))
	return h_off, w_off

	def main_data_transform(self, image, label, intrinsic, cam_model,
	pad_h, pad_w, pad_h_half, pad_w_half, h_off, w_off):

	# padding if current size is not satisfied
	if pad_h > 0 or pad_w > 0:
	if self.padding is None:
	raise (RuntimeError("depthtransform.Crop() need padding while padding argument is None\n"))
	image = cv2.copyMakeBorder(image, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.padding)
	if label is not None:
	label = cv2.copyMakeBorder(label, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
	if cam_model is not None:
	cam_model = cv2.copyMakeBorder(cam_model, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)

	# cropping
	image = image[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]
	if label is not None:
	label = label[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]
	if cam_model is not None:
	cam_model = cam_model[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]

	if intrinsic is not None:
	intrinsic[2] = intrinsic[2] + pad_w_half - w_off
	intrinsic[3] = intrinsic[3] + pad_h_half - h_off
	return image, label, intrinsic, cam_model

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	if 'random_crop_size' in transform_paras and transform_paras['random_crop_size'] is not None \
	and (transform_paras['random_crop_size'][0] + transform_paras['random_crop_size'][1] > 500):
	self.crop_h = int(transform_paras['random_crop_size'][0].item())
	self.crop_w = int(transform_paras['random_crop_size'][1].item())
	target_img = images[0]
	target_h, target_w, _ = target_img.shape
	target_intrinsic = intrinsics[0]
	pad_h, pad_w, pad_h_half, pad_w_half = self.cal_padding_paras(target_h, target_w)
	h_off, w_off = self.cal_cropping_paras(target_h+pad_h, target_w+pad_w, target_intrinsic)

	for i in range(len(images)):
	img = images[i]
	label = labels[i] if i < len(labels) else None
	intrinsic = intrinsics[i].copy() if i < len(intrinsics) else None
	cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
	img, label, intrinsic, cam_model = self.main_data_transform(
	img, label, intrinsic, cam_model,
	pad_h, pad_w, pad_h_half, pad_w_half, h_off, w_off)
	images[i] = img
	if label is not None:
	labels[i] = label
	if intrinsic is not None:
	intrinsics[i] = intrinsic
	if cam_model is not None:
	cam_models[i] = cam_model
	pad=[pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half]
	if normals is not None:
	for i, normal in enumerate(normals):
	# padding if current size is not satisfied
	normal = cv2.copyMakeBorder(normal, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=0)
	normals[i] = normal[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]
	if other_labels is not None:
	for i, other_lab in enumerate(other_labels):
	# padding if current size is not satisfied
	other_lab = cv2.copyMakeBorder(other_lab, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
	other_labels[i] = other_lab[h_off:h_off+self.crop_h, w_off:w_off+self.crop_w]
	if transform_paras is not None:
	transform_paras.update(dict(pad=pad))
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras


	class RandomResize(object):
	"""
	Random resize the image. During this process, the camera model is hold, and thus the depth label is scaled.
	Args:
	images: list of RGB images.
	labels: list of depth/disparity labels.
	other labels: other labels, such as instance segmentations, semantic segmentations...
	"""
	def __init__(self, ratio_range=(0.85, 1.15), prob=0.5, is_lidar=True, **kwargs):
	self.ratio_range = ratio_range
	self.is_lidar = is_lidar
	self.prob = prob

	def random_resize(self, image, label, intrinsic, cam_model, to_random_ratio):
	ori_h, ori_w, _ = image.shape

	resize_ratio = to_random_ratio
	label_scale_ratio = 1.0 / resize_ratio
	reshape_h = int(ori_h * resize_ratio + 0.5)
	reshape_w = int(ori_w * resize_ratio + 0.5)

	image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	if intrinsic is not None:
	intrinsic = [intrinsic[0], intrinsic[1], intrinsic[2]resize_ratio, intrinsic[3]resize_ratio]
	if label is not None:
	if self.is_lidar:
	label = resize_depth_preserve(label, (reshape_h, reshape_w))
	else:
	label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	# scale the label
	label = label * label_scale_ratio

	if cam_model is not None:
	# Should not directly resize the cam_model.
	# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
	# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)

	return image, label, intrinsic, cam_model, label_scale_ratio

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
	assert labels[0].dtype == np.float
	# target_focal = (intrinsics[0][0] + intrinsics[0][1]) / 2.0
	# target_to_canonical_ratio = self.canonical_focal / target_focal
	# target_img_shape = images[0].shape
	prob = random.uniform(0, 1)
	if prob < self.prob:
	to_random_ratio = random.uniform(self.ratio_range[0], self.ratio_range[1])
	else:
	to_random_ratio = 1.0
	label_scale_ratio = 0.0
	for i in range(len(images)):
	img = images[i]
	label = labels[i] if i < len(labels) else None
	intrinsic = intrinsics[i].copy() if i < len(intrinsics) else None
	cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
	img, label, intrinsic, cam_model, label_scale_ratio = self.random_resize(
	img, label, intrinsic, cam_model, to_random_ratio)

	images[i] = img
	if label is not None:
	labels[i] = label
	if intrinsic is not None:
	intrinsics[i] = intrinsic.copy()
	if cam_model is not None:
	cam_models[i] = cam_model

	if normals != None:
	reshape_h, reshape_w, _ = images[0].shape
	for i, norm in enumerate(normals):
	normals[i] = cv2.resize(norm, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)


	if other_labels != None:
	# other labels are like semantic segmentations, instance segmentations, instance planes segmentations...
	#resize_ratio = target_to_canonical_ratio * to_scale_ratio
	#reshape_h = int(target_img_shape[0] * resize_ratio + 0.5)
	#reshape_w = int(target_img_shape[1] * resize_ratio + 0.5)
	reshape_h, reshape_w, _ = images[0].shape
	for i, other_label_i in enumerate(other_labels):
	other_labels[i] = cv2.resize(other_label_i, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)

	if transform_paras is not None:
	if 'label_scale_factor' in transform_paras:
	transform_paras['label_scale_factor'] = transform_paras['label_scale_factor'] * label_scale_ratio
	else:
	transform_paras.update(label_scale_factor = label_scale_ratio)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras

	class RandomEdgeMask(object):
	"""
	Random mask the input and labels.
	Args:
	images: list of RGB images.
	labels: list of depth/disparity labels.
	other labels: other labels, such as instance segmentations, semantic segmentations...
	"""
	def __init__(self, mask_maxsize=32, prob=0.5, rgb_invalid=[0,0,0], label_invalid=-1,**kwargs):
	self.mask_maxsize = mask_maxsize
	self.prob = prob
	self.rgb_invalid = rgb_invalid
	self.label_invalid = label_invalid

	def mask_edge(self, image, mask_edgesize, mask_value):
	H, W = image.shape[0], image.shape[1]
	# up
	image[0:mask_edgesize[0], :, ...] = mask_value
	# down
	image[H-mask_edgesize[1]:H, :, ...] = mask_value
	# left
	image[:, 0:mask_edgesize[2], ...] = mask_value
	# right
	image[:, W-mask_edgesize[3]:W, ...] = mask_value

	return image

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
	assert labels[0].dtype == np.float

	prob = random.uniform(0, 1)
	if prob > self.prob:
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras

	mask_edgesize = random.sample(range(self.mask_maxsize), 4) #[up, down, left, right]
	for i in range(len(images)):
	img = images[i]
	label = labels[i] if i < len(labels) else None
	img = self.mask_edge(img, mask_edgesize, self.rgb_invalid)

	images[i] = img
	if label is not None:
	label = self.mask_edge(label, mask_edgesize, self.label_invalid)
	labels[i] = label

	if normals != None:
	for i, normal in enumerate(normals):
	normals[i] = self.mask_edge(normal, mask_edgesize, mask_value=0)

	if other_labels != None:
	# other labels are like semantic segmentations, instance segmentations, instance planes segmentations...
	for i, other_label_i in enumerate(other_labels):
	other_labels[i] = self.mask_edge(other_label_i, mask_edgesize, self.label_invalid)

	if transform_paras is not None:
	pad = transform_paras['pad'] if 'pad' in transform_paras else [0,0,0,0]
	new_pad = [max(mask_edgesize[0], pad[0]), max(mask_edgesize[1], pad[1]), max(mask_edgesize[2], pad[2]), max(mask_edgesize[3], pad[3])]
	transform_paras.update(dict(pad=new_pad))
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras


	class AdjustSize(object):
	"""Crops the given ndarray image (HWC or H*W).
	Args:
	size (sequence or int): Desired output size of the crop. If size is an
	int instead of sequence like (h, w), a square crop (size, size) is made.
	"""
	def __init__(self, padding=None, ignore_label=-1, **kwargs):

	if padding is None:
	self.padding = padding
	elif isinstance(padding, list):
	if all(isinstance(i, numbers.Number) for i in padding):
	self.padding = padding
	else:
	raise (RuntimeError("padding in Crop() should be a number list\n"))
	if len(padding) != 3:
	raise (RuntimeError("padding channel is not equal with 3\n"))
	else:
	raise (RuntimeError("padding in Crop() should be a number list\n"))
	if isinstance(ignore_label, int):
	self.ignore_label = ignore_label
	else:
	raise (RuntimeError("ignore_label should be an integer number\n"))

	def get_pad_paras(self, h, w):
	pad_h = 32 - h % 32 if h %32 != 0 else 0
	pad_w = 32 - w % 32 if w %32 != 0 else 0
	pad_h_half = int(pad_h // 2)
	pad_w_half = int(pad_w // 2)
	return pad_h, pad_w, pad_h_half, pad_w_half

	def main_data_transform(self, image, label, intrinsic, cam_model):
	h, w, _ = image.shape
	pad_h, pad_w, pad_h_half, pad_w_half = self.get_pad_paras(h=h, w=w)
	if pad_h > 0 or pad_w > 0:
	if self.padding is None:
	raise (RuntimeError("depthtransform.Crop() need padding while padding argument is None\n"))
	image = cv2.copyMakeBorder(image, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.padding)
	if label is not None:
	label = cv2.copyMakeBorder(label, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
	if cam_model is not None:
	cam_model = cv2.copyMakeBorder(cam_model, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)

	if intrinsic is not None:
	intrinsic[2] = intrinsic[2] + pad_w_half
	intrinsic[3] = intrinsic[3] + pad_h_half
	pad=[pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half]
	return image, label, intrinsic, cam_model, pad

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	target_img = images[0]
	target_h, target_w, _ = target_img.shape
	for i in range(len(images)):
	img = images[i]
	label = labels[i] if i < len(labels) else None
	intrinsic = intrinsics[i] if i < len(intrinsics) else None
	cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
	img, label, intrinsic, cam_model, pad = self.main_data_transform(
	img, label, intrinsic, cam_model)
	images[i] = img
	if label is not None:
	labels[i] = label
	if intrinsic is not None:
	intrinsics[i] = intrinsic
	if cam_model is not None:
	cam_models[i] = cam_model

	if transform_paras is not None:
	transform_paras.update(dict(pad=pad))
	if normals is not None:
	pad_h, pad_w, pad_h_half, pad_w_half = self.get_pad_paras(h=target_h, w=target_w)
	for i, normal in enumerate(normals):
	normals[i] = cv2.copyMakeBorder(normal, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=0)

	if other_labels is not None:
	pad_h, pad_w, pad_h_half, pad_w_half = self.get_pad_paras(h=target_h, w=target_w)
	for i, other_lab in enumerate(other_labels):
	other_labels[i] = cv2.copyMakeBorder(other_lab, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=self.ignore_label)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras


	class RandomHorizontalFlip(object):
	def __init__(self, prob=0.5, **kwargs):
	self.p = prob

	def main_data_transform(self, image, label, intrinsic, cam_model, rotate):
	if rotate:
	image = cv2.flip(image, 1)
	if label is not None:
	label = cv2.flip(label, 1)
	if intrinsic is not None:
	h, w, _ = image.shape
	intrinsic[2] = w - intrinsic[2]
	intrinsic[3] = h - intrinsic[3]
	if cam_model is not None:
	cam_model = cv2.flip(cam_model, 1)
	cam_model[:, :, 0] = cam_model[:, :, 0] * -1
	cam_model[:, :, 2] = cam_model[:, :, 2] * -1
	return image, label, intrinsic, cam_model

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	rotate = random.random() > self.p

	for i in range(len(images)):
	img = images[i]
	label = labels[i] if i < len(labels) else None
	intrinsic = intrinsics[i] if i < len(intrinsics) else None
	cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
	img, label, intrinsic, cam_model = self.main_data_transform(
	img, label, intrinsic, cam_model, rotate)
	images[i] = img
	if label is not None:
	labels[i] = label
	if intrinsic is not None:
	intrinsics[i] = intrinsic
	if cam_model is not None:
	cam_models[i] = cam_model
	if normals is not None:
	for i, normal in enumerate(normals):
	if rotate:
	normal = cv2.flip(normal, 1)
	normal[:, :, 0] = -normal[:, :, 0] # NOTE: check the direction of normal coordinates axis, this is used in https://github.com/baegwangbin/surface_normal_uncertainty
	normals[i] = normal

	if other_labels is not None:
	for i, other_lab in enumerate(other_labels):
	if rotate:
	other_lab = cv2.flip(other_lab, 1)
	other_labels[i] = other_lab
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras

	class RandomBlur(object):
	def __init__(self,
	aver_kernal=(2, 10),
	motion_kernal=(5, 15),
	angle=[-80, 80],
	prob=0.3,
	**kwargs):

	gaussian_blur = iaa.AverageBlur(k=aver_kernal)
	motion_blur = iaa.MotionBlur(k=motion_kernal, angle=angle)
	zoom_blur = iaa.imgcorruptlike.ZoomBlur(severity=1)
	self.prob = prob
	self.blurs = [gaussian_blur, motion_blur, zoom_blur]

	def blur(self, imgs, id):
	blur_mtd = self.blurs[id]
	return blur_mtd(images=imgs)

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	prob = random.random()
	if prob < self.prob:
	id = random.randint(0, len(self.blurs)-1)
	images = self.blur(images, id)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras

	class RGBCompresion(object):
	def __init__(self, prob=0.1, compression=(0, 50), **kwargs):
	self.rgb_compress = iaa.Sequential(
	[
	iaa.JpegCompression(compression=compression),
	],
	random_order=True,
	)
	self.prob = prob

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	if random.random() < self.prob:
	images = self.rgb_compress(images=images)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras


	class RGB2BGR(object):
	# Converts image from RGB order to BGR order, for model initialized from Caffe
	def __init__(self, **kwargs):
	return
	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	for i, img in enumerate(images):
	images[i] = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras


	class BGR2RGB(object):
	# Converts image from BGR order to RGB order, for model initialized from Pytorch
	def __init__(self, **kwargs):
	return
	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	for i, img in enumerate(images):
	images[i] = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras


	class PhotoMetricDistortion(object):
	"""Apply photometric distortion to image sequentially, every transformation
	is applied with a probability of 0.5. The position of random contrast is in
	second or second to last.
	1. random brightness
	2. random contrast (mode 0)
	3. convert color from BGR to HSV
	4. random saturation
	5. random hue
	6. convert color from HSV to BGR
	7. random contrast (mode 1)
	Args:
	brightness_delta (int): delta of brightness.
	contrast_range (tuple): range of contrast.
	saturation_range (tuple): range of saturation.
	hue_delta (int): delta of hue.
	"""

	def __init__(self,
	brightness_delta=32,
	contrast_range=(0.5, 1.5),
	saturation_range=(0.5, 1.5),
	hue_delta=18,
	to_gray_prob=0.3,
	distortion_prob=0.3,
	**kwargs):
	self.brightness_delta = brightness_delta
	self.contrast_lower, self.contrast_upper = contrast_range
	self.saturation_lower, self.saturation_upper = saturation_range
	self.hue_delta = hue_delta
	self.gray_aug = iaa.Grayscale(alpha=(0.8, 1.0))
	self.to_gray_prob = to_gray_prob
	self.distortion_prob = distortion_prob

	def convert(self, img, alpha=1.0, beta=0.0):
	"""Multiple with alpha and add beat with clip."""
	img = img.astype(np.float32) * alpha + beta
	img = np.clip(img, 0, 255)
	return img.astype(np.uint8)

	def brightness(self, img, beta, do):
	"""Brightness distortion."""
	if do:
	# beta = random.uniform(-self.brightness_delta,
	# self.brightness_delta)
	img = self.convert(
	img,
	beta=beta)
	return img

	def contrast(self, img, alpha, do):
	"""Contrast distortion."""
	if do:
	#alpha = random.uniform(self.contrast_lower, self.contrast_upper)
	img = self.convert(
	img,
	alpha=alpha)
	return img

	def saturation(self, img, alpha, do):
	"""Saturation distortion."""
	if do:
	# alpha = random.uniform(self.saturation_lower,
	# self.saturation_upper)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
	img[:, :, 1] = self.convert(
	img[:, :, 1],
	alpha=alpha)
	img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
	return img

	def hue(self, img, rand_hue, do):
	"""Hue distortion."""
	if do:
	# rand_hue = random.randint(-self.hue_delta, self.hue_delta)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
	img[:, :, 0] = (img[:, :, 0].astype(int) + rand_hue) % 180
	img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
	return img

	def rgb2gray(self, img):
	img = self.gray_aug(image=img)
	return img

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	"""Call function to perform photometric distortion on images.
	Args:
	results (dict): Result dict from loading pipeline.
	Returns:
	dict: Result dict with images distorted.
	"""
	brightness_beta = random.uniform(-self.brightness_delta, self.brightness_delta)
	brightness_do = random.random() < self.distortion_prob

	contrast_alpha = random.uniform(self.contrast_lower, self.contrast_upper)
	contrast_do = random.random() < self.distortion_prob

	saturate_alpha = random.uniform(self.saturation_lower, self.saturation_upper)
	saturate_do = random.random() < self.distortion_prob

	rand_hue = random.randint(-self.hue_delta, self.hue_delta)
	rand_hue_do = random.random() < self.distortion_prob

	# mode == 0 --> do random contrast first
	# mode == 1 --> do random contrast last
	mode = 1 if random.random() > 0.5 else 2
	for i, img in enumerate(images):
	if random.random() < self.to_gray_prob:
	img = self.rgb2gray(img)
	else:
	# random brightness
	img = self.brightness(img, brightness_beta, brightness_do)

	if mode == 1:
	img = self.contrast(img, contrast_alpha, contrast_do)

	# random saturation
	img = self.saturation(img, saturate_alpha, saturate_do)

	# random hue
	img = self.hue(img, rand_hue, rand_hue_do)

	# random contrast
	if mode == 0:
	img = self.contrast(img, contrast_alpha, contrast_do)
	images[i] = img
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras

	class Weather(object):
	"""Apply the following weather augmentations to data.
	Args:
	prob (float): probability to enforce the weather augmentation.
	"""

	def __init__(self,
	prob=0.3,
	**kwargs):
	snow = iaa.FastSnowyLandscape(
	lightness_threshold=[50, 100],
	lightness_multiplier=(1.2, 2)
	)
	cloud = iaa.Clouds()
	fog = iaa.Fog()
	snow_flakes = iaa.Snowflakes(flake_size=(0.2, 0.4), speed=(0.001, 0.03)) #iaa.imgcorruptlike.Snow(severity=2)#
	rain = iaa.Rain(speed=(0.1, 0.3), drop_size=(0.1, 0.3))
	# rain_drops = RainDrop_Augmentor()
	self.aug_list = [
	snow, cloud, fog, snow_flakes, rain,
	#wa.add_sun_flare, wa.darken, wa.random_brightness,
	]
	self.prob = prob

	def aug_with_weather(self, imgs, id):
	weather = self.aug_list[id]
	if id <5:
	return weather(images=imgs)
	else:
	return weather(imgs)

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	"""Call function to perform photometric distortion on images.
	Args:
	results (dict): Result dict from loading pipeline.
	Returns:
	dict: Result dict with images distorted.
	"""
	if random.random() < self.prob:
	select_id = np.random.randint(0, high=len(self.aug_list))
	images = self.aug_with_weather(images, select_id)
	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras


	def resize_depth_preserve(depth, shape):
	"""
	Resizes depth map preserving all valid depth pixels
	Multiple downsampled points can be assigned to the same pixel.

	Parameters
	----------
	depth : np.array [h,w]
	Depth map
	shape : tuple (H,W)
	Output shape

	Returns
	-------
	depth : np.array [H,W,1]
	Resized depth map
	"""
	# Store dimensions and reshapes to single column
	depth = np.squeeze(depth)
	h, w = depth.shape
	x = depth.reshape(-1)
	# Create coordinate grid
	uv = np.mgrid[:h, :w].transpose(1, 2, 0).reshape(-1, 2)
	# Filters valid points
	idx = x > 0
	crd, val = uv[idx], x[idx]
	# Downsamples coordinates
	crd[:, 0] = (crd[:, 0] * (shape[0] / h) + 0.5).astype(np.int32)
	crd[:, 1] = (crd[:, 1] * (shape[1] / w) + 0.5).astype(np.int32)
	# Filters points inside image
	idx = (crd[:, 0] < shape[0]) & (crd[:, 1] < shape[1])
	crd, val = crd[idx], val[idx]
	# Creates downsampled depth image and assigns points
	depth = np.zeros(shape)
	depth[crd[:, 0], crd[:, 1]] = val
	# Return resized depth map
	return depth


	def gray_to_colormap(img, cmap='rainbow', max_value=None):
	"""
	Transfer gray map to matplotlib colormap
	"""
	assert img.ndim == 2

	img[img<0] = 0
	mask_invalid = img < 1e-10
	if max_value == None:
	img = img / (img.max() + 1e-8)
	else:
	img = img / (max_value + 1e-8)
	norm = matplotlib.colors.Normalize(vmin=0, vmax=1.1)
	cmap_m = matplotlib.cm.get_cmap(cmap)
	map = matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap_m)
	colormap = (map.to_rgba(img)[:, :, :3] * 255).astype(np.uint8)
	colormap[mask_invalid] = 0
	return colormap


	class LiDarResizeCanonical(object):
	"""
	Resize the input to the canonical space first, then resize the input with random sampled size.
	In the first stage, we assume the distance holds while the camera model varies.
	In the second stage, we aim to simulate the observation in different distance. The camera will move along the optical axis.
	"""
	def __init__(self, **kwargs):
	self.ratio_range = kwargs['ratio_range']
	self.canonical_focal = kwargs['focal_length']
	self.crop_size = kwargs['crop_size']

	def random_on_canonical_transform(self, image, label, intrinsic, cam_model, to_random_ratio):
	ori_h, ori_w, _ = image.shape
	ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0

	to_canonical_ratio = self.canonical_focal / ori_focal
	to_scale_ratio = to_random_ratio
	resize_ratio = to_canonical_ratio * to_random_ratio
	reshape_h = int(ori_h * resize_ratio + 0.5)
	reshape_w = int(ori_w * resize_ratio + 0.5)

	image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	if intrinsic is not None:
	intrinsic = [self.canonical_focal, self.canonical_focal, intrinsic[2]resize_ratio, intrinsic[3]resize_ratio]
	if label is not None:
	# number of other labels may be less than that of image
	#label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	label = resize_depth_preserve(label, (reshape_h, reshape_w))
	# scale the label and camera intrinsics
	label = label / to_scale_ratio

	if cam_model is not None:
	# Should not directly resize the cam_model.
	# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
	# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
	return image, label, intrinsic, cam_model, to_scale_ratio

	def random_on_crop_transform(self, image, label, intrinsic, cam_model, to_random_ratio):
	ori_h, ori_w, _ = image.shape
	crop_h, crop_w = self.crop_size
	ori_focal = (intrinsic[0] + intrinsic[1]) / 2.0

	to_canonical_ratio = self.canonical_focal / ori_focal

	# random resize based on the last crop size
	proposal_reshape_h = int(crop_h * to_random_ratio + 0.5)
	proposal_reshape_w = int(crop_w * to_random_ratio + 0.5)
	resize_ratio_h = proposal_reshape_h / ori_h
	resize_ratio_w = proposal_reshape_w / ori_w
	resize_ratio = min(resize_ratio_h, resize_ratio_w) # resize based on the long edge
	reshape_h = int(ori_h * resize_ratio + 0.5)
	reshape_w = int(ori_w * resize_ratio + 0.5)

	to_scale_ratio = resize_ratio / to_canonical_ratio

	image = cv2.resize(image, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	if intrinsic is not None:
	intrinsic = [self.canonical_focal, self.canonical_focal, intrinsic[2]resize_ratio, intrinsic[3]resize_ratio]
	if label is not None:
	# number of other labels may be less than that of image
	# label = cv2.resize(label, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)
	label = resize_depth_preserve(label, (reshape_h, reshape_w))
	# scale the label and camera intrinsics
	label = label / to_scale_ratio

	if cam_model is not None:
	# Should not directly resize the cam_model.
	# Camera model should be resized in 'to canonical' stage, while it holds in 'random resizing' stage.
	# cam_model = cv2.resize(cam_model, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_LINEAR)
	cam_model = build_camera_model(reshape_h, reshape_w, intrinsic)
	return image, label, intrinsic, cam_model, to_scale_ratio

	def __call__(self, images, labels, intrinsics, cam_models=None, normals=None, other_labels=None, transform_paras=None):
	assert len(images[0].shape) == 3 and len(labels[0].shape) == 2
	assert labels[0].dtype == np.float
	target_focal = (intrinsics[0][0] + intrinsics[0][1]) / 2.0
	target_to_canonical_ratio = self.canonical_focal / target_focal
	target_img_shape = images[0].shape
	to_random_ratio = random.uniform(self.ratio_range[0], self.ratio_range[1])
	to_scale_ratio = 0
	for i in range(len(images)):
	img = images[i]
	label = labels[i] if i < len(labels) else None
	intrinsic = intrinsics[i] if i < len(intrinsics) else None
	cam_model = cam_models[i] if cam_models is not None and i < len(cam_models) else None
	img, label, intrinsic, cam_model, to_scale_ratio = self.random_on_canonical_transform(
	img, label, intrinsic, cam_model, to_random_ratio)

	images[i] = img
	if label is not None:
	labels[i] = label
	if intrinsic is not None:
	intrinsics[i] = intrinsic
	if cam_model is not None:
	cam_models[i] = cam_model
	if normals != None:
	reshape_h, reshape_w, _ = images[0].shape
	for i, normal in enumerate(normals):
	normals[i] = cv2.resize(normal, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)

	if other_labels != None:
	# other labels are like semantic segmentations, instance segmentations, instance planes segmentations...
	# resize_ratio = target_to_canonical_ratio * to_random_ratio
	# reshape_h = int(target_img_shape[0] * resize_ratio + 0.5)
	# reshape_w = int(target_img_shape[1] * resize_ratio + 0.5)
	reshape_h, reshape_w, _ = images[0].shape
	for i, other_label_i in enumerate(other_labels):
	other_labels[i] = cv2.resize(other_label_i, dsize=(reshape_w, reshape_h), interpolation=cv2.INTER_NEAREST)

	if transform_paras is not None:
	transform_paras.update(label_scale_factor = 1.0/to_scale_ratio)

	return images, labels, intrinsics, cam_models, normals, other_labels, transform_paras



	def build_camera_model(H : int, W : int, intrinsics : list) -> np.array:
	"""
	Encode the camera intrinsic parameters (focal length and principle point) to a 4-channel map.
	"""
	fx, fy, u0, v0 = intrinsics
	f = (fx + fy) / 2.0
	# principle point location
	x_row = np.arange(0, W).astype(np.float32)
	x_row_center_norm = (x_row - u0) / W
	x_center = np.tile(x_row_center_norm, (H, 1)) # [H, W]

	y_col = np.arange(0, H).astype(np.float32)
	y_col_center_norm = (y_col - v0) / H
	y_center = np.tile(y_col_center_norm, (W, 1)).T

	# FoV
	fov_x = np.arctan(x_center / (f / W))
	fov_y = np.arctan(y_center/ (f / H))

	cam_model = np.stack([x_center, y_center, fov_x, fov_y], axis=2)
	return cam_model


	if __name__ == '__main__':
	img = cv2.imread('/mnt/mldb/raw/62b3ed3455e805efcb28c74b/NuScenes/data_test/samples/CAM_FRONT/n008-2018-08-01-15-34-25-0400__CAM_FRONT__1533152214512404.jpg', -1)
	H, W, _ = img.shape
	label = img[:, :, 0]
	intrinsic = [1000, 1000, W//2, H//2]
	for i in range(20):
	weather_aug = Weather(prob=1.0)
	img_aug, label, intrinsic, cam_model, ref_images, transform_paras = weather_aug([img, ], [label,], [intrinsic,])
	cv2.imwrite(f'test_aug_{i}.jpg', img_aug[0])

	print('Done')