# MIT License # Copyright (c) 2022 Intelligent Systems Lab Org # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # File author: Shariq Farooq Bhat import math import random import cv2 import numpy as np class RandomFliplr(object): """Horizontal flip of the sample with given probability. """ def __init__(self, probability=0.5): """Init. Args: probability (float, optional): Flip probability. Defaults to 0.5. """ self.__probability = probability def __call__(self, sample): prob = random.random() if prob < self.__probability: for k, v in sample.items(): if len(v.shape) >= 2: sample[k] = np.fliplr(v).copy() return sample def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): """Rezise the sample to ensure the given size. Keeps aspect ratio. Args: sample (dict): sample size (tuple): image size Returns: tuple: new size """ shape = list(sample["disparity"].shape) if shape[0] >= size[0] and shape[1] >= size[1]: return sample scale = [0, 0] scale[0] = size[0] / shape[0] scale[1] = size[1] / shape[1] scale = max(scale) shape[0] = math.ceil(scale * shape[0]) shape[1] = math.ceil(scale * shape[1]) # resize sample["image"] = cv2.resize( sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method ) sample["disparity"] = cv2.resize( sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST ) sample["mask"] = cv2.resize( sample["mask"].astype(np.float32), tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST, ) sample["mask"] = sample["mask"].astype(bool) return tuple(shape) class RandomCrop(object): """Get a random crop of the sample with the given size (width, height). """ def __init__( self, width, height, resize_if_needed=False, image_interpolation_method=cv2.INTER_AREA, ): """Init. Args: width (int): output width height (int): output height resize_if_needed (bool, optional): If True, sample might be upsampled to ensure that a crop of size (width, height) is possbile. Defaults to False. """ self.__size = (height, width) self.__resize_if_needed = resize_if_needed self.__image_interpolation_method = image_interpolation_method def __call__(self, sample): shape = sample["disparity"].shape if self.__size[0] > shape[0] or self.__size[1] > shape[1]: if self.__resize_if_needed: shape = apply_min_size( sample, self.__size, self.__image_interpolation_method ) else: raise Exception( "Output size {} bigger than input size {}.".format( self.__size, shape ) ) offset = ( np.random.randint(shape[0] - self.__size[0] + 1), np.random.randint(shape[1] - self.__size[1] + 1), ) for k, v in sample.items(): if k == "code" or k == "basis": continue if len(sample[k].shape) >= 2: sample[k] = v[ offset[0]: offset[0] + self.__size[0], offset[1]: offset[1] + self.__size[1], ] return sample class Resize(object): """Resize sample to given size (width, height). """ def __init__( self, width, height, resize_target=True, keep_aspect_ratio=False, ensure_multiple_of=1, resize_method="lower_bound", image_interpolation_method=cv2.INTER_AREA, letter_box=False, ): """Init. Args: width (int): desired output width height (int): desired output height resize_target (bool, optional): True: Resize the full sample (image, mask, target). False: Resize image only. Defaults to True. keep_aspect_ratio (bool, optional): True: Keep the aspect ratio of the input sample. Output sample might not have the given width and height, and resize behaviour depends on the parameter 'resize_method'. Defaults to False. ensure_multiple_of (int, optional): Output width and height is constrained to be multiple of this parameter. Defaults to 1. resize_method (str, optional): "lower_bound": Output will be at least as large as the given size. "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) "minimal": Scale as least as possible. (Output size might be smaller than given size.) Defaults to "lower_bound". """ self.__width = width self.__height = height self.__resize_target = resize_target self.__keep_aspect_ratio = keep_aspect_ratio self.__multiple_of = ensure_multiple_of self.__resize_method = resize_method self.__image_interpolation_method = image_interpolation_method self.__letter_box = letter_box def constrain_to_multiple_of(self, x, min_val=0, max_val=None): y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) if max_val is not None and y > max_val: y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) if y < min_val: y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) return y def get_size(self, width, height): # determine new height and width scale_height = self.__height / height scale_width = self.__width / width if self.__keep_aspect_ratio: if self.__resize_method == "lower_bound": # scale such that output size is lower bound if scale_width > scale_height: # fit width scale_height = scale_width else: # fit height scale_width = scale_height elif self.__resize_method == "upper_bound": # scale such that output size is upper bound if scale_width < scale_height: # fit width scale_height = scale_width else: # fit height scale_width = scale_height elif self.__resize_method == "minimal": # scale as least as possbile if abs(1 - scale_width) < abs(1 - scale_height): # fit width scale_height = scale_width else: # fit height scale_width = scale_height else: raise ValueError( f"resize_method {self.__resize_method} not implemented" ) if self.__resize_method == "lower_bound": new_height = self.constrain_to_multiple_of( scale_height * height, min_val=self.__height ) new_width = self.constrain_to_multiple_of( scale_width * width, min_val=self.__width ) elif self.__resize_method == "upper_bound": new_height = self.constrain_to_multiple_of( scale_height * height, max_val=self.__height ) new_width = self.constrain_to_multiple_of( scale_width * width, max_val=self.__width ) elif self.__resize_method == "minimal": new_height = self.constrain_to_multiple_of(scale_height * height) new_width = self.constrain_to_multiple_of(scale_width * width) else: raise ValueError( f"resize_method {self.__resize_method} not implemented") return (new_width, new_height) def make_letter_box(self, sample): top = bottom = (self.__height - sample.shape[0]) // 2 left = right = (self.__width - sample.shape[1]) // 2 sample = cv2.copyMakeBorder( sample, top, bottom, left, right, cv2.BORDER_CONSTANT, None, 0) return sample def __call__(self, sample): width, height = self.get_size( sample["image"].shape[1], sample["image"].shape[0] ) # resize sample sample["image"] = cv2.resize( sample["image"], (width, height), interpolation=self.__image_interpolation_method, ) if self.__letter_box: sample["image"] = self.make_letter_box(sample["image"]) if self.__resize_target: if "disparity" in sample: sample["disparity"] = cv2.resize( sample["disparity"], (width, height), interpolation=cv2.INTER_NEAREST, ) if self.__letter_box: sample["disparity"] = self.make_letter_box( sample["disparity"]) if "depth" in sample: sample["depth"] = cv2.resize( sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST ) if self.__letter_box: sample["depth"] = self.make_letter_box(sample["depth"]) sample["mask"] = cv2.resize( sample["mask"].astype(np.float32), (width, height), interpolation=cv2.INTER_NEAREST, ) if self.__letter_box: sample["mask"] = self.make_letter_box(sample["mask"]) sample["mask"] = sample["mask"].astype(bool) return sample class ResizeFixed(object): def __init__(self, size): self.__size = size def __call__(self, sample): sample["image"] = cv2.resize( sample["image"], self.__size[::-1], interpolation=cv2.INTER_LINEAR ) sample["disparity"] = cv2.resize( sample["disparity"], self.__size[::- 1], interpolation=cv2.INTER_NEAREST ) sample["mask"] = cv2.resize( sample["mask"].astype(np.float32), self.__size[::-1], interpolation=cv2.INTER_NEAREST, ) sample["mask"] = sample["mask"].astype(bool) return sample class Rescale(object): """Rescale target values to the interval [0, max_val]. If input is constant, values are set to max_val / 2. """ def __init__(self, max_val=1.0, use_mask=True): """Init. Args: max_val (float, optional): Max output value. Defaults to 1.0. use_mask (bool, optional): Only operate on valid pixels (mask == True). Defaults to True. """ self.__max_val = max_val self.__use_mask = use_mask def __call__(self, sample): disp = sample["disparity"] if self.__use_mask: mask = sample["mask"] else: mask = np.ones_like(disp, dtype=np.bool) if np.sum(mask) == 0: return sample min_val = np.min(disp[mask]) max_val = np.max(disp[mask]) if max_val > min_val: sample["disparity"][mask] = ( (disp[mask] - min_val) / (max_val - min_val) * self.__max_val ) else: sample["disparity"][mask] = np.ones_like( disp[mask]) * self.__max_val / 2.0 return sample # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] class NormalizeImage(object): """Normlize image by given mean and std. """ def __init__(self, mean, std): self.__mean = mean self.__std = std def __call__(self, sample): sample["image"] = (sample["image"] - self.__mean) / self.__std return sample class DepthToDisparity(object): """Convert depth to disparity. Removes depth from sample. """ def __init__(self, eps=1e-4): self.__eps = eps def __call__(self, sample): assert "depth" in sample sample["mask"][sample["depth"] < self.__eps] = False sample["disparity"] = np.zeros_like(sample["depth"]) sample["disparity"][sample["depth"] >= self.__eps] = ( 1.0 / sample["depth"][sample["depth"] >= self.__eps] ) del sample["depth"] return sample class DisparityToDepth(object): """Convert disparity to depth. Removes disparity from sample. """ def __init__(self, eps=1e-4): self.__eps = eps def __call__(self, sample): assert "disparity" in sample disp = np.abs(sample["disparity"]) sample["mask"][disp < self.__eps] = False # print(sample["disparity"]) # print(sample["mask"].sum()) # exit() sample["depth"] = np.zeros_like(disp) sample["depth"][disp >= self.__eps] = ( 1.0 / disp[disp >= self.__eps] ) del sample["disparity"] return sample class PrepareForNet(object): """Prepare sample for usage as network input. """ def __init__(self): pass def __call__(self, sample): image = np.transpose(sample["image"], (2, 0, 1)) sample["image"] = np.ascontiguousarray(image).astype(np.float32) if "mask" in sample: sample["mask"] = sample["mask"].astype(np.float32) sample["mask"] = np.ascontiguousarray(sample["mask"]) if "disparity" in sample: disparity = sample["disparity"].astype(np.float32) sample["disparity"] = np.ascontiguousarray(disparity) if "depth" in sample: depth = sample["depth"].astype(np.float32) sample["depth"] = np.ascontiguousarray(depth) return sample