""" Common photometric transforms for data augmentation. """ import numpy as np from PIL import Image from torchvision import transforms as transforms import cv2 # List all the available augmentations available_augmentations = [ "additive_gaussian_noise", "additive_speckle_noise", "random_brightness", "random_contrast", "additive_shade", "motion_blur", ] class additive_gaussian_noise(object): """Additive gaussian noise.""" def __init__(self, stddev_range=None): # If std is not given, use the default setting if stddev_range is None: self.stddev_range = [5, 95] else: self.stddev_range = stddev_range def __call__(self, input_image): # Get the noise stddev stddev = np.random.uniform(self.stddev_range[0], self.stddev_range[1]) noise = np.random.normal(0.0, stddev, size=input_image.shape) noisy_image = (input_image + noise).clip(0.0, 255.0) return noisy_image class additive_speckle_noise(object): """Additive speckle noise.""" def __init__(self, prob_range=None): # If prob range is not given, use the default setting if prob_range is None: self.prob_range = [0.0, 0.005] else: self.prob_range = prob_range def __call__(self, input_image): # Sample prob = np.random.uniform(self.prob_range[0], self.prob_range[1]) sample = np.random.uniform(0.0, 1.0, size=input_image.shape) # Get the mask mask0 = sample <= prob mask1 = sample >= (1 - prob) # Mask the image (here we assume the image ranges from 0~255 noisy = input_image.copy() noisy[mask0] = 0.0 noisy[mask1] = 255.0 return noisy class random_brightness(object): """Brightness change.""" def __init__(self, brightness=None): # If the brightness is not given, use the default setting if brightness is None: self.brightness = 0.5 else: self.brightness = brightness # Initialize the transformer self.transform = transforms.ColorJitter(brightness=self.brightness) def __call__(self, input_image): # Convert to PIL image if isinstance(input_image, np.ndarray): input_image = Image.fromarray(input_image.astype(np.uint8)) return np.array(self.transform(input_image)) class random_contrast(object): """Additive contrast.""" def __init__(self, contrast=None): # If the brightness is not given, use the default setting if contrast is None: self.contrast = 0.5 else: self.contrast = contrast # Initialize the transformer self.transform = transforms.ColorJitter(contrast=self.contrast) def __call__(self, input_image): # Convert to PIL image if isinstance(input_image, np.ndarray): input_image = Image.fromarray(input_image.astype(np.uint8)) return np.array(self.transform(input_image)) class additive_shade(object): """Additive shade.""" def __init__(self, nb_ellipses=20, transparency_range=None, kernel_size_range=None): self.nb_ellipses = nb_ellipses if transparency_range is None: self.transparency_range = [-0.5, 0.8] else: self.transparency_range = transparency_range if kernel_size_range is None: self.kernel_size_range = [250, 350] else: self.kernel_size_range = kernel_size_range def __call__(self, input_image): # ToDo: if we should convert to numpy array first. min_dim = min(input_image.shape[:2]) / 4 mask = np.zeros(input_image.shape[:2], np.uint8) for i in range(self.nb_ellipses): ax = int(max(np.random.rand() * min_dim, min_dim / 5)) ay = int(max(np.random.rand() * min_dim, min_dim / 5)) max_rad = max(ax, ay) x = np.random.randint(max_rad, input_image.shape[1] - max_rad) y = np.random.randint(max_rad, input_image.shape[0] - max_rad) angle = np.random.rand() * 90 cv2.ellipse(mask, (x, y), (ax, ay), angle, 0, 360, 255, -1) transparency = np.random.uniform(*self.transparency_range) kernel_size = np.random.randint(*self.kernel_size_range) # kernel_size has to be odd if (kernel_size % 2) == 0: kernel_size += 1 mask = cv2.GaussianBlur(mask.astype(np.float32), (kernel_size, kernel_size), 0) shaded = input_image[..., None] * ( 1 - transparency * mask[..., np.newaxis] / 255.0 ) shaded = np.clip(shaded, 0, 255) return np.reshape(shaded, input_image.shape) class motion_blur(object): """Motion blur.""" def __init__(self, max_kernel_size=10): self.max_kernel_size = max_kernel_size def __call__(self, input_image): # Either vertical, horizontal or diagonal blur mode = np.random.choice(["h", "v", "diag_down", "diag_up"]) ksize = np.random.randint(0, int(round((self.max_kernel_size + 1) / 2))) * 2 + 1 center = int((ksize - 1) / 2) kernel = np.zeros((ksize, ksize)) if mode == "h": kernel[center, :] = 1.0 elif mode == "v": kernel[:, center] = 1.0 elif mode == "diag_down": kernel = np.eye(ksize) elif mode == "diag_up": kernel = np.flip(np.eye(ksize), 0) var = ksize * ksize / 16.0 grid = np.repeat(np.arange(ksize)[:, np.newaxis], ksize, axis=-1) gaussian = np.exp( -(np.square(grid - center) + np.square(grid.T - center)) / (2.0 * var) ) kernel *= gaussian kernel /= np.sum(kernel) blurred = cv2.filter2D(input_image, -1, kernel) return np.reshape(blurred, input_image.shape) class normalize_image(object): """Image normalization to the range [0, 1].""" def __init__(self): self.normalize_value = 255 def __call__(self, input_image): return (input_image / self.normalize_value).astype(np.float32)