import collections import numbers from functools import wraps import cv2 import numpy as np import torch from PIL import Image from scipy.ndimage.filters import gaussian_filter __numpy_type_map = { 'float64': torch.DoubleTensor, 'float32': torch.FloatTensor, 'float16': torch.HalfTensor, 'int64': torch.LongTensor, 'int32': torch.IntTensor, 'int16': torch.ShortTensor, 'uint16': torch.ShortTensor, 'int8': torch.CharTensor, 'uint8': torch.ByteTensor, } '''image functional utils ''' # NOTE: all the function should recive the ndarray like image, should be W x H x C or W x H # 如果将所有输出的维度够搞成height,width,channel 那么可以不用to_tensor??, 不行 def preserve_channel_dim(func): """Preserve dummy channel dim.""" @wraps(func) def wrapped_function(img, *args, **kwargs): shape = img.shape result = func(img, *args, **kwargs) if len(shape) == 3 and shape[-1] == 1 and len(result.shape) == 2: result = np.expand_dims(result, axis=-1) return result return wrapped_function def _is_tensor_image(img): return torch.is_tensor(img) and img.ndimension() == 3 def _is_numpy_image(img): return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) def to_tensor(img): '''convert numpy.ndarray to torch tensor. \n if the image is uint8 , it will be divided by 255;\n if the image is uint16 , it will be divided by 65535;\n if the image is float , it will not be divided, we suppose your image range should between [0~1] ;\n Arguments: img {numpy.ndarray} -- image to be converted to tensor. ''' if not _is_numpy_image(img): raise TypeError('data should be numpy ndarray. but got {}'.format(type(img))) if img.ndim == 2: img = img[:, :, None] if img.dtype == np.uint8: img = img.astype(np.float32)/255 elif img.dtype == np.uint16: img = img.astype(np.float32)/65535 elif img.dtype in [np.float32, np.float64]: img = img.astype(np.float32)/1 else: raise TypeError('{} is not support'.format(img.dtype)) img = torch.from_numpy(img.transpose((2, 0, 1))) return img def to_pil_image(tensor): # TODO pass def to_tiff_image(tensor): # TODO pass def normalize(tensor, mean, std, inplace=False): """Normalize a tensor image with mean and standard deviation. .. note:: This transform acts out of place by default, i.e., it does not mutates the input tensor. See :class:`~torchsat.transforms.Normalize` for more details. Args: tensor (Tensor): Tensor image of size (C, H, W) to be normalized. mean (sequence): Sequence of means for each channel. std (sequence): Sequence of standard deviations for each channel. Returns: Tensor: Normalized Tensor image. """ if not _is_tensor_image(tensor): raise TypeError('tensor is not a torch image.') if not inplace: tensor = tensor.clone() mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device) std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device) tensor.sub_(mean[:, None, None]).div_(std[:, None, None]) return tensor def noise(img, mode='gaussain', percent=0.02): """ TODO: Not good for uint16 data """ original_dtype = img.dtype if mode == 'gaussian': mean = 0 var = 0.1 sigma = var*0.5 if img.ndim == 2: h, w = img.shape gauss = np.random.normal(mean, sigma, (h, w)) else: h, w, c = img.shape gauss = np.random.normal(mean, sigma, (h, w, c)) if img.dtype not in [np.float32, np.float64]: gauss = gauss * np.iinfo(img.dtype).max img = np.clip(img.astype(np.float) + gauss, 0, np.iinfo(img.dtype).max) else: img = np.clip(img.astype(np.float) + gauss, 0, 1) elif mode == 'salt': print(img.dtype) s_vs_p = 1 num_salt = np.ceil(percent * img.size * s_vs_p) coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape]) if img.dtype in [np.float32, np.float64]: img[coords] = 1 else: img[coords] = np.iinfo(img.dtype).max print(img.dtype) elif mode == 'pepper': s_vs_p = 0 num_pepper = np.ceil(percent * img.size * (1. - s_vs_p)) coords = tuple([np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape]) img[coords] = 0 elif mode == 's&p': s_vs_p = 0.5 # Salt mode num_salt = np.ceil(percent * img.size * s_vs_p) coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape]) if img.dtype in [np.float32, np.float64]: img[coords] = 1 else: img[coords] = np.iinfo(img.dtype).max # Pepper mode num_pepper = np.ceil(percent* img.size * (1. - s_vs_p)) coords = tuple([np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape]) img[coords] = 0 else: raise ValueError('not support mode for {}'.format(mode)) noisy = img.astype(original_dtype) return noisy def gaussian_blur(img, kernel_size): # When sigma=0, it is computed as `sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8` return cv2.GaussianBlur(img, (kernel_size, kernel_size), sigmaX=0) def adjust_brightness(img, value=0): if img.dtype in [np.float, np.float32, np.float64, np.float128]: dtype_min, dtype_max = 0, 1 dtype = np.float32 else: dtype_min = np.iinfo(img.dtype).min dtype_max = np.iinfo(img.dtype).max dtype = np.iinfo(img.dtype) result = np.clip(img.astype(np.float)+value, dtype_min, dtype_max).astype(dtype) return result def adjust_contrast(img, factor): if img.dtype in [np.float, np.float32, np.float64, np.float128]: dtype_min, dtype_max = 0, 1 dtype = np.float32 else: dtype_min = np.iinfo(img.dtype).min dtype_max = np.iinfo(img.dtype).max dtype = np.iinfo(img.dtype) result = np.clip(img.astype(np.float)*factor, dtype_min, dtype_max).astype(dtype) return result def adjust_saturation(): # TODO pass def adjust_hue(): # TODO pass def to_grayscale(img, output_channels=1): """convert input ndarray image to gray sacle image. Arguments: img {ndarray} -- the input ndarray image Keyword Arguments: output_channels {int} -- output gray image channel (default: {1}) Returns: ndarray -- gray scale ndarray image """ if img.ndim == 2: gray_img = img elif img.shape[2] == 3: gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) else: gray_img = np.mean(img, axis=2) gray_img = gray_img.astype(img.dtype) if output_channels != 1: gray_img = np.tile(gray_img, (output_channels, 1, 1)) gray_img = np.transpose(gray_img, [1,2,0]) return gray_img def shift(img, top, left): (h, w) = img.shape[0:2] matrix = np.float32([[1, 0, left], [0, 1, top]]) dst = cv2.warpAffine(img, matrix, (w, h)) return dst def rotate(img, angle, center=None, scale=1.0): (h, w) = img.shape[:2] if center is None: center = (w / 2, h / 2) M = cv2.getRotationMatrix2D(center, angle, scale) rotated = cv2.warpAffine(img, M, (w, h)) return rotated def resize(img, size, interpolation=Image.BILINEAR): '''resize the image TODO: opencv resize 之后图像就成了0~1了 Arguments: img {ndarray} -- the input ndarray image size {int, iterable} -- the target size, if size is intger, width and height will be resized to same \ otherwise, the size should be tuple (height, width) or list [height, width] Keyword Arguments: interpolation {Image} -- the interpolation method (default: {Image.BILINEAR}) Raises: TypeError -- img should be ndarray ValueError -- size should be intger or iterable vaiable and length should be 2. Returns: img -- resize ndarray image ''' if not _is_numpy_image(img): raise TypeError('img shoud be ndarray image [w, h, c] or [w, h], but got {}'.format(type(img))) if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size)==2)): raise ValueError('size should be intger or iterable vaiable(length is 2), but got {}'.format(type(size))) if isinstance(size, int): height, width = (size, size) else: height, width = (size[0], size[1]) return cv2.resize(img, (width, height), interpolation=interpolation) def pad(img, padding, fill=0, padding_mode='constant'): if isinstance(padding, int): pad_left = pad_right = pad_top = pad_bottom = padding if isinstance(padding, collections.Iterable) and len(padding) == 2: pad_left = pad_right = padding[0] pad_bottom = pad_top = padding[1] if isinstance(padding, collections.Iterable) and len(padding) == 4: pad_left = padding[0] pad_top = padding[1] pad_right = padding[2] pad_bottom = padding[3] if img.ndim == 2: if padding_mode == 'constant': img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), mode=padding_mode, constant_values=fill) else: img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), mode=padding_mode) if img.ndim == 3: if padding_mode == 'constant': img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode=padding_mode, constant_values=fill) else: img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode=padding_mode) return img def crop(img, top, left, height, width): '''crop image Arguments: img {ndarray} -- image to be croped top {int} -- top size left {int} -- left size height {int} -- croped height width {int} -- croped width ''' if not _is_numpy_image(img): raise TypeError('the input image should be numpy ndarray with dimension 2 or 3.' 'but got {}'.format(type(img)) ) if width<0 or height<0 or left <0 or height<0: raise ValueError('the input left, top, width, height should be greater than 0' 'but got left={}, top={} width={} height={}'.format(left, top, width, height) ) if img.ndim == 2: img_height, img_width = img.shape else: img_height, img_width, _ = img.shape if (left+width) > img_width or (top+height) > img_height: raise ValueError('the input crop width and height should be small or \ equal to image width and height. ') if img.ndim == 2: return img[top:(top+height), left:(left+width)] elif img.ndim == 3: return img[top:(top+height), left:(left+width), :] def center_crop(img, output_size): '''crop image Arguments: img {ndarray} -- input image output_size {number or sequence} -- the output image size. if sequence, should be [h, w] Raises: ValueError -- the input image is large than original image. Returns: ndarray image -- return croped ndarray image. ''' if img.ndim == 2: img_height, img_width = img.shape else: img_height, img_width, _ = img.shape if isinstance(output_size, numbers.Number): output_size = (int(output_size), int(output_size)) if output_size[0] > img_height or output_size[1] > img_width: raise ValueError('the output_size should not greater than image size, but got {}'.format(output_size)) target_height, target_width = output_size top = int(round((img_height - target_height)/2)) left = int(round((img_width - target_width)/2)) return crop(img, top, left, target_height, target_width) def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR): img = crop(img, top, left, height, width) img = resize(img, size, interpolation) return img def vflip(img): return cv2.flip(img, 0) def hflip(img): return cv2.flip(img, 1) def flip(img, flip_code): return cv2.flip(img, flip_code) def elastic_transform(image, alpha, sigma, alpha_affine, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_REFLECT_101, random_state=None, approximate=False): """Elastic deformation of images as described in [Simard2003]_ (with modifications). Based on https://gist.github.com/erniejunior/601cdf56d2b424757de5 .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for Convolutional Neural Networks applied to Visual Document Analysis", in Proc. of the International Conference on Document Analysis and Recognition, 2003. """ if random_state is None: random_state = np.random.RandomState(1234) height, width = image.shape[:2] # Random affine center_square = np.float32((height, width)) // 2 square_size = min((height, width)) // 3 alpha = float(alpha) sigma = float(sigma) alpha_affine = float(alpha_affine) pts1 = np.float32([center_square + square_size, [center_square[0] + square_size, center_square[1] - square_size], center_square - square_size]) pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, size=pts1.shape).astype(np.float32) matrix = cv2.getAffineTransform(pts1, pts2) image = cv2.warpAffine(image, matrix, (width, height), flags=interpolation, borderMode=border_mode) if approximate: # Approximate computation smooth displacement map with a large enough kernel. # On large images (512+) this is approximately 2X times faster dx = (random_state.rand(height, width).astype(np.float32) * 2 - 1) cv2.GaussianBlur(dx, (17, 17), sigma, dst=dx) dx *= alpha dy = (random_state.rand(height, width).astype(np.float32) * 2 - 1) cv2.GaussianBlur(dy, (17, 17), sigma, dst=dy) dy *= alpha else: dx = np.float32(gaussian_filter((random_state.rand(height, width) * 2 - 1), sigma) * alpha) dy = np.float32(gaussian_filter((random_state.rand(height, width) * 2 - 1), sigma) * alpha) x, y = np.meshgrid(np.arange(width), np.arange(height)) mapx = np.float32(x + dx) mapy = np.float32(y + dy) return cv2.remap(image, mapx, mapy, interpolation, borderMode=border_mode) def bbox_shift(bboxes, top, left): pass def bbox_vflip(bboxes, img_height): """vertical flip the bboxes ........... . . . . >...........< . . . . ........... Args: bbox (ndarray): bbox ndarray [box_nums, 4] flip_code (int, optional): [description]. Defaults to 0. """ flipped = bboxes.copy() flipped[...,1::2] = img_height - bboxes[...,1::2] flipped = flipped[..., [0, 3, 2, 1]] return flipped def bbox_hflip(bboxes, img_width): """horizontal flip the bboxes ^ ............. . . . . . . . . . . . . ............. ^ Args: bbox (ndarray): bbox ndarray [box_nums, 4] flip_code (int, optional): [description]. Defaults to 0. """ flipped = bboxes.copy() flipped[..., 0::2] = img_width - bboxes[...,0::2] flipped = flipped[..., [2, 1, 0, 3]] return flipped def bbox_resize(bboxes, img_size, target_size): """resize the bbox Args: bboxes (ndarray): bbox ndarray [box_nums, 4] img_size (tuple): the image height and width target_size (int, or tuple): the target bbox size. Int or Tuple, if tuple the shape should be (height, width) """ if isinstance(target_size, numbers.Number): target_size = (target_size, target_size) ratio_height = target_size[0]/img_size[0] ratio_width = target_size[1]/img_size[1] return bboxes[...,]*[ratio_width,ratio_height,ratio_width,ratio_height] def bbox_crop(bboxes, top, left, height, width): '''crop bbox Arguments: img {ndarray} -- image to be croped top {int} -- top size left {int} -- left size height {int} -- croped height width {int} -- croped width ''' croped_bboxes = bboxes.copy() right = width + left bottom = height + top croped_bboxes[..., 0::2] = bboxes[..., 0::2].clip(left, right) - left croped_bboxes[..., 1::2] = bboxes[..., 1::2].clip(top, bottom) - top return croped_bboxes def bbox_pad(bboxes, padding): if isinstance(padding, int): pad_left = pad_right = pad_top = pad_bottom = padding if isinstance(padding, collections.Iterable) and len(padding) == 2: pad_left = pad_right = padding[0] pad_bottom = pad_top = padding[1] if isinstance(padding, collections.Iterable) and len(padding) == 4: pad_left = padding[0] pad_top = padding[1] pad_right = padding[2] pad_bottom = padding[3] pad_bboxes = bboxes.copy() pad_bboxes[..., 0::2] = bboxes[..., 0::2] + pad_left pad_bboxes[..., 1::2] = bboxes[..., 1::2] + pad_top return pad_bboxes