ImageEnhancement / torchvision_x_functional.py
chenzhicun
初始化web demo.
ec08fea
import collections
import numbers
from functools import wraps
import cv2
import numpy as np
import torch
from PIL import Image
from scipy.ndimage.filters import gaussian_filter
__numpy_type_map = {
'float64': torch.DoubleTensor,
'float32': torch.FloatTensor,
'float16': torch.HalfTensor,
'int64': torch.LongTensor,
'int32': torch.IntTensor,
'int16': torch.ShortTensor,
'uint16': torch.ShortTensor,
'int8': torch.CharTensor,
'uint8': torch.ByteTensor,
}
'''image functional utils
'''
# NOTE: all the function should recive the ndarray like image, should be W x H x C or W x H
# 如果将所有输出的维度够搞成height,width,channel 那么可以不用to_tensor??, 不行
def preserve_channel_dim(func):
"""Preserve dummy channel dim."""
@wraps(func)
def wrapped_function(img, *args, **kwargs):
shape = img.shape
result = func(img, *args, **kwargs)
if len(shape) == 3 and shape[-1] == 1 and len(result.shape) == 2:
result = np.expand_dims(result, axis=-1)
return result
return wrapped_function
def _is_tensor_image(img):
return torch.is_tensor(img) and img.ndimension() == 3
def _is_numpy_image(img):
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
def to_tensor(img):
'''convert numpy.ndarray to torch tensor. \n
if the image is uint8 , it will be divided by 255;\n
if the image is uint16 , it will be divided by 65535;\n
if the image is float , it will not be divided, we suppose your image range should between [0~1] ;\n
Arguments:
img {numpy.ndarray} -- image to be converted to tensor.
'''
if not _is_numpy_image(img):
raise TypeError('data should be numpy ndarray. but got {}'.format(type(img)))
if img.ndim == 2:
img = img[:, :, None]
if img.dtype == np.uint8:
img = img.astype(np.float32)/255
elif img.dtype == np.uint16:
img = img.astype(np.float32)/65535
elif img.dtype in [np.float32, np.float64]:
img = img.astype(np.float32)/1
else:
raise TypeError('{} is not support'.format(img.dtype))
img = torch.from_numpy(img.transpose((2, 0, 1)))
return img
def to_pil_image(tensor):
# TODO
pass
def to_tiff_image(tensor):
# TODO
pass
def normalize(tensor, mean, std, inplace=False):
"""Normalize a tensor image with mean and standard deviation.
.. note::
This transform acts out of place by default, i.e., it does not mutates the input tensor.
See :class:`~torchsat.transforms.Normalize` for more details.
Args:
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
Returns:
Tensor: Normalized Tensor image.
"""
if not _is_tensor_image(tensor):
raise TypeError('tensor is not a torch image.')
if not inplace:
tensor = tensor.clone()
mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device)
std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device)
tensor.sub_(mean[:, None, None]).div_(std[:, None, None])
return tensor
def noise(img, mode='gaussain', percent=0.02):
"""
TODO: Not good for uint16 data
"""
original_dtype = img.dtype
if mode == 'gaussian':
mean = 0
var = 0.1
sigma = var*0.5
if img.ndim == 2:
h, w = img.shape
gauss = np.random.normal(mean, sigma, (h, w))
else:
h, w, c = img.shape
gauss = np.random.normal(mean, sigma, (h, w, c))
if img.dtype not in [np.float32, np.float64]:
gauss = gauss * np.iinfo(img.dtype).max
img = np.clip(img.astype(np.float) + gauss, 0, np.iinfo(img.dtype).max)
else:
img = np.clip(img.astype(np.float) + gauss, 0, 1)
elif mode == 'salt':
print(img.dtype)
s_vs_p = 1
num_salt = np.ceil(percent * img.size * s_vs_p)
coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape])
if img.dtype in [np.float32, np.float64]:
img[coords] = 1
else:
img[coords] = np.iinfo(img.dtype).max
print(img.dtype)
elif mode == 'pepper':
s_vs_p = 0
num_pepper = np.ceil(percent * img.size * (1. - s_vs_p))
coords = tuple([np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape])
img[coords] = 0
elif mode == 's&p':
s_vs_p = 0.5
# Salt mode
num_salt = np.ceil(percent * img.size * s_vs_p)
coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape])
if img.dtype in [np.float32, np.float64]:
img[coords] = 1
else:
img[coords] = np.iinfo(img.dtype).max
# Pepper mode
num_pepper = np.ceil(percent* img.size * (1. - s_vs_p))
coords = tuple([np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape])
img[coords] = 0
else:
raise ValueError('not support mode for {}'.format(mode))
noisy = img.astype(original_dtype)
return noisy
def gaussian_blur(img, kernel_size):
# When sigma=0, it is computed as `sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`
return cv2.GaussianBlur(img, (kernel_size, kernel_size), sigmaX=0)
def adjust_brightness(img, value=0):
if img.dtype in [np.float, np.float32, np.float64, np.float128]:
dtype_min, dtype_max = 0, 1
dtype = np.float32
else:
dtype_min = np.iinfo(img.dtype).min
dtype_max = np.iinfo(img.dtype).max
dtype = np.iinfo(img.dtype)
result = np.clip(img.astype(np.float)+value, dtype_min, dtype_max).astype(dtype)
return result
def adjust_contrast(img, factor):
if img.dtype in [np.float, np.float32, np.float64, np.float128]:
dtype_min, dtype_max = 0, 1
dtype = np.float32
else:
dtype_min = np.iinfo(img.dtype).min
dtype_max = np.iinfo(img.dtype).max
dtype = np.iinfo(img.dtype)
result = np.clip(img.astype(np.float)*factor, dtype_min, dtype_max).astype(dtype)
return result
def adjust_saturation():
# TODO
pass
def adjust_hue():
# TODO
pass
def to_grayscale(img, output_channels=1):
"""convert input ndarray image to gray sacle image.
Arguments:
img {ndarray} -- the input ndarray image
Keyword Arguments:
output_channels {int} -- output gray image channel (default: {1})
Returns:
ndarray -- gray scale ndarray image
"""
if img.ndim == 2:
gray_img = img
elif img.shape[2] == 3:
gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
else:
gray_img = np.mean(img, axis=2)
gray_img = gray_img.astype(img.dtype)
if output_channels != 1:
gray_img = np.tile(gray_img, (output_channels, 1, 1))
gray_img = np.transpose(gray_img, [1,2,0])
return gray_img
def shift(img, top, left):
(h, w) = img.shape[0:2]
matrix = np.float32([[1, 0, left], [0, 1, top]])
dst = cv2.warpAffine(img, matrix, (w, h))
return dst
def rotate(img, angle, center=None, scale=1.0):
(h, w) = img.shape[:2]
if center is None:
center = (w / 2, h / 2)
M = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(img, M, (w, h))
return rotated
def resize(img, size, interpolation=Image.BILINEAR):
'''resize the image
TODO: opencv resize 之后图像就成了0~1了
Arguments:
img {ndarray} -- the input ndarray image
size {int, iterable} -- the target size, if size is intger, width and height will be resized to same \
otherwise, the size should be tuple (height, width) or list [height, width]
Keyword Arguments:
interpolation {Image} -- the interpolation method (default: {Image.BILINEAR})
Raises:
TypeError -- img should be ndarray
ValueError -- size should be intger or iterable vaiable and length should be 2.
Returns:
img -- resize ndarray image
'''
if not _is_numpy_image(img):
raise TypeError('img shoud be ndarray image [w, h, c] or [w, h], but got {}'.format(type(img)))
if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size)==2)):
raise ValueError('size should be intger or iterable vaiable(length is 2), but got {}'.format(type(size)))
if isinstance(size, int):
height, width = (size, size)
else:
height, width = (size[0], size[1])
return cv2.resize(img, (width, height), interpolation=interpolation)
def pad(img, padding, fill=0, padding_mode='constant'):
if isinstance(padding, int):
pad_left = pad_right = pad_top = pad_bottom = padding
if isinstance(padding, collections.Iterable) and len(padding) == 2:
pad_left = pad_right = padding[0]
pad_bottom = pad_top = padding[1]
if isinstance(padding, collections.Iterable) and len(padding) == 4:
pad_left = padding[0]
pad_top = padding[1]
pad_right = padding[2]
pad_bottom = padding[3]
if img.ndim == 2:
if padding_mode == 'constant':
img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), mode=padding_mode, constant_values=fill)
else:
img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), mode=padding_mode)
if img.ndim == 3:
if padding_mode == 'constant':
img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode=padding_mode, constant_values=fill)
else:
img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode=padding_mode)
return img
def crop(img, top, left, height, width):
'''crop image
Arguments:
img {ndarray} -- image to be croped
top {int} -- top size
left {int} -- left size
height {int} -- croped height
width {int} -- croped width
'''
if not _is_numpy_image(img):
raise TypeError('the input image should be numpy ndarray with dimension 2 or 3.'
'but got {}'.format(type(img))
)
if width<0 or height<0 or left <0 or height<0:
raise ValueError('the input left, top, width, height should be greater than 0'
'but got left={}, top={} width={} height={}'.format(left, top, width, height)
)
if img.ndim == 2:
img_height, img_width = img.shape
else:
img_height, img_width, _ = img.shape
if (left+width) > img_width or (top+height) > img_height:
raise ValueError('the input crop width and height should be small or \
equal to image width and height. ')
if img.ndim == 2:
return img[top:(top+height), left:(left+width)]
elif img.ndim == 3:
return img[top:(top+height), left:(left+width), :]
def center_crop(img, output_size):
'''crop image
Arguments:
img {ndarray} -- input image
output_size {number or sequence} -- the output image size. if sequence, should be [h, w]
Raises:
ValueError -- the input image is large than original image.
Returns:
ndarray image -- return croped ndarray image.
'''
if img.ndim == 2:
img_height, img_width = img.shape
else:
img_height, img_width, _ = img.shape
if isinstance(output_size, numbers.Number):
output_size = (int(output_size), int(output_size))
if output_size[0] > img_height or output_size[1] > img_width:
raise ValueError('the output_size should not greater than image size, but got {}'.format(output_size))
target_height, target_width = output_size
top = int(round((img_height - target_height)/2))
left = int(round((img_width - target_width)/2))
return crop(img, top, left, target_height, target_width)
def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR):
img = crop(img, top, left, height, width)
img = resize(img, size, interpolation)
return img
def vflip(img):
return cv2.flip(img, 0)
def hflip(img):
return cv2.flip(img, 1)
def flip(img, flip_code):
return cv2.flip(img, flip_code)
def elastic_transform(image, alpha, sigma, alpha_affine, interpolation=cv2.INTER_LINEAR,
border_mode=cv2.BORDER_REFLECT_101, random_state=None, approximate=False):
"""Elastic deformation of images as described in [Simard2003]_ (with modifications).
Based on https://gist.github.com/erniejunior/601cdf56d2b424757de5
.. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
Convolutional Neural Networks applied to Visual Document Analysis", in
Proc. of the International Conference on Document Analysis and
Recognition, 2003.
"""
if random_state is None:
random_state = np.random.RandomState(1234)
height, width = image.shape[:2]
# Random affine
center_square = np.float32((height, width)) // 2
square_size = min((height, width)) // 3
alpha = float(alpha)
sigma = float(sigma)
alpha_affine = float(alpha_affine)
pts1 = np.float32([center_square + square_size, [center_square[0] + square_size, center_square[1] - square_size],
center_square - square_size])
pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, size=pts1.shape).astype(np.float32)
matrix = cv2.getAffineTransform(pts1, pts2)
image = cv2.warpAffine(image, matrix, (width, height), flags=interpolation, borderMode=border_mode)
if approximate:
# Approximate computation smooth displacement map with a large enough kernel.
# On large images (512+) this is approximately 2X times faster
dx = (random_state.rand(height, width).astype(np.float32) * 2 - 1)
cv2.GaussianBlur(dx, (17, 17), sigma, dst=dx)
dx *= alpha
dy = (random_state.rand(height, width).astype(np.float32) * 2 - 1)
cv2.GaussianBlur(dy, (17, 17), sigma, dst=dy)
dy *= alpha
else:
dx = np.float32(gaussian_filter((random_state.rand(height, width) * 2 - 1), sigma) * alpha)
dy = np.float32(gaussian_filter((random_state.rand(height, width) * 2 - 1), sigma) * alpha)
x, y = np.meshgrid(np.arange(width), np.arange(height))
mapx = np.float32(x + dx)
mapy = np.float32(y + dy)
return cv2.remap(image, mapx, mapy, interpolation, borderMode=border_mode)
def bbox_shift(bboxes, top, left):
pass
def bbox_vflip(bboxes, img_height):
"""vertical flip the bboxes
...........
. .
. .
>...........<
. .
. .
...........
Args:
bbox (ndarray): bbox ndarray [box_nums, 4]
flip_code (int, optional): [description]. Defaults to 0.
"""
flipped = bboxes.copy()
flipped[...,1::2] = img_height - bboxes[...,1::2]
flipped = flipped[..., [0, 3, 2, 1]]
return flipped
def bbox_hflip(bboxes, img_width):
"""horizontal flip the bboxes
^
.............
. . .
. . .
. . .
. . .
.............
^
Args:
bbox (ndarray): bbox ndarray [box_nums, 4]
flip_code (int, optional): [description]. Defaults to 0.
"""
flipped = bboxes.copy()
flipped[..., 0::2] = img_width - bboxes[...,0::2]
flipped = flipped[..., [2, 1, 0, 3]]
return flipped
def bbox_resize(bboxes, img_size, target_size):
"""resize the bbox
Args:
bboxes (ndarray): bbox ndarray [box_nums, 4]
img_size (tuple): the image height and width
target_size (int, or tuple): the target bbox size.
Int or Tuple, if tuple the shape should be (height, width)
"""
if isinstance(target_size, numbers.Number):
target_size = (target_size, target_size)
ratio_height = target_size[0]/img_size[0]
ratio_width = target_size[1]/img_size[1]
return bboxes[...,]*[ratio_width,ratio_height,ratio_width,ratio_height]
def bbox_crop(bboxes, top, left, height, width):
'''crop bbox
Arguments:
img {ndarray} -- image to be croped
top {int} -- top size
left {int} -- left size
height {int} -- croped height
width {int} -- croped width
'''
croped_bboxes = bboxes.copy()
right = width + left
bottom = height + top
croped_bboxes[..., 0::2] = bboxes[..., 0::2].clip(left, right) - left
croped_bboxes[..., 1::2] = bboxes[..., 1::2].clip(top, bottom) - top
return croped_bboxes
def bbox_pad(bboxes, padding):
if isinstance(padding, int):
pad_left = pad_right = pad_top = pad_bottom = padding
if isinstance(padding, collections.Iterable) and len(padding) == 2:
pad_left = pad_right = padding[0]
pad_bottom = pad_top = padding[1]
if isinstance(padding, collections.Iterable) and len(padding) == 4:
pad_left = padding[0]
pad_top = padding[1]
pad_right = padding[2]
pad_bottom = padding[3]
pad_bboxes = bboxes.copy()
pad_bboxes[..., 0::2] = bboxes[..., 0::2] + pad_left
pad_bboxes[..., 1::2] = bboxes[..., 1::2] + pad_top
return pad_bboxes