Spaces:

bingbingbing
/

ImageEnhancement

Runtime error

ImageEnhancement / torchvision_x_functional.py

chenzhicun

初始化web demo.

ec08fea about 3 years ago

17.6 kB

	import collections
	import numbers
	from functools import wraps

	import cv2
	import numpy as np
	import torch
	from PIL import Image
	from scipy.ndimage.filters import gaussian_filter

	__numpy_type_map = {
	'float64': torch.DoubleTensor,
	'float32': torch.FloatTensor,
	'float16': torch.HalfTensor,
	'int64': torch.LongTensor,
	'int32': torch.IntTensor,
	'int16': torch.ShortTensor,
	'uint16': torch.ShortTensor,
	'int8': torch.CharTensor,
	'uint8': torch.ByteTensor,
	}

	'''image functional utils

	'''

	# NOTE: all the function should recive the ndarray like image, should be W x H x C or W x H

	# 如果将所有输出的维度够搞成height，width，channel 那么可以不用to_tensor??, 不行
	def preserve_channel_dim(func):
	"""Preserve dummy channel dim."""
	@wraps(func)
	def wrapped_function(img, args, *kwargs):
	shape = img.shape
	result = func(img, args, *kwargs)
	if len(shape) == 3 and shape[-1] == 1 and len(result.shape) == 2:
	result = np.expand_dims(result, axis=-1)
	return result

	return wrapped_function


	def _is_tensor_image(img):
	return torch.is_tensor(img) and img.ndimension() == 3


	def _is_numpy_image(img):
	return isinstance(img, np.ndarray) and (img.ndim in {2, 3})


	def to_tensor(img):
	'''convert numpy.ndarray to torch tensor. \n
	if the image is uint8 , it will be divided by 255;\n
	if the image is uint16 , it will be divided by 65535;\n
	if the image is float , it will not be divided, we suppose your image range should between [0~1] ;\n

	Arguments:
	img {numpy.ndarray} -- image to be converted to tensor.
	'''
	if not _is_numpy_image(img):
	raise TypeError('data should be numpy ndarray. but got {}'.format(type(img)))

	if img.ndim == 2:
	img = img[:, :, None]

	if img.dtype == np.uint8:
	img = img.astype(np.float32)/255
	elif img.dtype == np.uint16:
	img = img.astype(np.float32)/65535
	elif img.dtype in [np.float32, np.float64]:
	img = img.astype(np.float32)/1
	else:
	raise TypeError('{} is not support'.format(img.dtype))

	img = torch.from_numpy(img.transpose((2, 0, 1)))

	return img


	def to_pil_image(tensor):
	# TODO
	pass


	def to_tiff_image(tensor):
	# TODO
	pass


	def normalize(tensor, mean, std, inplace=False):
	"""Normalize a tensor image with mean and standard deviation.

	.. note::
	This transform acts out of place by default, i.e., it does not mutates the input tensor.

	See :class:`~torchsat.transforms.Normalize` for more details.

	Args:
	tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
	mean (sequence): Sequence of means for each channel.
	std (sequence): Sequence of standard deviations for each channel.

	Returns:
	Tensor: Normalized Tensor image.
	"""
	if not _is_tensor_image(tensor):
	raise TypeError('tensor is not a torch image.')

	if not inplace:
	tensor = tensor.clone()

	mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device)
	std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device)
	tensor.sub_(mean[:, None, None]).div_(std[:, None, None])
	return tensor

	def noise(img, mode='gaussain', percent=0.02):
	"""
	TODO: Not good for uint16 data
	"""
	original_dtype = img.dtype
	if mode == 'gaussian':
	mean = 0
	var = 0.1
	sigma = var*0.5

	if img.ndim == 2:
	h, w = img.shape
	gauss = np.random.normal(mean, sigma, (h, w))
	else:
	h, w, c = img.shape
	gauss = np.random.normal(mean, sigma, (h, w, c))

	if img.dtype not in [np.float32, np.float64]:
	gauss = gauss * np.iinfo(img.dtype).max
	img = np.clip(img.astype(np.float) + gauss, 0, np.iinfo(img.dtype).max)
	else:
	img = np.clip(img.astype(np.float) + gauss, 0, 1)

	elif mode == 'salt':
	print(img.dtype)
	s_vs_p = 1
	num_salt = np.ceil(percent * img.size * s_vs_p)
	coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape])

	if img.dtype in [np.float32, np.float64]:
	img[coords] = 1
	else:
	img[coords] = np.iinfo(img.dtype).max
	print(img.dtype)
	elif mode == 'pepper':
	s_vs_p = 0
	num_pepper = np.ceil(percent * img.size * (1. - s_vs_p))
	coords = tuple([np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape])
	img[coords] = 0

	elif mode == 's&p':
	s_vs_p = 0.5

	# Salt mode
	num_salt = np.ceil(percent * img.size * s_vs_p)
	coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape])
	if img.dtype in [np.float32, np.float64]:
	img[coords] = 1
	else:
	img[coords] = np.iinfo(img.dtype).max

	# Pepper mode
	num_pepper = np.ceil(percent* img.size * (1. - s_vs_p))
	coords = tuple([np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape])
	img[coords] = 0
	else:
	raise ValueError('not support mode for {}'.format(mode))

	noisy = img.astype(original_dtype)

	return noisy


	def gaussian_blur(img, kernel_size):
	# When sigma=0, it is computed as `sigma = 0.3((ksize-1)0.5 - 1) + 0.8`
	return cv2.GaussianBlur(img, (kernel_size, kernel_size), sigmaX=0)


	def adjust_brightness(img, value=0):
	if img.dtype in [np.float, np.float32, np.float64, np.float128]:
	dtype_min, dtype_max = 0, 1
	dtype = np.float32
	else:
	dtype_min = np.iinfo(img.dtype).min
	dtype_max = np.iinfo(img.dtype).max
	dtype = np.iinfo(img.dtype)

	result = np.clip(img.astype(np.float)+value, dtype_min, dtype_max).astype(dtype)

	return result


	def adjust_contrast(img, factor):
	if img.dtype in [np.float, np.float32, np.float64, np.float128]:
	dtype_min, dtype_max = 0, 1
	dtype = np.float32
	else:
	dtype_min = np.iinfo(img.dtype).min
	dtype_max = np.iinfo(img.dtype).max
	dtype = np.iinfo(img.dtype)

	result = np.clip(img.astype(np.float)*factor, dtype_min, dtype_max).astype(dtype)

	return result

	def adjust_saturation():
	# TODO
	pass

	def adjust_hue():
	# TODO
	pass



	def to_grayscale(img, output_channels=1):
	"""convert input ndarray image to gray sacle image.

	Arguments:
	img {ndarray} -- the input ndarray image

	Keyword Arguments:
	output_channels {int} -- output gray image channel (default: {1})

	Returns:
	ndarray -- gray scale ndarray image
	"""
	if img.ndim == 2:
	gray_img = img
	elif img.shape[2] == 3:
	gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
	else:
	gray_img = np.mean(img, axis=2)
	gray_img = gray_img.astype(img.dtype)

	if output_channels != 1:
	gray_img = np.tile(gray_img, (output_channels, 1, 1))
	gray_img = np.transpose(gray_img, [1,2,0])

	return gray_img


	def shift(img, top, left):
	(h, w) = img.shape[0:2]
	matrix = np.float32([[1, 0, left], [0, 1, top]])
	dst = cv2.warpAffine(img, matrix, (w, h))

	return dst


	def rotate(img, angle, center=None, scale=1.0):
	(h, w) = img.shape[:2]

	if center is None:
	center = (w / 2, h / 2)

	M = cv2.getRotationMatrix2D(center, angle, scale)
	rotated = cv2.warpAffine(img, M, (w, h))

	return rotated


	def resize(img, size, interpolation=Image.BILINEAR):
	'''resize the image
	TODO: opencv resize 之后图像就成了0~1了
	Arguments:
	img {ndarray} -- the input ndarray image
	size {int, iterable} -- the target size, if size is intger, width and height will be resized to same \
	otherwise, the size should be tuple (height, width) or list [height, width]


	Keyword Arguments:
	interpolation {Image} -- the interpolation method (default: {Image.BILINEAR})

	Raises:
	TypeError -- img should be ndarray
	ValueError -- size should be intger or iterable vaiable and length should be 2.

	Returns:
	img -- resize ndarray image
	'''

	if not _is_numpy_image(img):
	raise TypeError('img shoud be ndarray image [w, h, c] or [w, h], but got {}'.format(type(img)))
	if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size)==2)):
	raise ValueError('size should be intger or iterable vaiable(length is 2), but got {}'.format(type(size)))

	if isinstance(size, int):
	height, width = (size, size)
	else:
	height, width = (size[0], size[1])

	return cv2.resize(img, (width, height), interpolation=interpolation)


	def pad(img, padding, fill=0, padding_mode='constant'):
	if isinstance(padding, int):
	pad_left = pad_right = pad_top = pad_bottom = padding
	if isinstance(padding, collections.Iterable) and len(padding) == 2:
	pad_left = pad_right = padding[0]
	pad_bottom = pad_top = padding[1]
	if isinstance(padding, collections.Iterable) and len(padding) == 4:
	pad_left = padding[0]
	pad_top = padding[1]
	pad_right = padding[2]
	pad_bottom = padding[3]

	if img.ndim == 2:
	if padding_mode == 'constant':
	img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), mode=padding_mode, constant_values=fill)
	else:
	img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), mode=padding_mode)
	if img.ndim == 3:
	if padding_mode == 'constant':
	img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode=padding_mode, constant_values=fill)
	else:
	img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode=padding_mode)
	return img


	def crop(img, top, left, height, width):
	'''crop image

	Arguments:
	img {ndarray} -- image to be croped
	top {int} -- top size
	left {int} -- left size
	height {int} -- croped height
	width {int} -- croped width
	'''
	if not _is_numpy_image(img):
	raise TypeError('the input image should be numpy ndarray with dimension 2 or 3.'
	'but got {}'.format(type(img))
	)

	if width<0 or height<0 or left <0 or height<0:
	raise ValueError('the input left, top, width, height should be greater than 0'
	'but got left={}, top={} width={} height={}'.format(left, top, width, height)
	)
	if img.ndim == 2:
	img_height, img_width = img.shape
	else:
	img_height, img_width, _ = img.shape
	if (left+width) > img_width or (top+height) > img_height:
	raise ValueError('the input crop width and height should be small or \
	equal to image width and height. ')

	if img.ndim == 2:
	return img[top:(top+height), left:(left+width)]
	elif img.ndim == 3:
	return img[top:(top+height), left:(left+width), :]


	def center_crop(img, output_size):
	'''crop image

	Arguments:
	img {ndarray} -- input image
	output_size {number or sequence} -- the output image size. if sequence, should be [h, w]

	Raises:
	ValueError -- the input image is large than original image.

	Returns:
	ndarray image -- return croped ndarray image.
	'''
	if img.ndim == 2:
	img_height, img_width = img.shape
	else:
	img_height, img_width, _ = img.shape

	if isinstance(output_size, numbers.Number):
	output_size = (int(output_size), int(output_size))
	if output_size[0] > img_height or output_size[1] > img_width:
	raise ValueError('the output_size should not greater than image size, but got {}'.format(output_size))

	target_height, target_width = output_size

	top = int(round((img_height - target_height)/2))
	left = int(round((img_width - target_width)/2))

	return crop(img, top, left, target_height, target_width)


	def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR):

	img = crop(img, top, left, height, width)
	img = resize(img, size, interpolation)
	return img

	def vflip(img):
	return cv2.flip(img, 0)

	def hflip(img):
	return cv2.flip(img, 1)

	def flip(img, flip_code):
	return cv2.flip(img, flip_code)


	def elastic_transform(image, alpha, sigma, alpha_affine, interpolation=cv2.INTER_LINEAR,
	border_mode=cv2.BORDER_REFLECT_101, random_state=None, approximate=False):
	"""Elastic deformation of images as described in [Simard2003]_ (with modifications).
	Based on https://gist.github.com/erniejunior/601cdf56d2b424757de5
	.. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
	Convolutional Neural Networks applied to Visual Document Analysis", in
	Proc. of the International Conference on Document Analysis and
	Recognition, 2003.
	"""
	if random_state is None:
	random_state = np.random.RandomState(1234)

	height, width = image.shape[:2]

	# Random affine
	center_square = np.float32((height, width)) // 2
	square_size = min((height, width)) // 3
	alpha = float(alpha)
	sigma = float(sigma)
	alpha_affine = float(alpha_affine)

	pts1 = np.float32([center_square + square_size, [center_square[0] + square_size, center_square[1] - square_size],
	center_square - square_size])
	pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, size=pts1.shape).astype(np.float32)
	matrix = cv2.getAffineTransform(pts1, pts2)

	image = cv2.warpAffine(image, matrix, (width, height), flags=interpolation, borderMode=border_mode)

	if approximate:
	# Approximate computation smooth displacement map with a large enough kernel.
	# On large images (512+) this is approximately 2X times faster
	dx = (random_state.rand(height, width).astype(np.float32) * 2 - 1)
	cv2.GaussianBlur(dx, (17, 17), sigma, dst=dx)
	dx *= alpha

	dy = (random_state.rand(height, width).astype(np.float32) * 2 - 1)
	cv2.GaussianBlur(dy, (17, 17), sigma, dst=dy)
	dy *= alpha
	else:
	dx = np.float32(gaussian_filter((random_state.rand(height, width) * 2 - 1), sigma) * alpha)
	dy = np.float32(gaussian_filter((random_state.rand(height, width) * 2 - 1), sigma) * alpha)

	x, y = np.meshgrid(np.arange(width), np.arange(height))

	mapx = np.float32(x + dx)
	mapy = np.float32(y + dy)

	return cv2.remap(image, mapx, mapy, interpolation, borderMode=border_mode)


	def bbox_shift(bboxes, top, left):
	pass


	def bbox_vflip(bboxes, img_height):
	"""vertical flip the bboxes
	...........
	. .
	. .
	>...........<
	. .
	. .
	...........
	Args:
	bbox (ndarray): bbox ndarray [box_nums, 4]
	flip_code (int, optional): [description]. Defaults to 0.
	"""
	flipped = bboxes.copy()
	flipped[...,1::2] = img_height - bboxes[...,1::2]
	flipped = flipped[..., [0, 3, 2, 1]]
	return flipped


	def bbox_hflip(bboxes, img_width):
	"""horizontal flip the bboxes
	^
	.............
	. . .
	. . .
	. . .
	. . .
	.............
	^
	Args:
	bbox (ndarray): bbox ndarray [box_nums, 4]
	flip_code (int, optional): [description]. Defaults to 0.
	"""
	flipped = bboxes.copy()
	flipped[..., 0::2] = img_width - bboxes[...,0::2]
	flipped = flipped[..., [2, 1, 0, 3]]
	return flipped


	def bbox_resize(bboxes, img_size, target_size):
	"""resize the bbox

	Args:
	bboxes (ndarray): bbox ndarray [box_nums, 4]
	img_size (tuple): the image height and width
	target_size (int, or tuple): the target bbox size.
	Int or Tuple, if tuple the shape should be (height, width)
	"""
	if isinstance(target_size, numbers.Number):
	target_size = (target_size, target_size)

	ratio_height = target_size[0]/img_size[0]
	ratio_width = target_size[1]/img_size[1]

	return bboxes[...,]*[ratio_width,ratio_height,ratio_width,ratio_height]


	def bbox_crop(bboxes, top, left, height, width):
	'''crop bbox

	Arguments:
	img {ndarray} -- image to be croped
	top {int} -- top size
	left {int} -- left size
	height {int} -- croped height
	width {int} -- croped width
	'''
	croped_bboxes = bboxes.copy()

	right = width + left
	bottom = height + top

	croped_bboxes[..., 0::2] = bboxes[..., 0::2].clip(left, right) - left
	croped_bboxes[..., 1::2] = bboxes[..., 1::2].clip(top, bottom) - top

	return croped_bboxes

	def bbox_pad(bboxes, padding):
	if isinstance(padding, int):
	pad_left = pad_right = pad_top = pad_bottom = padding
	if isinstance(padding, collections.Iterable) and len(padding) == 2:
	pad_left = pad_right = padding[0]
	pad_bottom = pad_top = padding[1]
	if isinstance(padding, collections.Iterable) and len(padding) == 4:
	pad_left = padding[0]
	pad_top = padding[1]
	pad_right = padding[2]
	pad_bottom = padding[3]

	pad_bboxes = bboxes.copy()
	pad_bboxes[..., 0::2] = bboxes[..., 0::2] + pad_left
	pad_bboxes[..., 1::2] = bboxes[..., 1::2] + pad_top

	return pad_bboxes