Spaces:

dongyi
/

MMFS

Sleeping

MMFS / utils /augmentation.py

limoran

add basic files

7e2a2a5 over 1 year ago

24.6 kB

	from packaging import version
	import random
	import numpy as np
	from PIL import Image, ImageFilter, ImageOps
	from torchvision.transforms.transforms import Lambda, Compose
	from torchvision.transforms import functional as F
	from collections.abc import Iterable
	import torch, torchvision
	import numbers
	import copy

	if version.parse(torchvision.__version__) <= version.parse('0.7.0'):
	from torchvision.transforms.transforms import _get_image_size

	def check_input_type_perform_action(input, type, action, args, *kwargs):
	output = input
	if isinstance(input, list):
	for i in range(0, len(input)):
	if type is None:
	if input[i] is not None: # do not combine with last line, to avoid calling isinstance on None.
	output[i] = action(input[i], args, *kwargs)
	elif isinstance(input[i], type):
	output[i] = action(input[i], args, *kwargs)
	elif type is None:
	if input is not None:
	output = action(input, args, *kwargs)
	elif isinstance(input, type):
	output = action(input, args, *kwargs)
	return output


	"""
	Most of these functions are imported from torchvision.transforms.transforms and edited to support 2 or more inputs.
	"""

	class JointCompose(object):
	"""
	Composes several transforms together.
	"""

	def __init__(self, transforms):
	self.transforms = transforms

	def __call__(self, input1, input2):
	for t in self.transforms:
	input1, input2 = t(input1, input2)
	return input1, input2


	class Grayscale(object):

	def __init__(self, input1_output_channels=1, input2_output_channels=1):
	self.input1_output_channels = input1_output_channels
	self.input2_output_channels = input2_output_channels

	def __call__(self, input1, input2):
	output1 = F.to_grayscale(input1, num_output_channels=self.input1_output_channels) if self.input1_output_channels == 1 else input1
	output2 = check_input_type_perform_action(input2, Image.Image, F.to_grayscale, num_output_channels=self.input2_output_channels) \
	if self.input2_output_channels == 1 else input2
	return output1, output2


	class Resize(object):

	def __init__(self, size, interpolation=Image.BILINEAR):
	assert isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)
	self.size = size
	self.interpolation = interpolation

	def __call__(self, input1, input2):
	output1 = F.resize(input1, self.size, self.interpolation)
	output2 = check_input_type_perform_action(input2, Image.Image, F.resize, self.size, self.interpolation)
	return output1, output2


	class ScaleWidth:

	def __init__(self, target_size, method=Image.BICUBIC):
	self.target_size = target_size
	self.method = method

	def scalewidth(self, img):
	ow, oh = img.size
	w = self.target_size
	h = int(self.target_size * oh / ow)
	img_resized = img.resize((w, h), self.method)

	if h > w:
	# if resized image's height is larger than its width, crop the center
	left = 0
	top = h // 2 - self.target_size // 2
	right = self.target_size
	bottom = top + self.target_size
	img_resized = img_resized.crop((left, top, right, bottom))
	elif h < w:
	# pad the heights
	delta_w = self.target_size - w
	delta_h = self.target_size - h
	padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
	img_resized = ImageOps.expand(img_resized, padding)

	return img_resized

	def __call__(self, input1, input2):
	output1 = self.scalewidth(input1)
	output2 = check_input_type_perform_action(input2, Image.Image, self.scalewidth)
	return output1, output2


	class RandomCrop(object):

	def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant'):
	if isinstance(size, numbers.Number):
	self.size = (int(size), int(size))
	else:
	self.size = size
	self.padding = padding
	self.pad_if_needed = pad_if_needed
	self.fill = fill
	self.padding_mode = padding_mode

	@staticmethod
	def get_params(img, output_size):
	if version.parse(torchvision.__version__) <= version.parse('0.7.0'):
	w, h = _get_image_size(img)
	else:
	w, h = F._get_image_size(img)
	th, tw = output_size
	if w == tw and h == th:
	return 0, 0, h, w

	i = random.randint(0, h - th)
	j = random.randint(0, w - tw)
	return i, j, th, tw

	def pad(self, img):
	if self.padding is not None:
	img = F.pad(img, self.padding, self.fill, self.padding_mode)

	# pad the width if needed
	if self.pad_if_needed and img.size[0] < self.size[1]:
	img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
	# pad the height if needed
	if self.pad_if_needed and img.size[1] < self.size[0]:
	img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)

	return img

	def get_crop_range(self, img):
	return self.get_params(img, self.size)

	def pad_and_crop(self, input, i, j, h, w):
	return F.crop(self.pad(input), i, j, h, w)

	def __call__(self, input1, input2):
	output1 = self.pad(input1)
	i, j, h, w = self.get_crop_range(output1)
	output1 = F.crop(output1, i, j, h, w)
	output2 = check_input_type_perform_action(input2, Image.Image, self.pad_and_crop, i, j, h, w)
	return output1, output2


	class Crop:

	def __init__(self, pos, size):
	self.pos = pos
	self.size = size

	def crop(self, img):
	ow, oh = img.size
	x1, y1 = self.pos
	tw = th = self.size
	if (ow > tw or oh > th):
	return img.crop((x1, y1, x1 + tw, y1 + th))
	return img

	def __call__(self, input1, input2):
	output1 = self.crop(input1)
	output2 = check_input_type_perform_action(input2, Image.Image, self.crop)
	return output1, output2


	class ColorJitter(object):

	def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
	self.brightness = self._check_input(brightness, 'brightness')
	self.contrast = self._check_input(contrast, 'contrast')
	self.saturation = self._check_input(saturation, 'saturation')
	self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
	clip_first_on_zero=False)

	def _check_input(self, value, name, center=1, bound=(0, float('inf')), clip_first_on_zero=True):
	if isinstance(value, numbers.Number):
	if value < 0:
	raise ValueError("If {} is a single number, it must be non negative.".format(name))
	value = [center - value, center + value]
	if clip_first_on_zero:
	value[0] = max(value[0], 0)
	elif isinstance(value, (tuple, list)) and len(value) == 2:
	if not bound[0] <= value[0] <= value[1] <= bound[1]:
	raise ValueError("{} values should be between {}".format(name, bound))
	else:
	raise TypeError("{} should be a single number or a list/tuple with lenght 2.".format(name))

	# if value is 0 or (1., 1.) for brightness/contrast/saturation
	# or (0., 0.) for hue, do nothing
	if value[0] == value[1] == center:
	value = None
	return value

	@staticmethod
	def get_params(brightness, contrast, saturation, hue):
	transforms = []

	if brightness is not None:
	brightness_factor = random.uniform(brightness[0], brightness[1])
	transforms.append(Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))

	if contrast is not None:
	contrast_factor = random.uniform(contrast[0], contrast[1])
	transforms.append(Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))

	if saturation is not None:
	saturation_factor = random.uniform(saturation[0], saturation[1])
	transforms.append(Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))

	if hue is not None:
	hue_factor = random.uniform(hue[0], hue[1])
	transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))

	random.shuffle(transforms)
	transform = Compose(transforms)

	return transform

	def __call__(self, input1, input2):
	transform = self.get_params(self.brightness, self.contrast,
	self.saturation, self.hue)
	output1 = transform(input1)
	output2 = check_input_type_perform_action(input2, Image.Image, transform)
	return output1, output2


	class RandomAffine(object):

	def __init__(self, degrees, translate=None, scale=None, shear=None, resample=False, fillcolor=0):
	if isinstance(degrees, numbers.Number):
	if degrees < 0:
	raise ValueError("If degrees is a single number, it must be positive.")
	self.degrees = (-degrees, degrees)
	else:
	assert isinstance(degrees, (tuple, list)) and len(degrees) == 2, \
	"degrees should be a list or tuple and it must be of length 2."
	self.degrees = degrees

	if translate is not None:
	assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
	"translate should be a list or tuple and it must be of length 2."
	for t in translate:
	if not (0.0 <= t <= 1.0):
	raise ValueError("translation values should be between 0 and 1")
	self.translate = translate

	if scale is not None:
	assert isinstance(scale, (tuple, list)) and len(scale) == 2, \
	"scale should be a list or tuple and it must be of length 2."
	for s in scale:
	if s <= 0:
	raise ValueError("scale values should be positive")
	self.scale = scale

	if shear is not None:
	if isinstance(shear, numbers.Number):
	if shear < 0:
	raise ValueError("If shear is a single number, it must be positive.")
	self.shear = (-shear, shear)
	else:
	assert isinstance(shear, (tuple, list)) and \
	(len(shear) == 2 or len(shear) == 4), \
	"shear should be a list or tuple and it must be of length 2 or 4."
	# X-Axis shear with [min, max]
	if len(shear) == 2:
	self.shear = [shear[0], shear[1], 0., 0.]
	elif len(shear) == 4:
	self.shear = [s for s in shear]
	else:
	self.shear = shear

	self.resample = resample
	self.fillcolor = fillcolor

	@staticmethod
	def get_params(degrees, translate, scale_ranges, shears, img_size):
	angle = random.uniform(degrees[0], degrees[1])
	if translate is not None:
	max_dx = translate[0] * img_size[0]
	max_dy = translate[1] * img_size[1]
	translations = (np.round(random.uniform(-max_dx, max_dx)),
	np.round(random.uniform(-max_dy, max_dy)))
	else:
	translations = (0, 0)

	if scale_ranges is not None:
	scale = random.uniform(scale_ranges[0], scale_ranges[1])
	else:
	scale = 1.0

	if shears is not None:
	if len(shears) == 2:
	shear = [random.uniform(shears[0], shears[1]), 0.]
	elif len(shears) == 4:
	shear = [random.uniform(shears[0], shears[1]),
	random.uniform(shears[2], shears[3])]
	else:
	shear = 0.0

	return angle, translations, scale, shear

	def __call__(self, input1, input2):
	params = self.get_params(self.degrees, self.translate, self.scale, self.shear, input1.size)
	output1 = F.affine(input1, *params, resample=self.resample, fillcolor=self.fillcolor)
	output2 = check_input_type_perform_action(input2, Image.Image, F.affine, *params, resample=self.resample, fillcolor=self.fillcolor)
	return output1, output2


	class RandomRotation(object):
	def __init__(self, degrees, resample=False, expand=False, center=None, fill=None):
	if isinstance(degrees, numbers.Number):
	if degrees < 0:
	raise ValueError("If degrees is a single number, it must be positive.")
	self.degrees = (-degrees, degrees)
	else:
	if len(degrees) != 2:
	raise ValueError("If degrees is a sequence, it must be of len 2.")
	self.degrees = degrees

	self.resample = resample
	self.expand = expand
	self.center = center
	self.fill = fill

	@staticmethod
	def get_params(degrees):
	angle = random.uniform(degrees[0], degrees[1])
	return angle

	def __call__(self, input1, input2):
	angle = self.get_params(self.degrees)
	output1 = F.rotate(input1, angle, self.resample, self.expand, self.center, self.fill)
	output2 = check_input_type_perform_action(input2, Image.Image, F.rotate, angle, self.resample, self.expand, self.center, self.fill)
	return output1, output2


	class RandomBlur:
	def __init__(self, blur_chance):
	self.blur_chance = blur_chance

	def get_params(self):
	if self.blur_chance > random.random():
	kernel = random.randint(3, 12)
	while kernel % 2 != 1:
	kernel = random.randint(3, 12)
	else:
	kernel = None
	return kernel

	def blur(self, image, kernel):
	image = image.filter(ImageFilter.GaussianBlur(radius=kernel))
	return image

	def __call__(self, input1, input2):
	kernel = self.get_params()
	if kernel is None:
	return input1, input2
	else:
	output1 = self.blur(input1, kernel)
	output2 = check_input_type_perform_action(input2, Image.Image, self.blur, kernel)
	return output1, output2


	class NoiseTransform:
	"""code is partly from http://www.xiaoliangbai.com/2016/09/09/more-on-image-noise-generation and edited by Oliver."""

	def __init__(self, noise_type):
	self.noise_type = noise_type

	def get_params(self, image):
	params = []
	image_np = np.array(image)
	row, col, ch = image_np.shape
	if random.random() < 0.5:
	return None
	if self.noise_type == "gauss":
	mean = 0.0
	std = random.uniform(0.001, 0.3)
	gauss = np.random.normal(mean, std, (row, col, ch))
	gauss = gauss.reshape(row, col, ch)
	params.append(gauss)
	return params
	elif self.noise_type == "s&p":
	s_vs_p = 0.5
	amount = random.uniform(0.001, 0.01)

	# Generate Salt '1' noise
	num_salt = np.ceil(amount * image_np.size * s_vs_p)
	coords = [np.random.randint(0, i - 1, int(num_salt))
	for i in image_np.shape]
	coords[2] = np.random.randint(0, 3, int(num_salt))
	params.append(copy.deepcopy(coords))

	# Generate Pepper '0' noise
	num_pepper = np.ceil(amount * image_np.size * (1. - s_vs_p))
	coords = [np.random.randint(0, i - 1, int(num_pepper))
	for i in image_np.shape]
	params.append(copy.deepcopy(coords))
	return params
	elif self.noise_type == "poisson":
	noisy = np.random.poisson(image_np)
	params.append(noisy)
	return params
	elif self.noise_type == "speckle":
	factor = random.uniform(0.01, 0.4)
	gauss = np.random.randn(row, col, ch)
	gauss = gauss.reshape(row, col, ch) * factor
	params.append(gauss)
	return params
	elif self.noise_type == "band":
	smaller_dim = min(col, row)
	num_bands = random.randrange(smaller_dim // 2, smaller_dim)
	scale = random.uniform(1.0, 10.0)

	offset = np.zeros(image_np.shape).astype(np.float64)

	# horizontal branding
	num_list = list(range(image.width)) # list of integers from 0 to image width-1
	# adjust this boundaries to fit your needs
	random.shuffle(num_list)
	horizontal_bands = num_list[:num_bands]
	for w in horizontal_bands:
	offset[w, :, :] += random.uniform(-1, 1) * scale

	# vertical branding
	num_list = list(range(image.height)) # list of integers from 0 to image height-1
	# adjust this boundaries to fit your needs
	random.shuffle(num_list)
	vertical_bands = num_list[:num_bands]
	for h in vertical_bands:
	offset[:, h, :] += random.uniform(-1, 1) * scale

	params.append(offset)
	return params
	else:
	return params

	def apply(self, image, params):
	"""
	image: ndarray (input image data. It will be converted to float)
	"""
	if params is None:
	return image
	image_np = np.array(image)
	if self.noise_type == "gauss":
	gauss = params[0]
	noisy = image_np + image_np * gauss
	noisy = np.clip(noisy, 0, 255)
	return Image.fromarray(noisy.astype('uint8'))
	elif self.noise_type == "s&p":
	out = image_np
	# Generate Salt '1' noise
	coords = params[0]
	out[tuple(coords)] = 255
	# Generate Pepper '0' noise
	coords = params[1]
	out[tuple(coords)] = 0
	out = np.clip(out, 0, 255)
	return Image.fromarray(out.astype('uint8'))
	elif self.noise_type == "poisson":
	noisy = params[0]
	noisy = np.clip(noisy, 0, 255)
	return Image.fromarray(noisy.astype('uint8'))
	elif self.noise_type == "speckle":
	gauss = params[0]
	noisy = image_np + image_np * gauss
	noisy = np.clip(noisy, 0, 255)
	return Image.fromarray(noisy.astype('uint8'))
	elif self.noise_type == "band":
	offset = params[0]
	noisy = image_np + offset
	noisy = np.clip(noisy, 0, 255)
	return Image.fromarray(noisy.astype('uint8'))
	else:
	return image

	def __call__(self, input1, input2):
	params = self.get_params(input1)
	output1 = self.apply(input1, params)
	output2 = check_input_type_perform_action(input2, Image.Image, self.apply, params)
	return output1, output2


	class MakePower2:
	def __init__(self, base, method=Image.BICUBIC):
	self.base = base
	self.method = method
	self.print_size_warning = PrintSizeWarning()

	def apply(self, img):
	ow, oh = img.size
	h = int(round(oh / self.base) * self.base)
	w = int(round(ow / self.base) * self.base)
	if h == oh and w == ow:
	return img

	self.print_size_warning(ow, oh, w, h)
	return img.resize((w, h), self.method)

	def __call__(self, input1, input2):
	output1 = self.apply(input1)
	output2 = check_input_type_perform_action(input2, Image.Image, self.apply)
	return output1, output2


	class RandomHorizontalFlip(object):
	"""Horizontally flip the given PIL Image randomly with a given probability.

	Args:
	p (float): probability of the image being flipped. Default value is 0.5
	"""

	def __init__(self, p=0.5):
	self.p = p

	def get_params(self):
	if random.random() < self.p:
	return True
	else:
	return False

	def __call__(self, input1, input2):
	flip = self.get_params()
	if flip:
	output1 = F.hflip(input1)
	output2 = check_input_type_perform_action(input2, Image.Image, F.hflip)
	else:
	output1, output2 = input1, input2
	return output1, output2


	class Flip:
	def __init__(self, flip):
	self.flip = flip

	def transpose(self, input):
	return input.transpose(Image.FLIP_LEFT_RIGHT)

	def __call__(self, input1, input2):
	if self.flip:
	output1 = input1.transpose(Image.FLIP_LEFT_RIGHT)
	output2 = check_input_type_perform_action(input2, Image.Image, self.transpose)
	else:
	output1, output2 = input1, input2
	return output1, output2


	class ToTensor(object):
	"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.

	Converts a PIL Image or numpy.ndarray (H x W x C) in the range
	[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
	if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
	or if the numpy.ndarray has dtype = np.uint8

	In the other cases, tensors are returned without scaling.
	"""

	def __call__(self, input1, input2):
	output1 = F.to_tensor(input1)
	output2 = check_input_type_perform_action(input2, None, F.to_tensor)
	return output1, output2


	class Normalize(object):
	"""Normalize a tensor image with mean and standard deviation.
	Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
	will normalize each channel of the input ``torch.*Tensor`` i.e.
	``output[channel] = (input[channel] - mean[channel]) / std[channel]``

	.. note::
	This transform acts out of place, i.e., it does not mutate the input tensor.

	Args:
	mean (sequence): Sequence of means for each channel.
	std (sequence): Sequence of standard deviations for each channel.
	inplace(bool,optional): Bool to make this operation in-place.

	"""

	def __init__(self, first_input_mean, first_input_std, second_input_mean=None, second_input_std=None, inplace=False):
	self.first_input_mean = first_input_mean
	self.first_input_std = first_input_std
	self.second_input_mean = second_input_mean if second_input_mean is not None else first_input_mean
	self.second_input_std = second_input_std if second_input_std is not None else first_input_std
	self.inplace = inplace

	def __call__(self, tensor1, tensor2):
	"""
	Args:
	tensor (Tensor): Tensor image of size (C, H, W) to be normalized.

	Returns:
	Tensor: Normalized Tensor image.
	"""
	output1 = F.normalize(tensor1, self.first_input_mean, self.first_input_std, self.inplace)
	output2 = check_input_type_perform_action(tensor2, None, F.normalize, self.second_input_mean, self.second_input_std, self.inplace)
	return output1, output2


	class PrintSizeWarning:
	def __init__(self):
	self.has_printed = False

	def __call__(self, ow, oh, w, h):
	if not self.has_printed:
	print("The image size needs to be a multiple of 4. "
	"The loaded image size was (%d, %d), so it was adjusted to "
	"(%d, %d). This adjustment will be done to all images "
	"whose sizes are not multiples of 4" % (ow, oh, w, h))
	self.has_printed = True


	class ImagePathToImage:
	"""Convert an image path to an image.

	Parameters:
	filename -- the input file path.
	"""

	def load_img(self, path):
	return Image.open(path).convert('RGB')

	def __call__(self, filename1, filename2):
	img1 = self.load_img(filename1)
	img2 = check_input_type_perform_action(filename2, None, self.load_img)
	return img1, img2


	class NumpyToTensor:
	"""Convert a numpy array to a tensor.

	Parameters:
	filename -- the input file path.
	"""

	def load_numpy(self, filename):
	npy = np.load(filename)
	if isinstance(npy, np.lib.npyio.NpzFile):
	npy = npy['data']
	if len(npy.shape) == 2:
	npy = np.tile(npy, (1, 1, 1))
	else:
	npy = np.transpose(npy, (2, 0, 1))
	return torch.from_numpy(npy).float()

	def __call__(self, filename1, filename2):
	tensor1 = self.load_numpy(filename1)
	tensor2 = check_input_type_perform_action(filename2, None, self.load_numpy)
	return tensor1, tensor2