Spaces:

abhishek
/

first-order-motion-model

Build error

App Files Files Community

first-order-motion-model / augmentation.py

abhishek HF staff

first commit

9915c5d almost 3 years ago

raw

history blame contribute delete

12.5 kB

	"""
	Code from https://github.com/hassony2/torch_videovision
	"""

	import numbers

	import random
	import numpy as np
	import PIL

	from skimage.transform import resize, rotate
	from skimage.util import pad
	import torchvision

	import warnings

	from skimage import img_as_ubyte, img_as_float


	def crop_clip(clip, min_h, min_w, h, w):
	if isinstance(clip[0], np.ndarray):
	cropped = [img[min_h:min_h + h, min_w:min_w + w, :] for img in clip]

	elif isinstance(clip[0], PIL.Image.Image):
	cropped = [
	img.crop((min_w, min_h, min_w + w, min_h + h)) for img in clip
	]
	else:
	raise TypeError('Expected numpy.ndarray or PIL.Image' +
	'but got list of {0}'.format(type(clip[0])))
	return cropped


	def pad_clip(clip, h, w):
	im_h, im_w = clip[0].shape[:2]
	pad_h = (0, 0) if h < im_h else ((h - im_h) // 2, (h - im_h + 1) // 2)
	pad_w = (0, 0) if w < im_w else ((w - im_w) // 2, (w - im_w + 1) // 2)

	return pad(clip, ((0, 0), pad_h, pad_w, (0, 0)), mode='edge')


	def resize_clip(clip, size, interpolation='bilinear'):
	if isinstance(clip[0], np.ndarray):
	if isinstance(size, numbers.Number):
	im_h, im_w, im_c = clip[0].shape
	# Min spatial dim already matches minimal size
	if (im_w <= im_h and im_w == size) or (im_h <= im_w
	and im_h == size):
	return clip
	new_h, new_w = get_resize_sizes(im_h, im_w, size)
	size = (new_w, new_h)
	else:
	size = size[1], size[0]

	scaled = [
	resize(img, size, order=1 if interpolation == 'bilinear' else 0, preserve_range=True,
	mode='constant', anti_aliasing=True) for img in clip
	]
	elif isinstance(clip[0], PIL.Image.Image):
	if isinstance(size, numbers.Number):
	im_w, im_h = clip[0].size
	# Min spatial dim already matches minimal size
	if (im_w <= im_h and im_w == size) or (im_h <= im_w
	and im_h == size):
	return clip
	new_h, new_w = get_resize_sizes(im_h, im_w, size)
	size = (new_w, new_h)
	else:
	size = size[1], size[0]
	if interpolation == 'bilinear':
	pil_inter = PIL.Image.NEAREST
	else:
	pil_inter = PIL.Image.BILINEAR
	scaled = [img.resize(size, pil_inter) for img in clip]
	else:
	raise TypeError('Expected numpy.ndarray or PIL.Image' +
	'but got list of {0}'.format(type(clip[0])))
	return scaled


	def get_resize_sizes(im_h, im_w, size):
	if im_w < im_h:
	ow = size
	oh = int(size * im_h / im_w)
	else:
	oh = size
	ow = int(size * im_w / im_h)
	return oh, ow


	class RandomFlip(object):
	def __init__(self, time_flip=False, horizontal_flip=False):
	self.time_flip = time_flip
	self.horizontal_flip = horizontal_flip

	def __call__(self, clip):
	if random.random() < 0.5 and self.time_flip:
	return clip[::-1]
	if random.random() < 0.5 and self.horizontal_flip:
	return [np.fliplr(img) for img in clip]

	return clip


	class RandomResize(object):
	"""Resizes a list of (H x W x C) numpy.ndarray to the final size
	The larger the original image is, the more times it takes to
	interpolate
	Args:
	interpolation (str): Can be one of 'nearest', 'bilinear'
	defaults to nearest
	size (tuple): (widht, height)
	"""

	def __init__(self, ratio=(3. / 4., 4. / 3.), interpolation='nearest'):
	self.ratio = ratio
	self.interpolation = interpolation

	def __call__(self, clip):
	scaling_factor = random.uniform(self.ratio[0], self.ratio[1])

	if isinstance(clip[0], np.ndarray):
	im_h, im_w, im_c = clip[0].shape
	elif isinstance(clip[0], PIL.Image.Image):
	im_w, im_h = clip[0].size

	new_w = int(im_w * scaling_factor)
	new_h = int(im_h * scaling_factor)
	new_size = (new_w, new_h)
	resized = resize_clip(
	clip, new_size, interpolation=self.interpolation)

	return resized


	class RandomCrop(object):
	"""Extract random crop at the same location for a list of videos
	Args:
	size (sequence or int): Desired output size for the
	crop in format (h, w)
	"""

	def __init__(self, size):
	if isinstance(size, numbers.Number):
	size = (size, size)

	self.size = size

	def __call__(self, clip):
	"""
	Args:
	img (PIL.Image or numpy.ndarray): List of videos to be cropped
	in format (h, w, c) in numpy.ndarray
	Returns:
	PIL.Image or numpy.ndarray: Cropped list of videos
	"""
	h, w = self.size
	if isinstance(clip[0], np.ndarray):
	im_h, im_w, im_c = clip[0].shape
	elif isinstance(clip[0], PIL.Image.Image):
	im_w, im_h = clip[0].size
	else:
	raise TypeError('Expected numpy.ndarray or PIL.Image' +
	'but got list of {0}'.format(type(clip[0])))

	clip = pad_clip(clip, h, w)
	im_h, im_w = clip.shape[1:3]
	x1 = 0 if h == im_h else random.randint(0, im_w - w)
	y1 = 0 if w == im_w else random.randint(0, im_h - h)
	cropped = crop_clip(clip, y1, x1, h, w)

	return cropped


	class RandomRotation(object):
	"""Rotate entire clip randomly by a random angle within
	given bounds
	Args:
	degrees (sequence or int): Range of degrees to select from
	If degrees is a number instead of sequence like (min, max),
	the range of degrees, will be (-degrees, +degrees).
	"""

	def __init__(self, degrees):
	if isinstance(degrees, numbers.Number):
	if degrees < 0:
	raise ValueError('If degrees is a single number,'
	'must be positive')
	degrees = (-degrees, degrees)
	else:
	if len(degrees) != 2:
	raise ValueError('If degrees is a sequence,'
	'it must be of len 2.')

	self.degrees = degrees

	def __call__(self, clip):
	"""
	Args:
	img (PIL.Image or numpy.ndarray): List of videos to be cropped
	in format (h, w, c) in numpy.ndarray
	Returns:
	PIL.Image or numpy.ndarray: Cropped list of videos
	"""
	angle = random.uniform(self.degrees[0], self.degrees[1])
	if isinstance(clip[0], np.ndarray):
	rotated = [rotate(image=img, angle=angle, preserve_range=True) for img in clip]
	elif isinstance(clip[0], PIL.Image.Image):
	rotated = [img.rotate(angle) for img in clip]
	else:
	raise TypeError('Expected numpy.ndarray or PIL.Image' +
	'but got list of {0}'.format(type(clip[0])))

	return rotated


	class ColorJitter(object):
	"""Randomly change the brightness, contrast and saturation and hue of the clip
	Args:
	brightness (float): How much to jitter brightness. brightness_factor
	is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
	contrast (float): How much to jitter contrast. contrast_factor
	is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
	saturation (float): How much to jitter saturation. saturation_factor
	is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
	hue(float): How much to jitter hue. hue_factor is chosen uniformly from
	[-hue, hue]. Should be >=0 and <= 0.5.
	"""

	def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
	self.brightness = brightness
	self.contrast = contrast
	self.saturation = saturation
	self.hue = hue

	def get_params(self, brightness, contrast, saturation, hue):
	if brightness > 0:
	brightness_factor = random.uniform(
	max(0, 1 - brightness), 1 + brightness)
	else:
	brightness_factor = None

	if contrast > 0:
	contrast_factor = random.uniform(
	max(0, 1 - contrast), 1 + contrast)
	else:
	contrast_factor = None

	if saturation > 0:
	saturation_factor = random.uniform(
	max(0, 1 - saturation), 1 + saturation)
	else:
	saturation_factor = None

	if hue > 0:
	hue_factor = random.uniform(-hue, hue)
	else:
	hue_factor = None
	return brightness_factor, contrast_factor, saturation_factor, hue_factor

	def __call__(self, clip):
	"""
	Args:
	clip (list): list of PIL.Image
	Returns:
	list PIL.Image : list of transformed PIL.Image
	"""
	if isinstance(clip[0], np.ndarray):
	brightness, contrast, saturation, hue = self.get_params(
	self.brightness, self.contrast, self.saturation, self.hue)

	# Create img transform function sequence
	img_transforms = []
	if brightness is not None:
	img_transforms.append(lambda img: torchvision.transforms.functional.adjust_brightness(img, brightness))
	if saturation is not None:
	img_transforms.append(lambda img: torchvision.transforms.functional.adjust_saturation(img, saturation))
	if hue is not None:
	img_transforms.append(lambda img: torchvision.transforms.functional.adjust_hue(img, hue))
	if contrast is not None:
	img_transforms.append(lambda img: torchvision.transforms.functional.adjust_contrast(img, contrast))
	random.shuffle(img_transforms)
	img_transforms = [img_as_ubyte, torchvision.transforms.ToPILImage()] + img_transforms + [np.array,
	img_as_float]

	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	jittered_clip = []
	for img in clip:
	jittered_img = img
	for func in img_transforms:
	jittered_img = func(jittered_img)
	jittered_clip.append(jittered_img.astype('float32'))
	elif isinstance(clip[0], PIL.Image.Image):
	brightness, contrast, saturation, hue = self.get_params(
	self.brightness, self.contrast, self.saturation, self.hue)

	# Create img transform function sequence
	img_transforms = []
	if brightness is not None:
	img_transforms.append(lambda img: torchvision.transforms.functional.adjust_brightness(img, brightness))
	if saturation is not None:
	img_transforms.append(lambda img: torchvision.transforms.functional.adjust_saturation(img, saturation))
	if hue is not None:
	img_transforms.append(lambda img: torchvision.transforms.functional.adjust_hue(img, hue))
	if contrast is not None:
	img_transforms.append(lambda img: torchvision.transforms.functional.adjust_contrast(img, contrast))
	random.shuffle(img_transforms)

	# Apply to all videos
	jittered_clip = []
	for img in clip:
	for func in img_transforms:
	jittered_img = func(img)
	jittered_clip.append(jittered_img)

	else:
	raise TypeError('Expected numpy.ndarray or PIL.Image' +
	'but got list of {0}'.format(type(clip[0])))
	return jittered_clip


	class AllAugmentationTransform:
	def __init__(self, resize_param=None, rotation_param=None, flip_param=None, crop_param=None, jitter_param=None):
	self.transforms = []

	if flip_param is not None:
	self.transforms.append(RandomFlip(**flip_param))

	if rotation_param is not None:
	self.transforms.append(RandomRotation(**rotation_param))

	if resize_param is not None:
	self.transforms.append(RandomResize(**resize_param))

	if crop_param is not None:
	self.transforms.append(RandomCrop(**crop_param))

	if jitter_param is not None:
	self.transforms.append(ColorJitter(**jitter_param))

	def __call__(self, clip):
	for t in self.transforms:
	clip = t(clip)
	return clip