Spaces:

Andy1621
/

uniformerv2_demo

Running

App Files Files Community

uniformerv2_demo / transforms.py

SakuraD

init

469404a over 1 year ago

raw history blame contribute delete

No virus

14.4 kB

	import torchvision
	import random
	from PIL import Image, ImageOps
	import numpy as np
	import numbers
	import math
	import torch


	class GroupRandomCrop(object):
	def __init__(self, size):
	if isinstance(size, numbers.Number):
	self.size = (int(size), int(size))
	else:
	self.size = size

	def __call__(self, img_group):

	w, h = img_group[0].size
	th, tw = self.size

	out_images = list()

	x1 = random.randint(0, w - tw)
	y1 = random.randint(0, h - th)

	for img in img_group:
	assert(img.size[0] == w and img.size[1] == h)
	if w == tw and h == th:
	out_images.append(img)
	else:
	out_images.append(img.crop((x1, y1, x1 + tw, y1 + th)))

	return out_images


	class MultiGroupRandomCrop(object):
	def __init__(self, size, groups=1):
	if isinstance(size, numbers.Number):
	self.size = (int(size), int(size))
	else:
	self.size = size
	self.groups = groups

	def __call__(self, img_group):

	w, h = img_group[0].size
	th, tw = self.size

	out_images = list()

	for i in range(self.groups):
	x1 = random.randint(0, w - tw)
	y1 = random.randint(0, h - th)

	for img in img_group:
	assert(img.size[0] == w and img.size[1] == h)
	if w == tw and h == th:
	out_images.append(img)
	else:
	out_images.append(img.crop((x1, y1, x1 + tw, y1 + th)))

	return out_images


	class GroupCenterCrop(object):
	def __init__(self, size):
	self.worker = torchvision.transforms.CenterCrop(size)

	def __call__(self, img_group):
	return [self.worker(img) for img in img_group]


	class GroupRandomHorizontalFlip(object):
	"""Randomly horizontally flips the given PIL.Image with a probability of 0.5
	"""

	def __init__(self, is_flow=False):
	self.is_flow = is_flow

	def __call__(self, img_group, is_flow=False):
	v = random.random()
	if v < 0.5:
	ret = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in img_group]
	if self.is_flow:
	for i in range(0, len(ret), 2):
	# invert flow pixel values when flipping
	ret[i] = ImageOps.invert(ret[i])
	return ret
	else:
	return img_group


	class GroupNormalize(object):
	def __init__(self, mean, std):
	self.mean = mean
	self.std = std

	def __call__(self, tensor):
	rep_mean = self.mean * (tensor.size()[0] // len(self.mean))
	rep_std = self.std * (tensor.size()[0] // len(self.std))

	# TODO: make efficient
	for t, m, s in zip(tensor, rep_mean, rep_std):
	t.sub_(m).div_(s)

	return tensor


	class GroupScale(object):
	""" Rescales the input PIL.Image to the given 'size'.
	'size' will be the size of the smaller edge.
	For example, if height > width, then image will be
	rescaled to (size * height / width, size)
	size: size of the smaller edge
	interpolation: Default: PIL.Image.BILINEAR
	"""

	def __init__(self, size, interpolation=Image.BILINEAR):
	self.worker = torchvision.transforms.Resize(size, interpolation)

	def __call__(self, img_group):
	return [self.worker(img) for img in img_group]


	class GroupOverSample(object):
	def __init__(self, crop_size, scale_size=None, flip=True):
	self.crop_size = crop_size if not isinstance(
	crop_size, int) else (crop_size, crop_size)

	if scale_size is not None:
	self.scale_worker = GroupScale(scale_size)
	else:
	self.scale_worker = None
	self.flip = flip

	def __call__(self, img_group):

	if self.scale_worker is not None:
	img_group = self.scale_worker(img_group)

	image_w, image_h = img_group[0].size
	crop_w, crop_h = self.crop_size

	offsets = GroupMultiScaleCrop.fill_fix_offset(
	False, image_w, image_h, crop_w, crop_h)
	oversample_group = list()
	for o_w, o_h in offsets:
	normal_group = list()
	flip_group = list()
	for i, img in enumerate(img_group):
	crop = img.crop((o_w, o_h, o_w + crop_w, o_h + crop_h))
	normal_group.append(crop)
	flip_crop = crop.copy().transpose(Image.FLIP_LEFT_RIGHT)

	if img.mode == 'L' and i % 2 == 0:
	flip_group.append(ImageOps.invert(flip_crop))
	else:
	flip_group.append(flip_crop)

	oversample_group.extend(normal_group)
	if self.flip:
	oversample_group.extend(flip_group)
	return oversample_group


	class GroupFullResSample(object):
	def __init__(self, crop_size, scale_size=None, flip=True):
	self.crop_size = crop_size if not isinstance(
	crop_size, int) else (crop_size, crop_size)

	if scale_size is not None:
	self.scale_worker = GroupScale(scale_size)
	else:
	self.scale_worker = None
	self.flip = flip

	def __call__(self, img_group):

	if self.scale_worker is not None:
	img_group = self.scale_worker(img_group)

	image_w, image_h = img_group[0].size
	crop_w, crop_h = self.crop_size

	w_step = (image_w - crop_w) // 4
	h_step = (image_h - crop_h) // 4

	offsets = list()
	offsets.append((0 * w_step, 2 * h_step)) # left
	offsets.append((4 * w_step, 2 * h_step)) # right
	offsets.append((2 * w_step, 2 * h_step)) # center

	oversample_group = list()
	for o_w, o_h in offsets:
	normal_group = list()
	flip_group = list()
	for i, img in enumerate(img_group):
	crop = img.crop((o_w, o_h, o_w + crop_w, o_h + crop_h))
	normal_group.append(crop)
	if self.flip:
	flip_crop = crop.copy().transpose(Image.FLIP_LEFT_RIGHT)

	if img.mode == 'L' and i % 2 == 0:
	flip_group.append(ImageOps.invert(flip_crop))
	else:
	flip_group.append(flip_crop)

	oversample_group.extend(normal_group)
	oversample_group.extend(flip_group)
	return oversample_group


	class GroupMultiScaleCrop(object):

	def __init__(self, input_size, scales=None, max_distort=1,
	fix_crop=True, more_fix_crop=True):
	self.scales = scales if scales is not None else [1, .875, .75, .66]
	self.max_distort = max_distort
	self.fix_crop = fix_crop
	self.more_fix_crop = more_fix_crop
	self.input_size = input_size if not isinstance(input_size, int) else [
	input_size, input_size]
	self.interpolation = Image.BILINEAR

	def __call__(self, img_group):

	im_size = img_group[0].size

	crop_w, crop_h, offset_w, offset_h = self._sample_crop_size(im_size)
	crop_img_group = [
	img.crop(
	(offset_w,
	offset_h,
	offset_w +
	crop_w,
	offset_h +
	crop_h)) for img in img_group]
	ret_img_group = [img.resize((self.input_size[0], self.input_size[1]), self.interpolation)
	for img in crop_img_group]
	return ret_img_group

	def _sample_crop_size(self, im_size):
	image_w, image_h = im_size[0], im_size[1]

	# find a crop size
	base_size = min(image_w, image_h)
	crop_sizes = [int(base_size * x) for x in self.scales]
	crop_h = [
	self.input_size[1] if abs(
	x - self.input_size[1]) < 3 else x for x in crop_sizes]
	crop_w = [
	self.input_size[0] if abs(
	x - self.input_size[0]) < 3 else x for x in crop_sizes]

	pairs = []
	for i, h in enumerate(crop_h):
	for j, w in enumerate(crop_w):
	if abs(i - j) <= self.max_distort:
	pairs.append((w, h))

	crop_pair = random.choice(pairs)
	if not self.fix_crop:
	w_offset = random.randint(0, image_w - crop_pair[0])
	h_offset = random.randint(0, image_h - crop_pair[1])
	else:
	w_offset, h_offset = self._sample_fix_offset(
	image_w, image_h, crop_pair[0], crop_pair[1])

	return crop_pair[0], crop_pair[1], w_offset, h_offset

	def _sample_fix_offset(self, image_w, image_h, crop_w, crop_h):
	offsets = self.fill_fix_offset(
	self.more_fix_crop, image_w, image_h, crop_w, crop_h)
	return random.choice(offsets)

	@staticmethod
	def fill_fix_offset(more_fix_crop, image_w, image_h, crop_w, crop_h):
	w_step = (image_w - crop_w) // 4
	h_step = (image_h - crop_h) // 4

	ret = list()
	ret.append((0, 0)) # upper left
	ret.append((4 * w_step, 0)) # upper right
	ret.append((0, 4 * h_step)) # lower left
	ret.append((4 * w_step, 4 * h_step)) # lower right
	ret.append((2 * w_step, 2 * h_step)) # center

	if more_fix_crop:
	ret.append((0, 2 * h_step)) # center left
	ret.append((4 * w_step, 2 * h_step)) # center right
	ret.append((2 * w_step, 4 * h_step)) # lower center
	ret.append((2 * w_step, 0 * h_step)) # upper center

	ret.append((1 * w_step, 1 * h_step)) # upper left quarter
	ret.append((3 * w_step, 1 * h_step)) # upper right quarter
	ret.append((1 * w_step, 3 * h_step)) # lower left quarter
	ret.append((3 * w_step, 3 * h_step)) # lower righ quarter

	return ret


	class GroupRandomSizedCrop(object):
	"""Random crop the given PIL.Image to a random size of (0.08 to 1.0) of the original size
	and and a random aspect ratio of 3/4 to 4/3 of the original aspect ratio
	This is popularly used to train the Inception networks
	size: size of the smaller edge
	interpolation: Default: PIL.Image.BILINEAR
	"""

	def __init__(self, size, interpolation=Image.BILINEAR):
	self.size = size
	self.interpolation = interpolation

	def __call__(self, img_group):
	for attempt in range(10):
	area = img_group[0].size[0] * img_group[0].size[1]
	target_area = random.uniform(0.08, 1.0) * area
	aspect_ratio = random.uniform(3. / 4, 4. / 3)

	w = int(round(math.sqrt(target_area * aspect_ratio)))
	h = int(round(math.sqrt(target_area / aspect_ratio)))

	if random.random() < 0.5:
	w, h = h, w

	if w <= img_group[0].size[0] and h <= img_group[0].size[1]:
	x1 = random.randint(0, img_group[0].size[0] - w)
	y1 = random.randint(0, img_group[0].size[1] - h)
	found = True
	break
	else:
	found = False
	x1 = 0
	y1 = 0

	if found:
	out_group = list()
	for img in img_group:
	img = img.crop((x1, y1, x1 + w, y1 + h))
	assert(img.size == (w, h))
	out_group.append(
	img.resize(
	(self.size, self.size), self.interpolation))
	return out_group
	else:
	# Fallback
	scale = GroupScale(self.size, interpolation=self.interpolation)
	crop = GroupRandomCrop(self.size)
	return crop(scale(img_group))


	class ConvertDataFormat(object):
	def __init__(self, model_type):
	self.model_type = model_type

	def __call__(self, images):
	if self.model_type == '2D':
	return images
	tc, h, w = images.size()
	t = tc // 3
	images = images.view(t, 3, h, w)
	images = images.permute(1, 0, 2, 3)
	return images


	class Stack(object):

	def __init__(self, roll=False):
	self.roll = roll

	def __call__(self, img_group):
	if img_group[0].mode == 'L':
	return np.concatenate([np.expand_dims(x, 2)
	for x in img_group], axis=2)
	elif img_group[0].mode == 'RGB':
	if self.roll:
	return np.concatenate([np.array(x)[:, :, ::-1]
	for x in img_group], axis=2)
	else:
	#print(np.concatenate(img_group, axis=2).shape)
	# print(img_group[0].shape)
	return np.concatenate(img_group, axis=2)


	class ToTorchFormatTensor(object):
	""" Converts a PIL.Image (RGB) or numpy.ndarray (H x W x C) in the range [0, 255]
	to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] """

	def __init__(self, div=True):
	self.div = div

	def __call__(self, pic):
	if isinstance(pic, np.ndarray):
	# handle numpy array
	img = torch.from_numpy(pic).permute(2, 0, 1).contiguous()
	else:
	# handle PIL Image
	img = torch.ByteTensor(
	torch.ByteStorage.from_buffer(
	pic.tobytes()))
	img = img.view(pic.size[1], pic.size[0], len(pic.mode))
	# put it from HWC to CHW format
	# yikes, this transpose takes 80% of the loading time/CPU
	img = img.transpose(0, 1).transpose(0, 2).contiguous()
	return img.float().div(255) if self.div else img.float()


	class IdentityTransform(object):

	def __call__(self, data):
	return data


	if __name__ == "__main__":
	trans = torchvision.transforms.Compose([
	GroupScale(256),
	GroupRandomCrop(224),
	Stack(),
	ToTorchFormatTensor(),
	GroupNormalize(
	mean=[.485, .456, .406],
	std=[.229, .224, .225]
	)]
	)

	im = Image.open('../tensorflow-model-zoo.torch/lena_299.png')

	color_group = [im] * 3
	rst = trans(color_group)

	gray_group = [im.convert('L')] * 9
	gray_rst = trans(gray_group)

	trans2 = torchvision.transforms.Compose([
	GroupRandomSizedCrop(256),
	Stack(),
	ToTorchFormatTensor(),
	GroupNormalize(
	mean=[.485, .456, .406],
	std=[.229, .224, .225])
	])
	print(trans2(color_group))