object-to-object-replace

Running

object-to-object-replace / utils /tools.py

nikunjkdtechnoland

some more add more files

4b98c85 8 months ago

16.1 kB

	import os
	import torch
	import yaml
	import numpy as np
	from PIL import Image

	import torch.nn.functional as F


	def pil_loader(path):
	# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
	with open(path, 'rb') as f:
	img = Image.open(f)
	return img.convert('RGB')


	def default_loader(path):
	return pil_loader(path)


	def tensor_img_to_npimg(tensor_img):
	"""
	Turn a tensor image with shape CxHxW to a numpy array image with shape HxWxC
	:param tensor_img:
	:return: a numpy array image with shape HxWxC
	"""
	if not (torch.is_tensor(tensor_img) and tensor_img.ndimension() == 3):
	raise NotImplementedError("Not supported tensor image. Only tensors with dimension CxHxW are supported.")
	npimg = np.transpose(tensor_img.numpy(), (1, 2, 0))
	npimg = npimg.squeeze()
	assert isinstance(npimg, np.ndarray) and (npimg.ndim in {2, 3})
	return npimg


	# Change the values of tensor x from range [0, 1] to [-1, 1]
	def normalize(x):
	return x.mul_(2).add_(-1)

	def same_padding(images, ksizes, strides, rates):
	assert len(images.size()) == 4
	batch_size, channel, rows, cols = images.size()
	out_rows = (rows + strides[0] - 1) // strides[0]
	out_cols = (cols + strides[1] - 1) // strides[1]
	effective_k_row = (ksizes[0] - 1) * rates[0] + 1
	effective_k_col = (ksizes[1] - 1) * rates[1] + 1
	padding_rows = max(0, (out_rows-1)*strides[0]+effective_k_row-rows)
	padding_cols = max(0, (out_cols-1)*strides[1]+effective_k_col-cols)
	# Pad the input
	padding_top = int(padding_rows / 2.)
	padding_left = int(padding_cols / 2.)
	padding_bottom = padding_rows - padding_top
	padding_right = padding_cols - padding_left
	paddings = (padding_left, padding_right, padding_top, padding_bottom)
	images = torch.nn.ZeroPad2d(paddings)(images)
	return images


	def extract_image_patches(images, ksizes, strides, rates, padding='same'):
	"""
	Extract patches from images and put them in the C output dimension.
	:param padding:
	:param images: [batch, channels, in_rows, in_cols]. A 4-D Tensor with shape
	:param ksizes: [ksize_rows, ksize_cols]. The size of the sliding window for
	each dimension of images
	:param strides: [stride_rows, stride_cols]
	:param rates: [dilation_rows, dilation_cols]
	:return: A Tensor
	"""
	assert len(images.size()) == 4
	assert padding in ['same', 'valid']
	batch_size, channel, height, width = images.size()

	if padding == 'same':
	images = same_padding(images, ksizes, strides, rates)
	elif padding == 'valid':
	pass
	else:
	raise NotImplementedError('Unsupported padding type: {}.\
	Only "same" or "valid" are supported.'.format(padding))

	unfold = torch.nn.Unfold(kernel_size=ksizes,
	dilation=rates,
	padding=0,
	stride=strides)
	patches = unfold(images)
	return patches # [N, Ckk, L], L is the total number of such blocks


	def random_bbox(config, batch_size):
	"""Generate a random tlhw with configuration.

	Args:
	config: Config should have configuration including img

	Returns:
	tuple: (top, left, height, width)

	"""
	img_height, img_width, _ = config['image_shape']
	h, w = config['mask_shape']
	margin_height, margin_width = config['margin']
	maxt = img_height - margin_height - h
	maxl = img_width - margin_width - w
	bbox_list = []
	if config['mask_batch_same']:
	t = np.random.randint(margin_height, maxt)
	l = np.random.randint(margin_width, maxl)
	bbox_list.append((t, l, h, w))
	bbox_list = bbox_list * batch_size
	else:
	for i in range(batch_size):
	t = np.random.randint(margin_height, maxt)
	l = np.random.randint(margin_width, maxl)
	bbox_list.append((t, l, h, w))

	return torch.tensor(bbox_list, dtype=torch.int64)


	def test_random_bbox():
	image_shape = [256, 256, 3]
	mask_shape = [128, 128]
	margin = [0, 0]
	bbox = random_bbox(image_shape)
	return bbox


	def bbox2mask(bboxes, height, width, max_delta_h, max_delta_w):
	batch_size = bboxes.size(0)
	mask = torch.zeros((batch_size, 1, height, width), dtype=torch.float32)
	for i in range(batch_size):
	bbox = bboxes[i]
	delta_h = np.random.randint(max_delta_h // 2 + 1)
	delta_w = np.random.randint(max_delta_w // 2 + 1)
	mask[i, :, bbox[0] + delta_h:bbox[0] + bbox[2] - delta_h, bbox[1] + delta_w:bbox[1] + bbox[3] - delta_w] = 1.
	return mask


	def test_bbox2mask():
	image_shape = [256, 256, 3]
	mask_shape = [128, 128]
	margin = [0, 0]
	max_delta_shape = [32, 32]
	bbox = random_bbox(image_shape)
	mask = bbox2mask(bbox, image_shape[0], image_shape[1], max_delta_shape[0], max_delta_shape[1])
	return mask


	def local_patch(x, bbox_list):
	assert len(x.size()) == 4
	patches = []
	for i, bbox in enumerate(bbox_list):
	t, l, h, w = bbox
	patches.append(x[i, :, t:t + h, l:l + w])
	return torch.stack(patches, dim=0)


	def mask_image(x, bboxes, config):
	height, width, _ = config['image_shape']
	max_delta_h, max_delta_w = config['max_delta_shape']
	mask = bbox2mask(bboxes, height, width, max_delta_h, max_delta_w)
	if x.is_cuda:
	mask = mask.cuda()

	if config['mask_type'] == 'hole':
	result = x * (1. - mask)
	elif config['mask_type'] == 'mosaic':
	# TODO: Matching the mosaic patch size and the mask size
	mosaic_unit_size = config['mosaic_unit_size']
	downsampled_image = F.interpolate(x, scale_factor=1. / mosaic_unit_size, mode='nearest')
	upsampled_image = F.interpolate(downsampled_image, size=(height, width), mode='nearest')
	result = upsampled_image * mask + x * (1. - mask)
	else:
	raise NotImplementedError('Not implemented mask type.')

	return result, mask


	def spatial_discounting_mask(config):
	"""Generate spatial discounting mask constant.

	Spatial discounting mask is first introduced in publication:
	Generative Image Inpainting with Contextual Attention, Yu et al.

	Args:
	config: Config should have configuration including HEIGHT, WIDTH,
	DISCOUNTED_MASK.

	Returns:
	tf.Tensor: spatial discounting mask

	"""
	gamma = config['spatial_discounting_gamma']
	height, width = config['mask_shape']
	shape = [1, 1, height, width]
	if config['discounted_mask']:
	mask_values = np.ones((height, width))
	for i in range(height):
	for j in range(width):
	mask_values[i, j] = max(
	gamma ** min(i, height - i),
	gamma ** min(j, width - j))
	mask_values = np.expand_dims(mask_values, 0)
	mask_values = np.expand_dims(mask_values, 0)
	else:
	mask_values = np.ones(shape)
	spatial_discounting_mask_tensor = torch.tensor(mask_values, dtype=torch.float32)
	if config['cuda']:
	spatial_discounting_mask_tensor = spatial_discounting_mask_tensor.cuda()
	return spatial_discounting_mask_tensor


	def reduce_mean(x, axis=None, keepdim=False):
	if not axis:
	axis = range(len(x.shape))
	for i in sorted(axis, reverse=True):
	x = torch.mean(x, dim=i, keepdim=keepdim)
	return x


	def reduce_std(x, axis=None, keepdim=False):
	if not axis:
	axis = range(len(x.shape))
	for i in sorted(axis, reverse=True):
	x = torch.std(x, dim=i, keepdim=keepdim)
	return x


	def reduce_sum(x, axis=None, keepdim=False):
	if not axis:
	axis = range(len(x.shape))
	for i in sorted(axis, reverse=True):
	x = torch.sum(x, dim=i, keepdim=keepdim)
	return x


	def flow_to_image(flow):
	"""Transfer flow map to image.
	Part of code forked from flownet.
	"""
	out = []
	maxu = -999.
	maxv = -999.
	minu = 999.
	minv = 999.
	maxrad = -1
	for i in range(flow.shape[0]):
	u = flow[i, :, :, 0]
	v = flow[i, :, :, 1]
	idxunknow = (abs(u) > 1e7) \| (abs(v) > 1e7)
	u[idxunknow] = 0
	v[idxunknow] = 0
	maxu = max(maxu, np.max(u))
	minu = min(minu, np.min(u))
	maxv = max(maxv, np.max(v))
	minv = min(minv, np.min(v))
	rad = np.sqrt(u 2 + v 2)
	maxrad = max(maxrad, np.max(rad))
	u = u / (maxrad + np.finfo(float).eps)
	v = v / (maxrad + np.finfo(float).eps)
	img = compute_color(u, v)
	out.append(img)
	return np.float32(np.uint8(out))


	def pt_flow_to_image(flow):
	"""Transfer flow map to image.
	Part of code forked from flownet.
	"""
	out = []
	maxu = torch.tensor(-999)
	maxv = torch.tensor(-999)
	minu = torch.tensor(999)
	minv = torch.tensor(999)
	maxrad = torch.tensor(-1)
	if torch.cuda.is_available():
	maxu = maxu.cuda()
	maxv = maxv.cuda()
	minu = minu.cuda()
	minv = minv.cuda()
	maxrad = maxrad.cuda()
	for i in range(flow.shape[0]):
	u = flow[i, 0, :, :]
	v = flow[i, 1, :, :]
	idxunknow = (torch.abs(u) > 1e7) + (torch.abs(v) > 1e7)
	u[idxunknow] = 0
	v[idxunknow] = 0
	maxu = torch.max(maxu, torch.max(u))
	minu = torch.min(minu, torch.min(u))
	maxv = torch.max(maxv, torch.max(v))
	minv = torch.min(minv, torch.min(v))
	rad = torch.sqrt((u 2 + v 2).float()).to(torch.int64)
	maxrad = torch.max(maxrad, torch.max(rad))
	u = u / (maxrad + torch.finfo(torch.float32).eps)
	v = v / (maxrad + torch.finfo(torch.float32).eps)
	# TODO: change the following to pytorch
	img = pt_compute_color(u, v)
	out.append(img)

	return torch.stack(out, dim=0)


	def highlight_flow(flow):
	"""Convert flow into middlebury color code image.
	"""
	out = []
	s = flow.shape
	for i in range(flow.shape[0]):
	img = np.ones((s[1], s[2], 3)) * 144.
	u = flow[i, :, :, 0]
	v = flow[i, :, :, 1]
	for h in range(s[1]):
	for w in range(s[1]):
	ui = u[h, w]
	vi = v[h, w]
	img[ui, vi, :] = 255.
	out.append(img)
	return np.float32(np.uint8(out))


	def pt_highlight_flow(flow):
	"""Convert flow into middlebury color code image.
	"""
	out = []
	s = flow.shape
	for i in range(flow.shape[0]):
	img = np.ones((s[1], s[2], 3)) * 144.
	u = flow[i, :, :, 0]
	v = flow[i, :, :, 1]
	for h in range(s[1]):
	for w in range(s[1]):
	ui = u[h, w]
	vi = v[h, w]
	img[ui, vi, :] = 255.
	out.append(img)
	return np.float32(np.uint8(out))


	def compute_color(u, v):
	h, w = u.shape
	img = np.zeros([h, w, 3])
	nanIdx = np.isnan(u) \| np.isnan(v)
	u[nanIdx] = 0
	v[nanIdx] = 0
	# colorwheel = COLORWHEEL
	colorwheel = make_color_wheel()
	ncols = np.size(colorwheel, 0)
	rad = np.sqrt(u 2 + v 2)
	a = np.arctan2(-v, -u) / np.pi
	fk = (a + 1) / 2 * (ncols - 1) + 1
	k0 = np.floor(fk).astype(int)
	k1 = k0 + 1
	k1[k1 == ncols + 1] = 1
	f = fk - k0
	for i in range(np.size(colorwheel, 1)):
	tmp = colorwheel[:, i]
	col0 = tmp[k0 - 1] / 255
	col1 = tmp[k1 - 1] / 255
	col = (1 - f) * col0 + f * col1
	idx = rad <= 1
	col[idx] = 1 - rad[idx] * (1 - col[idx])
	notidx = np.logical_not(idx)
	col[notidx] *= 0.75
	img[:, :, i] = np.uint8(np.floor(255 * col * (1 - nanIdx)))
	return img


	def pt_compute_color(u, v):
	h, w = u.shape
	img = torch.zeros([3, h, w])
	if torch.cuda.is_available():
	img = img.cuda()
	nanIdx = (torch.isnan(u) + torch.isnan(v)) != 0
	u[nanIdx] = 0.
	v[nanIdx] = 0.
	# colorwheel = COLORWHEEL
	colorwheel = pt_make_color_wheel()
	if torch.cuda.is_available():
	colorwheel = colorwheel.cuda()
	ncols = colorwheel.size()[0]
	rad = torch.sqrt((u 2 + v 2).to(torch.float32))
	a = torch.atan2(-v.to(torch.float32), -u.to(torch.float32)) / np.pi
	fk = (a + 1) / 2 * (ncols - 1) + 1
	k0 = torch.floor(fk).to(torch.int64)
	k1 = k0 + 1
	k1[k1 == ncols + 1] = 1
	f = fk - k0.to(torch.float32)
	for i in range(colorwheel.size()[1]):
	tmp = colorwheel[:, i]
	col0 = tmp[k0 - 1]
	col1 = tmp[k1 - 1]
	col = (1 - f) * col0 + f * col1
	idx = rad <= 1. / 255.
	col[idx] = 1 - rad[idx] * (1 - col[idx])
	notidx = (idx != 0)
	col[notidx] *= 0.75
	img[i, :, :] = col * (1 - nanIdx).to(torch.float32)
	return img


	def make_color_wheel():
	RY, YG, GC, CB, BM, MR = (15, 6, 4, 11, 13, 6)
	ncols = RY + YG + GC + CB + BM + MR
	colorwheel = np.zeros([ncols, 3])
	col = 0
	# RY
	colorwheel[0:RY, 0] = 255
	colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY))
	col += RY
	# YG
	colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG))
	colorwheel[col:col + YG, 1] = 255
	col += YG
	# GC
	colorwheel[col:col + GC, 1] = 255
	colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC))
	col += GC
	# CB
	colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB))
	colorwheel[col:col + CB, 2] = 255
	col += CB
	# BM
	colorwheel[col:col + BM, 2] = 255
	colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM))
	col += + BM
	# MR
	colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
	colorwheel[col:col + MR, 0] = 255
	return colorwheel


	def pt_make_color_wheel():
	RY, YG, GC, CB, BM, MR = (15, 6, 4, 11, 13, 6)
	ncols = RY + YG + GC + CB + BM + MR
	colorwheel = torch.zeros([ncols, 3])
	col = 0
	# RY
	colorwheel[0:RY, 0] = 1.
	colorwheel[0:RY, 1] = torch.arange(0, RY, dtype=torch.float32) / RY
	col += RY
	# YG
	colorwheel[col:col + YG, 0] = 1. - (torch.arange(0, YG, dtype=torch.float32) / YG)
	colorwheel[col:col + YG, 1] = 1.
	col += YG
	# GC
	colorwheel[col:col + GC, 1] = 1.
	colorwheel[col:col + GC, 2] = torch.arange(0, GC, dtype=torch.float32) / GC
	col += GC
	# CB
	colorwheel[col:col + CB, 1] = 1. - (torch.arange(0, CB, dtype=torch.float32) / CB)
	colorwheel[col:col + CB, 2] = 1.
	col += CB
	# BM
	colorwheel[col:col + BM, 2] = 1.
	colorwheel[col:col + BM, 0] = torch.arange(0, BM, dtype=torch.float32) / BM
	col += BM
	# MR
	colorwheel[col:col + MR, 2] = 1. - (torch.arange(0, MR, dtype=torch.float32) / MR)
	colorwheel[col:col + MR, 0] = 1.
	return colorwheel


	def is_image_file(filename):
	IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']
	filename_lower = filename.lower()
	return any(filename_lower.endswith(extension) for extension in IMG_EXTENSIONS)


	def deprocess(img):
	img = img.add_(1).div_(2)
	return img


	# get configs
	def get_config(config):
	with open(config, 'r') as stream:
	return yaml.load(stream,Loader=yaml.Loader)


	# Get model list for resume
	def get_model_list(dirname, key, iteration=0):
	if os.path.exists(dirname) is False:
	return None
	gen_models = [os.path.join(dirname, f) for f in os.listdir(dirname) if
	os.path.isfile(os.path.join(dirname, f)) and key in f and ".pt" in f]
	if gen_models is None:
	return None
	gen_models.sort()
	if iteration == 0:
	last_model_name = gen_models[-1]
	else:
	for model_name in gen_models:
	if '{:0>8d}'.format(iteration) in model_name:
	return model_name
	raise ValueError('Not found models with this iteration')
	return last_model_name


	if __name__ == '__main__':
	test_random_bbox()
	mask = test_bbox2mask()
	print(mask.shape)
	import matplotlib.pyplot as plt

	plt.imshow(mask, cmap='gray')
	plt.show()