Spaces:

xiexh20
/

HDM-interaction-recon

Sleeping

App Files Files Community

HDM-interaction-recon / dataset /img_utils.py

xiexh20

add hdm demo v1

2fd6166 4 months ago

raw history blame

No virus

4.83 kB

	"""
	common functions for image operations
	"""

	import cv2
	import numpy as np


	def crop(img, center, crop_size):
	"""
	crop image around the given center, pad zeros for borders
	:param img:
	:param center: np array
	:param crop_size: np array or a float size of the resulting crop
	:return: a square crop around the center
	"""
	assert isinstance(img, np.ndarray)
	h, w = img.shape[:2]
	topleft = np.round(center - crop_size / 2).astype(int)
	bottom_right = np.round(center + crop_size / 2).astype(int)

	x1 = max(0, topleft[0])
	y1 = max(0, topleft[1])
	x2 = min(w - 1, bottom_right[0])
	y2 = min(h - 1, bottom_right[1])
	cropped = img[y1:y2, x1:x2]

	p1 = max(0, -topleft[0]) # padding in x, top
	p2 = max(0, -topleft[1]) # padding in y, top
	p3 = max(0, bottom_right[0] - w + 1) # padding in x, bottom
	p4 = max(0, bottom_right[1] - h + 1) # padding in y, bottom

	dim = len(img.shape)
	if dim == 3:
	padded = np.pad(cropped, [[p2, p4], [p1, p3], [0, 0]])
	elif dim == 2:
	padded = np.pad(cropped, [[p2, p4], [p1, p3]])
	else:
	raise NotImplemented
	return padded


	def resize(img, img_size, mode=cv2.INTER_LINEAR):
	"""
	resize image to the input
	:param img:
	:param img_size: (width, height) of the target image size
	:param mode:
	:return:
	"""
	h, w = img.shape[:2]
	load_ratio = 1.0 * w / h
	netin_ratio = 1.0 * img_size[0] / img_size[1]
	assert load_ratio == netin_ratio, "image aspect ration not matching, given image: {}, net input: {}".format(
	img.shape, img_size)
	resized = cv2.resize(img, img_size, interpolation=mode)
	return resized


	def masks2bbox(masks, threshold=127):
	"""

	:param masks:
	:param threshold:
	:return: bounding box corner coordinate
	"""
	mask_comb = np.zeros_like(masks[0], dtype=bool)
	for m in masks:
	mask_comb = mask_comb \| (m > threshold)

	yid, xid = np.where(mask_comb)
	bmin = np.array([xid.min(), yid.min()])
	bmax = np.array([xid.max(), yid.max()])
	return bmin, bmax


	def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5):
	"""
	solve for an optimal translation that project gaussian in origin to the crop
	Parameters
	----------
	crop_center: (x, y) of the crop center
	crop_size: float, the size of the square crop
	std_coverage: which edge point should be projected back to the edge of the 2d crop

	Returns
	-------
	the estimated translation

	"""
	x0, y0 = crop_center
	x1, y1 = x0 + crop_size/2, y0
	x2, y2 = x0 - crop_size/2, y0
	x3, y3 = x0, y0 + crop_size/2.
	# predefined kinect intrinsics
	if is_behave:
	fx = 979.7844
	fy = 979.840
	cx = 1018.952
	cy = 779.486
	else:
	# intercap camera
	fx, fy = 918.457763671875, 918.4373779296875
	cx, cy = 956.9661865234375, 555.944580078125

	# construct the matrix
	# A = np.array([
	# [fx, 0, cx-x0, cx-x0, 0, 0],
	# [0, fy, cy-y0, cy-y0, 0, 0],
	# [fx, 0, cx-x1, 0, cx-x1, 0],
	# [0, fy, cy-y1, 0, cy-y1, 0],
	# [fx, 0, cx-x2, 0, 0, cx-x2],
	# [0, fy, cy-y2, 0, 0, cy-y2]
	# ]) # this matrix is low-rank because columns are linearly dependent: col3 - col4 = col5 + col6
	# # find linearly dependent rows
	# lambdas, V = np.linalg.eig(A)
	# # print()
	# # The linearly dependent row vectors
	# print(lambdas == 0, np.linalg.det(A), A[lambdas == 0, :]) # some have determinant zero, some don't??
	# print(np.linalg.inv(A))

	# A = np.array([
	# [fx, 0, cx - x0, cx - x0, 0, 0],
	# [0, fy, cy - y0, cy - y0, 0, 0],
	# [fx, 0, cx - x1, 0, cx - x1, 0],
	# [0, fy, cy - y1, 0, cy - y1, 0],
	# [fx, 0, cx - x3, 0, 0, cx - x3],
	# [0, fy, cy - y3, 0, 0, cy - y3]
	# ]) # this is also low rank!
	# b = np.array([0, 0, -3fx, 0, 0, -3fy]).reshape((-1, 1))
	# print("rank of the coefficient matrix:", np.linalg.matrix_rank(A)) # rank is 5! underconstrained matrix!
	# x = np.matmul(np.linalg.inv(A), b)

	# fix z0 as 0, then A is a full-rank matrix
	# first two equations: origin (0, 0, 0) is projected to the crop center
	# last two equations: edge point (3.5, 0, z) is projected to the edge of crop
	A = np.array([
	[fx, 0, cx-x0, cx-x0],
	[0, fy, cy-y0, cy-y0],
	[fx, 0, fx-x1, 0],
	[0, fy, cy-y1, 0]
	])
	# b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
	b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
	x = np.matmul(np.linalg.inv(A), b) # use 4 or 5 does not really matter, same results

	# A is always a full-rank matrix

	return x.flatten()[:3]