Spaces:

xymeow7
/

gene-hoi-denoising

Runtime error

App Files Files Community

gene-hoi-denoising / common /camera.py

meow

init

d6d3a5b 7 months ago

raw

history blame

No virus

14.1 kB

	import numpy as np
	import torch

	"""
	Useful geometric operations, e.g. Perspective projection and a differentiable Rodrigues formula
	Parts of the code are taken from https://github.com/MandyMo/pytorch_HMR
	"""


	def perspective_to_weak_perspective_torch(
	perspective_camera,
	focal_length,
	img_res,
	):
	# Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz]
	# in 3D given the bounding box size
	# This camera translation can be used in a full perspective projection
	# if isinstance(focal_length, torch.Tensor):
	# focal_length = focal_length[:, 0]

	tx = perspective_camera[:, 0]
	ty = perspective_camera[:, 1]
	tz = perspective_camera[:, 2]

	weak_perspective_camera = torch.stack(
	[2 * focal_length / (img_res * tz + 1e-9), tx, ty],
	dim=-1,
	)
	return weak_perspective_camera


	def convert_perspective_to_weak_perspective(
	perspective_camera,
	focal_length,
	img_res,
	):
	# Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz]
	# in 3D given the bounding box size
	# This camera translation can be used in a full perspective projection
	# if isinstance(focal_length, torch.Tensor):
	# focal_length = focal_length[:, 0]

	weak_perspective_camera = torch.stack(
	[
	2 * focal_length / (img_res * perspective_camera[:, 2] + 1e-9),
	perspective_camera[:, 0],
	perspective_camera[:, 1],
	],
	dim=-1,
	)
	return weak_perspective_camera


	def convert_weak_perspective_to_perspective(
	weak_perspective_camera, focal_length, img_res
	):
	# Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz]
	# in 3D given the bounding box size
	# This camera translation can be used in a full perspective projection
	# if isinstance(focal_length, torch.Tensor):
	# focal_length = focal_length[:, 0]

	perspective_camera = torch.stack(
	[
	weak_perspective_camera[:, 1],
	weak_perspective_camera[:, 2],
	2 * focal_length / (img_res * weak_perspective_camera[:, 0] + 1e-9),
	],
	dim=-1,
	)
	return perspective_camera


	def get_default_cam_t(f, img_res):
	cam = torch.tensor([[5.0, 0.0, 0.0]])
	return convert_weak_perspective_to_perspective(cam, f, img_res)


	def estimate_translation_np(S, joints_2d, joints_conf, focal_length, img_size):
	"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
	Input:
	S: (25, 3) 3D joint locations
	joints: (25, 3) 2D joint locations and confidence
	Returns:
	(3,) camera translation vector
	"""
	num_joints = S.shape[0]
	# focal length

	f = np.array([focal_length[0], focal_length[1]])
	# optical center
	center = np.array([img_size[1] / 2.0, img_size[0] / 2.0])

	# transformations
	Z = np.reshape(np.tile(S[:, 2], (2, 1)).T, -1)
	XY = np.reshape(S[:, 0:2], -1)
	O = np.tile(center, num_joints)
	F = np.tile(f, num_joints)
	weight2 = np.reshape(np.tile(np.sqrt(joints_conf), (2, 1)).T, -1)

	# least squares
	Q = np.array(
	[
	F * np.tile(np.array([1, 0]), num_joints),
	F * np.tile(np.array([0, 1]), num_joints),
	O - np.reshape(joints_2d, -1),
	]
	).T
	c = (np.reshape(joints_2d, -1) - O) * Z - F * XY

	# weighted least squares
	W = np.diagflat(weight2)
	Q = np.dot(W, Q)
	c = np.dot(W, c)

	# square matrix
	A = np.dot(Q.T, Q)
	b = np.dot(Q.T, c)

	# solution
	trans = np.linalg.solve(A, b)

	return trans


	def estimate_translation(
	S,
	joints_2d,
	focal_length,
	img_size,
	use_all_joints=False,
	rotation=None,
	pad_2d=False,
	):
	"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
	Input:
	S: (B, 49, 3) 3D joint locations
	joints: (B, 49, 3) 2D joint locations and confidence
	Returns:
	(B, 3) camera translation vectors
	"""
	if pad_2d:
	batch, num_pts = joints_2d.shape[:2]
	joints_2d_pad = torch.ones((batch, num_pts, 3))
	joints_2d_pad[:, :, :2] = joints_2d
	joints_2d_pad = joints_2d_pad.to(joints_2d.device)
	joints_2d = joints_2d_pad

	device = S.device

	if rotation is not None:
	S = torch.einsum("bij,bkj->bki", rotation, S)

	# Use only joints 25:49 (GT joints)
	if use_all_joints:
	S = S.cpu().numpy()
	joints_2d = joints_2d.cpu().numpy()
	else:
	S = S[:, 25:, :].cpu().numpy()
	joints_2d = joints_2d[:, 25:, :].cpu().numpy()

	joints_conf = joints_2d[:, :, -1]
	joints_2d = joints_2d[:, :, :-1]
	trans = np.zeros((S.shape[0], 3), dtype=np.float32)
	# Find the translation for each example in the batch
	for i in range(S.shape[0]):
	S_i = S[i]
	joints_i = joints_2d[i]
	conf_i = joints_conf[i]
	trans[i] = estimate_translation_np(
	S_i, joints_i, conf_i, focal_length=focal_length, img_size=img_size
	)
	return torch.from_numpy(trans).to(device)


	def estimate_translation_cam(
	S, joints_2d, focal_length, img_size, use_all_joints=False, rotation=None
	):
	"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
	Input:
	S: (B, 49, 3) 3D joint locations
	joints: (B, 49, 3) 2D joint locations and confidence
	Returns:
	(B, 3) camera translation vectors
	"""

	def estimate_translation_np(S, joints_2d, joints_conf, focal_length, img_size):
	"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
	Input:
	S: (25, 3) 3D joint locations
	joints: (25, 3) 2D joint locations and confidence
	Returns:
	(3,) camera translation vector
	"""

	num_joints = S.shape[0]
	# focal length
	f = np.array([focal_length[0], focal_length[1]])
	# optical center
	center = np.array([img_size[0] / 2.0, img_size[1] / 2.0])

	# transformations
	Z = np.reshape(np.tile(S[:, 2], (2, 1)).T, -1)
	XY = np.reshape(S[:, 0:2], -1)
	O = np.tile(center, num_joints)
	F = np.tile(f, num_joints)
	weight2 = np.reshape(np.tile(np.sqrt(joints_conf), (2, 1)).T, -1)

	# least squares
	Q = np.array(
	[
	F * np.tile(np.array([1, 0]), num_joints),
	F * np.tile(np.array([0, 1]), num_joints),
	O - np.reshape(joints_2d, -1),
	]
	).T
	c = (np.reshape(joints_2d, -1) - O) * Z - F * XY

	# weighted least squares
	W = np.diagflat(weight2)
	Q = np.dot(W, Q)
	c = np.dot(W, c)

	# square matrix
	A = np.dot(Q.T, Q)
	b = np.dot(Q.T, c)

	# solution
	trans = np.linalg.solve(A, b)

	return trans

	device = S.device

	if rotation is not None:
	S = torch.einsum("bij,bkj->bki", rotation, S)

	# Use only joints 25:49 (GT joints)
	if use_all_joints:
	S = S.cpu().numpy()
	joints_2d = joints_2d.cpu().numpy()
	else:
	S = S[:, 25:, :].cpu().numpy()
	joints_2d = joints_2d[:, 25:, :].cpu().numpy()

	joints_conf = joints_2d[:, :, -1]
	joints_2d = joints_2d[:, :, :-1]
	trans = np.zeros((S.shape[0], 3), dtype=np.float32)
	# Find the translation for each example in the batch
	for i in range(S.shape[0]):
	S_i = S[i]
	joints_i = joints_2d[i]
	conf_i = joints_conf[i]
	trans[i] = estimate_translation_np(
	S_i, joints_i, conf_i, focal_length=focal_length, img_size=img_size
	)
	return torch.from_numpy(trans).to(device)


	def get_coord_maps(size=56):
	xx_ones = torch.ones([1, size], dtype=torch.int32)
	xx_ones = xx_ones.unsqueeze(-1)

	xx_range = torch.arange(size, dtype=torch.int32).unsqueeze(0)
	xx_range = xx_range.unsqueeze(1)

	xx_channel = torch.matmul(xx_ones, xx_range)
	xx_channel = xx_channel.unsqueeze(-1)

	yy_ones = torch.ones([1, size], dtype=torch.int32)
	yy_ones = yy_ones.unsqueeze(1)

	yy_range = torch.arange(size, dtype=torch.int32).unsqueeze(0)
	yy_range = yy_range.unsqueeze(-1)

	yy_channel = torch.matmul(yy_range, yy_ones)
	yy_channel = yy_channel.unsqueeze(-1)

	xx_channel = xx_channel.permute(0, 3, 1, 2)
	yy_channel = yy_channel.permute(0, 3, 1, 2)

	xx_channel = xx_channel.float() / (size - 1)
	yy_channel = yy_channel.float() / (size - 1)

	xx_channel = xx_channel * 2 - 1
	yy_channel = yy_channel * 2 - 1

	out = torch.cat([xx_channel, yy_channel], dim=1)
	return out


	def look_at(eye, at=np.array([0, 0, 0]), up=np.array([0, 0, 1]), eps=1e-5):
	at = at.astype(float).reshape(1, 3)
	up = up.astype(float).reshape(1, 3)

	eye = eye.reshape(-1, 3)
	up = up.repeat(eye.shape[0] // up.shape[0], axis=0)
	eps = np.array([eps]).reshape(1, 1).repeat(up.shape[0], axis=0)

	z_axis = eye - at
	z_axis /= np.max(np.stack([np.linalg.norm(z_axis, axis=1, keepdims=True), eps]))

	x_axis = np.cross(up, z_axis)
	x_axis /= np.max(np.stack([np.linalg.norm(x_axis, axis=1, keepdims=True), eps]))

	y_axis = np.cross(z_axis, x_axis)
	y_axis /= np.max(np.stack([np.linalg.norm(y_axis, axis=1, keepdims=True), eps]))

	r_mat = np.concatenate(
	(x_axis.reshape(-1, 3, 1), y_axis.reshape(-1, 3, 1), z_axis.reshape(-1, 3, 1)),
	axis=2,
	)

	return r_mat


	def to_sphere(u, v):
	theta = 2 * np.pi * u
	phi = np.arccos(1 - 2 * v)
	cx = np.sin(phi) * np.cos(theta)
	cy = np.sin(phi) * np.sin(theta)
	cz = np.cos(phi)
	s = np.stack([cx, cy, cz])
	return s


	def sample_on_sphere(range_u=(0, 1), range_v=(0, 1)):
	u = np.random.uniform(*range_u)
	v = np.random.uniform(*range_v)
	return to_sphere(u, v)


	def sample_pose_on_sphere(range_v=(0, 1), range_u=(0, 1), radius=1, up=[0, 1, 0]):
	# sample location on unit sphere
	loc = sample_on_sphere(range_u, range_v)

	# sample radius if necessary
	if isinstance(radius, tuple):
	radius = np.random.uniform(*radius)

	loc = loc * radius
	R = look_at(loc, up=np.array(up))[0]

	RT = np.concatenate([R, loc.reshape(3, 1)], axis=1)
	RT = torch.Tensor(RT.astype(np.float32))
	return RT


	def rectify_pose(camera_r, body_aa, rotate_x=False):
	body_r = batch_rodrigues(body_aa).reshape(-1, 3, 3)

	if rotate_x:
	rotate_x = torch.tensor([[[1.0, 0.0, 0.0], [0.0, -1.0, 0.0], [0.0, 0.0, -1.0]]])
	body_r = body_r @ rotate_x

	final_r = camera_r @ body_r
	body_aa = batch_rot2aa(final_r)
	return body_aa


	def estimate_translation_k_np(S, joints_2d, joints_conf, K):
	"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
	Input:
	S: (25, 3) 3D joint locations
	joints: (25, 3) 2D joint locations and confidence
	Returns:
	(3,) camera translation vector
	"""
	num_joints = S.shape[0]
	# focal length

	focal = np.array([K[0, 0], K[1, 1]])
	# optical center
	center = np.array([K[0, 2], K[1, 2]])

	# transformations
	Z = np.reshape(np.tile(S[:, 2], (2, 1)).T, -1)
	XY = np.reshape(S[:, 0:2], -1)
	O = np.tile(center, num_joints)
	F = np.tile(focal, num_joints)
	weight2 = np.reshape(np.tile(np.sqrt(joints_conf), (2, 1)).T, -1)

	# least squares
	Q = np.array(
	[
	F * np.tile(np.array([1, 0]), num_joints),
	F * np.tile(np.array([0, 1]), num_joints),
	O - np.reshape(joints_2d, -1),
	]
	).T
	c = (np.reshape(joints_2d, -1) - O) * Z - F * XY

	# weighted least squares
	W = np.diagflat(weight2)
	Q = np.dot(W, Q)
	c = np.dot(W, c)

	# square matrix
	A = np.dot(Q.T, Q)
	b = np.dot(Q.T, c)

	# solution
	trans = np.linalg.solve(A, b)

	return trans


	def estimate_translation_k(
	S,
	joints_2d,
	K,
	use_all_joints=False,
	rotation=None,
	pad_2d=False,
	):
	"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
	Input:
	S: (B, 49, 3) 3D joint locations
	joints: (B, 49, 3) 2D joint locations and confidence
	Returns:
	(B, 3) camera translation vectors
	"""
	if pad_2d:
	batch, num_pts = joints_2d.shape[:2]
	joints_2d_pad = torch.ones((batch, num_pts, 3))
	joints_2d_pad[:, :, :2] = joints_2d
	joints_2d_pad = joints_2d_pad.to(joints_2d.device)
	joints_2d = joints_2d_pad

	device = S.device

	if rotation is not None:
	S = torch.einsum("bij,bkj->bki", rotation, S)

	# Use only joints 25:49 (GT joints)
	if use_all_joints:
	S = S.cpu().numpy()
	joints_2d = joints_2d.cpu().numpy()
	else:
	S = S[:, 25:, :].cpu().numpy()
	joints_2d = joints_2d[:, 25:, :].cpu().numpy()

	joints_conf = joints_2d[:, :, -1]
	joints_2d = joints_2d[:, :, :-1]
	trans = np.zeros((S.shape[0], 3), dtype=np.float32)
	# Find the translation for each example in the batch
	for i in range(S.shape[0]):
	S_i = S[i]
	joints_i = joints_2d[i]
	conf_i = joints_conf[i]
	K_i = K[i]
	trans[i] = estimate_translation_k_np(S_i, joints_i, conf_i, K_i)
	return torch.from_numpy(trans).to(device)


	def weak_perspective_to_perspective_torch(
	weak_perspective_camera, focal_length, img_res, min_s
	):
	# Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz]
	# in 3D given the bounding box size
	# This camera translation can be used in a full perspective projection
	s = weak_perspective_camera[:, 0]
	s = torch.clamp(s, min_s)
	tx = weak_perspective_camera[:, 1]
	ty = weak_perspective_camera[:, 2]
	perspective_camera = torch.stack(
	[
	tx,
	ty,
	2 * focal_length / (img_res * s + 1e-9),
	],
	dim=-1,
	)
	return perspective_camera