Spaces:

kairunwen
/

InstantSplat

Runtime error

App Files Files Community

InstantSplat / utils /pose_utils.py

kairunwen

add code

35e2073 9 months ago

raw

history blame contribute delete

20.5 kB

	import math
	import numpy as np
	import torch
	import torch.nn.functional as F
	from typing import Tuple
	from utils.stepfun import sample_np, sample
	import scipy


	def quad2rotation(q):
	"""
	Convert quaternion to rotation in batch. Since all operation in pytorch, support gradient passing.

	Args:
	quad (tensor, batch_size*4): quaternion.

	Returns:
	rot_mat (tensor, batch_size33): rotation.
	"""
	# bs = quad.shape[0]
	# qr, qi, qj, qk = quad[:, 0], quad[:, 1], quad[:, 2], quad[:, 3]
	# two_s = 2.0 / (quad * quad).sum(-1)
	# rot_mat = torch.zeros(bs, 3, 3).to(quad.get_device())
	# rot_mat[:, 0, 0] = 1 - two_s * (qj2 + qk2)
	# rot_mat[:, 0, 1] = two_s * (qi * qj - qk * qr)
	# rot_mat[:, 0, 2] = two_s * (qi * qk + qj * qr)
	# rot_mat[:, 1, 0] = two_s * (qi * qj + qk * qr)
	# rot_mat[:, 1, 1] = 1 - two_s * (qi2 + qk2)
	# rot_mat[:, 1, 2] = two_s * (qj * qk - qi * qr)
	# rot_mat[:, 2, 0] = two_s * (qi * qk - qj * qr)
	# rot_mat[:, 2, 1] = two_s * (qj * qk + qi * qr)
	# rot_mat[:, 2, 2] = 1 - two_s * (qi2 + qj2)
	# return rot_mat
	if not isinstance(q, torch.Tensor):
	q = torch.tensor(q).cuda()

	norm = torch.sqrt(
	q[:, 0] * q[:, 0] + q[:, 1] * q[:, 1] + q[:, 2] * q[:, 2] + q[:, 3] * q[:, 3]
	)
	q = q / norm[:, None]
	rot = torch.zeros((q.size(0), 3, 3)).to(q)
	r = q[:, 0]
	x = q[:, 1]
	y = q[:, 2]
	z = q[:, 3]
	rot[:, 0, 0] = 1 - 2 * (y * y + z * z)
	rot[:, 0, 1] = 2 * (x * y - r * z)
	rot[:, 0, 2] = 2 * (x * z + r * y)
	rot[:, 1, 0] = 2 * (x * y + r * z)
	rot[:, 1, 1] = 1 - 2 * (x * x + z * z)
	rot[:, 1, 2] = 2 * (y * z - r * x)
	rot[:, 2, 0] = 2 * (x * z - r * y)
	rot[:, 2, 1] = 2 * (y * z + r * x)
	rot[:, 2, 2] = 1 - 2 * (x * x + y * y)
	return rot

	def get_camera_from_tensor(inputs):
	"""
	Convert quaternion and translation to transformation matrix.

	"""
	if not isinstance(inputs, torch.Tensor):
	inputs = torch.tensor(inputs).cuda()

	N = len(inputs.shape)
	if N == 1:
	inputs = inputs.unsqueeze(0)
	# quad, T = inputs[:, :4], inputs[:, 4:]
	# # normalize quad
	# quad = F.normalize(quad)
	# R = quad2rotation(quad)
	# RT = torch.cat([R, T[:, :, None]], 2)
	# # Add homogenous row
	# homogenous_row = torch.tensor([0, 0, 0, 1]).cuda()
	# RT = torch.cat([RT, homogenous_row[None, None, :].repeat(N, 1, 1)], 1)
	# if N == 1:
	# RT = RT[0]
	# return RT

	quad, T = inputs[:, :4], inputs[:, 4:]
	w2c = torch.eye(4).to(inputs).float()
	w2c[:3, :3] = quad2rotation(quad)
	w2c[:3, 3] = T
	return w2c

	def quadmultiply(q1, q2):
	"""
	Multiply two quaternions together using quaternion arithmetic
	"""
	# Extract scalar and vector parts of the quaternions
	w1, x1, y1, z1 = q1.unbind(dim=-1)
	w2, x2, y2, z2 = q2.unbind(dim=-1)
	# Calculate the quaternion product
	result_quaternion = torch.stack(
	[
	w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2,
	w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2,
	w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2,
	w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2,
	],
	dim=-1,
	)

	return result_quaternion

	def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor:
	"""
	Returns torch.sqrt(torch.max(0, x))
	but with a zero subgradient where x is 0.
	Source: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html#matrix_to_quaternion
	"""
	ret = torch.zeros_like(x)
	positive_mask = x > 0
	ret[positive_mask] = torch.sqrt(x[positive_mask])
	return ret

	def rotation2quad(matrix: torch.Tensor) -> torch.Tensor:
	"""
	Convert rotations given as rotation matrices to quaternions.

	Args:
	matrix: Rotation matrices as tensor of shape (..., 3, 3).

	Returns:
	quaternions with real part first, as tensor of shape (..., 4).
	Source: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html#matrix_to_quaternion
	"""
	if matrix.size(-1) != 3 or matrix.size(-2) != 3:
	raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.")

	if not isinstance(matrix, torch.Tensor):
	matrix = torch.tensor(matrix).cuda()

	batch_dim = matrix.shape[:-2]
	m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind(
	matrix.reshape(batch_dim + (9,)), dim=-1
	)

	q_abs = _sqrt_positive_part(
	torch.stack(
	[
	1.0 + m00 + m11 + m22,
	1.0 + m00 - m11 - m22,
	1.0 - m00 + m11 - m22,
	1.0 - m00 - m11 + m22,
	],
	dim=-1,
	)
	)

	# we produce the desired quaternion multiplied by each of r, i, j, k
	quat_by_rijk = torch.stack(
	[
	# pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
	# `int`.
	torch.stack([q_abs[..., 0] ** 2, m21 - m12, m02 - m20, m10 - m01], dim=-1),
	# pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
	# `int`.
	torch.stack([m21 - m12, q_abs[..., 1] ** 2, m10 + m01, m02 + m20], dim=-1),
	# pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
	# `int`.
	torch.stack([m02 - m20, m10 + m01, q_abs[..., 2] ** 2, m12 + m21], dim=-1),
	# pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
	# `int`.
	torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3] ** 2], dim=-1),
	],
	dim=-2,
	)

	# We floor here at 0.1 but the exact level is not important; if q_abs is small,
	# the candidate won't be picked.
	flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device)
	quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr))

	# if not for numerical problems, quat_candidates[i] should be same (up to a sign),
	# forall i; we pick the best-conditioned one (with the largest denominator)

	return quat_candidates[
	F.one_hot(q_abs.argmax(dim=-1), num_classes=4) > 0.5, :
	].reshape(batch_dim + (4,))


	def get_tensor_from_camera(RT, Tquad=False):
	"""
	Convert transformation matrix to quaternion and translation.

	"""
	# gpu_id = -1
	# if type(RT) == torch.Tensor:
	# if RT.get_device() != -1:
	# gpu_id = RT.get_device()
	# RT = RT.detach().cpu()
	# RT = RT.numpy()
	# from mathutils import Matrix
	#
	# R, T = RT[:3, :3], RT[:3, 3]
	# rot = Matrix(R)
	# quad = rot.to_quaternion()
	# if Tquad:
	# tensor = np.concatenate([T, quad], 0)
	# else:
	# tensor = np.concatenate([quad, T], 0)
	# tensor = torch.from_numpy(tensor).float()
	# if gpu_id != -1:
	# tensor = tensor.to(gpu_id)
	# return tensor

	if not isinstance(RT, torch.Tensor):
	RT = torch.tensor(RT).cuda()

	rot = RT[:3, :3].unsqueeze(0).detach()
	quat = rotation2quad(rot).squeeze()
	tran = RT[:3, 3].detach()

	return torch.cat([quat, tran])

	def normalize(x):
	return x / np.linalg.norm(x)


	def viewmatrix(lookdir, up, position, subtract_position=False):
	"""Construct lookat view matrix."""
	vec2 = normalize((lookdir - position) if subtract_position else lookdir)
	vec0 = normalize(np.cross(up, vec2))
	vec1 = normalize(np.cross(vec2, vec0))
	m = np.stack([vec0, vec1, vec2, position], axis=1)
	return m


	def poses_avg(poses):
	"""New pose using average position, z-axis, and up vector of input poses."""
	position = poses[:, :3, 3].mean(0)
	z_axis = poses[:, :3, 2].mean(0)
	up = poses[:, :3, 1].mean(0)
	cam2world = viewmatrix(z_axis, up, position)
	return cam2world


	def focus_point_fn(poses):
	"""Calculate nearest point to all focal axes in poses."""
	directions, origins = poses[:, :3, 2:3], poses[:, :3, 3:4]
	m = np.eye(3) - directions * np.transpose(directions, [0, 2, 1])
	mt_m = np.transpose(m, [0, 2, 1]) @ m
	focus_pt = np.linalg.inv(mt_m.mean(0)) @ (mt_m @ origins).mean(0)[:, 0]
	return focus_pt


	def pad_poses(p):
	"""Pad [..., 3, 4] pose matrices with a homogeneous bottom row [0,0,0,1]."""
	bottom = np.broadcast_to([0, 0, 0, 1.], p[..., :1, :4].shape)
	return np.concatenate([p[..., :3, :4], bottom], axis=-2)

	def unpad_poses(p):
	"""Remove the homogeneous bottom row from [..., 4, 4] pose matrices."""
	return p[..., :3, :4]

	def transform_poses_pca(poses):
	"""Transforms poses so principal components lie on XYZ axes.

	Args:
	poses: a (N, 3, 4) array containing the cameras' camera to world transforms.

	Returns:
	A tuple (poses, transform), with the transformed poses and the applied
	camera_to_world transforms.
	"""
	t = poses[:, :3, 3]
	t_mean = t.mean(axis=0)
	t = t - t_mean

	eigval, eigvec = np.linalg.eig(t.T @ t)
	# Sort eigenvectors in order of largest to smallest eigenvalue.
	inds = np.argsort(eigval)[::-1]
	eigvec = eigvec[:, inds]
	rot = eigvec.T
	if np.linalg.det(rot) < 0:
	rot = np.diag(np.array([1, 1, -1])) @ rot

	transform = np.concatenate([rot, rot @ -t_mean[:, None]], -1)
	poses_recentered = unpad_poses(transform @ pad_poses(poses))
	transform = np.concatenate([transform, np.eye(4)[3:]], axis=0)

	# Flip coordinate system if z component of y-axis is negative
	if poses_recentered.mean(axis=0)[2, 1] < 0:
	poses_recentered = np.diag(np.array([1, -1, -1])) @ poses_recentered
	transform = np.diag(np.array([1, -1, -1, 1])) @ transform

	# Just make sure it's it in the [-1, 1]^3 cube
	scale_factor = 1. / np.max(np.abs(poses_recentered[:, :3, 3]))
	poses_recentered[:, :3, 3] *= scale_factor
	transform = np.diag(np.array([scale_factor] * 3 + [1])) @ transform
	return poses_recentered, transform


	def recenter_poses(poses: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
	"""Recenter poses around the origin."""
	cam2world = poses_avg(poses)
	transform = np.linalg.inv(pad_poses(cam2world))
	poses = transform @ pad_poses(poses)
	return unpad_poses(poses), transform

	def generate_ellipse_path(views, n_frames=600, const_speed=True, z_variation=0., z_phase=0.):
	poses = []
	for view in views:
	tmp_view = np.eye(4)
	tmp_view[:3] = np.concatenate([view.R.T, view.T[:, None]], 1)
	tmp_view = np.linalg.inv(tmp_view)
	tmp_view[:, 1:3] *= -1
	poses.append(tmp_view)
	poses = np.stack(poses, 0)
	poses, transform = transform_poses_pca(poses)


	# Calculate the focal point for the path (cameras point toward this).
	center = focus_point_fn(poses)
	# Path height sits at z=0 (in middle of zero-mean capture pattern).
	offset = np.array([center[0] , center[1], 0 ])
	# Calculate scaling for ellipse axes based on input camera positions.
	sc = np.percentile(np.abs(poses[:, :3, 3] - offset), 90, axis=0)

	# Use ellipse that is symmetric about the focal point in xy.
	low = -sc + offset
	high = sc + offset
	# Optional height variation need not be symmetric
	z_low = np.percentile((poses[:, :3, 3]), 10, axis=0)
	z_high = np.percentile((poses[:, :3, 3]), 90, axis=0)


	def get_positions(theta):
	# Interpolate between bounds with trig functions to get ellipse in x-y.
	# Optionally also interpolate in z to change camera height along path.
	return np.stack([
	(low[0] + (high - low)[0] * (np.cos(theta) * .5 + .5)),
	(low[1] + (high - low)[1] * (np.sin(theta) * .5 + .5)),
	z_variation * (z_low[2] + (z_high - z_low)[2] *
	(np.cos(theta + 2 * np.pi * z_phase) * .5 + .5)),
	], -1)

	theta = np.linspace(0, 2. * np.pi, n_frames + 1, endpoint=True)
	positions = get_positions(theta)

	if const_speed:
	# Resample theta angles so that the velocity is closer to constant.
	lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1)
	theta = sample_np(None, theta, np.log(lengths), n_frames + 1)
	positions = get_positions(theta)

	# Throw away duplicated last position.
	positions = positions[:-1]

	# Set path's up vector to axis closest to average of input pose up vectors.
	avg_up = poses[:, :3, 1].mean(0)
	avg_up = avg_up / np.linalg.norm(avg_up)
	ind_up = np.argmax(np.abs(avg_up))
	up = np.eye(3)[ind_up] * np.sign(avg_up[ind_up])
	# up = normalize(poses[:, :3, 1].sum(0))

	render_poses = []
	for p in positions:
	render_pose = np.eye(4)
	render_pose[:3] = viewmatrix(p - center, up, p)
	render_pose = np.linalg.inv(transform) @ render_pose
	render_pose[:3, 1:3] *= -1
	render_poses.append(np.linalg.inv(render_pose))
	return render_poses



	def generate_spiral_path(poses_arr,
	n_frames: int = 180,
	n_rots: int = 2,
	zrate: float = .5) -> np.ndarray:
	"""Calculates a forward facing spiral path for rendering."""
	poses = poses_arr[:, :-2].reshape([-1, 3, 5])
	bounds = poses_arr[:, -2:]
	fix_rotation = np.array([
	[0, -1, 0, 0],
	[1, 0, 0, 0],
	[0, 0, 1, 0],
	[0, 0, 0, 1],
	], dtype=np.float32)
	poses = poses[:, :3, :4] @ fix_rotation

	scale = 1. / (bounds.min() * .75)
	poses[:, :3, 3] *= scale
	bounds *= scale
	poses, transform = recenter_poses(poses)

	close_depth, inf_depth = bounds.min() * .9, bounds.max() * 5.
	dt = .75
	focal = 1 / (((1 - dt) / close_depth + dt / inf_depth))

	# Get radii for spiral path using 90th percentile of camera positions.
	positions = poses[:, :3, 3]
	radii = np.percentile(np.abs(positions), 90, 0)
	radii = np.concatenate([radii, [1.]])

	# Generate poses for spiral path.
	render_poses = []
	cam2world = poses_avg(poses)
	up = poses[:, :3, 1].mean(0)
	for theta in np.linspace(0., 2. * np.pi * n_rots, n_frames, endpoint=False):
	t = radii * [np.cos(theta), -np.sin(theta), -np.sin(theta * zrate), 1.]
	position = cam2world @ t
	lookat = cam2world @ [0, 0, -focal, 1.]
	z_axis = position - lookat
	render_pose = np.eye(4)
	render_pose[:3] = viewmatrix(z_axis, up, position)
	render_pose = np.linalg.inv(transform) @ render_pose
	render_pose[:3, 1:3] *= -1
	render_pose[:3, 3] /= scale
	render_poses.append(np.linalg.inv(render_pose))
	render_poses = np.stack(render_poses, axis=0)
	return render_poses



	def generate_interpolated_path(
	views,
	n_interp,
	spline_degree = 5,
	smoothness = 0.03,
	rot_weight = 0.1,
	lock_up = False,
	fixed_up_vector = None,
	lookahead_i = None,
	frames_per_colmap = None,
	const_speed = False,
	n_buffer = None,
	periodic = False,
	n_interp_as_total = False,
	):
	"""Creates a smooth spline path between input keyframe camera poses.

	Spline is calculated with poses in format (position, lookat-point, up-point).
	Args:
	poses: (n, 3, 4) array of input pose keyframes.
	n_interp: returned path will have n_interp * (n - 1) total poses.
	spline_degree: polynomial degree of B-spline.
	smoothness: parameter for spline smoothing, 0 forces exact interpolation.
	rot_weight: relative weighting of rotation/translation in spline solve.
	lock_up: if True, forced to use given Up and allow Lookat to vary.
	fixed_up_vector: replace the interpolated `up` with a fixed vector.
	lookahead_i: force the look direction to look at the pose `i` frames ahead.
	frames_per_colmap: conversion factor for the desired average velocity.
	const_speed: renormalize spline to have constant delta between each pose.
	n_buffer: Number of buffer frames to insert at the start and end of the
	path. Helps keep the ends of a spline path straight.
	periodic: make the spline path periodic (perfect loop).
	n_interp_as_total: use n_interp as total number of poses in path rather than
	the number of poses to interpolate between each input.

	Returns:
	Array of new camera poses with shape (n_interp * (n - 1), 3, 4), or
	(n_interp, 3, 4) if n_interp_as_total is set.
	"""
	poses = []
	for view in views:
	tmp_view = np.eye(4)
	tmp_view[:3] = np.concatenate([view.R.T, view.T[:, None]], 1)
	tmp_view = np.linalg.inv(tmp_view)
	tmp_view[:, 1:3] *= -1
	poses.append(tmp_view)
	poses = np.stack(poses, 0)

	def poses_to_points(poses, dist):
	"""Converts from pose matrices to (position, lookat, up) format."""
	pos = poses[:, :3, -1]
	lookat = poses[:, :3, -1] - dist * poses[:, :3, 2]
	up = poses[:, :3, -1] + dist * poses[:, :3, 1]
	return np.stack([pos, lookat, up], 1)

	def points_to_poses(points):
	"""Converts from (position, lookat, up) format to pose matrices."""
	poses = []
	for i in range(len(points)):
	pos, lookat_point, up_point = points[i]
	if lookahead_i is not None:
	if i + lookahead_i < len(points):
	lookat = pos - points[i + lookahead_i][0]
	else:
	lookat = pos - lookat_point
	up = (up_point - pos) if fixed_up_vector is None else fixed_up_vector
	poses.append(viewmatrix(lookat, up, pos))
	return np.array(poses)

	def insert_buffer_poses(poses, n_buffer):
	"""Insert extra poses at the start and end of the path."""

	def average_distance(points):
	distances = np.linalg.norm(points[1:] - points[0:-1], axis=-1)
	return np.mean(distances)

	def shift(pose, dz):
	result = np.copy(pose)
	z = result[:3, 2]
	z /= np.linalg.norm(z)
	# Move along forward-backward axis. -z is forward.
	result[:3, 3] += z * dz
	return result

	dz = average_distance(poses[:, :3, 3])
	prefix = np.stack([shift(poses[0], (i + 1) * dz) for i in range(n_buffer)])
	prefix = prefix[::-1] # reverse order
	suffix = np.stack(
	[shift(poses[-1], -(i + 1) * dz) for i in range(n_buffer)]
	)
	result = np.concatenate([prefix, poses, suffix])
	return result

	def remove_buffer_poses(poses, u, n_frames, u_keyframes, n_buffer):
	u_keyframes = u_keyframes[n_buffer:-n_buffer]
	mask = (u >= u_keyframes[0]) & (u <= u_keyframes[-1])
	poses = poses[mask]
	u = u[mask]
	n_frames = len(poses)
	return poses, u, n_frames, u_keyframes

	def interp(points, u, k, s):
	"""Runs multidimensional B-spline interpolation on the input points."""
	sh = points.shape
	pts = np.reshape(points, (sh[0], -1))
	k = min(k, sh[0] - 1)
	tck, u_keyframes = scipy.interpolate.splprep(pts.T, k=k, s=s, per=periodic)
	new_points = np.array(scipy.interpolate.splev(u, tck))
	new_points = np.reshape(new_points.T, (len(u), sh[1], sh[2]))
	return new_points, u_keyframes


	if n_buffer is not None:
	poses = insert_buffer_poses(poses, n_buffer)
	points = poses_to_points(poses, dist=rot_weight)
	if n_interp_as_total:
	n_frames = n_interp + 1 # Add extra since final pose is discarded.
	else:
	n_frames = n_interp * (points.shape[0] - 1)
	u = np.linspace(0, 1, n_frames, endpoint=True)
	new_points, u_keyframes = interp(points, u=u, k=spline_degree, s=smoothness)
	poses = points_to_poses(new_points)
	if n_buffer is not None:
	poses, u, n_frames, u_keyframes = remove_buffer_poses(
	poses, u, n_frames, u_keyframes, n_buffer
	)
	# poses, transform = transform_poses_pca(poses)
	if frames_per_colmap is not None:
	# Recalculate the number of frames to achieve desired average velocity.
	positions = poses[:, :3, -1]
	lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1)
	total_length_colmap = lengths.sum()
	print('old n_frames:', n_frames)
	print('total_length_colmap:', total_length_colmap)
	n_frames = int(total_length_colmap * frames_per_colmap)
	print('new n_frames:', n_frames)
	u = np.linspace(
	np.min(u_keyframes), np.max(u_keyframes), n_frames, endpoint=True
	)
	new_points, _ = interp(points, u=u, k=spline_degree, s=smoothness)
	poses = points_to_poses(new_points)

	if const_speed:
	# Resample timesteps so that the velocity is nearly constant.
	positions = poses[:, :3, -1]
	lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1)
	u = sample(None, u, np.log(lengths), n_frames + 1)
	new_points, _ = interp(points, u=u, k=spline_degree, s=smoothness)
	poses = points_to_poses(new_points)

	# return poses[:-1], u[:-1], u_keyframes
	return poses[:-1]