import math import numpy as np import torch import torch.nn.functional as F from typing import Tuple from utils.stepfun import sample_np, sample import scipy def quad2rotation(q): """ Convert quaternion to rotation in batch. Since all operation in pytorch, support gradient passing. Args: quad (tensor, batch_size*4): quaternion. Returns: rot_mat (tensor, batch_size*3*3): rotation. """ # bs = quad.shape[0] # qr, qi, qj, qk = quad[:, 0], quad[:, 1], quad[:, 2], quad[:, 3] # two_s = 2.0 / (quad * quad).sum(-1) # rot_mat = torch.zeros(bs, 3, 3).to(quad.get_device()) # rot_mat[:, 0, 0] = 1 - two_s * (qj**2 + qk**2) # rot_mat[:, 0, 1] = two_s * (qi * qj - qk * qr) # rot_mat[:, 0, 2] = two_s * (qi * qk + qj * qr) # rot_mat[:, 1, 0] = two_s * (qi * qj + qk * qr) # rot_mat[:, 1, 1] = 1 - two_s * (qi**2 + qk**2) # rot_mat[:, 1, 2] = two_s * (qj * qk - qi * qr) # rot_mat[:, 2, 0] = two_s * (qi * qk - qj * qr) # rot_mat[:, 2, 1] = two_s * (qj * qk + qi * qr) # rot_mat[:, 2, 2] = 1 - two_s * (qi**2 + qj**2) # return rot_mat if not isinstance(q, torch.Tensor): q = torch.tensor(q).cuda() norm = torch.sqrt( q[:, 0] * q[:, 0] + q[:, 1] * q[:, 1] + q[:, 2] * q[:, 2] + q[:, 3] * q[:, 3] ) q = q / norm[:, None] rot = torch.zeros((q.size(0), 3, 3)).to(q) r = q[:, 0] x = q[:, 1] y = q[:, 2] z = q[:, 3] rot[:, 0, 0] = 1 - 2 * (y * y + z * z) rot[:, 0, 1] = 2 * (x * y - r * z) rot[:, 0, 2] = 2 * (x * z + r * y) rot[:, 1, 0] = 2 * (x * y + r * z) rot[:, 1, 1] = 1 - 2 * (x * x + z * z) rot[:, 1, 2] = 2 * (y * z - r * x) rot[:, 2, 0] = 2 * (x * z - r * y) rot[:, 2, 1] = 2 * (y * z + r * x) rot[:, 2, 2] = 1 - 2 * (x * x + y * y) return rot def get_camera_from_tensor(inputs): """ Convert quaternion and translation to transformation matrix. """ if not isinstance(inputs, torch.Tensor): inputs = torch.tensor(inputs).cuda() N = len(inputs.shape) if N == 1: inputs = inputs.unsqueeze(0) # quad, T = inputs[:, :4], inputs[:, 4:] # # normalize quad # quad = F.normalize(quad) # R = quad2rotation(quad) # RT = torch.cat([R, T[:, :, None]], 2) # # Add homogenous row # homogenous_row = torch.tensor([0, 0, 0, 1]).cuda() # RT = torch.cat([RT, homogenous_row[None, None, :].repeat(N, 1, 1)], 1) # if N == 1: # RT = RT[0] # return RT quad, T = inputs[:, :4], inputs[:, 4:] w2c = torch.eye(4).to(inputs).float() w2c[:3, :3] = quad2rotation(quad) w2c[:3, 3] = T return w2c def quadmultiply(q1, q2): """ Multiply two quaternions together using quaternion arithmetic """ # Extract scalar and vector parts of the quaternions w1, x1, y1, z1 = q1.unbind(dim=-1) w2, x2, y2, z2 = q2.unbind(dim=-1) # Calculate the quaternion product result_quaternion = torch.stack( [ w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2, w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2, w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2, w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2, ], dim=-1, ) return result_quaternion def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor: """ Returns torch.sqrt(torch.max(0, x)) but with a zero subgradient where x is 0. Source: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html#matrix_to_quaternion """ ret = torch.zeros_like(x) positive_mask = x > 0 ret[positive_mask] = torch.sqrt(x[positive_mask]) return ret def rotation2quad(matrix: torch.Tensor) -> torch.Tensor: """ Convert rotations given as rotation matrices to quaternions. Args: matrix: Rotation matrices as tensor of shape (..., 3, 3). Returns: quaternions with real part first, as tensor of shape (..., 4). Source: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html#matrix_to_quaternion """ if matrix.size(-1) != 3 or matrix.size(-2) != 3: raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.") if not isinstance(matrix, torch.Tensor): matrix = torch.tensor(matrix).cuda() batch_dim = matrix.shape[:-2] m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind( matrix.reshape(batch_dim + (9,)), dim=-1 ) q_abs = _sqrt_positive_part( torch.stack( [ 1.0 + m00 + m11 + m22, 1.0 + m00 - m11 - m22, 1.0 - m00 + m11 - m22, 1.0 - m00 - m11 + m22, ], dim=-1, ) ) # we produce the desired quaternion multiplied by each of r, i, j, k quat_by_rijk = torch.stack( [ # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and # `int`. torch.stack([q_abs[..., 0] ** 2, m21 - m12, m02 - m20, m10 - m01], dim=-1), # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and # `int`. torch.stack([m21 - m12, q_abs[..., 1] ** 2, m10 + m01, m02 + m20], dim=-1), # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and # `int`. torch.stack([m02 - m20, m10 + m01, q_abs[..., 2] ** 2, m12 + m21], dim=-1), # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and # `int`. torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3] ** 2], dim=-1), ], dim=-2, ) # We floor here at 0.1 but the exact level is not important; if q_abs is small, # the candidate won't be picked. flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device) quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr)) # if not for numerical problems, quat_candidates[i] should be same (up to a sign), # forall i; we pick the best-conditioned one (with the largest denominator) return quat_candidates[ F.one_hot(q_abs.argmax(dim=-1), num_classes=4) > 0.5, : ].reshape(batch_dim + (4,)) def get_tensor_from_camera(RT, Tquad=False): """ Convert transformation matrix to quaternion and translation. """ # gpu_id = -1 # if type(RT) == torch.Tensor: # if RT.get_device() != -1: # gpu_id = RT.get_device() # RT = RT.detach().cpu() # RT = RT.numpy() # from mathutils import Matrix # # R, T = RT[:3, :3], RT[:3, 3] # rot = Matrix(R) # quad = rot.to_quaternion() # if Tquad: # tensor = np.concatenate([T, quad], 0) # else: # tensor = np.concatenate([quad, T], 0) # tensor = torch.from_numpy(tensor).float() # if gpu_id != -1: # tensor = tensor.to(gpu_id) # return tensor if not isinstance(RT, torch.Tensor): RT = torch.tensor(RT).cuda() rot = RT[:3, :3].unsqueeze(0).detach() quat = rotation2quad(rot).squeeze() tran = RT[:3, 3].detach() return torch.cat([quat, tran]) def normalize(x): return x / np.linalg.norm(x) def viewmatrix(lookdir, up, position, subtract_position=False): """Construct lookat view matrix.""" vec2 = normalize((lookdir - position) if subtract_position else lookdir) vec0 = normalize(np.cross(up, vec2)) vec1 = normalize(np.cross(vec2, vec0)) m = np.stack([vec0, vec1, vec2, position], axis=1) return m def poses_avg(poses): """New pose using average position, z-axis, and up vector of input poses.""" position = poses[:, :3, 3].mean(0) z_axis = poses[:, :3, 2].mean(0) up = poses[:, :3, 1].mean(0) cam2world = viewmatrix(z_axis, up, position) return cam2world def focus_point_fn(poses): """Calculate nearest point to all focal axes in poses.""" directions, origins = poses[:, :3, 2:3], poses[:, :3, 3:4] m = np.eye(3) - directions * np.transpose(directions, [0, 2, 1]) mt_m = np.transpose(m, [0, 2, 1]) @ m focus_pt = np.linalg.inv(mt_m.mean(0)) @ (mt_m @ origins).mean(0)[:, 0] return focus_pt def pad_poses(p): """Pad [..., 3, 4] pose matrices with a homogeneous bottom row [0,0,0,1].""" bottom = np.broadcast_to([0, 0, 0, 1.], p[..., :1, :4].shape) return np.concatenate([p[..., :3, :4], bottom], axis=-2) def unpad_poses(p): """Remove the homogeneous bottom row from [..., 4, 4] pose matrices.""" return p[..., :3, :4] def transform_poses_pca(poses): """Transforms poses so principal components lie on XYZ axes. Args: poses: a (N, 3, 4) array containing the cameras' camera to world transforms. Returns: A tuple (poses, transform), with the transformed poses and the applied camera_to_world transforms. """ t = poses[:, :3, 3] t_mean = t.mean(axis=0) t = t - t_mean eigval, eigvec = np.linalg.eig(t.T @ t) # Sort eigenvectors in order of largest to smallest eigenvalue. inds = np.argsort(eigval)[::-1] eigvec = eigvec[:, inds] rot = eigvec.T if np.linalg.det(rot) < 0: rot = np.diag(np.array([1, 1, -1])) @ rot transform = np.concatenate([rot, rot @ -t_mean[:, None]], -1) poses_recentered = unpad_poses(transform @ pad_poses(poses)) transform = np.concatenate([transform, np.eye(4)[3:]], axis=0) # Flip coordinate system if z component of y-axis is negative if poses_recentered.mean(axis=0)[2, 1] < 0: poses_recentered = np.diag(np.array([1, -1, -1])) @ poses_recentered transform = np.diag(np.array([1, -1, -1, 1])) @ transform # Just make sure it's it in the [-1, 1]^3 cube scale_factor = 1. / np.max(np.abs(poses_recentered[:, :3, 3])) poses_recentered[:, :3, 3] *= scale_factor transform = np.diag(np.array([scale_factor] * 3 + [1])) @ transform return poses_recentered, transform def recenter_poses(poses: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Recenter poses around the origin.""" cam2world = poses_avg(poses) transform = np.linalg.inv(pad_poses(cam2world)) poses = transform @ pad_poses(poses) return unpad_poses(poses), transform def generate_ellipse_path(views, n_frames=600, const_speed=True, z_variation=0., z_phase=0.): poses = [] for view in views: tmp_view = np.eye(4) tmp_view[:3] = np.concatenate([view.R.T, view.T[:, None]], 1) tmp_view = np.linalg.inv(tmp_view) tmp_view[:, 1:3] *= -1 poses.append(tmp_view) poses = np.stack(poses, 0) poses, transform = transform_poses_pca(poses) # Calculate the focal point for the path (cameras point toward this). center = focus_point_fn(poses) # Path height sits at z=0 (in middle of zero-mean capture pattern). offset = np.array([center[0] , center[1], 0 ]) # Calculate scaling for ellipse axes based on input camera positions. sc = np.percentile(np.abs(poses[:, :3, 3] - offset), 90, axis=0) # Use ellipse that is symmetric about the focal point in xy. low = -sc + offset high = sc + offset # Optional height variation need not be symmetric z_low = np.percentile((poses[:, :3, 3]), 10, axis=0) z_high = np.percentile((poses[:, :3, 3]), 90, axis=0) def get_positions(theta): # Interpolate between bounds with trig functions to get ellipse in x-y. # Optionally also interpolate in z to change camera height along path. return np.stack([ (low[0] + (high - low)[0] * (np.cos(theta) * .5 + .5)), (low[1] + (high - low)[1] * (np.sin(theta) * .5 + .5)), z_variation * (z_low[2] + (z_high - z_low)[2] * (np.cos(theta + 2 * np.pi * z_phase) * .5 + .5)), ], -1) theta = np.linspace(0, 2. * np.pi, n_frames + 1, endpoint=True) positions = get_positions(theta) if const_speed: # Resample theta angles so that the velocity is closer to constant. lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1) theta = sample_np(None, theta, np.log(lengths), n_frames + 1) positions = get_positions(theta) # Throw away duplicated last position. positions = positions[:-1] # Set path's up vector to axis closest to average of input pose up vectors. avg_up = poses[:, :3, 1].mean(0) avg_up = avg_up / np.linalg.norm(avg_up) ind_up = np.argmax(np.abs(avg_up)) up = np.eye(3)[ind_up] * np.sign(avg_up[ind_up]) # up = normalize(poses[:, :3, 1].sum(0)) render_poses = [] for p in positions: render_pose = np.eye(4) render_pose[:3] = viewmatrix(p - center, up, p) render_pose = np.linalg.inv(transform) @ render_pose render_pose[:3, 1:3] *= -1 render_poses.append(np.linalg.inv(render_pose)) return render_poses def generate_spiral_path(poses_arr, n_frames: int = 180, n_rots: int = 2, zrate: float = .5) -> np.ndarray: """Calculates a forward facing spiral path for rendering.""" poses = poses_arr[:, :-2].reshape([-1, 3, 5]) bounds = poses_arr[:, -2:] fix_rotation = np.array([ [0, -1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], ], dtype=np.float32) poses = poses[:, :3, :4] @ fix_rotation scale = 1. / (bounds.min() * .75) poses[:, :3, 3] *= scale bounds *= scale poses, transform = recenter_poses(poses) close_depth, inf_depth = bounds.min() * .9, bounds.max() * 5. dt = .75 focal = 1 / (((1 - dt) / close_depth + dt / inf_depth)) # Get radii for spiral path using 90th percentile of camera positions. positions = poses[:, :3, 3] radii = np.percentile(np.abs(positions), 90, 0) radii = np.concatenate([radii, [1.]]) # Generate poses for spiral path. render_poses = [] cam2world = poses_avg(poses) up = poses[:, :3, 1].mean(0) for theta in np.linspace(0., 2. * np.pi * n_rots, n_frames, endpoint=False): t = radii * [np.cos(theta), -np.sin(theta), -np.sin(theta * zrate), 1.] position = cam2world @ t lookat = cam2world @ [0, 0, -focal, 1.] z_axis = position - lookat render_pose = np.eye(4) render_pose[:3] = viewmatrix(z_axis, up, position) render_pose = np.linalg.inv(transform) @ render_pose render_pose[:3, 1:3] *= -1 render_pose[:3, 3] /= scale render_poses.append(np.linalg.inv(render_pose)) render_poses = np.stack(render_poses, axis=0) return render_poses def generate_interpolated_path( views, n_interp, spline_degree = 5, smoothness = 0.03, rot_weight = 0.1, lock_up = False, fixed_up_vector = None, lookahead_i = None, frames_per_colmap = None, const_speed = False, n_buffer = None, periodic = False, n_interp_as_total = False, ): """Creates a smooth spline path between input keyframe camera poses. Spline is calculated with poses in format (position, lookat-point, up-point). Args: poses: (n, 3, 4) array of input pose keyframes. n_interp: returned path will have n_interp * (n - 1) total poses. spline_degree: polynomial degree of B-spline. smoothness: parameter for spline smoothing, 0 forces exact interpolation. rot_weight: relative weighting of rotation/translation in spline solve. lock_up: if True, forced to use given Up and allow Lookat to vary. fixed_up_vector: replace the interpolated `up` with a fixed vector. lookahead_i: force the look direction to look at the pose `i` frames ahead. frames_per_colmap: conversion factor for the desired average velocity. const_speed: renormalize spline to have constant delta between each pose. n_buffer: Number of buffer frames to insert at the start and end of the path. Helps keep the ends of a spline path straight. periodic: make the spline path periodic (perfect loop). n_interp_as_total: use n_interp as total number of poses in path rather than the number of poses to interpolate between each input. Returns: Array of new camera poses with shape (n_interp * (n - 1), 3, 4), or (n_interp, 3, 4) if n_interp_as_total is set. """ poses = [] for view in views: tmp_view = np.eye(4) tmp_view[:3] = np.concatenate([view.R.T, view.T[:, None]], 1) tmp_view = np.linalg.inv(tmp_view) tmp_view[:, 1:3] *= -1 poses.append(tmp_view) poses = np.stack(poses, 0) def poses_to_points(poses, dist): """Converts from pose matrices to (position, lookat, up) format.""" pos = poses[:, :3, -1] lookat = poses[:, :3, -1] - dist * poses[:, :3, 2] up = poses[:, :3, -1] + dist * poses[:, :3, 1] return np.stack([pos, lookat, up], 1) def points_to_poses(points): """Converts from (position, lookat, up) format to pose matrices.""" poses = [] for i in range(len(points)): pos, lookat_point, up_point = points[i] if lookahead_i is not None: if i + lookahead_i < len(points): lookat = pos - points[i + lookahead_i][0] else: lookat = pos - lookat_point up = (up_point - pos) if fixed_up_vector is None else fixed_up_vector poses.append(viewmatrix(lookat, up, pos)) return np.array(poses) def insert_buffer_poses(poses, n_buffer): """Insert extra poses at the start and end of the path.""" def average_distance(points): distances = np.linalg.norm(points[1:] - points[0:-1], axis=-1) return np.mean(distances) def shift(pose, dz): result = np.copy(pose) z = result[:3, 2] z /= np.linalg.norm(z) # Move along forward-backward axis. -z is forward. result[:3, 3] += z * dz return result dz = average_distance(poses[:, :3, 3]) prefix = np.stack([shift(poses[0], (i + 1) * dz) for i in range(n_buffer)]) prefix = prefix[::-1] # reverse order suffix = np.stack( [shift(poses[-1], -(i + 1) * dz) for i in range(n_buffer)] ) result = np.concatenate([prefix, poses, suffix]) return result def remove_buffer_poses(poses, u, n_frames, u_keyframes, n_buffer): u_keyframes = u_keyframes[n_buffer:-n_buffer] mask = (u >= u_keyframes[0]) & (u <= u_keyframes[-1]) poses = poses[mask] u = u[mask] n_frames = len(poses) return poses, u, n_frames, u_keyframes def interp(points, u, k, s): """Runs multidimensional B-spline interpolation on the input points.""" sh = points.shape pts = np.reshape(points, (sh[0], -1)) k = min(k, sh[0] - 1) tck, u_keyframes = scipy.interpolate.splprep(pts.T, k=k, s=s, per=periodic) new_points = np.array(scipy.interpolate.splev(u, tck)) new_points = np.reshape(new_points.T, (len(u), sh[1], sh[2])) return new_points, u_keyframes if n_buffer is not None: poses = insert_buffer_poses(poses, n_buffer) points = poses_to_points(poses, dist=rot_weight) if n_interp_as_total: n_frames = n_interp + 1 # Add extra since final pose is discarded. else: n_frames = n_interp * (points.shape[0] - 1) u = np.linspace(0, 1, n_frames, endpoint=True) new_points, u_keyframes = interp(points, u=u, k=spline_degree, s=smoothness) poses = points_to_poses(new_points) if n_buffer is not None: poses, u, n_frames, u_keyframes = remove_buffer_poses( poses, u, n_frames, u_keyframes, n_buffer ) # poses, transform = transform_poses_pca(poses) if frames_per_colmap is not None: # Recalculate the number of frames to achieve desired average velocity. positions = poses[:, :3, -1] lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1) total_length_colmap = lengths.sum() print('old n_frames:', n_frames) print('total_length_colmap:', total_length_colmap) n_frames = int(total_length_colmap * frames_per_colmap) print('new n_frames:', n_frames) u = np.linspace( np.min(u_keyframes), np.max(u_keyframes), n_frames, endpoint=True ) new_points, _ = interp(points, u=u, k=spline_degree, s=smoothness) poses = points_to_poses(new_points) if const_speed: # Resample timesteps so that the velocity is nearly constant. positions = poses[:, :3, -1] lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1) u = sample(None, u, np.log(lengths), n_frames + 1) new_points, _ = interp(points, u=u, k=spline_degree, s=smoothness) poses = points_to_poses(new_points) # return poses[:-1], u[:-1], u_keyframes return poses[:-1]