|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import math |
|
import torch |
|
|
|
""" |
|
R: (N, 3, 3) |
|
T: (N, 3) |
|
E: (N, 4, 4) |
|
vector: (N, 3) |
|
""" |
|
|
|
|
|
def compose_extrinsic_R_T(R: torch.Tensor, T: torch.Tensor): |
|
""" |
|
Compose the standard form extrinsic matrix from R and T. |
|
Batched I/O. |
|
""" |
|
RT = torch.cat((R, T.unsqueeze(-1)), dim=-1) |
|
return compose_extrinsic_RT(RT) |
|
|
|
|
|
def compose_extrinsic_RT(RT: torch.Tensor): |
|
""" |
|
Compose the standard form extrinsic matrix from RT. |
|
Batched I/O. |
|
""" |
|
return torch.cat([ |
|
RT, |
|
torch.tensor([[[0, 0, 0, 1]]], dtype=RT.dtype, device=RT.device).repeat(RT.shape[0], 1, 1) |
|
], dim=1) |
|
|
|
|
|
def decompose_extrinsic_R_T(E: torch.Tensor): |
|
""" |
|
Decompose the standard extrinsic matrix into R and T. |
|
Batched I/O. |
|
""" |
|
RT = decompose_extrinsic_RT(E) |
|
return RT[:, :, :3], RT[:, :, 3] |
|
|
|
|
|
def decompose_extrinsic_RT(E: torch.Tensor): |
|
""" |
|
Decompose the standard extrinsic matrix into RT. |
|
Batched I/O. |
|
""" |
|
return E[:, :3, :] |
|
|
|
|
|
def camera_normalization_objaverse(normed_dist_to_center, poses: torch.Tensor, ret_transform: bool = False): |
|
assert normed_dist_to_center is not None |
|
pivotal_pose = compose_extrinsic_RT(poses[:1]) |
|
dist_to_center = pivotal_pose[:, :3, 3].norm(dim=-1, keepdim=True).item() \ |
|
if normed_dist_to_center == 'auto' else normed_dist_to_center |
|
|
|
|
|
canonical_camera_extrinsics = torch.tensor([[ |
|
[1, 0, 0, 0], |
|
[0, 0, -1, -dist_to_center], |
|
[0, 1, 0, 0], |
|
[0, 0, 0, 1], |
|
]], dtype=torch.float32) |
|
pivotal_pose_inv = torch.inverse(pivotal_pose) |
|
camera_norm_matrix = torch.bmm(canonical_camera_extrinsics, pivotal_pose_inv) |
|
|
|
|
|
poses = compose_extrinsic_RT(poses) |
|
poses = torch.bmm(camera_norm_matrix.repeat(poses.shape[0], 1, 1), poses) |
|
poses = decompose_extrinsic_RT(poses) |
|
|
|
if ret_transform: |
|
return poses, camera_norm_matrix.squeeze(dim=0) |
|
return poses |
|
|
|
|
|
def get_normalized_camera_intrinsics(intrinsics: torch.Tensor): |
|
""" |
|
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] |
|
Return batched fx, fy, cx, cy |
|
""" |
|
fx, fy = intrinsics[:, 0, 0], intrinsics[:, 0, 1] |
|
cx, cy = intrinsics[:, 1, 0], intrinsics[:, 1, 1] |
|
width, height = intrinsics[:, 2, 0], intrinsics[:, 2, 1] |
|
fx, fy = fx / width, fy / height |
|
cx, cy = cx / width, cy / height |
|
return fx, fy, cx, cy |
|
|
|
|
|
def build_camera_principle(RT: torch.Tensor, intrinsics: torch.Tensor): |
|
""" |
|
RT: (N, 3, 4) |
|
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] |
|
""" |
|
fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics) |
|
return torch.cat([ |
|
RT.reshape(-1, 12), |
|
fx.unsqueeze(-1), fy.unsqueeze(-1), cx.unsqueeze(-1), cy.unsqueeze(-1), |
|
], dim=-1) |
|
|
|
|
|
def build_camera_standard(RT: torch.Tensor, intrinsics: torch.Tensor): |
|
""" |
|
RT: (N, 3, 4) |
|
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] |
|
""" |
|
E = compose_extrinsic_RT(RT) |
|
fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics) |
|
I = torch.stack([ |
|
torch.stack([fx, torch.zeros_like(fx), cx], dim=-1), |
|
torch.stack([torch.zeros_like(fy), fy, cy], dim=-1), |
|
torch.tensor([[0, 0, 1]], dtype=torch.float32, device=RT.device).repeat(RT.shape[0], 1), |
|
], dim=1) |
|
return torch.cat([ |
|
E.reshape(-1, 16), |
|
I.reshape(-1, 9), |
|
], dim=-1) |
|
|
|
|
|
def center_looking_at_camera_pose( |
|
camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None, |
|
device: torch.device = torch.device('cpu'), |
|
): |
|
""" |
|
camera_position: (M, 3) |
|
look_at: (3) |
|
up_world: (3) |
|
return: (M, 3, 4) |
|
""" |
|
|
|
if look_at is None: |
|
look_at = torch.tensor([0, 0, 0], dtype=torch.float32, device=device) |
|
if up_world is None: |
|
up_world = torch.tensor([0, 0, 1], dtype=torch.float32, device=device) |
|
look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1) |
|
up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1) |
|
|
|
z_axis = camera_position - look_at |
|
z_axis = z_axis / z_axis.norm(dim=-1, keepdim=True) |
|
x_axis = torch.cross(up_world, z_axis) |
|
x_axis = x_axis / x_axis.norm(dim=-1, keepdim=True) |
|
y_axis = torch.cross(z_axis, x_axis) |
|
y_axis = y_axis / y_axis.norm(dim=-1, keepdim=True) |
|
extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1) |
|
return extrinsics |
|
|
|
|
|
def surrounding_views_linspace(n_views: int, radius: float = 2.0, height: float = 0.8, device: torch.device = torch.device('cpu')): |
|
""" |
|
n_views: number of surrounding views |
|
radius: camera dist to center |
|
height: height of the camera |
|
return: (M, 3, 4) |
|
""" |
|
assert n_views > 0 |
|
assert radius > 0 |
|
|
|
theta = torch.linspace(-torch.pi / 2, 3 * torch.pi / 2, n_views, device=device) |
|
projected_radius = math.sqrt(radius ** 2 - height ** 2) |
|
x = torch.cos(theta) * projected_radius |
|
y = torch.sin(theta) * projected_radius |
|
z = torch.full((n_views,), height, device=device) |
|
|
|
camera_positions = torch.stack([x, y, z], dim=1) |
|
extrinsics = center_looking_at_camera_pose(camera_positions, device=device) |
|
|
|
return extrinsics |
|
|
|
|
|
def create_intrinsics( |
|
f: float, |
|
c: float = None, cx: float = None, cy: float = None, |
|
w: float = 1., h: float = 1., |
|
dtype: torch.dtype = torch.float32, |
|
device: torch.device = torch.device('cpu'), |
|
): |
|
""" |
|
return: (3, 2) |
|
""" |
|
fx = fy = f |
|
if c is not None: |
|
assert cx is None and cy is None, "c and cx/cy cannot be used together" |
|
cx = cy = c |
|
else: |
|
assert cx is not None and cy is not None, "cx/cy must be provided when c is not provided" |
|
fx, fy, cx, cy, w, h = fx/w, fy/h, cx/w, cy/h, 1., 1. |
|
intrinsics = torch.tensor([ |
|
[fx, fy], |
|
[cx, cy], |
|
[w, h], |
|
], dtype=dtype, device=device) |
|
return intrinsics |
|
|