| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | from typing import Tuple, Literal |
| | import torch |
| | import torch.nn.functional as F |
| | import math |
| | import numpy as np |
| | from scipy.spatial.transform import Rotation |
| |
|
| |
|
| | def align_cameras_to_axes( |
| | R: torch.Tensor, |
| | T: torch.Tensor, |
| | target_convention: Literal["opengl", "opencv"] = None, |
| | ): |
| | """align the averaged axes of cameras with the world axes. |
| | |
| | Args: |
| | R: rotation matrix (N, 3, 3) |
| | T: translation vector (N, 3) |
| | """ |
| | |
| | |
| | |
| | bases_c2w = gram_schmidt_orthogonalization(R.mean(0)) |
| | if target_convention == "opengl": |
| | bases_c2w[:, [1, 2]] *= -1 |
| | elif target_convention == "opencv": |
| | pass |
| | bases_w2c = bases_c2w.t() |
| |
|
| | |
| | R = bases_w2c[None, ...] @ R |
| | T = bases_w2c[None, ...] @ T |
| | return R, T |
| |
|
| |
|
| | def convert_camera_convention(camera_convention_conversion: str, R: torch.Tensor, K: torch.Tensor, H: int, W: int): |
| | if camera_convention_conversion is not None: |
| | if camera_convention_conversion == "opencv->opengl": |
| | R[:, :3, [1, 2]] *= -1 |
| | |
| | K[..., 1, 2] = H - K[..., 1, 2] |
| | elif camera_convention_conversion == "opencv->pytorch3d": |
| | R[:, :3, [0, 1]] *= -1 |
| | |
| | K[..., 0, 2] = W - K[..., 0, 2] |
| | K[..., 1, 2] = H - K[..., 1, 2] |
| | elif camera_convention_conversion == "opengl->pytorch3d": |
| | R[:, :3, [0, 2]] *= -1 |
| | |
| | K[..., 0, 2] = W - K[..., 0, 2] |
| | else: |
| | raise ValueError( |
| | f"Unknown camera coordinate conversion: {camera_convention_conversion}." |
| | ) |
| | return R, K |
| |
|
| |
|
| | def gram_schmidt_orthogonalization(M: torch.tensor): |
| | """conducting Gram-Schmidt process to transform column vectors into orthogonal bases |
| | |
| | Args: |
| | M: An matrix (num_rows, num_cols) |
| | Return: |
| | M: An matrix with orthonormal column vectors (num_rows, num_cols) |
| | """ |
| | num_rows, num_cols = M.shape |
| | for c in range(1, num_cols): |
| | M[:, [c - 1, c]] = F.normalize(M[:, [c - 1, c]], p=2, dim=0) |
| | M[:, [c]] -= M[:, :c] @ (M[:, :c].T @ M[:, [c]]) |
| |
|
| | M[:, -1] = F.normalize(M[:, -1], p=2, dim=0) |
| | return M |
| |
|
| |
|
| | def projection_from_intrinsics(K: np.ndarray, image_size: Tuple[int], near: float=0.01, far:float=10, flip_y: bool=False, z_sign=-1): |
| | """ |
| | Transform points from camera space (x: right, y: up, z: out) to clip space (x: right, y: down, z: in) |
| | Args: |
| | K: Intrinsic matrix, (N, 3, 3) |
| | K = [[ |
| | [fx, 0, cx], |
| | [0, fy, cy], |
| | [0, 0, 1], |
| | ] |
| | ] |
| | image_size: (height, width) |
| | Output: |
| | proj = [[ |
| | [2*fx/w, 0.0, (w - 2*cx)/w, 0.0 ], |
| | [0.0, 2*fy/h, (h - 2*cy)/h, 0.0 ], |
| | [0.0, 0.0, z_sign*(far+near) / (far-near), -2*far*near / (far-near)], |
| | [0.0, 0.0, z_sign, 0.0 ] |
| | ] |
| | ] |
| | """ |
| |
|
| | B = K.shape[0] |
| | h, w = image_size |
| |
|
| | if K.shape[-2:] == (3, 3): |
| | fx = K[..., 0, 0] |
| | fy = K[..., 1, 1] |
| | cx = K[..., 0, 2] |
| | cy = K[..., 1, 2] |
| | elif K.shape[-1] == 4: |
| | |
| | fx = K[..., [0]] |
| | fy = K[..., [1]] |
| | cx = K[..., [2]] |
| | cy = K[..., [3]] |
| | else: |
| | raise ValueError(f"Expected K to be (N, 3, 3) or (N, 4) but got: {K.shape}") |
| |
|
| | proj = np.zeros([B, 4, 4]) |
| | proj[:, 0, 0] = fx * 2 / w |
| | proj[:, 1, 1] = fy * 2 / h |
| | proj[:, 0, 2] = (w - 2 * cx) / w |
| | proj[:, 1, 2] = (h - 2 * cy) / h |
| | proj[:, 2, 2] = z_sign * (far+near) / (far-near) |
| | proj[:, 2, 3] = -2*far*near / (far-near) |
| | proj[:, 3, 2] = z_sign |
| |
|
| | if flip_y: |
| | proj[:, 1, 1] *= -1 |
| | return proj |
| |
|
| |
|
| | class OrbitCamera: |
| | def __init__(self, W, H, r=2, fovy=60, znear=1e-8, zfar=10, convention: Literal["opengl", "opencv"]="opengl"): |
| | self.image_width = W |
| | self.image_height = H |
| | self.radius_default = r |
| | self.fovy_default = fovy |
| | self.znear = znear |
| | self.zfar = zfar |
| | self.convention = convention |
| |
|
| | self.up = np.array([0, 1, 0], dtype=np.float32) |
| | self.reset() |
| | |
| | def reset(self): |
| | """ The internal state of the camera is based on the OpenGL convention, but |
| | properties are converted to the target convention when queried. |
| | """ |
| | self.rot = Rotation.from_matrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) |
| | self.look_at = np.array([0, 0, 0], dtype=np.float32) |
| | self.radius = self.radius_default |
| | self.fovy = self.fovy_default |
| | if self.convention == "opencv": |
| | self.z_sign = 1 |
| | self.y_sign = 1 |
| | elif self.convention == "opengl": |
| | self.z_sign = -1 |
| | self.y_sign = -1 |
| | else: |
| | raise ValueError(f"Unknown convention: {self.convention}") |
| |
|
| | @property |
| | def fovx(self): |
| | return self.fovy / self.image_height * self.image_width |
| | |
| | @property |
| | def intrinsics(self): |
| | focal = self.image_height / (2 * np.tan(np.radians(self.fovy) / 2)) |
| | return np.array([focal, focal, self.image_width // 2, self.image_height // 2]) |
| | |
| | @property |
| | def projection_matrix(self): |
| | return projection_from_intrinsics(self.intrinsics[None], (self.image_height, self.image_width), self.znear, self.zfar, z_sign=self.z_sign)[0] |
| | |
| | @property |
| | def world_view_transform(self): |
| | return np.linalg.inv(self.pose) |
| |
|
| | @property |
| | def full_proj_transform(self): |
| | return self.projection_matrix @ self.world_view_transform |
| |
|
| | @property |
| | def pose(self): |
| | |
| | pose = np.eye(4, dtype=np.float32) |
| | pose[2, 3] += self.radius |
| |
|
| | |
| | rot = np.eye(4, dtype=np.float32) |
| | rot[:3, :3] = self.rot.as_matrix() |
| | pose = rot @ pose |
| |
|
| | |
| | pose[:3, 3] -= self.look_at |
| |
|
| | if self.convention == "opencv": |
| | pose[:, [1, 2]] *= -1 |
| | elif self.convention == "opengl": |
| | pass |
| | else: |
| | raise ValueError(f"Unknown convention: {self.convention}") |
| | return pose |
| |
|
| | def orbit(self, dx, dy): |
| | |
| | side = self.rot.as_matrix()[:3, 0] |
| | rotvec_x = self.up * np.radians(-0.3 * dx) |
| | rotvec_y = side * np.radians(-0.3 * dy) |
| | self.rot = Rotation.from_rotvec(rotvec_x) * Rotation.from_rotvec(rotvec_y) * self.rot |
| |
|
| | def scale(self, delta): |
| | self.radius *= 1.1 ** (-delta) |
| |
|
| | def pan(self, dx, dy, dz=0): |
| | |
| | d = np.array([dx, -dy, dz]) |
| | self.look_at += 2 * self.rot.as_matrix()[:3, :3] @ d * self.radius / self.image_height * math.tan(np.radians(self.fovy) / 2) |
| |
|