| import numpy as np |
|
|
|
|
| def similarity_from_cameras(c2w, strict_scaling=False, center_method="focus"): |
| """ |
| reference: nerf-factory |
| Get a similarity transform to normalize dataset |
| from c2w (OpenCV convention) cameras |
| :param c2w: (N, 4) |
| :return T (4,4) , scale (float) |
| """ |
| t = c2w[:, :3, 3] |
| R = c2w[:, :3, :3] |
|
|
| |
| |
| ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1) |
| world_up = np.mean(ups, axis=0) |
| world_up /= np.linalg.norm(world_up) |
|
|
| up_camspace = np.array([0.0, -1.0, 0.0]) |
| c = (up_camspace * world_up).sum() |
| cross = np.cross(world_up, up_camspace) |
| skew = np.array( |
| [ |
| [0.0, -cross[2], cross[1]], |
| [cross[2], 0.0, -cross[0]], |
| [-cross[1], cross[0], 0.0], |
| ] |
| ) |
| if c > -1: |
| R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c) |
| else: |
| |
| |
| R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) |
|
|
| |
| R = R_align @ R |
| fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1) |
| t = (R_align @ t[..., None])[..., 0] |
|
|
| |
| if center_method == "focus": |
| |
| nearest = t + (fwds * -t).sum(-1)[:, None] * fwds |
| translate = -np.median(nearest, axis=0) |
| elif center_method == "poses": |
| |
| translate = -np.median(t, axis=0) |
| else: |
| raise ValueError(f"Unknown center_method {center_method}") |
|
|
| transform = np.eye(4) |
| transform[:3, 3] = translate |
| transform[:3, :3] = R_align |
|
|
| |
| scale_fn = np.max if strict_scaling else np.median |
| scale = 1.0 / scale_fn(np.linalg.norm(t + translate, axis=-1)) |
| transform[:3, :] *= scale |
|
|
| return transform |
|
|
|
|
| def align_principal_axes(point_cloud): |
| |
| centroid = np.median(point_cloud, axis=0) |
|
|
| |
| translated_point_cloud = point_cloud - centroid |
|
|
| |
| covariance_matrix = np.cov(translated_point_cloud, rowvar=False) |
|
|
| |
| eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix) |
|
|
| |
| |
| sort_indices = eigenvalues.argsort()[::-1] |
| eigenvectors = eigenvectors[:, sort_indices] |
|
|
| |
| |
| if np.linalg.det(eigenvectors) < 0: |
| eigenvectors[:, 0] *= -1 |
|
|
| |
| rotation_matrix = eigenvectors.T |
|
|
| |
| transform = np.eye(4) |
| transform[:3, :3] = rotation_matrix |
| transform[:3, 3] = -rotation_matrix @ centroid |
|
|
| return transform |
|
|
|
|
| def transform_points(matrix, points): |
| """Transform points using an SE(3) matrix. |
| |
| Args: |
| matrix: 4x4 SE(3) matrix |
| points: Nx3 array of points |
| |
| Returns: |
| Nx3 array of transformed points |
| """ |
| assert matrix.shape == (4, 4) |
| assert len(points.shape) == 2 and points.shape[1] == 3 |
| return points @ matrix[:3, :3].T + matrix[:3, 3] |
|
|
|
|
| def transform_cameras(matrix, camtoworlds): |
| """Transform cameras using an SE(3) matrix. |
| |
| Args: |
| matrix: 4x4 SE(3) matrix |
| camtoworlds: Nx4x4 array of camera-to-world matrices |
| |
| Returns: |
| Nx4x4 array of transformed camera-to-world matrices |
| """ |
| assert matrix.shape == (4, 4) |
| assert len(camtoworlds.shape) == 3 and camtoworlds.shape[1:] == (4, 4) |
| camtoworlds = np.einsum("nij, ki -> nkj", camtoworlds, matrix) |
| scaling = np.linalg.norm(camtoworlds[:, 0, :3], axis=1) |
| camtoworlds[:, :3, :3] = camtoworlds[:, :3, :3] / scaling[:, None, None] |
| return camtoworlds |
|
|
|
|
| def normalize(camtoworlds, points=None): |
| T1 = similarity_from_cameras(camtoworlds) |
| camtoworlds = transform_cameras(T1, camtoworlds) |
| if points is not None: |
| points = transform_points(T1, points) |
| T2 = align_principal_axes(points) |
| camtoworlds = transform_cameras(T2, camtoworlds) |
| points = transform_points(T2, points) |
| return camtoworlds, points, T2 @ T1 |
| else: |
| return camtoworlds, T1 |
|
|