Spaces:
Runtime error
Runtime error
| import sys | |
| import os | |
| import numpy as np | |
| import open3d as o3d | |
| import torch | |
| from mmengine import Config | |
| from pyvirtualdisplay import Display | |
| from tqdm import tqdm | |
| sys.path.append("Metric3D") | |
| def display_wrapper(func): | |
| def inner(*args, **kwargs): | |
| with Display(visible=False, size=(1920, 1080)): | |
| return func(*args, **kwargs) | |
| return inner | |
| def relative_pose(rt: np.ndarray, mode: str, ref_index: int = 0) -> np.ndarray: | |
| if mode == "left": | |
| rt = np.linalg.inv(rt[ref_index]) @ rt | |
| elif mode == "right": | |
| rt = rt @ np.linalg.inv(rt[ref_index]) | |
| return rt | |
| def project_point_cloud( | |
| frame: np.ndarray, | |
| depth: np.ndarray, | |
| intrinsics: list[float], | |
| remove_outliers: bool = True, | |
| voxel_size: float = None, | |
| ) -> o3d.geometry.PointCloud: | |
| from mono.utils.unproj_pcd import reconstruct_pcd | |
| points = reconstruct_pcd(depth, *intrinsics).reshape(-1, 3) | |
| colors = frame.reshape(-1, 3) / 255 | |
| pcd = o3d.geometry.PointCloud() | |
| pcd.points = o3d.utility.Vector3dVector(points.astype(np.double)) | |
| pcd.colors = o3d.utility.Vector3dVector(colors.astype(np.double)) | |
| if remove_outliers: | |
| cl, ind = pcd.remove_statistical_outlier(nb_neighbors=12, std_ratio=3.0) | |
| pcd = pcd.select_by_index(ind) | |
| if voxel_size is not None: | |
| pcd = pcd.voxel_down_sample(voxel_size=0.5) | |
| return pcd | |
| def create_camera_frustum( | |
| frame: np.ndarray, | |
| intrinsic: o3d.camera.PinholeCameraIntrinsic, | |
| c2w: np.ndarray, | |
| frustum_scale: float = 0.5, | |
| ): | |
| W, H = intrinsic.width, intrinsic.height | |
| fx, fy = intrinsic.get_focal_length() | |
| cx, cy = intrinsic.get_principal_point() | |
| z = frustum_scale | |
| x = (W - cx) * z / fx | |
| y = (H - cy) * z / fy | |
| points = [[0, 0, 0], [-x, -y, z], [x, -y, z], [x, y, z], [-x, y, z]] | |
| lines = [[0, 1], [0, 2], [0, 3], [0, 4], [1, 2], [2, 3], [3, 4], [4, 1]] | |
| line_set = o3d.geometry.LineSet( | |
| points=o3d.utility.Vector3dVector(points), | |
| lines=o3d.utility.Vector2iVector(lines), | |
| ) | |
| line_set.paint_uniform_color([0.8, 0.2, 0.2]) | |
| line_set.transform(c2w) | |
| vertices = [points[i] for i in [1, 2, 3, 4]] | |
| triangles = [[0, 1, 2], [0, 2, 3]] | |
| img_plane = o3d.geometry.TriangleMesh( | |
| vertices=o3d.utility.Vector3dVector(vertices), | |
| triangles=o3d.utility.Vector3iVector(triangles), | |
| ) | |
| img_plane.triangle_uvs = o3d.utility.Vector2dVector( | |
| np.array([[0, 1], [1, 1], [1, 0], [0, 1], [1, 0], [0, 0]]) | |
| ) | |
| img_plane.transform(c2w) | |
| material = o3d.visualization.rendering.MaterialRecord() | |
| material.shader = "defaultUnlit" | |
| material.albedo_img = o3d.geometry.Image(frame) | |
| return line_set, img_plane, material | |
| class Previewer: | |
| def __init__(self, model_path: str = "pretrained/metric_depth_vit_large_800k.pth"): | |
| self.model_path = model_path | |
| self.depth_predictor = None | |
| def init_depth_predictor(self): | |
| from mono.model.monodepth_model import get_configured_monodepth_model | |
| from mono.utils.running import load_ckpt | |
| self.config = Config.fromfile( | |
| "Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py" | |
| ) | |
| model = get_configured_monodepth_model(self.config) | |
| model = torch.nn.DataParallel(model).cuda().eval().requires_grad_(False) | |
| model, _, _, _ = load_ckpt(self.model_path, model, strict_match=False) | |
| self.depth_predictor = model | |
| def estimate_depths( | |
| self, frames: np.ndarray, intrinsics: list[float] | |
| ) -> np.ndarray: | |
| """ | |
| :param frames: `np.ndarray` of shape (B, H, W, C) and range (0, 255) | |
| :param intrinsics: list of [fx, fy, cx, cy] | |
| :return depths: `np.ndarray` of shape (B, H, W) and range (0, 300) | |
| """ | |
| from mono.utils.do_test import transform_test_data_scalecano | |
| if self.depth_predictor is None: | |
| self.init_depth_predictor() | |
| B, H, W, C = frames.shape | |
| rgb_inputs, pads = [], [] | |
| for frame in frames: | |
| rgb_input, _, pad, label_scale_factor = transform_test_data_scalecano( | |
| frame, intrinsics, self.config.data_basic | |
| ) | |
| rgb_inputs.append(rgb_input) | |
| pads.append(pad) | |
| with torch.inference_mode(), torch.autocast("cuda"): # b c h w | |
| depths, _, _ = self.depth_predictor.module.inference( | |
| {"input": torch.stack(rgb_inputs).cuda(), "pad_info": pads} | |
| ) | |
| _, _, h, w = depths.shape | |
| depths = depths[..., pad[0] : h - pad[1], pad[2] : w - pad[3]] | |
| depths = depths * self.config.data_basic.depth_range[-1] / label_scale_factor | |
| depths = torch.nn.functional.interpolate(depths, (H, W), mode="bilinear") | |
| return depths.clamp(0, 300).squeeze(1).cpu().numpy() | |
| def render_previews( | |
| self, | |
| frame: np.ndarray, | |
| depth: np.ndarray, | |
| intrinsics: list[float], | |
| w2cs: np.ndarray, | |
| ): | |
| """ | |
| :param frame: `np.ndarray` of shape (H, W, C) and range (0, 255) | |
| :param depth: `np.ndarray` of shape (H, W) and range (0, 300) | |
| :param intrinsics: list of [fx, fy, cx, cy] | |
| :param w2cs: `np.ndarray` of shape (4, 4) | |
| :return: previews: `np.ndarray of shape (B, H, W, C) and range (0, 255)` | |
| """ | |
| H, W, _ = frame.shape | |
| K = o3d.camera.PinholeCameraIntrinsic(W, H, *intrinsics) | |
| pcd = project_point_cloud(frame, depth, intrinsics) | |
| mat = o3d.visualization.rendering.MaterialRecord() | |
| mat.shader = "defaultUnlit" | |
| mat.point_size = 2 | |
| renderer = o3d.visualization.rendering.OffscreenRenderer(W, H) | |
| renderer.scene.set_background(np.array([1.0, 1.0, 1.0, 1.0])) | |
| renderer.scene.view.set_post_processing(False) | |
| renderer.scene.clear_geometry() | |
| renderer.scene.add_geometry("point cloud", pcd, mat) | |
| previews = [] | |
| for w2c in tqdm(relative_pose(w2cs, mode="left")): | |
| renderer.setup_camera(K, w2c) | |
| previews.append(renderer.render_to_image()) | |
| return np.stack(previews) | |
| def render_4d_scene( | |
| self, | |
| frames: np.ndarray, | |
| depths: np.ndarray, | |
| intrinsics: list[float], | |
| w2cs: np.ndarray, | |
| ): | |
| """ | |
| :param frames: `np.ndarray` of shape (B, H, W, C) and range (0, 255) | |
| :param depths: `np.ndarray` of shape (B, H, W) and range (0, 300) | |
| :param intrinsics: list of [fx, fy, cx, cy] | |
| :param w2cs: `np.ndarray` of shape (4, 4) | |
| :return: renderings: `np.ndarray of shape (B, H, W, C) and range (0, 255)` | |
| """ | |
| F, H, W, _ = frames.shape | |
| K = o3d.camera.PinholeCameraIntrinsic(W, H, *intrinsics) | |
| renderer = o3d.visualization.rendering.OffscreenRenderer(W, H) | |
| renderer.scene.set_background(np.array([1.0, 1.0, 1.0, 1.0])) | |
| renderer.scene.view.set_post_processing(False) | |
| c2w_0 = np.linalg.inv(w2cs[0]) | |
| eye_pos_world = (c2w_0 @ np.array([0.3, -0.5, -0.5, 1]))[:3] | |
| center_pos_world = (c2w_0 @ np.array([0, 0, 2, 1]))[:3] | |
| up_vector_world = np.array([0, -1, 0]) | |
| renderer.scene.camera.look_at(center_pos_world, eye_pos_world, up_vector_world) | |
| point_material = o3d.visualization.rendering.MaterialRecord() | |
| point_material.shader = "defaultUnlit" | |
| point_material.point_size = 2 | |
| line_material = o3d.visualization.rendering.MaterialRecord() | |
| line_material.shader = "unlitLine" | |
| line_material.line_width = 3 | |
| renderings = [] | |
| for frame, depth, w2c in tqdm(zip(frames, depths, w2cs), total=F): | |
| c2w = np.linalg.inv(w2c) | |
| pcd = project_point_cloud(frame, depth, intrinsics) | |
| pcd.transform(c2w) | |
| wire_frame, frustum, frustum_material = create_camera_frustum(frame, K, c2w) | |
| renderer.scene.clear_geometry() | |
| renderer.scene.add_geometry("point cloud", pcd, point_material) | |
| renderer.scene.add_geometry("wire frame", wire_frame, line_material) | |
| renderer.scene.add_geometry("frustum", frustum, frustum_material) | |
| renderings.append(renderer.render_to_image()) | |
| return np.stack(renderings) | |
| if __name__ == "__main__": | |
| with Display(visible=False, size=(512, 320)): | |
| o3d.visualization.rendering.OffscreenRenderer(512, 320) | |