Spaces:

Stable-X
/

StableRecon

Sleeping

App Files Files Community

Stable-X commited on Oct 19, 2024

Commit

82b898c

1 Parent(s): d7542a1

feat: Add pose_utils to solve camera and depth

Browse files

Files changed (2) hide show

demo.py +10 -2
pose_utils.py +126 -0

demo.py CHANGED Viewed

@@ -15,6 +15,7 @@ from spann3r.datasets import *
 from torch.utils.data import DataLoader
 from spann3r.tools.eval_recon import accuracy, completion
 from spann3r.tools.vis import render_frames, find_render_cam, vis_pred_and_imgs
 from backend_utils import improved_multiway_registration, pts2normal, point2mesh, combine_and_clean_point_clouds
 def get_args_parser():
@@ -63,7 +64,7 @@ def main(args):
     model.load_state_dict(torch.load(args.ckpt_path)['model'])
     model.eval()
-    if args.demo_path.endswith('.mp4') or args.demo_path.endswith('.avi') or args.demo_path.endswith('.MOV'):
         args.demo_path = extract_frames(args.demo_path)
         args.kf_every = 1
@@ -139,10 +140,15 @@ def main(args):
         conf_sig = (conf - 1) / conf
         pts_gt = view['pts3d'].cpu().numpy()[0]
         images_all.append((image[None, ...] + 1.0)/2.0)
         pts_all.append(pts[None, ...])
         pts_normal_all.append(pts_normal[None, ...])
         pts_gt_all.append(pts_gt[None, ...])
         masks_all.append(mask[None, ...])
         conf_sig_all.append(conf_sig[None, ...])
@@ -163,7 +169,9 @@ def main(args):
         pcd.normals = o3d.utility.Vector3dVector(pts_normal_all[j][mask])
         pcds.append(pcd)
-    pcd_combined = combine_and_clean_point_clouds(pcds, voxel_size=args.voxel_size * 0.1)
     mesh_recon = point2mesh(pcd_combined)

 from torch.utils.data import DataLoader
 from spann3r.tools.eval_recon import accuracy, completion
 from spann3r.tools.vis import render_frames, find_render_cam, vis_pred_and_imgs
+from pose_utils import solve_cemara
 from backend_utils import improved_multiway_registration, pts2normal, point2mesh, combine_and_clean_point_clouds
 def get_args_parser():
     model.load_state_dict(torch.load(args.ckpt_path)['model'])
     model.eval()
+    if args.demo_path.endswith('.mp4') or args.demo_path.endswith('.avi') or args.demo_path.endswith('.webm'):
         args.demo_path = extract_frames(args.demo_path)
         args.kf_every = 1
         conf_sig = (conf - 1) / conf
         pts_gt = view['pts3d'].cpu().numpy()[0]
+        camera, last_focal, depth_map = solve_cemara(torch.tensor(pts), torch.tensor(conf_sig) > args.conf_thresh,
+                                          args.device, focal=last_focal)
+        pts_scale = depth_map / last_focal
         images_all.append((image[None, ...] + 1.0)/2.0)
         pts_all.append(pts[None, ...])
         pts_normal_all.append(pts_normal[None, ...])
         pts_gt_all.append(pts_gt[None, ...])
+        pts_scale_all.append(pts_scale[None, ...])
         masks_all.append(mask[None, ...])
         conf_sig_all.append(conf_sig[None, ...])
         pcd.normals = o3d.utility.Vector3dVector(pts_normal_all[j][mask])
         pcds.append(pcd)
+    print("Performing global registration...")
+    pcd_combined, _, _ = improved_multiway_registration(pcds, voxel_size=0.001)
+    # pcd_combined = combine_and_clean_point_clouds(transformed_pcds, voxel_size=args.voxel_size * 0.1)
     mesh_recon = point2mesh(pcd_combined)

pose_utils.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import numpy as np
+import torch
+import cv2
+import open3d as o3d
+from dust3r.post_process import estimate_focal_knowing_depth
+from dust3r.utils.geometry import inv
+def estimate_focal(pts3d_i, pp=None):
+    if pp is None:
+        H, W, THREE = pts3d_i.shape
+        assert THREE == 3
+        pp = torch.tensor((W/2, H/2), device=pts3d_i.device)
+    focal = estimate_focal_knowing_depth(pts3d_i.unsqueeze(0), pp.unsqueeze(0), focal_mode='weiszfeld').ravel()
+    return float(focal)
+def pixel_grid(H, W):
+    return np.mgrid[:W, :H].T.astype(np.float32)
+def sRT_to_4x4(scale, R, T, device):
+    trf = torch.eye(4, device=device)
+    trf[:3, :3] = R * scale
+    trf[:3, 3] = T.ravel()  # doesn't need scaling
+    return trf
+def to_numpy(tensor):
+    return tensor.cpu().numpy() if isinstance(tensor, torch.Tensor) else tensor
+def calculate_depth_map(pts3d, R, T):
+    """
+    Calculate ray depths directly using camera center and 3D points.
+    Args:
+    pts3d (np.array): 3D points in world coordinates, shape (H, W, 3)
+    R (np.array): Rotation matrix, shape (3, 3)
+    T (np.array): Translation vector, shape (3, 1)
+    Returns:
+    np.array: Depth map of shape (H, W)
+    """
+    # Camera center in world coordinates is simply -T
+    C = -T.ravel()
+    # Calculate ray vectors
+    ray_vectors = pts3d - C
+    # Calculate ray depths
+    depth_map = np.linalg.norm(ray_vectors, axis=2)
+    return depth_map
+def fast_pnp(pts3d, focal, msk, device, pp=None, niter_PnP=10):
+    # extract camera poses and focals with RANSAC-PnP
+    if msk.sum() < 4:
+        return None  # we need at least 4 points for PnP
+    pts3d, msk = map(to_numpy, (pts3d, msk))
+    H, W, THREE = pts3d.shape
+    assert THREE == 3
+    pixels = pixel_grid(H, W)
+    if focal is None:
+        S = max(W, H)
+        tentative_focals = np.geomspace(S/2, S*3, 21)
+    else:
+        tentative_focals = [focal]
+    if pp is None:
+        pp = (W/2, H/2)
+    else:
+        pp = to_numpy(pp)
+    best = 0, None, None, None, None
+    for focal in tentative_focals:
+        K = np.float32([(focal, 0, pp[0]), (0, focal, pp[1]), (0, 0, 1)])
+        success, R, T, inliers = cv2.solvePnPRansac(pts3d[msk], pixels[msk], K, None,
+                                                    iterationsCount=niter_PnP, reprojectionError=5, flags=cv2.SOLVEPNP_SQPNP)
+        if not success:
+            continue
+        score = len(inliers)
+        if success and score > best[0]:
+            depth_map = calculate_depth_map(pts3d, R, T)
+            best = score, R, T, focal, depth_map
+    if not best[0]:
+        return None
+    _, R, T, best_focal, depth_map = best
+    R = cv2.Rodrigues(R)[0]  # world to cam
+    R, T = map(torch.from_numpy, (R, T))
+    depth_map = torch.from_numpy(depth_map).to(device)
+    cam_to_world = inv(sRT_to_4x4(1, R, T, device))  # cam to world
+    return best_focal, cam_to_world, depth_map
+def solve_cemara(pts3d, msk, device, focal=None, pp=None):
+    # Estimate focal length
+    if focal is None:
+        focal = estimate_focal(pts3d, pp)
+    # Compute camera pose using PnP
+    result = fast_pnp(pts3d, focal, msk, device, pp)
+    if result is None:
+        return None, focal, None
+    best_focal, camera_to_world, depth_map = result
+    # Construct K matrix
+    H, W, _ = pts3d.shape
+    if pp is None:
+        pp = (W/2, H/2)
+    camera_parameters = o3d.camera.PinholeCameraParameters()
+    intrinsic = o3d.camera.PinholeCameraIntrinsic()
+    intrinsic.set_intrinsics(W, H,
+                             best_focal, best_focal,
+                             pp[0], pp[1])
+    camera_parameters.intrinsic = intrinsic
+    camera_parameters.extrinsic = torch.inverse(camera_to_world).cpu().numpy()
+    return camera_parameters, best_focal, depth_map