Spaces:

Ruicheng
/

MoGe

Running on Zero

App Files Files Community

Ruicheng commited on Nov 28, 2024

Commit

c9d074f

1 Parent(s): 40ec4f9

update edge removal

Browse files

Files changed (13) hide show

app.py +2 -1
moge/model/moge_model.py +23 -10
moge/utils/geometry_numpy.py +48 -34
moge/utils/geometry_torch.py +28 -40
moge/utils/io.py +44 -0
utils3d/README.md +3 -0
utils3d/io/__init__.py +1 -2
utils3d/io/colmap.py +1 -1
utils3d/io/obj.py +146 -0
utils3d/numpy/__init__.py +4 -2
utils3d/numpy/rasterization.py +1 -3
utils3d/numpy/transforms.py +4 -3
utils3d/numpy/utils.py +86 -9

app.py CHANGED Viewed

@@ -60,7 +60,8 @@ def run(image: np.ndarray, remove_edge: bool = True, max_size: int = 800):
     points, depth, mask = output['points'], output['depth'], output['mask']
     if remove_edge:
-        mask = mask & ~utils3d.numpy.depth_edge(depth, mask=mask, rtol=0.02)
     faces, vertices, vertex_colors, vertex_uvs = utils3d.numpy.image_mesh(
         points,

     points, depth, mask = output['points'], output['depth'], output['mask']
     if remove_edge:
+        normals, normals_mask = utils3d.numpy.points_to_normals(points, mask=mask)
+        mask=mask & ~(utils3d.numpy.depth_edge(depth, rtol=0.03, mask=mask) & utils3d.numpy.normals_edge(normals, tol=5, mask=normals_mask))
     faces, vertices, vertex_colors, vertex_uvs = utils3d.numpy.image_mesh(
         points,

moge/model/moge_model.py CHANGED Viewed

@@ -15,7 +15,7 @@ import torch.version
 import utils3d
 from huggingface_hub import hf_hub_download
-from ..utils.geometry_torch import image_plane_uv, point_map_to_depth, gaussian_blur_2d
 from .utils import wrap_dinov2_attention_with_sdpa, wrap_module_with_gradient_checkpointing, unwrap_module_with_gradient_checkpointing
 from ..utils.tools import timeit
@@ -121,7 +121,7 @@ class Head(nn.Module):
         # (patch_h, patch_w) -> (patch_h * 2, patch_w * 2) -> (patch_h * 4, patch_w * 4) -> (patch_h * 8, patch_w * 8)
         for i, block in enumerate(self.upsample_blocks):
             # UV coordinates is for awareness of image aspect ratio
-            uv = image_plane_uv(width=x.shape[-1], height=x.shape[-2], aspect_ratio=img_w / img_h, dtype=x.dtype, device=x.device)
             uv = uv.permute(2, 0, 1).unsqueeze(0).expand(x.shape[0], -1, -1, -1)
             x = torch.cat([x, uv], dim=1)
             for layer in block:
@@ -129,7 +129,7 @@ class Head(nn.Module):
         # (patch_h * 8, patch_w * 8) -> (img_h, img_w)
         x = F.interpolate(x, (img_h, img_w), mode="bilinear", align_corners=False)
-        uv = image_plane_uv(width=x.shape[-1], height=x.shape[-2], aspect_ratio=img_w / img_h, dtype=x.dtype, device=x.device)
         uv = uv.permute(2, 0, 1).unsqueeze(0).expand(x.shape[0], -1, -1, -1)
         x = torch.cat([x, uv], dim=1)
@@ -301,6 +301,7 @@ class MoGeModel(nn.Module):
         force_projection: bool = True,
         resolution_level: int = 9,
         apply_mask: bool = True,
     ) -> Dict[str, torch.Tensor]:
         """
         User-friendly inference function
@@ -308,7 +309,9 @@ class MoGeModel(nn.Module):
         ### Parameters
         - `image`: input image tensor of shape (B, 3, H, W) or (3, H, W)
         - `resolution_level`: the resolution level to use for the output point map in 0-9. Default: 9 (highest)
-        - `interpolation_mode`: interpolation mode for the output points map. Default: 'bilinear'.
         ### Returns
@@ -325,6 +328,7 @@ class MoGeModel(nn.Module):
         original_height, original_width = image.shape[-2:]
         area = original_height * original_width
         min_area, max_area = self.trained_area_range
         expected_area = min_area + (max_area - min_area) * (resolution_level / 9)
@@ -336,15 +340,24 @@ class MoGeModel(nn.Module):
         output = self.forward(image)
         points, mask = output['points'], output.get('mask', None)
-        # Get camera-origin-centered point map
-        depth, fov_x, fov_y, z_shift = point_map_to_depth(points, None if mask is None else mask > 0.5)
-        intrinsics = utils3d.torch.intrinsics_from_fov_xy(fov_x, fov_y)
-        # If projection constraint is forces, recompute the point map using the actual depth map
         if force_projection:
             points = utils3d.torch.unproject_cv(utils3d.torch.image_uv(width=expected_width, height=expected_height, dtype=points.dtype, device=points.device), depth, extrinsics=None, intrinsics=intrinsics[..., None, :, :])
         else:
-            points = points + torch.stack([torch.zeros_like(z_shift), torch.zeros_like(z_shift), z_shift], dim=-1)[..., None, None, :]
         # Resize the output to the original resolution
         if expected_area != area:
@@ -373,4 +386,4 @@ class MoGeModel(nn.Module):
         if self.output_mask:
             return_dict['mask'] = mask > 0.5
-        return return_dict

 import utils3d
 from huggingface_hub import hf_hub_download
+from ..utils.geometry_torch import normalized_view_plane_uv, recover_focal_shift, gaussian_blur_2d
 from .utils import wrap_dinov2_attention_with_sdpa, wrap_module_with_gradient_checkpointing, unwrap_module_with_gradient_checkpointing
 from ..utils.tools import timeit
         # (patch_h, patch_w) -> (patch_h * 2, patch_w * 2) -> (patch_h * 4, patch_w * 4) -> (patch_h * 8, patch_w * 8)
         for i, block in enumerate(self.upsample_blocks):
             # UV coordinates is for awareness of image aspect ratio
+            uv = normalized_view_plane_uv(width=x.shape[-1], height=x.shape[-2], aspect_ratio=img_w / img_h, dtype=x.dtype, device=x.device)
             uv = uv.permute(2, 0, 1).unsqueeze(0).expand(x.shape[0], -1, -1, -1)
             x = torch.cat([x, uv], dim=1)
             for layer in block:
         # (patch_h * 8, patch_w * 8) -> (img_h, img_w)
         x = F.interpolate(x, (img_h, img_w), mode="bilinear", align_corners=False)
+        uv = normalized_view_plane_uv(width=x.shape[-1], height=x.shape[-2], aspect_ratio=img_w / img_h, dtype=x.dtype, device=x.device)
         uv = uv.permute(2, 0, 1).unsqueeze(0).expand(x.shape[0], -1, -1, -1)
         x = torch.cat([x, uv], dim=1)
         force_projection: bool = True,
         resolution_level: int = 9,
         apply_mask: bool = True,
+        fov_x: Union[Number, torch.Tensor] = None
     ) -> Dict[str, torch.Tensor]:
         """
         User-friendly inference function
         ### Parameters
         - `image`: input image tensor of shape (B, 3, H, W) or (3, H, W)
         - `resolution_level`: the resolution level to use for the output point map in 0-9. Default: 9 (highest)
+        - `force_projection`: if True, the output point map will be computed using the actual depth map. Default: True
+        - `apply_mask`: if True, the output point map will be masked using the predicted mask. Default: True
+        - `fov_x`: the horizontal camera FoV in degrees. If None, it will be inferred from the predicted point map. Default: None
         ### Returns
         original_height, original_width = image.shape[-2:]
         area = original_height * original_width
+        aspect_ratio = original_width / original_height
         min_area, max_area = self.trained_area_range
         expected_area = min_area + (max_area - min_area) * (resolution_level / 9)
         output = self.forward(image)
         points, mask = output['points'], output.get('mask', None)
+        # Get camera-space point map. (Focal here is the focal length relative to half the image diagonal)
+        if fov_x is None:
+            focal, shift = recover_focal_shift(points, None if mask is None else mask > 0.5)
+        else:
+            focal = aspect_ratio / (1 + aspect_ratio ** 2) ** 0.5 / torch.tan(torch.deg2rad(torch.as_tensor(fov_x, device=points.device, dtype=points.dtype) / 2))
+            if focal.ndim == 0:
+                focal = focal[None].expand(points.shape[0])
+            _, shift = recover_focal_shift(points, None if mask is None else mask > 0.5, focal=focal)
+        fx = focal / 2 * (1 + aspect_ratio ** 2) ** 0.5 / aspect_ratio
+        fy = focal / 2 * (1 + aspect_ratio ** 2) ** 0.5
+        intrinsics = utils3d.torch.intrinsics_from_focal_center(fx, fy, 0.5, 0.5)
+        depth = points[..., 2] + shift[..., None, None]
+        # If projection constraint is forced, recompute the point map using the actual depth map
         if force_projection:
             points = utils3d.torch.unproject_cv(utils3d.torch.image_uv(width=expected_width, height=expected_height, dtype=points.dtype, device=points.device), depth, extrinsics=None, intrinsics=intrinsics[..., None, :, :])
         else:
+            points = points + torch.stack([torch.zeros_like(shift), torch.zeros_like(shift), shift], dim=-1)[..., None, None, :]
         # Resize the output to the original resolution
         if expected_area != area:
         if self.output_mask:
             return_dict['mask'] = mask > 0.5
+        return return_dict

moge/utils/geometry_numpy.py CHANGED Viewed

@@ -23,7 +23,7 @@ def harmonic_mean_numpy(x: np.ndarray, w: np.ndarray = None, axis: Union[int, Tu
         return 1 / (weighted_mean_numpy(1 / (x + eps), w, axis=axis, keepdims=keepdims, eps=eps) + eps)
-def image_plane_uv_numpy(width: int, height: int, aspect_ratio: float = None, dtype: np.dtype = np.float32) -> np.ndarray:
     "UV with left-top corner as (-width / diagonal, -height / diagonal) and right-bottom corner as (width / diagonal, height / diagonal)"
     if aspect_ratio is None:
         aspect_ratio = width / height
@@ -52,7 +52,27 @@ def intrinsics_to_fov_numpy(intrinsics: np.ndarray) -> Tuple[np.ndarray, np.ndar
     return fov_x, fov_y
-def solve_optimal_shift_focal(uv: np.ndarray, xyz: np.ndarray, ransac_iters: int = None, ransac_hypothetical_size: float = 0.1, ransac_threshold: float = 0.1):
     "Solve `min |focal * xy / (z + shift) - uv|` with respect to shift and focal"
     from scipy.optimize import least_squares
     uv, xy, z = uv.reshape(-1, 2), xyz[..., :2].reshape(-1, 2), xyz[..., 2].reshape(-1)
@@ -63,44 +83,39 @@ def solve_optimal_shift_focal(uv: np.ndarray, xyz: np.ndarray, ransac_iters: int
         err = (f * xy_proj - uv).ravel()
         return err
-    initial_shift = 0 #-z.min(keepdims=True) + 1.0
-    if ransac_iters is None:
-        solution = least_squares(partial(fn, uv, xy, z), x0=initial_shift, ftol=1e-3, method='lm')
-        optim_shift = solution['x'].squeeze().astype(np.float32)
-    else:
-        best_err, best_shift = np.inf, None
-        for _ in range(ransac_iters):
-            maybe_inliers = np.random.choice(len(z), size=int(ransac_hypothetical_size * len(z)), replace=False)
-            solution = least_squares(partial(fn, uv[maybe_inliers], xy[maybe_inliers], z[maybe_inliers]), x0=initial_shift, ftol=1e-3, method='lm')
-            maybe_shift = solution['x'].squeeze().astype(np.float32)
-            confirmed_inliers = np.linalg.norm(fn(uv, xy, z, maybe_shift).reshape(-1, 2), axis=-1) < ransac_threshold
-            if confirmed_inliers.sum() > 10:
-                solution = least_squares(partial(fn, uv[confirmed_inliers], xy[confirmed_inliers], z[confirmed_inliers]), x0=maybe_shift, ftol=1e-3, method='lm')
-                better_shift = solution['x'].squeeze().astype(np.float32)
-            else:
-                better_shift = maybe_shift
-            err = np.linalg.norm(fn(uv, xy, z, better_shift).reshape(-1, 2), axis=-1).clip(max=ransac_threshold).mean()
-            if err < best_err:
-                best_err, best_shift = err, better_shift
-                initial_shift = best_shift
-        optim_shift = best_shift
     xy_proj = xy / (z + optim_shift)[: , None]
-    optim_focal = (xy_proj * uv).sum() / (xy_proj * xy_proj).sum()
     return optim_shift, optim_focal
-def point_map_to_depth_numpy(points: np.ndarray, mask: np.ndarray = None, downsample_size: Tuple[int, int] = (64, 64)):
     import cv2
     assert points.shape[-1] == 3, "Points should (H, W, 3)"
     height, width = points.shape[-3], points.shape[-2]
     diagonal = (height ** 2 + width ** 2) ** 0.5
-    uv = image_plane_uv_numpy(width=width, height=height)
     if mask is None:
         points_lr = cv2.resize(points, downsample_size, interpolation=cv2.INTER_LINEAR).reshape(-1, 3)
@@ -112,13 +127,12 @@ def point_map_to_depth_numpy(points: np.ndarray, mask: np.ndarray = None, downsa
     if points_lr.size == 0:
         return np.zeros((height, width)), 0, 0, 0
-    optim_shift, optim_focal = solve_optimal_shift_focal(uv_lr, points_lr, ransac_iters=None)
-    fov_x = 2 * np.arctan(width / diagonal / optim_focal)
-    fov_y = 2 * np.arctan(height / diagonal / optim_focal)
-    depth = points[:, :, 2] + optim_shift
-    return depth, fov_x, fov_y, optim_shift
 def mask_aware_nearest_resize_numpy(mask: np.ndarray, target_width: int, target_height: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:

         return 1 / (weighted_mean_numpy(1 / (x + eps), w, axis=axis, keepdims=keepdims, eps=eps) + eps)
+def normalized_view_plane_uv_numpy(width: int, height: int, aspect_ratio: float = None, dtype: np.dtype = np.float32) -> np.ndarray:
     "UV with left-top corner as (-width / diagonal, -height / diagonal) and right-bottom corner as (width / diagonal, height / diagonal)"
     if aspect_ratio is None:
         aspect_ratio = width / height
     return fov_x, fov_y
+def point_map_to_depth_legacy_numpy(points: np.ndarray):
+    height, width = points.shape[-3:-1]
+    diagonal = (height ** 2 + width ** 2) ** 0.5
+    uv = normalized_view_plane_uv_numpy(width, height, dtype=points.dtype)  # (H, W, 2)
+    _, uv = np.broadcast_arrays(points[..., :2], uv)
+    # Solve least squares problem
+    b = (uv * points[..., 2:]).reshape(*points.shape[:-3], -1)                                  # (..., H * W * 2)
+    A = np.stack([points[..., :2], -uv], axis=-1).reshape(*points.shape[:-3], -1, 2)   # (..., H * W * 2, 2)
+    M = A.swapaxes(-2, -1) @ A
+    solution = (np.linalg.inv(M + 1e-6 * np.eye(2)) @ (A.swapaxes(-2, -1) @ b[..., None])).squeeze(-1)
+    focal, shift = solution
+    depth = points[..., 2] + shift[..., None, None]
+    fov_x = np.arctan(width / diagonal / focal) * 2
+    fov_y = np.arctan(height / diagonal / focal) * 2
+    return depth, fov_x, fov_y, shift
+def solve_optimal_focal_shift(uv: np.ndarray, xyz: np.ndarray):
     "Solve `min |focal * xy / (z + shift) - uv|` with respect to shift and focal"
     from scipy.optimize import least_squares
     uv, xy, z = uv.reshape(-1, 2), xyz[..., :2].reshape(-1, 2), xyz[..., 2].reshape(-1)
         err = (f * xy_proj - uv).ravel()
         return err
+    solution = least_squares(partial(fn, uv, xy, z), x0=0, ftol=1e-3, method='lm')
+    optim_shift = solution['x'].squeeze().astype(np.float32)
     xy_proj = xy / (z + optim_shift)[: , None]
+    optim_focal = (xy_proj * uv).sum() / np.square(xy_proj).sum()
     return optim_shift, optim_focal
+def solve_optimal_shift(uv: np.ndarray, xyz: np.ndarray, focal: float):
+    "Solve `min |focal * xy / (z + shift) - uv|` with respect to shift"
+    from scipy.optimize import least_squares
+    uv, xy, z = uv.reshape(-1, 2), xyz[..., :2].reshape(-1, 2), xyz[..., 2].reshape(-1)
+    def fn(uv: np.ndarray, xy: np.ndarray, z: np.ndarray, shift: np.ndarray):
+        xy_proj = xy/ (z + shift)[: , None]
+        err = (focal * xy_proj - uv).ravel()
+        return err
+    solution = least_squares(partial(fn, uv, xy, z), x0=0, ftol=1e-3, method='lm')
+    optim_shift = solution['x'].squeeze().astype(np.float32)
+    return optim_shift
+def recover_focal_shift_numpy(points: np.ndarray, mask: np.ndarray = None, focal: float = None, downsample_size: Tuple[int, int] = (64, 64)):
     import cv2
     assert points.shape[-1] == 3, "Points should (H, W, 3)"
     height, width = points.shape[-3], points.shape[-2]
     diagonal = (height ** 2 + width ** 2) ** 0.5
+    uv = normalized_view_plane_uv_numpy(width=width, height=height)
     if mask is None:
         points_lr = cv2.resize(points, downsample_size, interpolation=cv2.INTER_LINEAR).reshape(-1, 3)
     if points_lr.size == 0:
         return np.zeros((height, width)), 0, 0, 0
+    if focal is None:
+        focal, shift = solve_optimal_focal_shift(uv_lr, points_lr)
+    else:
+        shift = solve_optimal_shift(uv_lr, points_lr, focal)
+    return focal, shift
 def mask_aware_nearest_resize_numpy(mask: np.ndarray, target_width: int, target_height: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:

moge/utils/geometry_torch.py CHANGED Viewed

@@ -10,7 +10,7 @@ import torch.types
 import utils3d
 from .tools import timeit
-from .geometry_numpy import solve_optimal_shift_focal
 def weighted_mean(x: torch.Tensor, w: torch.Tensor = None, dim: Union[int, torch.Size] = None, keepdim: bool = False, eps: float = 1e-7) -> torch.Tensor:
@@ -37,7 +37,7 @@ def geometric_mean(x: torch.Tensor, w: torch.Tensor = None, dim: Union[int, torc
         return weighted_mean(x.add(eps).log(), w, dim=dim, keepdim=keepdim, eps=eps).exp()
-def image_plane_uv(width: int, height: int, aspect_ratio: float = None, dtype: torch.dtype = None, device: torch.device = None) -> torch.Tensor:
     "UV with left-top corner as (-width / diagonal, -height / diagonal) and right-bottom corner as (width / diagonal, height / diagonal)"
     if aspect_ratio is None:
         aspect_ratio = width / height
@@ -61,23 +61,6 @@ def gaussian_blur_2d(input: torch.Tensor, kernel_size: int, sigma: float) -> tor
     return input
-def split_batch_fwd(fn: Callable, chunk_size: int, *args, **kwargs):
-    batch_size = next(x for x in (*args, *kwargs.values()) if isinstance(x, torch.Tensor)).shape[0]
-    n_chunks = batch_size // chunk_size + (batch_size % chunk_size > 0)
-    splited_args = tuple(arg.split(chunk_size, dim=0) if isinstance(arg, torch.Tensor) else [arg] * n_chunks for arg in args)
-    splited_kwargs = {k: [v.split(chunk_size, dim=0) if isinstance(v, torch.Tensor) else [v] * n_chunks] for k, v in kwargs.items()}
-    results = []
-    for i in range(n_chunks):
-        chunk_args = tuple(arg[i] for arg in splited_args)
-        chunk_kwargs = {k: v[i] for k, v in splited_kwargs.items()}
-        results.append(fn(*chunk_args, **chunk_kwargs))
-    if isinstance(results[0], tuple):
-        return tuple(torch.cat(r, dim=0) for r in zip(*results))
-    else:
-        return torch.cat(results, dim=0)
 def focal_to_fov(focal: torch.Tensor):
     return 2 * torch.atan(0.5 / focal)
@@ -104,7 +87,7 @@ def intrinsics_to_fov(intrinsics: torch.Tensor):
 def point_map_to_depth_legacy(points: torch.Tensor):
     height, width = points.shape[-3:-1]
     diagonal = (height ** 2 + width ** 2) ** 0.5
-    uv = image_plane_uv(width, height, dtype=points.dtype, device=points.device)  # (H, W, 2)
     # Solve least squares problem
     b = (uv * points[..., 2:]).flatten(-3, -1)                        # (..., H * W * 2)
@@ -120,7 +103,13 @@ def point_map_to_depth_legacy(points: torch.Tensor):
     return depth, fov_x, fov_y, shift
-def point_map_to_depth(points: torch.Tensor, mask: torch.Tensor = None, downsample_size: Tuple[int, int] = (64, 64)):
     """
     Recover the depth map and FoV from a point map with unknown z shift and focal.
@@ -131,13 +120,13 @@ def point_map_to_depth(points: torch.Tensor, mask: torch.Tensor = None, downsamp
     ### Parameters:
     - `points: torch.Tensor` of shape (..., H, W, 3)
     - `downsample_size: Tuple[int, int]` in (height, width), the size of the downsampled map. Downsampling produces approximate solution and is efficient for large maps.
     ### Returns:
-    - `depth: torch.Tensor` of shape (..., H, W)
-    - `fov_x: torch.Tensor` of shape (...)
-    - `fov_y: torch.Tensor` of shape (...)
-    - `shift: torch.Tensor` of shape (...), the z shift, making `depth = points[..., 2] + shift`
     """
     shape = points.shape
     height, width = points.shape[-3], points.shape[-2]
@@ -145,7 +134,8 @@ def point_map_to_depth(points: torch.Tensor, mask: torch.Tensor = None, downsamp
     points = points.reshape(-1, *shape[-3:])
     mask = None if mask is None else mask.reshape(-1, *shape[-3:-1])
-    uv = image_plane_uv(width, height, dtype=points.dtype, device=points.device)  # (H, W, 2)
     points_lr = F.interpolate(points.permute(0, 3, 1, 2), downsample_size, mode='nearest').permute(0, 2, 3, 1)
     uv_lr = F.interpolate(uv.unsqueeze(0).permute(0, 3, 1, 2), downsample_size, mode='nearest').squeeze(0).permute(1, 2, 0)
@@ -153,26 +143,26 @@ def point_map_to_depth(points: torch.Tensor, mask: torch.Tensor = None, downsamp
     uv_lr_np = uv_lr.cpu().numpy()
     points_lr_np = points_lr.detach().cpu().numpy()
     mask_lr_np = None if mask is None else mask_lr.cpu().numpy()
     optim_shift, optim_focal = [], []
     for i in range(points.shape[0]):
         points_lr_i_np = points_lr_np[i] if mask is None else points_lr_np[i][mask_lr_np[i]]
         uv_lr_i_np = uv_lr_np if mask is None else uv_lr_np[mask_lr_np[i]]
-        optim_shift_i, optim_focal_i = solve_optimal_shift_focal(uv_lr_i_np, points_lr_i_np, ransac_iters=None)
         optim_shift.append(float(optim_shift_i))
-        optim_focal.append(float(optim_focal_i))
-    optim_shift = torch.tensor(optim_shift, device=points.device, dtype=points.dtype)
-    optim_focal = torch.tensor(optim_focal, device=points.device, dtype=points.dtype)
-    fov_x = 2 * torch.atan(width / diagonal / optim_focal)
-    fov_y = 2 * torch.atan(height / diagonal / optim_focal)
-    depth = (points[..., 2] + optim_shift[:, None, None]).reshape(shape[:-1])
-    fov_x = fov_x.reshape(shape[:-3])
-    fov_y = fov_y.reshape(shape[:-3])
-    optim_shift = optim_shift.reshape(shape[:-3])
-    return depth, fov_x, fov_y, optim_shift
 def mask_aware_nearest_resize(mask: torch.BoolTensor, target_width: int, target_height: int) -> Tuple[torch.LongTensor, torch.LongTensor, torch.BoolTensor]:
@@ -227,5 +217,3 @@ def mask_aware_nearest_resize(mask: torch.BoolTensor, target_width: int, target_
     batch_indices = [torch.arange(n, device=device).reshape([1] * i + [n] + [1] * (mask.dim() - i - 1)) for i, n in enumerate(mask.shape[:-2])]
     return (*batch_indices, nearest_i, nearest_j), target_mask

 import utils3d
 from .tools import timeit
+from .geometry_numpy import solve_optimal_focal_shift, solve_optimal_shift
 def weighted_mean(x: torch.Tensor, w: torch.Tensor = None, dim: Union[int, torch.Size] = None, keepdim: bool = False, eps: float = 1e-7) -> torch.Tensor:
         return weighted_mean(x.add(eps).log(), w, dim=dim, keepdim=keepdim, eps=eps).exp()
+def normalized_view_plane_uv(width: int, height: int, aspect_ratio: float = None, dtype: torch.dtype = None, device: torch.device = None) -> torch.Tensor:
     "UV with left-top corner as (-width / diagonal, -height / diagonal) and right-bottom corner as (width / diagonal, height / diagonal)"
     if aspect_ratio is None:
         aspect_ratio = width / height
     return input
 def focal_to_fov(focal: torch.Tensor):
     return 2 * torch.atan(0.5 / focal)
 def point_map_to_depth_legacy(points: torch.Tensor):
     height, width = points.shape[-3:-1]
     diagonal = (height ** 2 + width ** 2) ** 0.5
+    uv = normalized_view_plane_uv(width, height, dtype=points.dtype, device=points.device)  # (H, W, 2)
     # Solve least squares problem
     b = (uv * points[..., 2:]).flatten(-3, -1)                        # (..., H * W * 2)
     return depth, fov_x, fov_y, shift
+def view_plane_uv_to_focal(uv: torch.Tensor):
+    normed_uv = normalized_view_plane_uv(width=uv.shape[-2], height=uv.shape[-3], device=uv.device, dtype=uv.dtype)
+    focal = (uv * normed_uv).sum() / uv.square().sum().add(1e-12)
+    return focal
+def recover_focal_shift(points: torch.Tensor, mask: torch.Tensor = None, focal: torch.Tensor = None, downsample_size: Tuple[int, int] = (64, 64)):
     """
     Recover the depth map and FoV from a point map with unknown z shift and focal.
     ### Parameters:
     - `points: torch.Tensor` of shape (..., H, W, 3)
+    - `mask: torch.Tensor` of shape (..., H, W). Optional.
+    - `focal: torch.Tensor` of shape (...). Optional.
     - `downsample_size: Tuple[int, int]` in (height, width), the size of the downsampled map. Downsampling produces approximate solution and is efficient for large maps.
     ### Returns:
+    - `focal`: torch.Tensor of shape (...) the estimated focal length, relative to the half diagonal of the map
+    - `shift`: torch.Tensor of shape (...) Z-axis shift to translate the point map to camera space
     """
     shape = points.shape
     height, width = points.shape[-3], points.shape[-2]
     points = points.reshape(-1, *shape[-3:])
     mask = None if mask is None else mask.reshape(-1, *shape[-3:-1])
+    focal = focal.reshape(-1) if focal is not None else None
+    uv = normalized_view_plane_uv(width, height, dtype=points.dtype, device=points.device)  # (H, W, 2)
     points_lr = F.interpolate(points.permute(0, 3, 1, 2), downsample_size, mode='nearest').permute(0, 2, 3, 1)
     uv_lr = F.interpolate(uv.unsqueeze(0).permute(0, 3, 1, 2), downsample_size, mode='nearest').squeeze(0).permute(1, 2, 0)
     uv_lr_np = uv_lr.cpu().numpy()
     points_lr_np = points_lr.detach().cpu().numpy()
+    focal_np = focal.cpu().numpy() if focal is not None else None
     mask_lr_np = None if mask is None else mask_lr.cpu().numpy()
     optim_shift, optim_focal = [], []
     for i in range(points.shape[0]):
         points_lr_i_np = points_lr_np[i] if mask is None else points_lr_np[i][mask_lr_np[i]]
         uv_lr_i_np = uv_lr_np if mask is None else uv_lr_np[mask_lr_np[i]]
+        if focal is None:
+            optim_shift_i, optim_focal_i = solve_optimal_focal_shift(uv_lr_i_np, points_lr_i_np)
+            optim_focal.append(float(optim_focal_i))
+        else:
+            optim_shift_i = solve_optimal_shift(uv_lr_i_np, points_lr_i_np, focal_np[i])
         optim_shift.append(float(optim_shift_i))
+    optim_shift = torch.tensor(optim_shift, device=points.device, dtype=points.dtype).reshape(shape[:-3])
+    if focal is None:
+        optim_focal = torch.tensor(optim_focal, device=points.device, dtype=points.dtype).reshape(shape[:-3])
+    else:
+        optim_focal = focal.reshape(shape[:-3])
+    return optim_focal, optim_shift
 def mask_aware_nearest_resize(mask: torch.BoolTensor, target_width: int, target_height: int) -> Tuple[torch.LongTensor, torch.LongTensor, torch.BoolTensor]:
     batch_indices = [torch.arange(n, device=device).reshape([1] * i + [n] + [1] * (mask.dim() - i - 1)) for i, n in enumerate(mask.shape[:-2])]
     return (*batch_indices, nearest_i, nearest_j), target_mask

moge/utils/io.py CHANGED Viewed

@@ -345,3 +345,47 @@ def read_rgbxyz(file: Union[IO, str, Path]) -> Tuple[np.ndarray, np.ndarray, np.
             mask = np.ones(image.shape[:2], dtype=bool)
     return image, points, mask

             mask = np.ones(image.shape[:2], dtype=bool)
     return image, points, mask
+def save_glb(
+    save_path: Union[str, os.PathLike],
+    vertices: np.ndarray,
+    faces: np.ndarray,
+    vertex_uvs: np.ndarray,
+    texture: np.ndarray,
+):
+    import trimesh
+    import trimesh.visual
+    from PIL import Image
+    trimesh.Trimesh(
+        vertices=vertices,
+        faces=faces,
+        visual = trimesh.visual.texture.TextureVisuals(
+            uv=vertex_uvs,
+            material=trimesh.visual.material.PBRMaterial(
+                baseColorTexture=Image.fromarray(texture),
+                metallicFactor=0.5,
+                roughnessFactor=1.0
+            )
+        ),
+        process=False
+    ).export(save_path)
+def save_ply(
+    save_path: Union[str, os.PathLike],
+    vertices: np.ndarray,
+    faces: np.ndarray,
+    vertex_colors: np.ndarray,
+):
+    import trimesh
+    import trimesh.visual
+    from PIL import Image
+    trimesh.Trimesh(
+        vertices=vertices,
+        faces=faces,
+        vertex_colors=vertex_colors,
+        process=False
+    ).export(save_path)

utils3d/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # utils3d
2	+
3	+ This is a collection of utility functions for 3D computer vision tasks copied from https://github.com/EasternJournalist/utils3d.

utils3d/io/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from .wavefront_obj import *
 from .colmap import *
 from .ply import *
-from .glb import *

+from .obj import *
 from .colmap import *
 from .ply import *

utils3d/io/colmap.py CHANGED Viewed

@@ -33,7 +33,7 @@ def write_extrinsics_as_colmap(file: Union[str, Path], extrinsics: np.ndarray, i
     with open(file, 'w') as fp:
         print("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME", file=fp)
         for i, (quat, t, name, camera_id) in enumerate(zip(quats.tolist(), trans.tolist(), image_names, camera_ids)):
-            # Colmap has wxyz order while scipy.spatial.transform.Rotation has xyzw order. Haha, wcnm.
             qx, qy, qz, qw = quat
             tx, ty, tz = t
             print(f'{i + 1} {qw:f} {qx:f} {qy:f} {qz:f} {tx:f} {ty:f} {tz:f} {camera_id:d} {name}', file=fp)

     with open(file, 'w') as fp:
         print("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME", file=fp)
         for i, (quat, t, name, camera_id) in enumerate(zip(quats.tolist(), trans.tolist(), image_names, camera_ids)):
+            # Colmap has wxyz order while scipy.spatial.transform.Rotation has xyzw order.
             qx, qy, qz, qw = quat
             tx, ty, tz = t
             print(f'{i + 1} {qw:f} {qx:f} {qy:f} {qz:f} {tx:f} {ty:f} {tz:f} {camera_id:d} {name}', file=fp)

utils3d/io/obj.py ADDED Viewed

	@@ -0,0 +1,146 @@

+from io import TextIOWrapper
+from typing import Dict, Any, Union, Iterable
+import numpy as np
+from pathlib import Path
+__all__ = [
+    'read_obj',
+    'write_obj',
+    'simple_write_obj'
+]
+def read_obj(
+    file : Union[str, Path, TextIOWrapper],
+    encoding: Union[str, None] = None,
+    ignore_unknown: bool = False
+):
+    """
+    Read wavefront .obj file, without preprocessing.
+    Why bothering having this read_obj() while we already have other libraries like `trimesh`?
+    This function read the raw format from .obj file and keeps the order of vertices and faces,
+    while trimesh which involves modification like merge/split vertices, which could break the orders of vertices and faces,
+    Those libraries are commonly aiming at geometry processing and rendering supporting various formats.
+    If you want mesh geometry processing, you may turn to `trimesh` for more features.
+    ### Parameters
+        `file` (str, Path, TextIOWrapper): filepath or file object
+        encoding (str, optional):
+    ### Returns
+        obj (dict): A dict containing .obj components
+        {
+            'mtllib': [],
+            'v': [[0,1, 0.2, 1.0], [1.2, 0.0, 0.0], ...],
+            'vt': [[0.5, 0.5], ...],
+            'vn': [[0., 0.7, 0.7], [0., -0.7, 0.7], ...],
+            'f': [[0, 1, 2], [2, 3, 4],...],
+            'usemtl': [{'name': 'mtl1', 'f': 7}]
+        }
+    """
+    if hasattr(file,'read'):
+        lines = file.read().splitlines()
+    else:
+        with open(file, 'r', encoding=encoding) as fp:
+            lines = fp.read().splitlines()
+    mtllib = []
+    v, vt, vn, vp = [], [], [], []      # Vertex coordinates, Vertex texture coordinate, Vertex normal, Vertex parameter
+    f, ft, fn = [], [], []              # Face indices, Face texture indices, Face normal indices
+    o = []
+    s = []
+    usemtl = []
+    def pad(l: list, n: Any):
+        return l + [n] * (3 - len(l))
+    for i, line in enumerate(lines):
+        sq = line.strip().split()
+        if len(sq) == 0:
+            continue
+        if sq[0] == 'v':
+            assert 4 <= len(sq) <= 5, f'Invalid format of line {i}: {line}'
+            v.append([float(e) for e in sq[1:]][:3])
+        elif sq[0] == 'vt':
+            assert 3 <= len(sq) <= 4, f'Invalid format of line {i}: {line}'
+            vt.append([float(e) for e in sq[1:]][:2])
+        elif sq[0] == 'vn':
+            assert len(sq) == 4, f'Invalid format of line {i}: {line}'
+            vn.append([float(e) for e in sq[1:]])
+        elif sq[0] == 'vp':
+            assert 2 <= len(sq) <= 4, f'Invalid format of line {i}: {line}'
+            vp.append(pad([float(e) for e in sq[1:]], 0))
+        elif sq[0] == 'f':
+            spliting = [pad([int(j) - 1 for j in e.split('/')], -1) for e in sq[1:]]
+            f.append([e[0] for e in spliting])
+            ft.append([e[1] for e in spliting])
+            fn.append([e[2] for e in spliting])
+        elif sq[0] == 'usemtl':
+            assert len(sq) == 2
+            usemtl.append((sq[1], len(f)))
+        elif sq[0] == 'o':
+            assert len(sq) == 2
+            o.append((sq[1], len(f)))
+        elif sq[0] == 's':
+            s.append((sq[1], len(f)))
+        elif sq[0] == 'mtllib':
+            assert len(sq) == 2
+            mtllib.append(sq[1])
+        elif sq[0][0] == '#':
+            continue
+        else:
+            if not ignore_unknown:
+                raise Exception(f'Unknown keyword {sq[0]}')
+    min_poly_vertices = min(len(f) for f in f)
+    max_poly_vertices = max(len(f) for f in f)
+    return {
+        'mtllib': mtllib,
+        'v': np.array(v, dtype=np.float32),
+        'vt': np.array(vt, dtype=np.float32),
+        'vn': np.array(vn, dtype=np.float32),
+        'vp': np.array(vp, dtype=np.float32),
+        'f': np.array(f, dtype=np.int32) if min_poly_vertices == max_poly_vertices else f,
+        'ft': np.array(ft, dtype=np.int32) if min_poly_vertices == max_poly_vertices else ft,
+        'fn': np.array(fn, dtype=np.int32) if min_poly_vertices == max_poly_vertices else fn,
+        'o': o,
+        's': s,
+        'usemtl': usemtl,
+    }
+def write_obj(
+        file: Union[str, Path],
+        obj: Dict[str, Any],
+        encoding: Union[str, None] = None
+    ):
+    with open(file, 'w', encoding=encoding) as fp:
+        for k in ['v', 'vt', 'vn', 'vp']:
+            if k not in obj:
+                continue
+            for v in obj[k]:
+                print(k, *map(float, v), file=fp)
+        for f in obj['f']:
+            print('f', *((str('/').join(map(int, i)) if isinstance(int(i), Iterable) else i) for i in f), file=fp)
+def simple_write_obj(
+        file: Union[str, Path],
+        vertices: np.ndarray,
+        faces: np.ndarray,
+        encoding: Union[str, None] = None
+    ):
+    """
+    Write wavefront .obj file, without preprocessing.
+    Args:
+        vertices (np.ndarray): [N, 3]
+        faces (np.ndarray): [T, 3]
+        file (Any): filepath
+        encoding (str, optional):
+    """
+    with open(file, 'w', encoding=encoding) as fp:
+        for v in vertices:
+            print('v', *map(float, v), file=fp)
+        for f in faces:
+            print('f', *map(int, f + 1), file=fp)

utils3d/numpy/__init__.py CHANGED Viewed

@@ -37,6 +37,7 @@ __modules_all__ = {
         'max_pool_2d',
         'max_pool_nd',
         'depth_edge',
         'depth_aliasing',
         'interpolate',
         'image_scrcoord',
@@ -45,10 +46,11 @@ __modules_all__ = {
         'image_pixel',
         'image_mesh',
         'image_mesh_from_depth',
-        'depth_to_normal',
-        'point_to_normal',
         'chessboard',
         'cube',
         'square',
         'camera_frustum',
     ],

         'max_pool_2d',
         'max_pool_nd',
         'depth_edge',
+        'normals_edge',
         'depth_aliasing',
         'interpolate',
         'image_scrcoord',
         'image_pixel',
         'image_mesh',
         'image_mesh_from_depth',
+        'depth_to_normals',
+        'points_to_normals',
         'chessboard',
         'cube',
+        'icosahedron',
         'square',
         'camera_frustum',
     ],

utils3d/numpy/rasterization.py CHANGED Viewed

@@ -460,10 +460,8 @@ def test():
         faces,
         attr,
         512, 512,
-        view=view,
-        projection=perspective,
         cull_backface=True,
-        ssaa=1,
         return_depth=True,
     )
     import cv2

         faces,
         attr,
         512, 512,
+        transform=perspective @ view,
         cull_backface=True,
         return_depth=True,
     )
     import cv2

utils3d/numpy/transforms.py CHANGED Viewed

@@ -474,7 +474,7 @@ def uv_to_pixel(
     Returns:
         (np.ndarray): [..., 2] pixel coordinrates defined in uv space, the range is (0, 1)
     """
-    pixel = uv * np.stack([width, height], axis=-1) - 0.5
     return pixel
@@ -645,7 +645,7 @@ def unproject_gl(
 @batched(2,1,2,2)
 def unproject_cv(
     uv_coord: np.ndarray,
-    depth: np.ndarray,
     extrinsics: np.ndarray = None,
     intrinsics: np.ndarray = None
 ) -> np.ndarray:
@@ -665,7 +665,8 @@ def unproject_cv(
     assert intrinsics is not None, "intrinsics matrix is required"
     points = np.concatenate([uv_coord, np.ones_like(uv_coord[..., :1])], axis=-1)
     points = points @ np.linalg.inv(intrinsics).swapaxes(-1, -2)
-    points = points * depth[..., None]
     if extrinsics is not None:
         points = np.concatenate([points, np.ones_like(points[..., :1])], axis=-1)
         points = (points @ np.linalg.inv(extrinsics).swapaxes(-1, -2))[..., :3]

     Returns:
         (np.ndarray): [..., 2] pixel coordinrates defined in uv space, the range is (0, 1)
     """
+    pixel = uv * np.stack([width, height], axis=-1).astype(uv.dtype) - 0.5
     return pixel
 @batched(2,1,2,2)
 def unproject_cv(
     uv_coord: np.ndarray,
+    depth: np.ndarray = None,
     extrinsics: np.ndarray = None,
     intrinsics: np.ndarray = None
 ) -> np.ndarray:
     assert intrinsics is not None, "intrinsics matrix is required"
     points = np.concatenate([uv_coord, np.ones_like(uv_coord[..., :1])], axis=-1)
     points = points @ np.linalg.inv(intrinsics).swapaxes(-1, -2)
+    if depth is not None:
+        points = points * depth[..., None]
     if extrinsics is not None:
         points = np.concatenate([points, np.ones_like(points[..., :1])], axis=-1)
         points = (points @ np.linalg.inv(extrinsics).swapaxes(-1, -2))[..., :3]

utils3d/numpy/utils.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import numpy as np
 from typing import *
 from numbers import Number
 from ._helpers import batched
 from . import transforms
@@ -14,6 +16,7 @@ __all__ = [
     'max_pool_2d',
     'max_pool_nd',
     'depth_edge',
     'depth_aliasing',
     'interpolate',
     'image_scrcoord',
@@ -22,16 +25,29 @@ __all__ = [
     'image_pixel',
     'image_mesh',
     'image_mesh_from_depth',
-    'depth_to_normal',
-    'point_to_normal',
     'chessboard',
     'cube',
     'square',
     'camera_frustum',
     'to4x4'
 ]
 def sliding_window_1d(x: np.ndarray, window_size: int, stride: int, axis: int = -1):
     """
     Return x view of the input array with x sliding window of the given kernel size and stride.
@@ -97,9 +113,10 @@ def max_pool_2d(x: np.ndarray, kernel_size: Union[int, Tuple[int, int]], stride:
     return max_pool_nd(x, kernel_size, stride, padding, axis)
 def depth_edge(depth: np.ndarray, atol: float = None, rtol: float = None, kernel_size: int = 3, mask: np.ndarray = None) -> np.ndarray:
     """
-    Compute the edge mask of x depth map. The edge is defined as the pixels whose neighbors have x large difference in depth.
     Args:
         depth (np.ndarray): shape (..., height, width), linear depth map
@@ -117,11 +134,15 @@ def depth_edge(depth: np.ndarray, atol: float = None, rtol: float = None, kernel
     edge = np.zeros_like(depth, dtype=bool)
     if atol is not None:
         edge |= diff > atol
-    if rtol is not None:
-        edge |= diff / depth > rtol
     return edge
 def depth_aliasing(depth: np.ndarray, atol: float = None, rtol: float = None, kernel_size: int = 3, mask: np.ndarray = None) -> np.ndarray:
     """
     Compute the map that indicates the aliasing of x depth map. The aliasing is defined as the pixels which neither close to the maximum nor the minimum of its neighbors.
@@ -148,7 +169,46 @@ def depth_aliasing(depth: np.ndarray, atol: float = None, rtol: float = None, ke
         edge |= diff / depth > rtol
     return edge
-def point_to_normal(point: np.ndarray, mask: np.ndarray = None) -> np.ndarray:
     """
     Calculate normal map from point map. Value range is [-1, 1]. Normal direction in OpenGL identity camera's coordinate system.
@@ -189,12 +249,14 @@ def point_to_normal(point: np.ndarray, mask: np.ndarray = None) -> np.ndarray:
     normal = normal / (np.linalg.norm(normal, axis=-1, keepdims=True) + 1e-12)
     if has_mask:
-        return normal, valid.any(axis=0)
     else:
         return normal
-def depth_to_normal(depth: np.ndarray, intrinsics: np.ndarray, mask: np.ndarray = None) -> np.ndarray:
     """
     Calculate normal map from depth map. Value range is [-1, 1]. Normal direction in OpenGL identity camera's coordinate system.
@@ -213,7 +275,7 @@ def depth_to_normal(depth: np.ndarray, intrinsics: np.ndarray, mask: np.ndarray
     uv = image_uv(width=width, height=height, dtype=np.float32)
     pts = transforms.unproject_cv(uv, depth, intrinsics=intrinsics, extrinsics=None)
-    return point_to_normal(pts, mask)
 def interpolate(bary: np.ndarray, tri_id: np.ndarray, attr: np.ndarray, faces: np.ndarray) -> np.ndarray:
     """Interpolate with given barycentric coordinates and triangle indices
@@ -560,3 +622,18 @@ def camera_frustum(extrinsics: np.ndarray, intrinsics: np.ndarray, depth: float
     ], dtype=np.int32)
     return vertices, edges, faces

 import numpy as np
 from typing import *
 from numbers import Number
+import warnings
+import functools
 from ._helpers import batched
 from . import transforms
     'max_pool_2d',
     'max_pool_nd',
     'depth_edge',
+    'normals_edge',
     'depth_aliasing',
     'interpolate',
     'image_scrcoord',
     'image_pixel',
     'image_mesh',
     'image_mesh_from_depth',
+    'points_to_normals',
+    'points_to_normals',
     'chessboard',
     'cube',
+    'icosahedron',
     'square',
     'camera_frustum',
     'to4x4'
 ]
+def no_runtime_warnings(fn):
+    """
+    Disable runtime warnings in numpy.
+    """
+    @functools.wraps(fn)
+    def wrapper(*args, **kwargs):
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            return fn(*args, **kwargs)
+    return wrapper
 def sliding_window_1d(x: np.ndarray, window_size: int, stride: int, axis: int = -1):
     """
     Return x view of the input array with x sliding window of the given kernel size and stride.
     return max_pool_nd(x, kernel_size, stride, padding, axis)
+@no_runtime_warnings
 def depth_edge(depth: np.ndarray, atol: float = None, rtol: float = None, kernel_size: int = 3, mask: np.ndarray = None) -> np.ndarray:
     """
+    Compute the edge mask from depth map. The edge is defined as the pixels whose neighbors have large difference in depth.
     Args:
         depth (np.ndarray): shape (..., height, width), linear depth map
     edge = np.zeros_like(depth, dtype=bool)
     if atol is not None:
         edge |= diff > atol
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", category=RuntimeWarning)
+        if rtol is not None:
+            edge |= diff / depth > rtol
     return edge
+@no_runtime_warnings
 def depth_aliasing(depth: np.ndarray, atol: float = None, rtol: float = None, kernel_size: int = 3, mask: np.ndarray = None) -> np.ndarray:
     """
     Compute the map that indicates the aliasing of x depth map. The aliasing is defined as the pixels which neither close to the maximum nor the minimum of its neighbors.
         edge |= diff / depth > rtol
     return edge
+@no_runtime_warnings
+def normals_edge(normals: np.ndarray, tol: float, kernel_size: int = 3, mask: np.ndarray = None) -> np.ndarray:
+    """
+    Compute the edge mask from normal map.
+    Args:
+        normal (np.ndarray): shape (..., height, width, 3), normal map
+        tol (float): tolerance in degrees
+    Returns:
+        edge (np.ndarray): shape (..., height, width) of dtype torch.bool
+    """
+    assert normals.ndim >= 3 and normals.shape[-1] == 3, "normal should be of shape (..., height, width, 3)"
+    normals = normals / (np.linalg.norm(normals, axis=-1, keepdims=True) + 1e-12)
+    padding = kernel_size // 2
+    normals_window = sliding_window_2d(
+        np.pad(normals, (*([(0, 0)] * (normals.ndim - 3)), (padding, padding), (padding, padding), (0, 0)), mode='edge'),
+        window_size=kernel_size,
+        stride=1,
+        axis=(-3, -2)
+    )
+    if mask is None:
+        angle_diff = np.acos((normals[..., None, None] * normals_window).sum(axis=-3)).max(axis=(-2, -1))
+    else:
+        mask_window = sliding_window_2d(
+            np.pad(mask, (*([(0, 0)] * (mask.ndim - 3)), (padding, padding), (padding, padding)), mode='edge'),
+            window_size=kernel_size,
+            stride=1,
+            axis=(-3, -2)
+        )
+        angle_diff = np.where(mask_window, np.acos((normals[..., None, None] * normals_window).sum(axis=-3)), 0).max(axis=(-2, -1))
+    angle_diff = max_pool_2d(angle_diff, kernel_size, stride=1, padding=kernel_size // 2)
+    edge = angle_diff > np.deg2rad(tol)
+    return edge
+@no_runtime_warnings
+def points_to_normals(point: np.ndarray, mask: np.ndarray = None) -> np.ndarray:
     """
     Calculate normal map from point map. Value range is [-1, 1]. Normal direction in OpenGL identity camera's coordinate system.
     normal = normal / (np.linalg.norm(normal, axis=-1, keepdims=True) + 1e-12)
     if has_mask:
+        normal_mask =  valid.any(axis=0)
+        normal = np.where(normal_mask[..., None], normal, 0)
+        return normal, normal_mask
     else:
         return normal
+def depth_to_normals(depth: np.ndarray, intrinsics: np.ndarray, mask: np.ndarray = None) -> np.ndarray:
     """
     Calculate normal map from depth map. Value range is [-1, 1]. Normal direction in OpenGL identity camera's coordinate system.
     uv = image_uv(width=width, height=height, dtype=np.float32)
     pts = transforms.unproject_cv(uv, depth, intrinsics=intrinsics, extrinsics=None)
+    return points_to_normals(pts, mask)
 def interpolate(bary: np.ndarray, tri_id: np.ndarray, attr: np.ndarray, faces: np.ndarray) -> np.ndarray:
     """Interpolate with given barycentric coordinates and triangle indices
     ], dtype=np.int32)
     return vertices, edges, faces
+def icosahedron():
+    A = (1 + 5 ** 0.5) / 2
+    vertices = np.array([
+        [0, 1, A], [0, -1, A], [0, 1, -A], [0, -1, -A],
+        [1, A, 0], [-1, A, 0], [1, -A, 0], [-1, -A, 0],
+        [A, 0, 1], [A, 0, -1], [-A, 0, 1], [-A, 0, -1]
+    ], dtype=np.float32)
+    faces = np.array([
+        [0, 1, 8], [0, 8, 4], [0, 4, 5], [0, 5, 10], [0, 10, 1],
+        [3, 2, 9], [3, 9, 6], [3, 6, 7], [3, 7, 11], [3, 11, 2],
+        [1, 6, 8], [8, 9, 4], [4, 2, 5], [5, 11, 10], [10, 7, 1],
+        [2, 4, 9], [9, 8, 6], [6, 1, 7], [7, 10, 11], [11, 5, 2]
+    ], dtype=np.int32)
+    return vertices, faces