Spaces:

YoonaAI
/

yoonaAvatarSpace1

Build error

App Files Files

YoonaAI commited on Feb 8, 2023

Commit

208e8a7

1 Parent(s): 7853fb6

Upload 6 files

Browse files

Files changed (6) hide show

lib/common/render.py +392 -0
lib/common/render_utils.py +221 -0
lib/common/seg3d_lossless.py +604 -0
lib/common/seg3d_utils.py +392 -0
lib/common/smpl_vert_segmentation.json +0 -0
lib/common/train_util.py +599 -0

lib/common/render.py ADDED Viewed

	@@ -0,0 +1,392 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: ps-license@tuebingen.mpg.de
+from pytorch3d.renderer import (
+    BlendParams,
+    blending,
+    look_at_view_transform,
+    FoVOrthographicCameras,
+    PointLights,
+    RasterizationSettings,
+    PointsRasterizationSettings,
+    PointsRenderer,
+    AlphaCompositor,
+    PointsRasterizer,
+    MeshRenderer,
+    MeshRasterizer,
+    SoftPhongShader,
+    SoftSilhouetteShader,
+    TexturesVertex,
+)
+from pytorch3d.renderer.mesh import TexturesVertex
+from pytorch3d.structures import Meshes
+import os, subprocess
+from lib.dataset.mesh_util import SMPLX, get_visibility
+import lib.common.render_utils as util
+import torch
+import numpy as np
+from PIL import Image
+from tqdm import tqdm
+import cv2
+import math
+from termcolor import colored
+def image2vid(images, vid_path):
+    w, h = images[0].size
+    videodims = (w, h)
+    fourcc = cv2.VideoWriter_fourcc(*'XVID')
+    video = cv2.VideoWriter(vid_path, fourcc, 30, videodims)
+    for image in images:
+        video.write(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR))
+    video.release()
+def query_color(verts, faces, image, device):
+    """query colors from points and image
+    Args:
+        verts ([B, 3]): [query verts]
+        faces ([M, 3]): [query faces]
+        image ([B, 3, H, W]): [full image]
+    Returns:
+        [np.float]: [return colors]
+    """
+    verts = verts.float().to(device)
+    faces = faces.long().to(device)
+    (xy, z) = verts.split([2, 1], dim=1)
+    visibility = get_visibility(xy, z, faces[:, [0, 2, 1]]).flatten()
+    uv = xy.unsqueeze(0).unsqueeze(2)  # [B, N, 2]
+    uv = uv * torch.tensor([1.0, -1.0]).type_as(uv)
+    colors = (torch.nn.functional.grid_sample(image, uv, align_corners=True)[
+              0, :, :, 0].permute(1, 0) + 1.0) * 0.5 * 255.0
+    colors[visibility == 0.0] = ((Meshes(verts.unsqueeze(0), faces.unsqueeze(
+        0)).verts_normals_padded().squeeze(0) + 1.0) * 0.5 * 255.0)[visibility == 0.0]
+    return colors.detach().cpu()
+class cleanShader(torch.nn.Module):
+    def __init__(self, device="cpu", cameras=None, blend_params=None):
+        super().__init__()
+        self.cameras = cameras
+        self.blend_params = blend_params if blend_params is not None else BlendParams()
+    def forward(self, fragments, meshes, **kwargs):
+        cameras = kwargs.get("cameras", self.cameras)
+        if cameras is None:
+            msg = "Cameras must be specified either at initialization \
+                or in the forward pass of TexturedSoftPhongShader"
+            raise ValueError(msg)
+        # get renderer output
+        blend_params = kwargs.get("blend_params", self.blend_params)
+        texels = meshes.sample_textures(fragments)
+        images = blending.softmax_rgb_blend(
+            texels, fragments, blend_params, znear=-256, zfar=256
+        )
+        return images
+class Render:
+    def __init__(self, size=512, device=torch.device("cuda:0")):
+        self.device = device
+        self.mesh_y_center = 100.0
+        self.dis = 100.0
+        self.scale = 1.0
+        self.size = size
+        self.cam_pos = [(0, 100, 100)]
+        self.mesh = None
+        self.deform_mesh = None
+        self.pcd = None
+        self.renderer = None
+        self.meshRas = None
+        self.type = None
+        self.knn = None
+        self.knn_inverse = None
+        self.smpl_seg = None
+        self.smpl_cmap = None
+        self.smplx = SMPLX()
+        self.uv_rasterizer = util.Pytorch3dRasterizer(self.size)
+    def get_camera(self, cam_id):
+        R, T = look_at_view_transform(
+            eye=[self.cam_pos[cam_id]],
+            at=((0, self.mesh_y_center, 0),),
+            up=((0, 1, 0),),
+        )
+        camera = FoVOrthographicCameras(
+            device=self.device,
+            R=R,
+            T=T,
+            znear=100.0,
+            zfar=-100.0,
+            max_y=100.0,
+            min_y=-100.0,
+            max_x=100.0,
+            min_x=-100.0,
+            scale_xyz=(self.scale * np.ones(3),),
+        )
+        return camera
+    def init_renderer(self, camera, type="clean_mesh", bg="gray"):
+        if "mesh" in type:
+            # rasterizer
+            self.raster_settings_mesh = RasterizationSettings(
+                image_size=self.size,
+                blur_radius=np.log(1.0 / 1e-4) * 1e-7,
+                faces_per_pixel=30,
+            )
+            self.meshRas = MeshRasterizer(
+                cameras=camera, raster_settings=self.raster_settings_mesh
+            )
+        if bg == "black":
+            blendparam = BlendParams(1e-4, 1e-4, (0.0, 0.0, 0.0))
+        elif bg == "white":
+            blendparam = BlendParams(1e-4, 1e-8, (1.0, 1.0, 1.0))
+        elif bg == "gray":
+            blendparam = BlendParams(1e-4, 1e-8, (0.5, 0.5, 0.5))
+        if type == "ori_mesh":
+            lights = PointLights(
+                device=self.device,
+                ambient_color=((0.8, 0.8, 0.8),),
+                diffuse_color=((0.2, 0.2, 0.2),),
+                specular_color=((0.0, 0.0, 0.0),),
+                location=[[0.0, 200.0, 0.0]],
+            )
+            self.renderer = MeshRenderer(
+                rasterizer=self.meshRas,
+                shader=SoftPhongShader(
+                    device=self.device,
+                    cameras=camera,
+                    lights=lights,
+                    blend_params=blendparam,
+                ),
+            )
+        if type == "silhouette":
+            self.raster_settings_silhouette = RasterizationSettings(
+                image_size=self.size,
+                blur_radius=np.log(1.0 / 1e-4 - 1.0) * 5e-5,
+                faces_per_pixel=50,
+                cull_backfaces=True,
+            )
+            self.silhouetteRas = MeshRasterizer(
+                cameras=camera, raster_settings=self.raster_settings_silhouette
+            )
+            self.renderer = MeshRenderer(
+                rasterizer=self.silhouetteRas, shader=SoftSilhouetteShader()
+            )
+        if type == "pointcloud":
+            self.raster_settings_pcd = PointsRasterizationSettings(
+                image_size=self.size, radius=0.006, points_per_pixel=10
+            )
+            self.pcdRas = PointsRasterizer(
+                cameras=camera, raster_settings=self.raster_settings_pcd
+            )
+            self.renderer = PointsRenderer(
+                rasterizer=self.pcdRas,
+                compositor=AlphaCompositor(background_color=(0, 0, 0)),
+            )
+        if type == "clean_mesh":
+            self.renderer = MeshRenderer(
+                rasterizer=self.meshRas,
+                shader=cleanShader(
+                    device=self.device, cameras=camera, blend_params=blendparam
+                ),
+            )
+    def VF2Mesh(self, verts, faces):
+        if not torch.is_tensor(verts):
+            verts = torch.tensor(verts)
+        if not torch.is_tensor(faces):
+            faces = torch.tensor(faces)
+        if verts.ndimension() == 2:
+            verts = verts.unsqueeze(0).float()
+        if faces.ndimension() == 2:
+            faces = faces.unsqueeze(0).long()
+        verts = verts.to(self.device)
+        faces = faces.to(self.device)
+        mesh = Meshes(verts, faces).to(self.device)
+        mesh.textures = TexturesVertex(
+            verts_features=(mesh.verts_normals_padded() + 1.0) * 0.5
+        )
+        return mesh
+    def load_meshes(self, verts, faces):
+        """load mesh into the pytorch3d renderer
+        Args:
+            verts ([N,3]): verts
+            faces ([N,3]): faces
+            offset ([N,3]): offset
+        """
+        # camera setting
+        self.scale = 100.0
+        self.mesh_y_center = 0.0
+        self.cam_pos = [
+            (0, self.mesh_y_center, 100.0),
+            (100.0, self.mesh_y_center, 0),
+            (0, self.mesh_y_center, -100.0),
+            (-100.0, self.mesh_y_center, 0),
+        ]
+        self.type = "color"
+        if isinstance(verts, list):
+            self.meshes = []
+            for V, F in zip(verts, faces):
+                self.meshes.append(self.VF2Mesh(V, F))
+        else:
+            self.meshes = [self.VF2Mesh(verts, faces)]
+    def get_depth_map(self, cam_ids=[0, 2]):
+        depth_maps = []
+        for cam_id in cam_ids:
+            self.init_renderer(self.get_camera(cam_id), "clean_mesh", "gray")
+            fragments = self.meshRas(self.meshes[0])
+            depth_map = fragments.zbuf[..., 0].squeeze(0)
+            if cam_id == 2:
+                depth_map = torch.fliplr(depth_map)
+            depth_maps.append(depth_map)
+        return depth_maps
+    def get_rgb_image(self, cam_ids=[0, 2]):
+        images = []
+        for cam_id in range(len(self.cam_pos)):
+            if cam_id in cam_ids:
+                self.init_renderer(self.get_camera(
+                    cam_id), "clean_mesh", "gray")
+                if len(cam_ids) == 4:
+                    rendered_img = (
+                        self.renderer(self.meshes[0])[
+                            0:1, :, :, :3].permute(0, 3, 1, 2)
+                        - 0.5
+                    ) * 2.0
+                else:
+                    rendered_img = (
+                        self.renderer(self.meshes[0])[
+                            0:1, :, :, :3].permute(0, 3, 1, 2)
+                        - 0.5
+                    ) * 2.0
+                if cam_id == 2 and len(cam_ids) == 2:
+                    rendered_img = torch.flip(rendered_img, dims=[3])
+                images.append(rendered_img)
+        return images
+    def get_rendered_video(self, images, save_path):
+        tmp_path = save_path.replace('cloth', 'tmp')
+        self.cam_pos = []
+        for angle in range(0, 360, 3):
+            self.cam_pos.append(
+                (
+                    100.0 * math.cos(np.pi / 180 * angle),
+                    self.mesh_y_center,
+                    100.0 * math.sin(np.pi / 180 * angle),
+                )
+            )
+        old_shape = np.array(images[0].shape[:2])
+        new_shape = np.around(
+            (self.size / old_shape[0]) * old_shape).astype(np.int)
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        video = cv2.VideoWriter(
+            tmp_path, fourcc, 30, (self.size * len(self.meshes) +
+                                    new_shape[1] * len(images), self.size)
+        )
+        pbar = tqdm(range(len(self.cam_pos)))
+        pbar.set_description(colored(f"exporting video {os.path.basename(save_path)}...", "blue"))
+        for cam_id in pbar:
+            self.init_renderer(self.get_camera(cam_id), "clean_mesh", "gray")
+            img_lst = [
+                np.array(Image.fromarray(img).resize(new_shape[::-1])).astype(np.uint8)[
+                    :, :, [2, 1, 0]
+                ]
+                for img in images
+            ]
+            for mesh in self.meshes:
+                rendered_img = (
+                    (self.renderer(mesh)[0, :, :, :3] * 255.0)
+                    .detach()
+                    .cpu()
+                    .numpy()
+                    .astype(np.uint8)
+                )
+                img_lst.append(rendered_img)
+            final_img = np.concatenate(img_lst, axis=1)
+            video.write(final_img)
+        video.release()
+        os.system(f'ffmpeg -y -loglevel quiet -stats -i {tmp_path} -c:v libx264 {save_path}')
+    def get_silhouette_image(self, cam_ids=[0, 2]):
+        images = []
+        for cam_id in range(len(self.cam_pos)):
+            if cam_id in cam_ids:
+                self.init_renderer(self.get_camera(cam_id), "silhouette")
+                rendered_img = self.renderer(self.meshes[0])[0:1, :, :, 3]
+                if cam_id == 2 and len(cam_ids) == 2:
+                    rendered_img = torch.flip(rendered_img, dims=[2])
+                images.append(rendered_img)
+        return images

lib/common/render_utils.py ADDED Viewed

	@@ -0,0 +1,221 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: ps-license@tuebingen.mpg.de
+import torch
+from torch import nn
+import trimesh
+import math
+from typing import NewType
+from pytorch3d.structures import Meshes
+from pytorch3d.renderer.mesh import rasterize_meshes
+Tensor = NewType('Tensor', torch.Tensor)
+def solid_angles(points: Tensor,
+                 triangles: Tensor,
+                 thresh: float = 1e-8) -> Tensor:
+    ''' Compute solid angle between the input points and triangles
+        Follows the method described in:
+        The Solid Angle of a Plane Triangle
+        A. VAN OOSTEROM AND J. STRACKEE
+        IEEE TRANSACTIONS ON BIOMEDICAL ENGINEERING,
+        VOL. BME-30, NO. 2, FEBRUARY 1983
+        Parameters
+        -----------
+            points: BxQx3
+                Tensor of input query points
+            triangles: BxFx3x3
+                Target triangles
+            thresh: float
+                float threshold
+        Returns
+        -------
+            solid_angles: BxQxF
+                A tensor containing the solid angle between all query points
+                and input triangles
+    '''
+    # Center the triangles on the query points. Size should be BxQxFx3x3
+    centered_tris = triangles[:, None] - points[:, :, None, None]
+    # BxQxFx3
+    norms = torch.norm(centered_tris, dim=-1)
+    # Should be BxQxFx3
+    cross_prod = torch.cross(centered_tris[:, :, :, 1],
+                             centered_tris[:, :, :, 2],
+                             dim=-1)
+    # Should be BxQxF
+    numerator = (centered_tris[:, :, :, 0] * cross_prod).sum(dim=-1)
+    del cross_prod
+    dot01 = (centered_tris[:, :, :, 0] * centered_tris[:, :, :, 1]).sum(dim=-1)
+    dot12 = (centered_tris[:, :, :, 1] * centered_tris[:, :, :, 2]).sum(dim=-1)
+    dot02 = (centered_tris[:, :, :, 0] * centered_tris[:, :, :, 2]).sum(dim=-1)
+    del centered_tris
+    denominator = (norms.prod(dim=-1) + dot01 * norms[:, :, :, 2] +
+                   dot02 * norms[:, :, :, 1] + dot12 * norms[:, :, :, 0])
+    del dot01, dot12, dot02, norms
+    # Should be BxQ
+    solid_angle = torch.atan2(numerator, denominator)
+    del numerator, denominator
+    torch.cuda.empty_cache()
+    return 2 * solid_angle
+def winding_numbers(points: Tensor,
+                    triangles: Tensor,
+                    thresh: float = 1e-8) -> Tensor:
+    ''' Uses winding_numbers to compute inside/outside
+        Robust inside-outside segmentation using generalized winding numbers
+        Alec Jacobson,
+        Ladislav Kavan,
+        Olga Sorkine-Hornung
+        Fast Winding Numbers for Soups and Clouds SIGGRAPH 2018
+        Gavin Barill
+        NEIL G. Dickson
+        Ryan Schmidt
+        David I.W. Levin
+        and Alec Jacobson
+        Parameters
+        -----------
+            points: BxQx3
+                Tensor of input query points
+            triangles: BxFx3x3
+                Target triangles
+            thresh: float
+                float threshold
+        Returns
+        -------
+            winding_numbers: BxQ
+                A tensor containing the Generalized winding numbers
+    '''
+    # The generalized winding number is the sum of solid angles of the point
+    # with respect to all triangles.
+    return 1 / (4 * math.pi) * solid_angles(points, triangles,
+                                            thresh=thresh).sum(dim=-1)
+def batch_contains(verts, faces, points):
+    B = verts.shape[0]
+    N = points.shape[1]
+    verts = verts.detach().cpu()
+    faces = faces.detach().cpu()
+    points = points.detach().cpu()
+    contains = torch.zeros(B, N)
+    for i in range(B):
+        contains[i] = torch.as_tensor(
+            trimesh.Trimesh(verts[i], faces[i]).contains(points[i]))
+    return 2.0 * (contains - 0.5)
+def dict2obj(d):
+    # if isinstance(d, list):
+    #     d = [dict2obj(x) for x in d]
+    if not isinstance(d, dict):
+        return d
+    class C(object):
+        pass
+    o = C()
+    for k in d:
+        o.__dict__[k] = dict2obj(d[k])
+    return o
+def face_vertices(vertices, faces):
+    """
+    :param vertices: [batch size, number of vertices, 3]
+    :param faces: [batch size, number of faces, 3]
+    :return: [batch size, number of faces, 3, 3]
+    """
+    bs, nv = vertices.shape[:2]
+    bs, nf = faces.shape[:2]
+    device = vertices.device
+    faces = faces + (torch.arange(bs, dtype=torch.int32).to(device) *
+                     nv)[:, None, None]
+    vertices = vertices.reshape((bs * nv, vertices.shape[-1]))
+    return vertices[faces.long()]
+class Pytorch3dRasterizer(nn.Module):
+    """  Borrowed from https://github.com/facebookresearch/pytorch3d
+    Notice:
+        x,y,z are in image space, normalized
+        can only render squared image now
+    """
+    def __init__(self, image_size=224):
+        """
+        use fixed raster_settings for rendering faces
+        """
+        super().__init__()
+        raster_settings = {
+            'image_size': image_size,
+            'blur_radius': 0.0,
+            'faces_per_pixel': 1,
+            'bin_size': None,
+            'max_faces_per_bin': None,
+            'perspective_correct': True,
+            'cull_backfaces': True,
+        }
+        raster_settings = dict2obj(raster_settings)
+        self.raster_settings = raster_settings
+    def forward(self, vertices, faces, attributes=None):
+        fixed_vertices = vertices.clone()
+        fixed_vertices[..., :2] = -fixed_vertices[..., :2]
+        meshes_screen = Meshes(verts=fixed_vertices.float(),
+                               faces=faces.long())
+        raster_settings = self.raster_settings
+        pix_to_face, zbuf, bary_coords, dists = rasterize_meshes(
+            meshes_screen,
+            image_size=raster_settings.image_size,
+            blur_radius=raster_settings.blur_radius,
+            faces_per_pixel=raster_settings.faces_per_pixel,
+            bin_size=raster_settings.bin_size,
+            max_faces_per_bin=raster_settings.max_faces_per_bin,
+            perspective_correct=raster_settings.perspective_correct,
+        )
+        vismask = (pix_to_face > -1).float()
+        D = attributes.shape[-1]
+        attributes = attributes.clone()
+        attributes = attributes.view(attributes.shape[0] * attributes.shape[1],
+                                     3, attributes.shape[-1])
+        N, H, W, K, _ = bary_coords.shape
+        mask = pix_to_face == -1
+        pix_to_face = pix_to_face.clone()
+        pix_to_face[mask] = 0
+        idx = pix_to_face.view(N * H * W * K, 1, 1).expand(N * H * W * K, 3, D)
+        pixel_face_vals = attributes.gather(0, idx).view(N, H, W, K, 3, D)
+        pixel_vals = (bary_coords[..., None] * pixel_face_vals).sum(dim=-2)
+        pixel_vals[mask] = 0  # Replace masked values in output.
+        pixel_vals = pixel_vals[:, :, :, 0].permute(0, 3, 1, 2)
+        pixel_vals = torch.cat(
+            [pixel_vals, vismask[:, :, :, 0][:, None, :, :]], dim=1)
+        return pixel_vals

lib/common/seg3d_lossless.py ADDED Viewed

	@@ -0,0 +1,604 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: ps-license@tuebingen.mpg.de
+from .seg3d_utils import (
+    create_grid3D,
+    plot_mask3D,
+    SmoothConv3D,
+)
+import torch
+import torch.nn as nn
+import numpy as np
+import torch.nn.functional as F
+import mcubes
+from kaolin.ops.conversions import voxelgrids_to_trianglemeshes
+import logging
+logging.getLogger("lightning").setLevel(logging.ERROR)
+class Seg3dLossless(nn.Module):
+    def __init__(self,
+                 query_func,
+                 b_min,
+                 b_max,
+                 resolutions,
+                 channels=1,
+                 balance_value=0.5,
+                 align_corners=False,
+                 visualize=False,
+                 debug=False,
+                 use_cuda_impl=False,
+                 faster=False,
+                 use_shadow=False,
+                 **kwargs):
+        """
+        align_corners: same with how you process gt. (grid_sample / interpolate)
+        """
+        super().__init__()
+        self.query_func = query_func
+        self.register_buffer(
+            'b_min',
+            torch.tensor(b_min).float().unsqueeze(1))  # [bz, 1, 3]
+        self.register_buffer(
+            'b_max',
+            torch.tensor(b_max).float().unsqueeze(1))  # [bz, 1, 3]
+        # ti.init(arch=ti.cuda)
+        # self.mciso_taichi = MCISO(dim=3, N=resolutions[-1]-1)
+        if type(resolutions[0]) is int:
+            resolutions = torch.tensor([(res, res, res)
+                                        for res in resolutions])
+        else:
+            resolutions = torch.tensor(resolutions)
+        self.register_buffer('resolutions', resolutions)
+        self.batchsize = self.b_min.size(0)
+        assert self.batchsize == 1
+        self.balance_value = balance_value
+        self.channels = channels
+        assert self.channels == 1
+        self.align_corners = align_corners
+        self.visualize = visualize
+        self.debug = debug
+        self.use_cuda_impl = use_cuda_impl
+        self.faster = faster
+        self.use_shadow = use_shadow
+        for resolution in resolutions:
+            assert resolution[0] % 2 == 1 and resolution[1] % 2 == 1, \
+                f"resolution {resolution} need to be odd becuase of align_corner."
+        # init first resolution
+        init_coords = create_grid3D(0,
+                                    resolutions[-1] - 1,
+                                    steps=resolutions[0])  # [N, 3]
+        init_coords = init_coords.unsqueeze(0).repeat(self.batchsize, 1,
+                                                      1)  # [bz, N, 3]
+        self.register_buffer('init_coords', init_coords)
+        # some useful tensors
+        calculated = torch.zeros(
+            (self.resolutions[-1][2], self.resolutions[-1][1],
+             self.resolutions[-1][0]),
+            dtype=torch.bool)
+        self.register_buffer('calculated', calculated)
+        gird8_offsets = torch.stack(
+            torch.meshgrid([
+                torch.tensor([-1, 0, 1]),
+                torch.tensor([-1, 0, 1]),
+                torch.tensor([-1, 0, 1])
+            ])).int().view(3, -1).t()  # [27, 3]
+        self.register_buffer('gird8_offsets', gird8_offsets)
+        # smooth convs
+        self.smooth_conv3x3 = SmoothConv3D(in_channels=1,
+                                           out_channels=1,
+                                           kernel_size=3)
+        self.smooth_conv5x5 = SmoothConv3D(in_channels=1,
+                                           out_channels=1,
+                                           kernel_size=5)
+        self.smooth_conv7x7 = SmoothConv3D(in_channels=1,
+                                           out_channels=1,
+                                           kernel_size=7)
+        self.smooth_conv9x9 = SmoothConv3D(in_channels=1,
+                                           out_channels=1,
+                                           kernel_size=9)
+    def batch_eval(self, coords, **kwargs):
+        """
+        coords: in the coordinates of last resolution
+        **kwargs: for query_func
+        """
+        coords = coords.detach()
+        # normalize coords to fit in [b_min, b_max]
+        if self.align_corners:
+            coords2D = coords.float() / (self.resolutions[-1] - 1)
+        else:
+            step = 1.0 / self.resolutions[-1].float()
+            coords2D = coords.float() / self.resolutions[-1] + step / 2
+        coords2D = coords2D * (self.b_max - self.b_min) + self.b_min
+        # query function
+        occupancys = self.query_func(**kwargs, points=coords2D)
+        if type(occupancys) is list:
+            occupancys = torch.stack(occupancys)  # [bz, C, N]
+        assert len(occupancys.size()) == 3, \
+            "query_func should return a occupancy with shape of [bz, C, N]"
+        return occupancys
+    def forward(self, **kwargs):
+        if self.faster:
+            return self._forward_faster(**kwargs)
+        else:
+            return self._forward(**kwargs)
+    def _forward_faster(self, **kwargs):
+        """
+        In faster mode, we make following changes to exchange accuracy for speed:
+        1. no conflict checking: 4.88 fps -> 6.56 fps
+        2. smooth_conv9x9 ~ smooth_conv3x3 for different resolution
+        3. last step no examine
+        """
+        final_W = self.resolutions[-1][0]
+        final_H = self.resolutions[-1][1]
+        final_D = self.resolutions[-1][2]
+        for resolution in self.resolutions:
+            W, H, D = resolution
+            stride = (self.resolutions[-1] - 1) / (resolution - 1)
+            # first step
+            if torch.equal(resolution, self.resolutions[0]):
+                coords = self.init_coords.clone()  # torch.long
+                occupancys = self.batch_eval(coords, **kwargs)
+                occupancys = occupancys.view(self.batchsize, self.channels, D,
+                                             H, W)
+                if (occupancys > 0.5).sum() == 0:
+                    # return F.interpolate(
+                    #     occupancys, size=(final_D, final_H, final_W),
+                    #     mode="linear", align_corners=True)
+                    return None
+                if self.visualize:
+                    self.plot(occupancys, coords, final_D, final_H, final_W)
+                with torch.no_grad():
+                    coords_accum = coords / stride
+            # last step
+            elif torch.equal(resolution, self.resolutions[-1]):
+                with torch.no_grad():
+                    # here true is correct!
+                    valid = F.interpolate(
+                        (occupancys > self.balance_value).float(),
+                        size=(D, H, W),
+                        mode="trilinear",
+                        align_corners=True)
+                # here true is correct!
+                occupancys = F.interpolate(occupancys.float(),
+                                           size=(D, H, W),
+                                           mode="trilinear",
+                                           align_corners=True)
+                # is_boundary = (valid > 0.0) & (valid < 1.0)
+                is_boundary = valid == 0.5
+            # next steps
+            else:
+                coords_accum *= 2
+                with torch.no_grad():
+                    # here true is correct!
+                    valid = F.interpolate(
+                        (occupancys > self.balance_value).float(),
+                        size=(D, H, W),
+                        mode="trilinear",
+                        align_corners=True)
+                # here true is correct!
+                occupancys = F.interpolate(occupancys.float(),
+                                           size=(D, H, W),
+                                           mode="trilinear",
+                                           align_corners=True)
+                is_boundary = (valid > 0.0) & (valid < 1.0)
+                with torch.no_grad():
+                    if torch.equal(resolution, self.resolutions[1]):
+                        is_boundary = (self.smooth_conv9x9(is_boundary.float())
+                                       > 0)[0, 0]
+                    elif torch.equal(resolution, self.resolutions[2]):
+                        is_boundary = (self.smooth_conv7x7(is_boundary.float())
+                                       > 0)[0, 0]
+                    else:
+                        is_boundary = (self.smooth_conv3x3(is_boundary.float())
+                                       > 0)[0, 0]
+                    coords_accum = coords_accum.long()
+                    is_boundary[coords_accum[0, :, 2], coords_accum[0, :, 1],
+                                coords_accum[0, :, 0]] = False
+                    point_coords = is_boundary.permute(
+                        2, 1, 0).nonzero(as_tuple=False).unsqueeze(0)
+                    point_indices = (point_coords[:, :, 2] * H * W +
+                                     point_coords[:, :, 1] * W +
+                                     point_coords[:, :, 0])
+                    R, C, D, H, W = occupancys.shape
+                    # inferred value
+                    coords = point_coords * stride
+                if coords.size(1) == 0:
+                    continue
+                occupancys_topk = self.batch_eval(coords, **kwargs)
+                # put mask point predictions to the right places on the upsampled grid.
+                R, C, D, H, W = occupancys.shape
+                point_indices = point_indices.unsqueeze(1).expand(-1, C, -1)
+                occupancys = (occupancys.reshape(R, C, D * H * W).scatter_(
+                    2, point_indices, occupancys_topk).view(R, C, D, H, W))
+                with torch.no_grad():
+                    voxels = coords / stride
+                    coords_accum = torch.cat([voxels, coords_accum],
+                                             dim=1).unique(dim=1)
+        return occupancys[0, 0]
+    def _forward(self, **kwargs):
+        """
+        output occupancy field would be:
+        (bz, C, res, res)
+        """
+        final_W = self.resolutions[-1][0]
+        final_H = self.resolutions[-1][1]
+        final_D = self.resolutions[-1][2]
+        calculated = self.calculated.clone()
+        for resolution in self.resolutions:
+            W, H, D = resolution
+            stride = (self.resolutions[-1] - 1) / (resolution - 1)
+            if self.visualize:
+                this_stage_coords = []
+            # first step
+            if torch.equal(resolution, self.resolutions[0]):
+                coords = self.init_coords.clone()  # torch.long
+                occupancys = self.batch_eval(coords, **kwargs)
+                occupancys = occupancys.view(self.batchsize, self.channels, D,
+                                             H, W)
+                if self.visualize:
+                    self.plot(occupancys, coords, final_D, final_H, final_W)
+                with torch.no_grad():
+                    coords_accum = coords / stride
+                    calculated[coords[0, :, 2], coords[0, :, 1],
+                               coords[0, :, 0]] = True
+            # next steps
+            else:
+                coords_accum *= 2
+                with torch.no_grad():
+                    # here true is correct!
+                    valid = F.interpolate(
+                        (occupancys > self.balance_value).float(),
+                        size=(D, H, W),
+                        mode="trilinear",
+                        align_corners=True)
+                # here true is correct!
+                occupancys = F.interpolate(occupancys.float(),
+                                           size=(D, H, W),
+                                           mode="trilinear",
+                                           align_corners=True)
+                is_boundary = (valid > 0.0) & (valid < 1.0)
+                with torch.no_grad():
+                    # TODO
+                    if self.use_shadow and torch.equal(resolution,
+                                                       self.resolutions[-1]):
+                        # larger z means smaller depth here
+                        depth_res = resolution[2].item()
+                        depth_index = torch.linspace(0,
+                                                     depth_res - 1,
+                                                     steps=depth_res).type_as(
+                                                         occupancys.device)
+                        depth_index_max = torch.max(
+                            (occupancys > self.balance_value) *
+                            (depth_index + 1),
+                            dim=-1,
+                            keepdim=True)[0] - 1
+                        shadow = depth_index < depth_index_max
+                        is_boundary[shadow] = False
+                        is_boundary = is_boundary[0, 0]
+                    else:
+                        is_boundary = (self.smooth_conv3x3(is_boundary.float())
+                                       > 0)[0, 0]
+                        # is_boundary = is_boundary[0, 0]
+                    is_boundary[coords_accum[0, :, 2], coords_accum[0, :, 1],
+                                coords_accum[0, :, 0]] = False
+                    point_coords = is_boundary.permute(
+                        2, 1, 0).nonzero(as_tuple=False).unsqueeze(0)
+                    point_indices = (point_coords[:, :, 2] * H * W +
+                                     point_coords[:, :, 1] * W +
+                                     point_coords[:, :, 0])
+                    R, C, D, H, W = occupancys.shape
+                    # interpolated value
+                    occupancys_interp = torch.gather(
+                        occupancys.reshape(R, C, D * H * W), 2,
+                        point_indices.unsqueeze(1))
+                    # inferred value
+                    coords = point_coords * stride
+                if coords.size(1) == 0:
+                    continue
+                occupancys_topk = self.batch_eval(coords, **kwargs)
+                if self.visualize:
+                    this_stage_coords.append(coords)
+                # put mask point predictions to the right places on the upsampled grid.
+                R, C, D, H, W = occupancys.shape
+                point_indices = point_indices.unsqueeze(1).expand(-1, C, -1)
+                occupancys = (occupancys.reshape(R, C, D * H * W).scatter_(
+                    2, point_indices, occupancys_topk).view(R, C, D, H, W))
+                with torch.no_grad():
+                    # conflicts
+                    conflicts = ((occupancys_interp - self.balance_value) *
+                                 (occupancys_topk - self.balance_value) < 0)[0,
+                                                                             0]
+                    if self.visualize:
+                        self.plot(occupancys, coords, final_D, final_H,
+                                  final_W)
+                    voxels = coords / stride
+                    coords_accum = torch.cat([voxels, coords_accum],
+                                             dim=1).unique(dim=1)
+                    calculated[coords[0, :, 2], coords[0, :, 1],
+                               coords[0, :, 0]] = True
+                while conflicts.sum() > 0:
+                    if self.use_shadow and torch.equal(resolution,
+                                                       self.resolutions[-1]):
+                        break
+                    with torch.no_grad():
+                        conflicts_coords = coords[0, conflicts, :]
+                        if self.debug:
+                            self.plot(occupancys,
+                                      conflicts_coords.unsqueeze(0),
+                                      final_D,
+                                      final_H,
+                                      final_W,
+                                      title='conflicts')
+                        conflicts_boundary = (conflicts_coords.int() +
+                                              self.gird8_offsets.unsqueeze(1) *
+                                              stride.int()).reshape(
+                                                  -1, 3).long().unique(dim=0)
+                        conflicts_boundary[:, 0] = (
+                            conflicts_boundary[:, 0].clamp(
+                                0,
+                                calculated.size(2) - 1))
+                        conflicts_boundary[:, 1] = (
+                            conflicts_boundary[:, 1].clamp(
+                                0,
+                                calculated.size(1) - 1))
+                        conflicts_boundary[:, 2] = (
+                            conflicts_boundary[:, 2].clamp(
+                                0,
+                                calculated.size(0) - 1))
+                        coords = conflicts_boundary[calculated[
+                            conflicts_boundary[:, 2], conflicts_boundary[:, 1],
+                            conflicts_boundary[:, 0]] == False]
+                        if self.debug:
+                            self.plot(occupancys,
+                                      coords.unsqueeze(0),
+                                      final_D,
+                                      final_H,
+                                      final_W,
+                                      title='coords')
+                        coords = coords.unsqueeze(0)
+                        point_coords = coords / stride
+                        point_indices = (point_coords[:, :, 2] * H * W +
+                                         point_coords[:, :, 1] * W +
+                                         point_coords[:, :, 0])
+                        R, C, D, H, W = occupancys.shape
+                        # interpolated value
+                        occupancys_interp = torch.gather(
+                            occupancys.reshape(R, C, D * H * W), 2,
+                            point_indices.unsqueeze(1))
+                        # inferred value
+                        coords = point_coords * stride
+                    if coords.size(1) == 0:
+                        break
+                    occupancys_topk = self.batch_eval(coords, **kwargs)
+                    if self.visualize:
+                        this_stage_coords.append(coords)
+                    with torch.no_grad():
+                        # conflicts
+                        conflicts = ((occupancys_interp - self.balance_value) *
+                                     (occupancys_topk - self.balance_value) <
+                                     0)[0, 0]
+                    # put mask point predictions to the right places on the upsampled grid.
+                    point_indices = point_indices.unsqueeze(1).expand(
+                        -1, C, -1)
+                    occupancys = (occupancys.reshape(R, C, D * H * W).scatter_(
+                        2, point_indices, occupancys_topk).view(R, C, D, H, W))
+                    with torch.no_grad():
+                        voxels = coords / stride
+                        coords_accum = torch.cat([voxels, coords_accum],
+                                                 dim=1).unique(dim=1)
+                        calculated[coords[0, :, 2], coords[0, :, 1],
+                                   coords[0, :, 0]] = True
+                if self.visualize:
+                    this_stage_coords = torch.cat(this_stage_coords, dim=1)
+                    self.plot(occupancys, this_stage_coords, final_D, final_H,
+                              final_W)
+        return occupancys[0, 0]
+    def plot(self,
+             occupancys,
+             coords,
+             final_D,
+             final_H,
+             final_W,
+             title='',
+             **kwargs):
+        final = F.interpolate(occupancys.float(),
+                              size=(final_D, final_H, final_W),
+                              mode="trilinear",
+                              align_corners=True)  # here true is correct!
+        x = coords[0, :, 0].to("cpu")
+        y = coords[0, :, 1].to("cpu")
+        z = coords[0, :, 2].to("cpu")
+        plot_mask3D(final[0, 0].to("cpu"), title, (x, y, z), **kwargs)
+    def find_vertices(self, sdf, direction="front"):
+        '''
+            - direction: "front" | "back" | "left" | "right"
+        '''
+        resolution = sdf.size(2)
+        if direction == "front":
+            pass
+        elif direction == "left":
+            sdf = sdf.permute(2, 1, 0)
+        elif direction == "back":
+            inv_idx = torch.arange(sdf.size(2) - 1, -1, -1).long()
+            sdf = sdf[inv_idx, :, :]
+        elif direction == "right":
+            inv_idx = torch.arange(sdf.size(2) - 1, -1, -1).long()
+            sdf = sdf[:, :, inv_idx]
+            sdf = sdf.permute(2, 1, 0)
+        inv_idx = torch.arange(sdf.size(2) - 1, -1, -1).long()
+        sdf = sdf[inv_idx, :, :]
+        sdf_all = sdf.permute(2, 1, 0)
+        # shadow
+        grad_v = (sdf_all > 0.5) * torch.linspace(
+            resolution, 1, steps=resolution).to(sdf.device)
+        grad_c = torch.ones_like(sdf_all) * torch.linspace(
+            0, resolution - 1, steps=resolution).to(sdf.device)
+        max_v, max_c = grad_v.max(dim=2)
+        shadow = grad_c > max_c.view(resolution, resolution, 1)
+        keep = (sdf_all > 0.5) & (~shadow)
+        p1 = keep.nonzero(as_tuple=False).t()  # [3, N]
+        p2 = p1.clone()  # z
+        p2[2, :] = (p2[2, :] - 2).clamp(0, resolution)
+        p3 = p1.clone()  # y
+        p3[1, :] = (p3[1, :] - 2).clamp(0, resolution)
+        p4 = p1.clone()  # x
+        p4[0, :] = (p4[0, :] - 2).clamp(0, resolution)
+        v1 = sdf_all[p1[0, :], p1[1, :], p1[2, :]]
+        v2 = sdf_all[p2[0, :], p2[1, :], p2[2, :]]
+        v3 = sdf_all[p3[0, :], p3[1, :], p3[2, :]]
+        v4 = sdf_all[p4[0, :], p4[1, :], p4[2, :]]
+        X = p1[0, :].long()  # [N,]
+        Y = p1[1, :].long()  # [N,]
+        Z = p2[2, :].float() * (0.5 - v1) / (v2 - v1) + \
+            p1[2, :].float() * (v2 - 0.5) / (v2 - v1)  # [N,]
+        Z = Z.clamp(0, resolution)
+        # normal
+        norm_z = v2 - v1
+        norm_y = v3 - v1
+        norm_x = v4 - v1
+        # print (v2.min(dim=0)[0], v2.max(dim=0)[0], v3.min(dim=0)[0], v3.max(dim=0)[0])
+        norm = torch.stack([norm_x, norm_y, norm_z], dim=1)
+        norm = norm / torch.norm(norm, p=2, dim=1, keepdim=True)
+        return X, Y, Z, norm
+    def render_normal(self, resolution, X, Y, Z, norm):
+        image = torch.ones((1, 3, resolution, resolution),
+                           dtype=torch.float32).to(norm.device)
+        color = (norm + 1) / 2.0
+        color = color.clamp(0, 1)
+        image[0, :, Y, X] = color.t()
+        return image
+    def display(self, sdf):
+        # render
+        X, Y, Z, norm = self.find_vertices(sdf, direction="front")
+        image1 = self.render_normal(self.resolutions[-1, -1], X, Y, Z, norm)
+        X, Y, Z, norm = self.find_vertices(sdf, direction="left")
+        image2 = self.render_normal(self.resolutions[-1, -1], X, Y, Z, norm)
+        X, Y, Z, norm = self.find_vertices(sdf, direction="right")
+        image3 = self.render_normal(self.resolutions[-1, -1], X, Y, Z, norm)
+        X, Y, Z, norm = self.find_vertices(sdf, direction="back")
+        image4 = self.render_normal(self.resolutions[-1, -1], X, Y, Z, norm)
+        image = torch.cat([image1, image2, image3, image4], axis=3)
+        image = image.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255.0
+        return np.uint8(image)
+    def export_mesh(self, occupancys):
+        final = occupancys[1:, 1:, 1:].contiguous()
+        if final.shape[0] > 256:
+            # for voxelgrid larger than 256^3, the required GPU memory will be > 9GB
+            # thus we use CPU marching_cube to avoid "CUDA out of memory"
+            occu_arr = final.detach().cpu().numpy()                 # non-smooth surface
+            # occu_arr = mcubes.smooth(final.detach().cpu().numpy())  # smooth surface
+            vertices, triangles = mcubes.marching_cubes(
+                occu_arr, self.balance_value)
+            verts = torch.as_tensor(vertices[:, [2, 1, 0]])
+            faces = torch.as_tensor(triangles.astype(
+                np.long), dtype=torch.long)[:, [0, 2, 1]]
+        else:
+            torch.cuda.empty_cache()
+            vertices, triangles = voxelgrids_to_trianglemeshes(
+                final.unsqueeze(0))
+            verts = vertices[0][:, [2, 1, 0]].cpu()
+            faces = triangles[0][:, [0, 2, 1]].cpu()
+        return verts, faces

lib/common/seg3d_utils.py ADDED Viewed

	@@ -0,0 +1,392 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: ps-license@tuebingen.mpg.de
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import matplotlib.pyplot as plt
+def plot_mask2D(mask,
+                title="",
+                point_coords=None,
+                figsize=10,
+                point_marker_size=5):
+    '''
+    Simple plotting tool to show intermediate mask predictions and points
+    where PointRend is applied.
+    Args:
+    mask (Tensor): mask prediction of shape HxW
+    title (str): title for the plot
+    point_coords ((Tensor, Tensor)): x and y point coordinates
+    figsize (int): size of the figure to plot
+    point_marker_size (int): marker size for points
+    '''
+    H, W = mask.shape
+    plt.figure(figsize=(figsize, figsize))
+    if title:
+        title += ", "
+    plt.title("{}resolution {}x{}".format(title, H, W), fontsize=30)
+    plt.ylabel(H, fontsize=30)
+    plt.xlabel(W, fontsize=30)
+    plt.xticks([], [])
+    plt.yticks([], [])
+    plt.imshow(mask.detach(),
+               interpolation="nearest",
+               cmap=plt.get_cmap('gray'))
+    if point_coords is not None:
+        plt.scatter(x=point_coords[0],
+                    y=point_coords[1],
+                    color="red",
+                    s=point_marker_size,
+                    clip_on=True)
+    plt.xlim(-0.5, W - 0.5)
+    plt.ylim(H - 0.5, -0.5)
+    plt.show()
+def plot_mask3D(mask=None,
+                title="",
+                point_coords=None,
+                figsize=1500,
+                point_marker_size=8,
+                interactive=True):
+    '''
+    Simple plotting tool to show intermediate mask predictions and points
+    where PointRend is applied.
+    Args:
+    mask (Tensor): mask prediction of shape DxHxW
+    title (str): title for the plot
+    point_coords ((Tensor, Tensor, Tensor)): x and y and z point coordinates
+    figsize (int): size of the figure to plot
+    point_marker_size (int): marker size for points
+    '''
+    import trimesh
+    import vtkplotter
+    from skimage import measure
+    vp = vtkplotter.Plotter(title=title, size=(figsize, figsize))
+    vis_list = []
+    if mask is not None:
+        mask = mask.detach().to("cpu").numpy()
+        mask = mask.transpose(2, 1, 0)
+        # marching cube to find surface
+        verts, faces, normals, values = measure.marching_cubes_lewiner(
+            mask, 0.5, gradient_direction='ascent')
+        # create a mesh
+        mesh = trimesh.Trimesh(verts, faces)
+        mesh.visual.face_colors = [200, 200, 250, 100]
+        vis_list.append(mesh)
+    if point_coords is not None:
+        point_coords = torch.stack(point_coords, 1).to("cpu").numpy()
+        # import numpy as np
+        # select_x = np.logical_and(point_coords[:, 0] >= 16, point_coords[:, 0] <= 112)
+        # select_y = np.logical_and(point_coords[:, 1] >= 48, point_coords[:, 1] <= 272)
+        # select_z = np.logical_and(point_coords[:, 2] >= 16, point_coords[:, 2] <= 112)
+        # select = np.logical_and(np.logical_and(select_x, select_y), select_z)
+        # point_coords = point_coords[select, :]
+        pc = vtkplotter.Points(point_coords, r=point_marker_size, c='red')
+        vis_list.append(pc)
+    vp.show(*vis_list,
+            bg="white",
+            axes=1,
+            interactive=interactive,
+            azimuth=30,
+            elevation=30)
+def create_grid3D(min, max, steps):
+    if type(min) is int:
+        min = (min, min, min)  # (x, y, z)
+    if type(max) is int:
+        max = (max, max, max)  # (x, y)
+    if type(steps) is int:
+        steps = (steps, steps, steps)  # (x, y, z)
+    arrangeX = torch.linspace(min[0], max[0], steps[0]).long()
+    arrangeY = torch.linspace(min[1], max[1], steps[1]).long()
+    arrangeZ = torch.linspace(min[2], max[2], steps[2]).long()
+    gridD, girdH, gridW = torch.meshgrid([arrangeZ, arrangeY, arrangeX])
+    coords = torch.stack([gridW, girdH,
+                          gridD])  # [2, steps[0], steps[1], steps[2]]
+    coords = coords.view(3, -1).t()  # [N, 3]
+    return coords
+def create_grid2D(min, max, steps):
+    if type(min) is int:
+        min = (min, min)  # (x, y)
+    if type(max) is int:
+        max = (max, max)  # (x, y)
+    if type(steps) is int:
+        steps = (steps, steps)  # (x, y)
+    arrangeX = torch.linspace(min[0], max[0], steps[0]).long()
+    arrangeY = torch.linspace(min[1], max[1], steps[1]).long()
+    girdH, gridW = torch.meshgrid([arrangeY, arrangeX])
+    coords = torch.stack([gridW, girdH])  # [2, steps[0], steps[1]]
+    coords = coords.view(2, -1).t()  # [N, 2]
+    return coords
+class SmoothConv2D(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=3):
+        super().__init__()
+        assert kernel_size % 2 == 1, "kernel_size for smooth_conv must be odd: {3, 5, ...}"
+        self.padding = (kernel_size - 1) // 2
+        weight = torch.ones(
+            (in_channels, out_channels, kernel_size, kernel_size),
+            dtype=torch.float32) / (kernel_size**2)
+        self.register_buffer('weight', weight)
+    def forward(self, input):
+        return F.conv2d(input, self.weight, padding=self.padding)
+class SmoothConv3D(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=3):
+        super().__init__()
+        assert kernel_size % 2 == 1, "kernel_size for smooth_conv must be odd: {3, 5, ...}"
+        self.padding = (kernel_size - 1) // 2
+        weight = torch.ones(
+            (in_channels, out_channels, kernel_size, kernel_size, kernel_size),
+            dtype=torch.float32) / (kernel_size**3)
+        self.register_buffer('weight', weight)
+    def forward(self, input):
+        return F.conv3d(input, self.weight, padding=self.padding)
+def build_smooth_conv3D(in_channels=1,
+                        out_channels=1,
+                        kernel_size=3,
+                        padding=1):
+    smooth_conv = torch.nn.Conv3d(in_channels=in_channels,
+                                  out_channels=out_channels,
+                                  kernel_size=kernel_size,
+                                  padding=padding)
+    smooth_conv.weight.data = torch.ones(
+        (in_channels, out_channels, kernel_size, kernel_size, kernel_size),
+        dtype=torch.float32) / (kernel_size**3)
+    smooth_conv.bias.data = torch.zeros(out_channels)
+    return smooth_conv
+def build_smooth_conv2D(in_channels=1,
+                        out_channels=1,
+                        kernel_size=3,
+                        padding=1):
+    smooth_conv = torch.nn.Conv2d(in_channels=in_channels,
+                                  out_channels=out_channels,
+                                  kernel_size=kernel_size,
+                                  padding=padding)
+    smooth_conv.weight.data = torch.ones(
+        (in_channels, out_channels, kernel_size, kernel_size),
+        dtype=torch.float32) / (kernel_size**2)
+    smooth_conv.bias.data = torch.zeros(out_channels)
+    return smooth_conv
+def get_uncertain_point_coords_on_grid3D(uncertainty_map, num_points,
+                                         **kwargs):
+    """
+    Find `num_points` most uncertain points from `uncertainty_map` grid.
+    Args:
+        uncertainty_map (Tensor): A tensor of shape (N, 1, H, W, D) that contains uncertainty
+            values for a set of points on a regular H x W x D grid.
+        num_points (int): The number of points P to select.
+    Returns:
+        point_indices (Tensor): A tensor of shape (N, P) that contains indices from
+            [0, H x W x D) of the most uncertain points.
+        point_coords (Tensor): A tensor of shape (N, P, 3) that contains [0, 1] x [0, 1] normalized
+            coordinates of the most uncertain points from the H x W x D grid.
+    """
+    R, _, D, H, W = uncertainty_map.shape
+    # h_step = 1.0 / float(H)
+    # w_step = 1.0 / float(W)
+    # d_step = 1.0 / float(D)
+    num_points = min(D * H * W, num_points)
+    point_scores, point_indices = torch.topk(uncertainty_map.view(
+        R, D * H * W),
+        k=num_points,
+        dim=1)
+    point_coords = torch.zeros(R,
+                               num_points,
+                               3,
+                               dtype=torch.float,
+                               device=uncertainty_map.device)
+    # point_coords[:, :, 0] = h_step / 2.0 + (point_indices // (W * D)).to(torch.float) * h_step
+    # point_coords[:, :, 1] = w_step / 2.0 + (point_indices % (W * D) // D).to(torch.float) * w_step
+    # point_coords[:, :, 2] = d_step / 2.0 + (point_indices % D).to(torch.float) * d_step
+    point_coords[:, :, 0] = (point_indices % W).to(torch.float)  # x
+    point_coords[:, :, 1] = (point_indices % (H * W) // W).to(torch.float)  # y
+    point_coords[:, :, 2] = (point_indices // (H * W)).to(torch.float)  # z
+    print(f"resolution {D} x {H} x {W}", point_scores.min(),
+          point_scores.max())
+    return point_indices, point_coords
+def get_uncertain_point_coords_on_grid3D_faster(uncertainty_map, num_points,
+                                                clip_min):
+    """
+    Find `num_points` most uncertain points from `uncertainty_map` grid.
+    Args:
+        uncertainty_map (Tensor): A tensor of shape (N, 1, H, W, D) that contains uncertainty
+            values for a set of points on a regular H x W x D grid.
+        num_points (int): The number of points P to select.
+    Returns:
+        point_indices (Tensor): A tensor of shape (N, P) that contains indices from
+            [0, H x W x D) of the most uncertain points.
+        point_coords (Tensor): A tensor of shape (N, P, 3) that contains [0, 1] x [0, 1] normalized
+            coordinates of the most uncertain points from the H x W x D grid.
+    """
+    R, _, D, H, W = uncertainty_map.shape
+    # h_step = 1.0 / float(H)
+    # w_step = 1.0 / float(W)
+    # d_step = 1.0 / float(D)
+    assert R == 1, "batchsize > 1 is not implemented!"
+    uncertainty_map = uncertainty_map.view(D * H * W)
+    indices = (uncertainty_map >= clip_min).nonzero().squeeze(1)
+    num_points = min(num_points, indices.size(0))
+    point_scores, point_indices = torch.topk(uncertainty_map[indices],
+                                             k=num_points,
+                                             dim=0)
+    point_indices = indices[point_indices].unsqueeze(0)
+    point_coords = torch.zeros(R,
+                               num_points,
+                               3,
+                               dtype=torch.float,
+                               device=uncertainty_map.device)
+    # point_coords[:, :, 0] = h_step / 2.0 + (point_indices // (W * D)).to(torch.float) * h_step
+    # point_coords[:, :, 1] = w_step / 2.0 + (point_indices % (W * D) // D).to(torch.float) * w_step
+    # point_coords[:, :, 2] = d_step / 2.0 + (point_indices % D).to(torch.float) * d_step
+    point_coords[:, :, 0] = (point_indices % W).to(torch.float)  # x
+    point_coords[:, :, 1] = (point_indices % (H * W) // W).to(torch.float)  # y
+    point_coords[:, :, 2] = (point_indices // (H * W)).to(torch.float)  # z
+    # print (f"resolution {D} x {H} x {W}", point_scores.min(), point_scores.max())
+    return point_indices, point_coords
+def get_uncertain_point_coords_on_grid2D(uncertainty_map, num_points,
+                                         **kwargs):
+    """
+    Find `num_points` most uncertain points from `uncertainty_map` grid.
+    Args:
+        uncertainty_map (Tensor): A tensor of shape (N, 1, H, W) that contains uncertainty
+            values for a set of points on a regular H x W grid.
+        num_points (int): The number of points P to select.
+    Returns:
+        point_indices (Tensor): A tensor of shape (N, P) that contains indices from
+            [0, H x W) of the most uncertain points.
+        point_coords (Tensor): A tensor of shape (N, P, 2) that contains [0, 1] x [0, 1] normalized
+            coordinates of the most uncertain points from the H x W grid.
+    """
+    R, _, H, W = uncertainty_map.shape
+    # h_step = 1.0 / float(H)
+    # w_step = 1.0 / float(W)
+    num_points = min(H * W, num_points)
+    point_scores, point_indices = torch.topk(uncertainty_map.view(R, H * W),
+                                             k=num_points,
+                                             dim=1)
+    point_coords = torch.zeros(R,
+                               num_points,
+                               2,
+                               dtype=torch.long,
+                               device=uncertainty_map.device)
+    # point_coords[:, :, 0] = w_step / 2.0 + (point_indices % W).to(torch.float) * w_step
+    # point_coords[:, :, 1] = h_step / 2.0 + (point_indices // W).to(torch.float) * h_step
+    point_coords[:, :, 0] = (point_indices % W).to(torch.long)
+    point_coords[:, :, 1] = (point_indices // W).to(torch.long)
+    # print (point_scores.min(), point_scores.max())
+    return point_indices, point_coords
+def get_uncertain_point_coords_on_grid2D_faster(uncertainty_map, num_points,
+                                                clip_min):
+    """
+    Find `num_points` most uncertain points from `uncertainty_map` grid.
+    Args:
+        uncertainty_map (Tensor): A tensor of shape (N, 1, H, W) that contains uncertainty
+            values for a set of points on a regular H x W grid.
+        num_points (int): The number of points P to select.
+    Returns:
+        point_indices (Tensor): A tensor of shape (N, P) that contains indices from
+            [0, H x W) of the most uncertain points.
+        point_coords (Tensor): A tensor of shape (N, P, 2) that contains [0, 1] x [0, 1] normalized
+            coordinates of the most uncertain points from the H x W grid.
+    """
+    R, _, H, W = uncertainty_map.shape
+    # h_step = 1.0 / float(H)
+    # w_step = 1.0 / float(W)
+    assert R == 1, "batchsize > 1 is not implemented!"
+    uncertainty_map = uncertainty_map.view(H * W)
+    indices = (uncertainty_map >= clip_min).nonzero().squeeze(1)
+    num_points = min(num_points, indices.size(0))
+    point_scores, point_indices = torch.topk(uncertainty_map[indices],
+                                             k=num_points,
+                                             dim=0)
+    point_indices = indices[point_indices].unsqueeze(0)
+    point_coords = torch.zeros(R,
+                               num_points,
+                               2,
+                               dtype=torch.long,
+                               device=uncertainty_map.device)
+    # point_coords[:, :, 0] = w_step / 2.0 + (point_indices % W).to(torch.float) * w_step
+    # point_coords[:, :, 1] = h_step / 2.0 + (point_indices // W).to(torch.float) * h_step
+    point_coords[:, :, 0] = (point_indices % W).to(torch.long)
+    point_coords[:, :, 1] = (point_indices // W).to(torch.long)
+    # print (point_scores.min(), point_scores.max())
+    return point_indices, point_coords
+def calculate_uncertainty(logits, classes=None, balance_value=0.5):
+    """
+    We estimate uncerainty as L1 distance between 0.0 and the logit prediction in 'logits' for the
+        foreground class in `classes`.
+    Args:
+        logits (Tensor): A tensor of shape (R, C, ...) or (R, 1, ...) for class-specific or
+            class-agnostic, where R is the total number of predicted masks in all images and C is
+            the number of foreground classes. The values are logits.
+        classes (list): A list of length R that contains either predicted of ground truth class
+            for eash predicted mask.
+    Returns:
+        scores (Tensor): A tensor of shape (R, 1, ...) that contains uncertainty scores with
+            the most uncertain locations having the highest uncertainty score.
+    """
+    if logits.shape[1] == 1:
+        gt_class_logits = logits
+    else:
+        gt_class_logits = logits[
+            torch.arange(logits.shape[0], device=logits.device),
+            classes].unsqueeze(1)
+    return -torch.abs(gt_class_logits - balance_value)

lib/common/smpl_vert_segmentation.json ADDED Viewed

The diff for this file is too large to render. See raw diff

lib/common/train_util.py ADDED Viewed

	@@ -0,0 +1,599 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: ps-license@tuebingen.mpg.de
+import yaml
+import os.path as osp
+import torch
+import numpy as np
+import torch.nn.functional as F
+from ..dataset.mesh_util import *
+from ..net.geometry import orthogonal
+from pytorch3d.renderer.mesh import rasterize_meshes
+from .render_utils import Pytorch3dRasterizer
+from pytorch3d.structures import Meshes
+import cv2
+from PIL import Image
+from tqdm import tqdm
+import os
+from termcolor import colored
+def reshape_sample_tensor(sample_tensor, num_views):
+    if num_views == 1:
+        return sample_tensor
+    # Need to repeat sample_tensor along the batch dim num_views times
+    sample_tensor = sample_tensor.unsqueeze(dim=1)
+    sample_tensor = sample_tensor.repeat(1, num_views, 1, 1)
+    sample_tensor = sample_tensor.view(
+        sample_tensor.shape[0] * sample_tensor.shape[1],
+        sample_tensor.shape[2], sample_tensor.shape[3])
+    return sample_tensor
+def gen_mesh_eval(opt, net, cuda, data, resolution=None):
+    resolution = opt.resolution if resolution is None else resolution
+    image_tensor = data['img'].to(device=cuda)
+    calib_tensor = data['calib'].to(device=cuda)
+    net.filter(image_tensor)
+    b_min = data['b_min']
+    b_max = data['b_max']
+    try:
+        verts, faces, _, _ = reconstruction_faster(net,
+                                                   cuda,
+                                                   calib_tensor,
+                                                   resolution,
+                                                   b_min,
+                                                   b_max,
+                                                   use_octree=False)
+    except Exception as e:
+        print(e)
+        print('Can not create marching cubes at this time.')
+        verts, faces = None, None
+    return verts, faces
+def gen_mesh(opt, net, cuda, data, save_path, resolution=None):
+    resolution = opt.resolution if resolution is None else resolution
+    image_tensor = data['img'].to(device=cuda)
+    calib_tensor = data['calib'].to(device=cuda)
+    net.filter(image_tensor)
+    b_min = data['b_min']
+    b_max = data['b_max']
+    try:
+        save_img_path = save_path[:-4] + '.png'
+        save_img_list = []
+        for v in range(image_tensor.shape[0]):
+            save_img = (np.transpose(image_tensor[v].detach().cpu().numpy(),
+                                     (1, 2, 0)) * 0.5 +
+                        0.5)[:, :, ::-1] * 255.0
+            save_img_list.append(save_img)
+        save_img = np.concatenate(save_img_list, axis=1)
+        Image.fromarray(np.uint8(save_img[:, :, ::-1])).save(save_img_path)
+        verts, faces, _, _ = reconstruction_faster(net, cuda, calib_tensor,
+                                                   resolution, b_min, b_max)
+        verts_tensor = torch.from_numpy(
+            verts.T).unsqueeze(0).to(device=cuda).float()
+        xyz_tensor = net.projection(verts_tensor, calib_tensor[:1])
+        uv = xyz_tensor[:, :2, :]
+        color = netG.index(image_tensor[:1], uv).detach().cpu().numpy()[0].T
+        color = color * 0.5 + 0.5
+        save_obj_mesh_with_color(save_path, verts, faces, color)
+    except Exception as e:
+        print(e)
+        print('Can not create marching cubes at this time.')
+        verts, faces, color = None, None, None
+    return verts, faces, color
+def gen_mesh_color(opt, netG, netC, cuda, data, save_path, use_octree=True):
+    image_tensor = data['img'].to(device=cuda)
+    calib_tensor = data['calib'].to(device=cuda)
+    netG.filter(image_tensor)
+    netC.filter(image_tensor)
+    netC.attach(netG.get_im_feat())
+    b_min = data['b_min']
+    b_max = data['b_max']
+    try:
+        save_img_path = save_path[:-4] + '.png'
+        save_img_list = []
+        for v in range(image_tensor.shape[0]):
+            save_img = (np.transpose(image_tensor[v].detach().cpu().numpy(),
+                                     (1, 2, 0)) * 0.5 +
+                        0.5)[:, :, ::-1] * 255.0
+            save_img_list.append(save_img)
+        save_img = np.concatenate(save_img_list, axis=1)
+        Image.fromarray(np.uint8(save_img[:, :, ::-1])).save(save_img_path)
+        verts, faces, _, _ = reconstruction_faster(netG,
+                                                   cuda,
+                                                   calib_tensor,
+                                                   opt.resolution,
+                                                   b_min,
+                                                   b_max,
+                                                   use_octree=use_octree)
+        # Now Getting colors
+        verts_tensor = torch.from_numpy(
+            verts.T).unsqueeze(0).to(device=cuda).float()
+        verts_tensor = reshape_sample_tensor(verts_tensor, opt.num_views)
+        color = np.zeros(verts.shape)
+        interval = 10000
+        for i in range(len(color) // interval):
+            left = i * interval
+            right = i * interval + interval
+            if i == len(color) // interval - 1:
+                right = -1
+            netC.query(verts_tensor[:, :, left:right], calib_tensor)
+            rgb = netC.get_preds()[0].detach().cpu().numpy() * 0.5 + 0.5
+            color[left:right] = rgb.T
+        save_obj_mesh_with_color(save_path, verts, faces, color)
+    except Exception as e:
+        print(e)
+        print('Can not create marching cubes at this time.')
+        verts, faces, color = None, None, None
+    return verts, faces, color
+def adjust_learning_rate(optimizer, epoch, lr, schedule, gamma):
+    """Sets the learning rate to the initial LR decayed by schedule"""
+    if epoch in schedule:
+        lr *= gamma
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = lr
+    return lr
+def compute_acc(pred, gt, thresh=0.5):
+    '''
+    return:
+        IOU, precision, and recall
+    '''
+    with torch.no_grad():
+        vol_pred = pred > thresh
+        vol_gt = gt > thresh
+        union = vol_pred | vol_gt
+        inter = vol_pred & vol_gt
+        true_pos = inter.sum().float()
+        union = union.sum().float()
+        if union == 0:
+            union = 1
+        vol_pred = vol_pred.sum().float()
+        if vol_pred == 0:
+            vol_pred = 1
+        vol_gt = vol_gt.sum().float()
+        if vol_gt == 0:
+            vol_gt = 1
+        return true_pos / union, true_pos / vol_pred, true_pos / vol_gt
+# def calc_metrics(opt, net, cuda, dataset, num_tests,
+#                  resolution=128, sampled_points=1000, use_kaolin=True):
+#     if num_tests > len(dataset):
+#         num_tests = len(dataset)
+#     with torch.no_grad():
+#         chamfer_arr, p2s_arr = [], []
+#         for idx in tqdm(range(num_tests)):
+#             data = dataset[idx * len(dataset) // num_tests]
+#             verts, faces = gen_mesh_eval(opt, net, cuda, data, resolution)
+#             if verts is None:
+#                 continue
+#             mesh_gt = trimesh.load(data['mesh_path'])
+#             mesh_gt = mesh_gt.split(only_watertight=False)
+#             comp_num = [mesh.vertices.shape[0] for mesh in mesh_gt]
+#             mesh_gt = mesh_gt[comp_num.index(max(comp_num))]
+#             mesh_pred = trimesh.Trimesh(verts, faces)
+#             gt_surface_pts, _ = trimesh.sample.sample_surface_even(
+#                 mesh_gt, sampled_points)
+#             pred_surface_pts, _ = trimesh.sample.sample_surface_even(
+#                 mesh_pred, sampled_points)
+#             if use_kaolin and has_kaolin:
+#                 kal_mesh_gt = kal.rep.TriangleMesh.from_tensors(
+#                         torch.tensor(mesh_gt.vertices).float().to(device=cuda),
+#                         torch.tensor(mesh_gt.faces).long().to(device=cuda))
+#                 kal_mesh_pred = kal.rep.TriangleMesh.from_tensors(
+#                     torch.tensor(mesh_pred.vertices).float().to(device=cuda),
+#                     torch.tensor(mesh_pred.faces).long().to(device=cuda))
+#                 kal_distance_0 = kal.metrics.mesh.point_to_surface(
+#                     torch.tensor(pred_surface_pts).float().to(device=cuda), kal_mesh_gt)
+#                 kal_distance_1 = kal.metrics.mesh.point_to_surface(
+#                     torch.tensor(gt_surface_pts).float().to(device=cuda), kal_mesh_pred)
+#                 dist_gt_pred = torch.sqrt(kal_distance_0).cpu().numpy()
+#                 dist_pred_gt = torch.sqrt(kal_distance_1).cpu().numpy()
+#             else:
+#                 try:
+#                     _, dist_pred_gt, _ = trimesh.proximity.closest_point(mesh_pred, gt_surface_pts)
+#                     _, dist_gt_pred, _ = trimesh.proximity.closest_point(mesh_gt, pred_surface_pts)
+#                 except Exception as e:
+#                     print (e)
+#                     continue
+#             chamfer_dist = 0.5 * (dist_pred_gt.mean() + dist_gt_pred.mean())
+#             p2s_dist = dist_pred_gt.mean()
+#             chamfer_arr.append(chamfer_dist)
+#             p2s_arr.append(p2s_dist)
+#     return np.average(chamfer_arr), np.average(p2s_arr)
+def calc_error(opt, net, cuda, dataset, num_tests):
+    if num_tests > len(dataset):
+        num_tests = len(dataset)
+    with torch.no_grad():
+        erorr_arr, IOU_arr, prec_arr, recall_arr = [], [], [], []
+        for idx in tqdm(range(num_tests)):
+            data = dataset[idx * len(dataset) // num_tests]
+            # retrieve the data
+            image_tensor = data['img'].to(device=cuda)
+            calib_tensor = data['calib'].to(device=cuda)
+            sample_tensor = data['samples'].to(device=cuda).unsqueeze(0)
+            if opt.num_views > 1:
+                sample_tensor = reshape_sample_tensor(sample_tensor,
+                                                      opt.num_views)
+            label_tensor = data['labels'].to(device=cuda).unsqueeze(0)
+            res, error = net.forward(image_tensor,
+                                     sample_tensor,
+                                     calib_tensor,
+                                     labels=label_tensor)
+            IOU, prec, recall = compute_acc(res, label_tensor)
+            # print(
+            #     '{0}/{1} | Error: {2:06f} IOU: {3:06f} prec: {4:06f} recall: {5:06f}'
+            #         .format(idx, num_tests, error.item(), IOU.item(), prec.item(), recall.item()))
+            erorr_arr.append(error.item())
+            IOU_arr.append(IOU.item())
+            prec_arr.append(prec.item())
+            recall_arr.append(recall.item())
+    return np.average(erorr_arr), np.average(IOU_arr), np.average(
+        prec_arr), np.average(recall_arr)
+def calc_error_color(opt, netG, netC, cuda, dataset, num_tests):
+    if num_tests > len(dataset):
+        num_tests = len(dataset)
+    with torch.no_grad():
+        error_color_arr = []
+        for idx in tqdm(range(num_tests)):
+            data = dataset[idx * len(dataset) // num_tests]
+            # retrieve the data
+            image_tensor = data['img'].to(device=cuda)
+            calib_tensor = data['calib'].to(device=cuda)
+            color_sample_tensor = data['color_samples'].to(
+                device=cuda).unsqueeze(0)
+            if opt.num_views > 1:
+                color_sample_tensor = reshape_sample_tensor(
+                    color_sample_tensor, opt.num_views)
+            rgb_tensor = data['rgbs'].to(device=cuda).unsqueeze(0)
+            netG.filter(image_tensor)
+            _, errorC = netC.forward(image_tensor,
+                                     netG.get_im_feat(),
+                                     color_sample_tensor,
+                                     calib_tensor,
+                                     labels=rgb_tensor)
+            # print('{0}/{1} | Error inout: {2:06f} | Error color: {3:06f}'
+            #       .format(idx, num_tests, errorG.item(), errorC.item()))
+            error_color_arr.append(errorC.item())
+    return np.average(error_color_arr)
+# pytorch lightning training related fucntions
+def query_func(opt, netG, features, points, proj_matrix=None):
+    '''
+        - points: size of (bz, N, 3)
+        - proj_matrix: size of (bz, 4, 4)
+    return: size of (bz, 1, N)
+    '''
+    assert len(points) == 1
+    samples = points.repeat(opt.num_views, 1, 1)
+    samples = samples.permute(0, 2, 1)  # [bz, 3, N]
+    # view specific query
+    if proj_matrix is not None:
+        samples = orthogonal(samples, proj_matrix)
+    calib_tensor = torch.stack([torch.eye(4).float()], dim=0).type_as(samples)
+    preds = netG.query(features=features,
+                       points=samples,
+                       calibs=calib_tensor,
+                       regressor=netG.if_regressor)
+    if type(preds) is list:
+        preds = preds[0]
+    return preds
+def isin(ar1, ar2):
+    return (ar1[..., None] == ar2).any(-1)
+def in1d(ar1, ar2):
+    mask = ar2.new_zeros((max(ar1.max(), ar2.max()) + 1, ), dtype=torch.bool)
+    mask[ar2.unique()] = True
+    return mask[ar1]
+def get_visibility(xy, z, faces):
+    """get the visibility of vertices
+    Args:
+        xy (torch.tensor): [N,2]
+        z (torch.tensor): [N,1]
+        faces (torch.tensor): [N,3]
+        size (int): resolution of rendered image
+    """
+    xyz = torch.cat((xy, -z), dim=1)
+    xyz = (xyz + 1.0) / 2.0
+    faces = faces.long()
+    rasterizer = Pytorch3dRasterizer(image_size=2**12)
+    meshes_screen = Meshes(verts=xyz[None, ...], faces=faces[None, ...])
+    raster_settings = rasterizer.raster_settings
+    pix_to_face, zbuf, bary_coords, dists = rasterize_meshes(
+        meshes_screen,
+        image_size=raster_settings.image_size,
+        blur_radius=raster_settings.blur_radius,
+        faces_per_pixel=raster_settings.faces_per_pixel,
+        bin_size=raster_settings.bin_size,
+        max_faces_per_bin=raster_settings.max_faces_per_bin,
+        perspective_correct=raster_settings.perspective_correct,
+        cull_backfaces=raster_settings.cull_backfaces,
+    )
+    vis_vertices_id = torch.unique(faces[torch.unique(pix_to_face), :])
+    vis_mask = torch.zeros(size=(z.shape[0], 1))
+    vis_mask[vis_vertices_id] = 1.0
+    # print("------------------------\n")
+    # print(f"keep points : {vis_mask.sum()/len(vis_mask)}")
+    return vis_mask
+def batch_mean(res, key):
+    # recursive mean for multilevel dicts
+    return torch.stack([
+        x[key] if isinstance(x, dict) else batch_mean(x, key) for x in res
+    ]).mean()
+def tf_log_convert(log_dict):
+    new_log_dict = log_dict.copy()
+    for k, v in log_dict.items():
+        new_log_dict[k.replace("_", "/")] = v
+        del new_log_dict[k]
+    return new_log_dict
+def bar_log_convert(log_dict, name=None, rot=None):
+    from decimal import Decimal
+    new_log_dict = {}
+    if name is not None:
+        new_log_dict['name'] = name[0]
+    if rot is not None:
+        new_log_dict['rot'] = rot[0]
+    for k, v in log_dict.items():
+        color = "yellow"
+        if 'loss' in k:
+            color = "red"
+            k = k.replace("loss", "L")
+        elif 'acc' in k:
+            color = "green"
+            k = k.replace("acc", "A")
+        elif 'iou' in k:
+            color = "green"
+            k = k.replace("iou", "I")
+        elif 'prec' in k:
+            color = "green"
+            k = k.replace("prec", "P")
+        elif 'recall' in k:
+            color = "green"
+            k = k.replace("recall", "R")
+        if 'lr' not in k:
+            new_log_dict[colored(k.split("_")[1],
+                                 color)] = colored(f"{v:.3f}", color)
+        else:
+            new_log_dict[colored(k.split("_")[1],
+                                 color)] = colored(f"{Decimal(str(v)):.1E}",
+                                                   color)
+    if 'loss' in new_log_dict.keys():
+        del new_log_dict['loss']
+    return new_log_dict
+def accumulate(outputs, rot_num, split):
+    hparam_log_dict = {}
+    metrics = outputs[0].keys()
+    datasets = split.keys()
+    for dataset in datasets:
+        for metric in metrics:
+            keyword = f"hparam/{dataset}-{metric}"
+            if keyword not in hparam_log_dict.keys():
+                hparam_log_dict[keyword] = 0
+            for idx in range(split[dataset][0] * rot_num,
+                             split[dataset][1] * rot_num):
+                hparam_log_dict[keyword] += outputs[idx][metric]
+            hparam_log_dict[keyword] /= (split[dataset][1] -
+                                         split[dataset][0]) * rot_num
+    print(colored(hparam_log_dict, "green"))
+    return hparam_log_dict
+def calc_error_N(outputs, targets):
+    """calculate the error of normal (IGR)
+    Args:
+        outputs (torch.tensor): [B, 3, N]
+        target (torch.tensor): [B, N, 3]
+    # manifold loss and grad_loss in IGR paper
+    grad_loss = ((nonmnfld_grad.norm(2, dim=-1) - 1) ** 2).mean()
+    normals_loss = ((mnfld_grad - normals).abs()).norm(2, dim=1).mean()
+    Returns:
+        torch.tensor: error of valid normals on the surface
+    """
+    # outputs = torch.tanh(-outputs.permute(0,2,1).reshape(-1,3))
+    outputs = -outputs.permute(0, 2, 1).reshape(-1, 1)
+    targets = targets.reshape(-1, 3)[:, 2:3]
+    with_normals = targets.sum(dim=1).abs() > 0.0
+    # eikonal loss
+    grad_loss = ((outputs[with_normals].norm(2, dim=-1) - 1)**2).mean()
+    # normals loss
+    normal_loss = (outputs - targets)[with_normals].abs().norm(2, dim=1).mean()
+    return grad_loss * 0.0 + normal_loss
+def calc_knn_acc(preds, carn_verts, labels, pick_num):
+    """calculate knn accuracy
+    Args:
+        preds (torch.tensor): [B, 3, N]
+        carn_verts (torch.tensor): [SMPLX_V_num, 3]
+        labels (torch.tensor): [B, N_knn, N]
+    """
+    N_knn_full = labels.shape[1]
+    preds = preds.permute(0, 2, 1).reshape(-1, 3)
+    labels = labels.permute(0, 2, 1).reshape(-1, N_knn_full)  # [BxN, num_knn]
+    labels = labels[:, :pick_num]
+    dist = torch.cdist(preds, carn_verts, p=2)  # [BxN, SMPL_V_num]
+    knn = dist.topk(k=pick_num, dim=1, largest=False)[1]  # [BxN, num_knn]
+    cat_mat = torch.sort(torch.cat((knn, labels), dim=1))[0]
+    bool_col = torch.zeros_like(cat_mat)[:, 0]
+    for i in range(pick_num * 2 - 1):
+        bool_col += cat_mat[:, i] == cat_mat[:, i + 1]
+    acc = (bool_col > 0).sum() / len(bool_col)
+    return acc
+def calc_acc_seg(output, target, num_multiseg):
+    from pytorch_lightning.metrics import Accuracy
+    return Accuracy()(output.reshape(-1, num_multiseg).cpu(),
+                      target.flatten().cpu())
+def add_watermark(imgs, titles):
+    # Write some Text
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    bottomLeftCornerOfText = (350, 50)
+    bottomRightCornerOfText = (800, 50)
+    fontScale = 1
+    fontColor = (1.0, 1.0, 1.0)
+    lineType = 2
+    for i in range(len(imgs)):
+        title = titles[i + 1]
+        cv2.putText(imgs[i], title, bottomLeftCornerOfText, font, fontScale,
+                    fontColor, lineType)
+        if i == 0:
+            cv2.putText(imgs[i], str(titles[i][0]), bottomRightCornerOfText,
+                        font, fontScale, fontColor, lineType)
+    result = np.concatenate(imgs, axis=0).transpose(2, 0, 1)
+    return result
+def make_test_gif(img_dir):
+    if img_dir is not None and len(os.listdir(img_dir)) > 0:
+        for dataset in os.listdir(img_dir):
+            for subject in sorted(os.listdir(osp.join(img_dir, dataset))):
+                img_lst = []
+                im1 = None
+                for file in sorted(
+                        os.listdir(osp.join(img_dir, dataset, subject))):
+                    if file[-3:] not in ['obj', 'gif']:
+                        img_path = os.path.join(img_dir, dataset, subject,
+                                                file)
+                        if im1 == None:
+                            im1 = Image.open(img_path)
+                        else:
+                            img_lst.append(Image.open(img_path))
+                print(os.path.join(img_dir, dataset, subject, "out.gif"))
+                im1.save(os.path.join(img_dir, dataset, subject, "out.gif"),
+                         save_all=True,
+                         append_images=img_lst,
+                         duration=500,
+                         loop=0)
+def export_cfg(logger, cfg):
+    cfg_export_file = osp.join(logger.save_dir, logger.name,
+                               f"version_{logger.version}", "cfg.yaml")
+    if not osp.exists(cfg_export_file):
+        os.makedirs(osp.dirname(cfg_export_file), exist_ok=True)
+        with open(cfg_export_file, "w+") as file:
+            _ = yaml.dump(cfg, file)