""" a simple wrapper for pytorch3d rendering Cite: BEHAVE: Dataset and Method for Tracking Human Object Interaction """ import numpy as np import torch from copy import deepcopy # Data structures and functions for rendering from pytorch3d.renderer import ( PointLights, RasterizationSettings, MeshRenderer, MeshRasterizer, SoftPhongShader, TexturesVertex, PerspectiveCameras, PointsRasterizer, AlphaCompositor, PointsRasterizationSettings, ) from pytorch3d.structures import Meshes, join_meshes_as_scene, Pointclouds SMPL_OBJ_COLOR_LIST = [ [0.65098039, 0.74117647, 0.85882353], # SMPL [251 / 255.0, 128 / 255.0, 114 / 255.0], # object ] class MeshRendererWrapper: "a simple wrapper for the pytorch3d mesh renderer" def __init__(self, image_size=1200, faces_per_pixel=1, device='cuda:0', blur_radius=0, lights=None, materials=None, max_faces_per_bin=50000): self.image_size = image_size self.faces_per_pixel=faces_per_pixel self.max_faces_per_bin=max_faces_per_bin # prevent overflow, see https://github.com/facebookresearch/pytorch3d/issues/348 self.blur_radius = blur_radius self.device = device self.lights=lights if lights is not None else PointLights( ((0.5, 0.5, 0.5),), ((0.5, 0.5, 0.5),), ((0.05, 0.05, 0.05),), ((0, -2, 0),), device ) self.materials = materials self.renderer = self.setup_renderer() def setup_renderer(self): # for sillhouette rendering sigma = 1e-4 raster_settings = RasterizationSettings( image_size=self.image_size, blur_radius=self.blur_radius, # blur_radius=np.log(1. / 1e-4 - 1.) * sigma, # this will create large sphere for each face faces_per_pixel=self.faces_per_pixel, clip_barycentric_coords=False, max_faces_per_bin=self.max_faces_per_bin ) shader = SoftPhongShader( device=self.device, lights=self.lights, materials=self.materials) renderer = MeshRenderer( rasterizer=MeshRasterizer( raster_settings=raster_settings), shader=shader ) return renderer def render(self, meshes, cameras, ret_mask=False, mode='rgb'): assert len(meshes.faces_list()) == 1, 'currently only support batch size =1 rendering!' images = self.renderer(meshes, cameras=cameras) # print(images.shape) if ret_mask or mode=='mask': mask = images[0, ..., 3].cpu().detach().numpy() return images[0, ..., :3].cpu().detach().numpy(), mask > 0 return images[0, ..., :3].cpu().detach().numpy() def get_kinect_camera(device='cuda:0', kid=1): R, T = torch.eye(3), torch.zeros(3) R[0, 0] = R[1, 1] = -1 # pytorch3d y-axis up, need to rotate to kinect coordinate R = R.unsqueeze(0) T = T.unsqueeze(0) assert kid in [0, 1, 2, 3], f'invalid kinect index {kid}!' if kid == 0: fx, fy = 976.212, 976.047 cx, cy = 1017.958, 787.313 elif kid == 1: fx, fy = 979.784, 979.840 # for original kinect coordinate system cx, cy = 1018.952, 779.486 elif kid == 2: fx, fy = 974.899, 974.337 cx, cy = 1018.747, 786.176 else: fx, fy = 972.873, 972.790 cx, cy = 1022.0565, 770.397 color_w, color_h = 2048, 1536 # kinect color image size cam_center = torch.tensor((cx, cy), dtype=torch.float32).unsqueeze(0) focal_length = torch.tensor((fx, fy), dtype=torch.float32).unsqueeze(0) cam = PerspectiveCameras(focal_length=focal_length, principal_point=cam_center, image_size=((color_w, color_h),), device=device, R=R, T=T) return cam class PcloudRenderer: "a simple wrapper for pytorch3d point cloud renderer" def __init__(self, image_size=1024, radius=0.005, points_per_pixel=10, device='cuda:0', bin_size=128, batch_size=1, ret_depth=False): camera_centers = [] focal_lengths = [] for i in range(batch_size): camera_centers.append(torch.Tensor([image_size / 2., image_size / 2.]).to(device)) focal_lengths.append(torch.Tensor([image_size / 2., image_size / 2.]).to(device)) self.image_size = image_size self.device = device self.camera_center = torch.stack(camera_centers) self.focal_length = torch.stack(focal_lengths) self.ret_depth = ret_depth # return depth map or not self.renderer = self.setup_renderer(radius, points_per_pixel, bin_size) def render(self, pc, cameras, mode='image'): # TODO: support batch rendering """ render the point cloud, compute the world coordinate of each pixel based on zbuf image: (H, W, 3) xyz_world: (H, W, 3), the third dimension is the xyz coordinate in world space """ # assert cameras.R.shape[0]==1, "batch rendering is not supported for now!" images, fragments = self.renderer(pc, cameras=cameras) if mode=='image': if images.shape[0] == 1: img = images[0, ..., :3].cpu().numpy().copy() else: img = images[..., :3].cpu().numpy().copy() return img elif mode=='mask': zbuf = torch.mean(fragments.zbuf, -1) # (B, H, W) masks = zbuf >= 0 if images.shape[0] == 1: img = images[0, ..., :3].cpu().numpy() masks = masks[0].cpu().numpy().astype(bool) else: img = images[..., :3].cpu().numpy() masks = masks.cpu().numpy().astype(bool) return img, masks def get_xy_ndc(self): """ return (H, W, 2), each pixel is the x,y coordinate in NDC space """ py, px = torch.meshgrid(torch.linspace(0, self.image_size-1, self.image_size), torch.linspace(0, self.image_size-1, self.image_size)) x_ndc = 1 - 2*px/(self.image_size - 1) y_ndc = 1 - 2*py/(self.image_size - 1) xy_ndc = torch.stack([x_ndc, y_ndc], axis=-1).to(self.device) return xy_ndc.squeeze(0).unsqueeze(0) def setup_renderer(self, radius, points_per_pixel, bin_size): raster_settings = PointsRasterizationSettings( image_size=self.image_size, # radius=0.003, radius=radius, points_per_pixel=points_per_pixel, bin_size=bin_size, max_points_per_bin=500000 ) # Create a points renderer by compositing points using an alpha compositor (nearer points # are weighted more heavily). See [1] for an explanation. rasterizer = PointsRasterizer(raster_settings=raster_settings) renderer = PointsRendererWithFragments( rasterizer=rasterizer, compositor=AlphaCompositor() ) return renderer class PointsRendererWithFragments(torch.nn.Module): def __init__(self, rasterizer, compositor): super().__init__() self.rasterizer = rasterizer self.compositor = compositor def forward(self, point_clouds, **kwargs) -> (torch.Tensor, torch.Tensor): fragments = self.rasterizer(point_clouds, **kwargs) # Construct weights based on the distance of a point to the true point. # However, this could be done differently: e.g. predicted as opposed # to a function of the weights. r = self.rasterizer.raster_settings.radius dists2 = fragments.dists.permute(0, 3, 1, 2) weights = 1 - dists2 / (r * r) images = self.compositor( fragments.idx.long().permute(0, 3, 1, 2), weights, point_clouds.features_packed().permute(1, 0), **kwargs, ) # permute so image comes at the end images = images.permute(0, 2, 3, 1) return images, fragments # class PcloudsRenderer class DepthRasterizer(torch.nn.Module): """ simply rasterize a mesh or point cloud to depth image """ def __init__(self, image_size, dtype='pc', radius=0.005, points_per_pixel=1, bin_size=128, blur_radius=0, max_faces_per_bin=50000, faces_per_pixel=1,): """ image_size: (height, width) """ super(DepthRasterizer, self).__init__() if dtype == 'pc': raster_settings = PointsRasterizationSettings( image_size=image_size, radius=radius, points_per_pixel=points_per_pixel, bin_size=bin_size ) self.rasterizer = PointsRasterizer(raster_settings=raster_settings) elif dtype == 'mesh': raster_settings = RasterizationSettings( image_size=image_size, blur_radius=blur_radius, # blur_radius=np.log(1. / 1e-4 - 1.) * sigma, # this will create large sphere for each face faces_per_pixel=faces_per_pixel, clip_barycentric_coords=False, max_faces_per_bin=max_faces_per_bin ) self.rasterizer=MeshRasterizer(raster_settings=raster_settings) else: raise NotImplemented def forward(self, data, to_np=True, **kwargs): fragments = self.rasterizer(data, **kwargs) if to_np: zbuf = fragments.zbuf # (B, H, W, points_per_pixel) return zbuf[0, ..., 0].cpu().numpy() return fragments.zbuf def test_depth_rasterizer(): from psbody.mesh import Mesh import cv2 m = Mesh() m.load_from_file("/BS/xxie-4/work/kindata/Sep29_shuo_chairwood_hand/t0003.000/person/person.ply") device = 'cuda:0' pc = Pointclouds([torch.from_numpy(m.v).float().to(device)], features=[torch.from_numpy(m.vc).float().to(device)]) rasterizer = DepthRasterizer(image_size=(480, 640)) camera = get_kinect_camera(device) depth = rasterizer(pc, cameras=camera) std = torch.std(depth, -1) print('max std', torch.max(std)) # maximum std is up to 1.7m, too much! print('min std', torch.min(std)) print(depth.shape) dmap = depth[0, ..., 0].cpu().numpy() dmap[dmap<0] = 0 cv2.imwrite('debug/depth.png', (dmap*1000).astype(np.uint16)) def test_mesh_rasterizer(): from psbody.mesh import Mesh import cv2 m = Mesh() m.load_from_file("/BS/xxie-4/work/kindata/Sep29_shuo_chairwood_hand/t0003.000/person/fit02/person_fit.ply") device = 'cuda:0' mesh = Meshes([torch.from_numpy(m.v).float().to(device)], [torch.from_numpy(m.f.astype(int)).to(device)]) rasterizer = DepthRasterizer(image_size=(480, 640), dtype='mesh') camera = get_kinect_camera(device) depth = rasterizer(mesh, to_np=False, cameras=camera) print(depth.shape) dmap = depth[0, ..., 0].cpu().numpy() dmap[dmap < 0] = 0 cv2.imwrite('debug/depth_mesh.png', (dmap * 1000).astype(np.uint16)) if __name__ == '__main__': # test_depth_rasterizer() test_mesh_rasterizer()