Spaces:
Sleeping
Sleeping
""" | |
a simple wrapper for pytorch3d rendering | |
Cite: BEHAVE: Dataset and Method for Tracking Human Object Interaction | |
""" | |
import numpy as np | |
import torch | |
from copy import deepcopy | |
# Data structures and functions for rendering | |
from pytorch3d.renderer import ( | |
PointLights, | |
RasterizationSettings, | |
MeshRenderer, | |
MeshRasterizer, | |
SoftPhongShader, | |
TexturesVertex, | |
PerspectiveCameras, | |
PointsRasterizer, | |
AlphaCompositor, | |
PointsRasterizationSettings, | |
) | |
from pytorch3d.structures import Meshes, join_meshes_as_scene, Pointclouds | |
SMPL_OBJ_COLOR_LIST = [ | |
[0.65098039, 0.74117647, 0.85882353], # SMPL | |
[251 / 255.0, 128 / 255.0, 114 / 255.0], # object | |
] | |
class MeshRendererWrapper: | |
"a simple wrapper for the pytorch3d mesh renderer" | |
def __init__(self, image_size=1200, | |
faces_per_pixel=1, | |
device='cuda:0', | |
blur_radius=0, lights=None, | |
materials=None, max_faces_per_bin=50000): | |
self.image_size = image_size | |
self.faces_per_pixel=faces_per_pixel | |
self.max_faces_per_bin=max_faces_per_bin # prevent overflow, see https://github.com/facebookresearch/pytorch3d/issues/348 | |
self.blur_radius = blur_radius | |
self.device = device | |
self.lights=lights if lights is not None else PointLights( | |
((0.5, 0.5, 0.5),), ((0.5, 0.5, 0.5),), ((0.05, 0.05, 0.05),), ((0, -2, 0),), device | |
) | |
self.materials = materials | |
self.renderer = self.setup_renderer() | |
def setup_renderer(self): | |
# for sillhouette rendering | |
sigma = 1e-4 | |
raster_settings = RasterizationSettings( | |
image_size=self.image_size, | |
blur_radius=self.blur_radius, | |
# blur_radius=np.log(1. / 1e-4 - 1.) * sigma, # this will create large sphere for each face | |
faces_per_pixel=self.faces_per_pixel, | |
clip_barycentric_coords=False, | |
max_faces_per_bin=self.max_faces_per_bin | |
) | |
shader = SoftPhongShader( | |
device=self.device, | |
lights=self.lights, | |
materials=self.materials) | |
renderer = MeshRenderer( | |
rasterizer=MeshRasterizer( | |
raster_settings=raster_settings), | |
shader=shader | |
) | |
return renderer | |
def render(self, meshes, cameras, ret_mask=False, mode='rgb'): | |
assert len(meshes.faces_list()) == 1, 'currently only support batch size =1 rendering!' | |
images = self.renderer(meshes, cameras=cameras) | |
# print(images.shape) | |
if ret_mask or mode=='mask': | |
mask = images[0, ..., 3].cpu().detach().numpy() | |
return images[0, ..., :3].cpu().detach().numpy(), mask > 0 | |
return images[0, ..., :3].cpu().detach().numpy() | |
def get_kinect_camera(device='cuda:0', kid=1): | |
R, T = torch.eye(3), torch.zeros(3) | |
R[0, 0] = R[1, 1] = -1 # pytorch3d y-axis up, need to rotate to kinect coordinate | |
R = R.unsqueeze(0) | |
T = T.unsqueeze(0) | |
assert kid in [0, 1, 2, 3], f'invalid kinect index {kid}!' | |
if kid == 0: | |
fx, fy = 976.212, 976.047 | |
cx, cy = 1017.958, 787.313 | |
elif kid == 1: | |
fx, fy = 979.784, 979.840 # for original kinect coordinate system | |
cx, cy = 1018.952, 779.486 | |
elif kid == 2: | |
fx, fy = 974.899, 974.337 | |
cx, cy = 1018.747, 786.176 | |
else: | |
fx, fy = 972.873, 972.790 | |
cx, cy = 1022.0565, 770.397 | |
color_w, color_h = 2048, 1536 # kinect color image size | |
cam_center = torch.tensor((cx, cy), dtype=torch.float32).unsqueeze(0) | |
focal_length = torch.tensor((fx, fy), dtype=torch.float32).unsqueeze(0) | |
cam = PerspectiveCameras(focal_length=focal_length, principal_point=cam_center, | |
image_size=((color_w, color_h),), | |
device=device, | |
R=R, T=T) | |
return cam | |
class PcloudRenderer: | |
"a simple wrapper for pytorch3d point cloud renderer" | |
def __init__(self, image_size=1024, radius=0.005, points_per_pixel=10, | |
device='cuda:0', bin_size=128, batch_size=1, ret_depth=False): | |
camera_centers = [] | |
focal_lengths = [] | |
for i in range(batch_size): | |
camera_centers.append(torch.Tensor([image_size / 2., image_size / 2.]).to(device)) | |
focal_lengths.append(torch.Tensor([image_size / 2., image_size / 2.]).to(device)) | |
self.image_size = image_size | |
self.device = device | |
self.camera_center = torch.stack(camera_centers) | |
self.focal_length = torch.stack(focal_lengths) | |
self.ret_depth = ret_depth # return depth map or not | |
self.renderer = self.setup_renderer(radius, points_per_pixel, bin_size) | |
def render(self, pc, cameras, mode='image'): | |
# TODO: support batch rendering | |
""" | |
render the point cloud, compute the world coordinate of each pixel based on zbuf | |
image: (H, W, 3) | |
xyz_world: (H, W, 3), the third dimension is the xyz coordinate in world space | |
""" | |
# assert cameras.R.shape[0]==1, "batch rendering is not supported for now!" | |
images, fragments = self.renderer(pc, cameras=cameras) | |
if mode=='image': | |
if images.shape[0] == 1: | |
img = images[0, ..., :3].cpu().numpy().copy() | |
else: | |
img = images[..., :3].cpu().numpy().copy() | |
return img | |
elif mode=='mask': | |
zbuf = torch.mean(fragments.zbuf, -1) # (B, H, W) | |
masks = zbuf >= 0 | |
if images.shape[0] == 1: | |
img = images[0, ..., :3].cpu().numpy() | |
masks = masks[0].cpu().numpy().astype(bool) | |
else: | |
img = images[..., :3].cpu().numpy() | |
masks = masks.cpu().numpy().astype(bool) | |
return img, masks | |
def get_xy_ndc(self): | |
""" | |
return (H, W, 2), each pixel is the x,y coordinate in NDC space | |
""" | |
py, px = torch.meshgrid(torch.linspace(0, self.image_size-1, self.image_size), | |
torch.linspace(0, self.image_size-1, self.image_size)) | |
x_ndc = 1 - 2*px/(self.image_size - 1) | |
y_ndc = 1 - 2*py/(self.image_size - 1) | |
xy_ndc = torch.stack([x_ndc, y_ndc], axis=-1).to(self.device) | |
return xy_ndc.squeeze(0).unsqueeze(0) | |
def setup_renderer(self, radius, points_per_pixel, bin_size): | |
raster_settings = PointsRasterizationSettings( | |
image_size=self.image_size, | |
# radius=0.003, | |
radius=radius, | |
points_per_pixel=points_per_pixel, | |
bin_size=bin_size, | |
max_points_per_bin=500000 | |
) | |
# Create a points renderer by compositing points using an alpha compositor (nearer points | |
# are weighted more heavily). See [1] for an explanation. | |
rasterizer = PointsRasterizer(raster_settings=raster_settings) | |
renderer = PointsRendererWithFragments( | |
rasterizer=rasterizer, | |
compositor=AlphaCompositor() | |
) | |
return renderer | |
class PointsRendererWithFragments(torch.nn.Module): | |
def __init__(self, rasterizer, compositor): | |
super().__init__() | |
self.rasterizer = rasterizer | |
self.compositor = compositor | |
def forward(self, point_clouds, **kwargs) -> (torch.Tensor, torch.Tensor): | |
fragments = self.rasterizer(point_clouds, **kwargs) | |
# Construct weights based on the distance of a point to the true point. | |
# However, this could be done differently: e.g. predicted as opposed | |
# to a function of the weights. | |
r = self.rasterizer.raster_settings.radius | |
dists2 = fragments.dists.permute(0, 3, 1, 2) | |
weights = 1 - dists2 / (r * r) | |
images = self.compositor( | |
fragments.idx.long().permute(0, 3, 1, 2), | |
weights, | |
point_clouds.features_packed().permute(1, 0), | |
**kwargs, | |
) | |
# permute so image comes at the end | |
images = images.permute(0, 2, 3, 1) | |
return images, fragments | |
# class PcloudsRenderer | |
class DepthRasterizer(torch.nn.Module): | |
""" | |
simply rasterize a mesh or point cloud to depth image | |
""" | |
def __init__(self, image_size, dtype='pc', | |
radius=0.005, points_per_pixel=1, | |
bin_size=128, | |
blur_radius=0, | |
max_faces_per_bin=50000, | |
faces_per_pixel=1,): | |
""" | |
image_size: (height, width) | |
""" | |
super(DepthRasterizer, self).__init__() | |
if dtype == 'pc': | |
raster_settings = PointsRasterizationSettings( | |
image_size=image_size, | |
radius=radius, | |
points_per_pixel=points_per_pixel, | |
bin_size=bin_size | |
) | |
self.rasterizer = PointsRasterizer(raster_settings=raster_settings) | |
elif dtype == 'mesh': | |
raster_settings = RasterizationSettings( | |
image_size=image_size, | |
blur_radius=blur_radius, | |
# blur_radius=np.log(1. / 1e-4 - 1.) * sigma, # this will create large sphere for each face | |
faces_per_pixel=faces_per_pixel, | |
clip_barycentric_coords=False, | |
max_faces_per_bin=max_faces_per_bin | |
) | |
self.rasterizer=MeshRasterizer(raster_settings=raster_settings) | |
else: | |
raise NotImplemented | |
def forward(self, data, to_np=True, **kwargs): | |
fragments = self.rasterizer(data, **kwargs) | |
if to_np: | |
zbuf = fragments.zbuf # (B, H, W, points_per_pixel) | |
return zbuf[0, ..., 0].cpu().numpy() | |
return fragments.zbuf | |
def test_depth_rasterizer(): | |
from psbody.mesh import Mesh | |
import cv2 | |
m = Mesh() | |
m.load_from_file("/BS/xxie-4/work/kindata/Sep29_shuo_chairwood_hand/t0003.000/person/person.ply") | |
device = 'cuda:0' | |
pc = Pointclouds([torch.from_numpy(m.v).float().to(device)], | |
features=[torch.from_numpy(m.vc).float().to(device)]) | |
rasterizer = DepthRasterizer(image_size=(480, 640)) | |
camera = get_kinect_camera(device) | |
depth = rasterizer(pc, cameras=camera) | |
std = torch.std(depth, -1) | |
print('max std', torch.max(std)) # maximum std is up to 1.7m, too much! | |
print('min std', torch.min(std)) | |
print(depth.shape) | |
dmap = depth[0, ..., 0].cpu().numpy() | |
dmap[dmap<0] = 0 | |
cv2.imwrite('debug/depth.png', (dmap*1000).astype(np.uint16)) | |
def test_mesh_rasterizer(): | |
from psbody.mesh import Mesh | |
import cv2 | |
m = Mesh() | |
m.load_from_file("/BS/xxie-4/work/kindata/Sep29_shuo_chairwood_hand/t0003.000/person/fit02/person_fit.ply") | |
device = 'cuda:0' | |
mesh = Meshes([torch.from_numpy(m.v).float().to(device)], | |
[torch.from_numpy(m.f.astype(int)).to(device)]) | |
rasterizer = DepthRasterizer(image_size=(480, 640), dtype='mesh') | |
camera = get_kinect_camera(device) | |
depth = rasterizer(mesh, to_np=False, cameras=camera) | |
print(depth.shape) | |
dmap = depth[0, ..., 0].cpu().numpy() | |
dmap[dmap < 0] = 0 | |
cv2.imwrite('debug/depth_mesh.png', (dmap * 1000).astype(np.uint16)) | |
if __name__ == '__main__': | |
# test_depth_rasterizer() | |
test_mesh_rasterizer() | |