SceneDINO / sscbench /point_utils.py
jev-aleks's picture
scenedino init
9e15541
import numpy as np
from fusion import TSDFVolume, rigid_transform
def get_fov_mask():
calib = read_calib()
T_velo_2_cam = calib["Tr"]
_, fov_mask = generate_point_grid(vox_origin=np.array([0, -25.6, -2]),
scene_size=(51.2, 51.2, 6.4),
voxel_size=0.2,
cam_E=T_velo_2_cam,
cam_k=get_cam_k())
return fov_mask.reshape(256, 256, 32)
def generate_point_grid(cam_E, vox_origin, voxel_size, scene_size, cam_k, img_W=1408, img_H=376):
"""
compute the 2D projection of voxels centroids
Parameters:
----------
cam_E: 4x4
=camera pose in case of NYUv2 dataset
=Transformation from camera to lidar coordinate in case of SemKITTI
cam_k: 3x3
camera intrinsics
vox_origin: (3,)
world(NYU)/lidar(SemKITTI) cooridnates of the voxel at index (0, 0, 0)
img_W: int
image width
img_H: int
image height
scene_size: (3,)
scene size in meter: (51.2, 51.2, 6.4) for SemKITTI and (4.8, 4.8, 2.88) for NYUv2
Returns
-------
projected_pix: (N, 2)
Projected 2D positions of voxels
fov_mask: (N,)
Voxels mask indice voxels inside image's FOV
pix_z: (N,)
Voxels'distance to the sensor in meter
"""
# Compute the x, y, z bounding of the scene in meter
vol_bnds = np.zeros((3, 2))
vol_bnds[:, 0] = vox_origin
vol_bnds[:, 1] = vox_origin + np.array(scene_size)
# Compute the voxels centroids in lidar cooridnates
vol_dim = np.ceil((vol_bnds[:, 1] - vol_bnds[:, 0]) / voxel_size).copy(order='C').astype(int)
xv, yv, zv = np.meshgrid(
range(vol_dim[0]),
range(vol_dim[1]),
range(vol_dim[2]),
indexing='ij'
)
print(xv.shape, yv.shape, zv.shape)
vox_coords = np.concatenate([
xv.reshape(1, -1),
yv.reshape(1, -1),
zv.reshape(1, -1)
], axis=0).astype(int).T
# Project voxels'centroid from lidar coordinates to camera coordinates
cam_pts = TSDFVolume.vox2world(vox_origin, vox_coords, voxel_size)
cam_pts = rigid_transform(cam_pts, cam_E)
# Project camera coordinates to pixel positions
projected_pix = TSDFVolume.cam2pix(cam_pts, cam_k)
pix_x, pix_y = projected_pix[:, 0], projected_pix[:, 1]
# Eliminate pixels outside view frustum
pix_z = cam_pts[:, 2]
fov_mask = np.logical_and(pix_x >= 0,
np.logical_and(pix_x < img_W,
np.logical_and(pix_y >= 0,
np.logical_and(pix_y < img_H,
pix_z > 0))))
return cam_pts, fov_mask
def read_calib():
"""
:param calib_path: Path to a calibration text file.
:return: dict with calibration matrices.
"""
P = np.array(
[
552.554261,
0.000000,
682.049453,
0.000000,
0.000000,
552.554261,
238.769549,
0.000000,
0.000000,
0.000000,
1.000000,
0.000000,
]
).reshape(3, 4)
cam2velo = np.array(
[
0.04307104361,
-0.08829286498,
0.995162929,
0.8043914418,
-0.999004371,
0.007784614041,
0.04392796942,
0.2993489574,
-0.01162548558,
-0.9960641394,
-0.08786966659,
-0.1770225824,
]
).reshape(3, 4)
C2V = np.concatenate(
[cam2velo, np.array([0, 0, 0, 1]).reshape(1, 4)], axis=0
)
# print("C2V: ", C2V)
V2C = np.linalg.inv(C2V)
# print("V2C: ", V2C)
V2C = V2C[:3, :]
# print("V2C: ", V2C)
# reshape matrices
calib_out = {}
# 3x4 projection matrix for left camera
calib_out["P2"] = P
calib_out["Tr"] = np.identity(4) # 4x4 matrix
calib_out["Tr"][:3, :4] = V2C
return calib_out
def get_cam_k():
cam_k = np.array(
[
552.554261,
0.000000,
682.049453,
0.000000,
0.000000,
552.554261,
238.769549,
0.000000,
0.000000,
0.000000,
1.000000,
0.000000,
]
).reshape(3, 4)
return cam_k[:3, :3]