Egrt's picture
init
424188c
"""
Adapted from https://github.com/thusiyuan/cooperative_scene_parsing/blob/master/utils/sunrgbd_utils.py
"""
import numpy as np
def normalize(vector):
return vector / np.linalg.norm(vector)
def parse_camera_info(camera_info, height, width, inverse=False):
""" extract intrinsic and extrinsic matrix
"""
lookat = normalize(camera_info[3:6])
up = normalize(camera_info[6:9])
W = lookat
U = np.cross(W, up)
V = - np.cross(W, U)
if inverse:
rot = np.linalg.inv(np.vstack((U, -V, W)))
trans = camera_info[:3]
else:
rot = np.vstack((U, V, W))
trans = camera_info[:3]
xfov = camera_info[9]
yfov = camera_info[10]
K = np.diag([1, 1, 1]).astype(np.float32)
K[0, 2] = (width) / 2.
K[1, 2] = (height) / 2.
K[0, 0] = K[0, 2] / np.tan(xfov)
K[1, 1] = K[1, 2] / np.tan(yfov)
# tan_half_fov = window_height_ / (intrinsic.intrinsic_matrix_(1, 1) * 2.0);
# fov_rad = std::atan(tan_half_fov) * 2.0;
return rot, trans, K
def flip_towards_viewer(normals, points):
points = points / np.linalg.norm(points)
proj = points.dot(normals[:2, :].T)
flip = np.where(proj > 0)
normals[flip, :] = -normals[flip, :]
return normals
def get_corners_of_bb3d(basis, coeffs, centroid):
corners = np.zeros((8, 3))
# order the basis
index = np.argsort(np.abs(basis[:, 0]))[::-1]
# the case that two same value appear the same time
if index[2] != 2:
index[1:] = index[1:][::-1]
basis = basis[index, :]
coeffs = coeffs[index]
# Now, we know the basis vectors are orders X, Y, Z. Next, flip the basis vectors towards the viewer
basis = flip_towards_viewer(basis, centroid)
coeffs = np.abs(coeffs)
corners[0, :] = -basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[1, :] = basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[2, :] = basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[3, :] = -basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[4, :] = -basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[5, :] = basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[6, :] = basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[7, :] = -basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners = corners + np.tile(centroid, (8, 1))
return corners
def get_corners_of_bb3d_no_index(basis, coeffs, centroid):
corners = np.zeros((8, 3))
coeffs = np.abs(coeffs)
corners[0, :] = -basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[1, :] = basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[2, :] = basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[3, :] = -basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[4, :] = -basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[5, :] = basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[6, :] = basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[7, :] = -basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners = corners + np.tile(centroid, (8, 1))
return corners
def project_3d_points_to_2d(points3d, R_ex, K):
"""
Project 3d points from camera-centered coordinate to 2D image plane
Parameters
----------
points3d: numpy array
3d location of point
R_ex: numpy array
extrinsic camera parameter
K: numpy array
intrinsic camera parameter
Returns
-------
points2d: numpy array
2d location of the point
"""
points3d = R_ex.dot(points3d.T).T
x3 = points3d[:, 0]
y3 = -points3d[:, 1]
z3 = np.abs(points3d[:, 2])
xx = x3 * K[0, 0] / z3 + K[0, 2]
yy = y3 * K[1, 1] / z3 + K[1, 2]
points2d = np.vstack((xx, yy))
return points2d
def project_struct_bdb_to_2d(basis, coeffs, center, R_ex, K):
"""
Project 3d bounding box to 2d bounding box
Parameters
----------
basis, coeffs, center, R_ex, K
: K is the intrinsic camera parameter matrix
: Rtilt is the extrinsic camera parameter matrix in right hand coordinates
Returns
-------
bdb2d: dict
Keys: {'x1', 'x2', 'y1', 'y2'}
The (x1, y1) position is at the top left corner,
the (x2, y2) position is at the bottom right corner
"""
corners3d = get_corners_of_bb3d(basis, coeffs, center)
corners = project_3d_points_to_2d(corners3d, R_ex, K)
bdb2d = dict()
bdb2d['x1'] = int(max(np.min(corners[0, :]), 1)) # x1
bdb2d['y1'] = int(max(np.min(corners[1, :]), 1)) # y1
bdb2d['x2'] = int(min(np.max(corners[0, :]), 2*K[0, 2])) # x2
bdb2d['y2'] = int(min(np.max(corners[1, :]), 2*K[1, 2])) # y2
# if not check_bdb(bdb2d, 2*K[0, 2], 2*K[1, 2]):
# bdb2d = None
return bdb2d