File size: 5,240 Bytes
424188c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
"""
Adapted from https://github.com/thusiyuan/cooperative_scene_parsing/blob/master/utils/sunrgbd_utils.py
"""
import numpy as np
def normalize(vector):
return vector / np.linalg.norm(vector)
def parse_camera_info(camera_info, height, width, inverse=False):
""" extract intrinsic and extrinsic matrix
"""
lookat = normalize(camera_info[3:6])
up = normalize(camera_info[6:9])
W = lookat
U = np.cross(W, up)
V = - np.cross(W, U)
if inverse:
rot = np.linalg.inv(np.vstack((U, -V, W)))
trans = camera_info[:3]
else:
rot = np.vstack((U, V, W))
trans = camera_info[:3]
xfov = camera_info[9]
yfov = camera_info[10]
K = np.diag([1, 1, 1]).astype(np.float32)
K[0, 2] = (width) / 2.
K[1, 2] = (height) / 2.
K[0, 0] = K[0, 2] / np.tan(xfov)
K[1, 1] = K[1, 2] / np.tan(yfov)
# tan_half_fov = window_height_ / (intrinsic.intrinsic_matrix_(1, 1) * 2.0);
# fov_rad = std::atan(tan_half_fov) * 2.0;
return rot, trans, K
def flip_towards_viewer(normals, points):
points = points / np.linalg.norm(points)
proj = points.dot(normals[:2, :].T)
flip = np.where(proj > 0)
normals[flip, :] = -normals[flip, :]
return normals
def get_corners_of_bb3d(basis, coeffs, centroid):
corners = np.zeros((8, 3))
# order the basis
index = np.argsort(np.abs(basis[:, 0]))[::-1]
# the case that two same value appear the same time
if index[2] != 2:
index[1:] = index[1:][::-1]
basis = basis[index, :]
coeffs = coeffs[index]
# Now, we know the basis vectors are orders X, Y, Z. Next, flip the basis vectors towards the viewer
basis = flip_towards_viewer(basis, centroid)
coeffs = np.abs(coeffs)
corners[0, :] = -basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[1, :] = basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[2, :] = basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[3, :] = -basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[4, :] = -basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[5, :] = basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[6, :] = basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[7, :] = -basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners = corners + np.tile(centroid, (8, 1))
return corners
def get_corners_of_bb3d_no_index(basis, coeffs, centroid):
corners = np.zeros((8, 3))
coeffs = np.abs(coeffs)
corners[0, :] = -basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[1, :] = basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[2, :] = basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[3, :] = -basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + basis[2, :] * coeffs[2]
corners[4, :] = -basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[5, :] = basis[0, :] * coeffs[0] + basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[6, :] = basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners[7, :] = -basis[0, :] * coeffs[0] + -basis[1, :] * coeffs[1] + -basis[2, :] * coeffs[2]
corners = corners + np.tile(centroid, (8, 1))
return corners
def project_3d_points_to_2d(points3d, R_ex, K):
"""
Project 3d points from camera-centered coordinate to 2D image plane
Parameters
----------
points3d: numpy array
3d location of point
R_ex: numpy array
extrinsic camera parameter
K: numpy array
intrinsic camera parameter
Returns
-------
points2d: numpy array
2d location of the point
"""
points3d = R_ex.dot(points3d.T).T
x3 = points3d[:, 0]
y3 = -points3d[:, 1]
z3 = np.abs(points3d[:, 2])
xx = x3 * K[0, 0] / z3 + K[0, 2]
yy = y3 * K[1, 1] / z3 + K[1, 2]
points2d = np.vstack((xx, yy))
return points2d
def project_struct_bdb_to_2d(basis, coeffs, center, R_ex, K):
"""
Project 3d bounding box to 2d bounding box
Parameters
----------
basis, coeffs, center, R_ex, K
: K is the intrinsic camera parameter matrix
: Rtilt is the extrinsic camera parameter matrix in right hand coordinates
Returns
-------
bdb2d: dict
Keys: {'x1', 'x2', 'y1', 'y2'}
The (x1, y1) position is at the top left corner,
the (x2, y2) position is at the bottom right corner
"""
corners3d = get_corners_of_bb3d(basis, coeffs, center)
corners = project_3d_points_to_2d(corners3d, R_ex, K)
bdb2d = dict()
bdb2d['x1'] = int(max(np.min(corners[0, :]), 1)) # x1
bdb2d['y1'] = int(max(np.min(corners[1, :]), 1)) # y1
bdb2d['x2'] = int(min(np.max(corners[0, :]), 2*K[0, 2])) # x2
bdb2d['y2'] = int(min(np.max(corners[1, :]), 2*K[1, 2])) # y2
# if not check_bdb(bdb2d, 2*K[0, 2], 2*K[1, 2]):
# bdb2d = None
return bdb2d
|