File size: 4,831 Bytes
2fd6166 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
common functions for image operations
import cv2
import numpy as np
def crop(img, center, crop_size):
crop image around the given center, pad zeros for borders
:param img:
:param center: np array
:param crop_size: np array or a float size of the resulting crop
:return: a square crop around the center
assert isinstance(img, np.ndarray)
h, w = img.shape[:2]
topleft = np.round(center - crop_size / 2).astype(int)
bottom_right = np.round(center + crop_size / 2).astype(int)
x1 = max(0, topleft[0])
y1 = max(0, topleft[1])
x2 = min(w - 1, bottom_right[0])
y2 = min(h - 1, bottom_right[1])
cropped = img[y1:y2, x1:x2]
p1 = max(0, -topleft[0]) # padding in x, top
p2 = max(0, -topleft[1]) # padding in y, top
p3 = max(0, bottom_right[0] - w + 1) # padding in x, bottom
p4 = max(0, bottom_right[1] - h + 1) # padding in y, bottom
dim = len(img.shape)
if dim == 3:
padded = np.pad(cropped, [[p2, p4], [p1, p3], [0, 0]])
elif dim == 2:
padded = np.pad(cropped, [[p2, p4], [p1, p3]])
raise NotImplemented
return padded
def resize(img, img_size, mode=cv2.INTER_LINEAR):
resize image to the input
:param img:
:param img_size: (width, height) of the target image size
:param mode:
h, w = img.shape[:2]
load_ratio = 1.0 * w / h
netin_ratio = 1.0 * img_size[0] / img_size[1]
assert load_ratio == netin_ratio, "image aspect ration not matching, given image: {}, net input: {}".format(
img.shape, img_size)
resized = cv2.resize(img, img_size, interpolation=mode)
return resized
def masks2bbox(masks, threshold=127):
:param masks:
:param threshold:
:return: bounding box corner coordinate
mask_comb = np.zeros_like(masks[0], dtype=bool)
for m in masks:
mask_comb = mask_comb | (m > threshold)
yid, xid = np.where(mask_comb)
bmin = np.array([xid.min(), yid.min()])
bmax = np.array([xid.max(), yid.max()])
return bmin, bmax
def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5):
solve for an optimal translation that project gaussian in origin to the crop
crop_center: (x, y) of the crop center
crop_size: float, the size of the square crop
std_coverage: which edge point should be projected back to the edge of the 2d crop
the estimated translation
x0, y0 = crop_center
x1, y1 = x0 + crop_size/2, y0
x2, y2 = x0 - crop_size/2, y0
x3, y3 = x0, y0 + crop_size/2.
# predefined kinect intrinsics
if is_behave:
fx = 979.7844
fy = 979.840
cx = 1018.952
cy = 779.486
# intercap camera
fx, fy = 918.457763671875, 918.4373779296875
cx, cy = 956.9661865234375, 555.944580078125
# construct the matrix
# A = np.array([
# [fx, 0, cx-x0, cx-x0, 0, 0],
# [0, fy, cy-y0, cy-y0, 0, 0],
# [fx, 0, cx-x1, 0, cx-x1, 0],
# [0, fy, cy-y1, 0, cy-y1, 0],
# [fx, 0, cx-x2, 0, 0, cx-x2],
# [0, fy, cy-y2, 0, 0, cy-y2]
# ]) # this matrix is low-rank because columns are linearly dependent: col3 - col4 = col5 + col6
# # find linearly dependent rows
# lambdas, V = np.linalg.eig(A)
# # print()
# # The linearly dependent row vectors
# print(lambdas == 0, np.linalg.det(A), A[lambdas == 0, :]) # some have determinant zero, some don't??
# print(np.linalg.inv(A))
# A = np.array([
# [fx, 0, cx - x0, cx - x0, 0, 0],
# [0, fy, cy - y0, cy - y0, 0, 0],
# [fx, 0, cx - x1, 0, cx - x1, 0],
# [0, fy, cy - y1, 0, cy - y1, 0],
# [fx, 0, cx - x3, 0, 0, cx - x3],
# [0, fy, cy - y3, 0, 0, cy - y3]
# ]) # this is also low rank!
# b = np.array([0, 0, -3*fx, 0, 0, -3*fy]).reshape((-1, 1))
# print("rank of the coefficient matrix:", np.linalg.matrix_rank(A)) # rank is 5! underconstrained matrix!
# x = np.matmul(np.linalg.inv(A), b)
# fix z0 as 0, then A is a full-rank matrix
# first two equations: origin (0, 0, 0) is projected to the crop center
# last two equations: edge point (3.5, 0, z) is projected to the edge of crop
A = np.array([
[fx, 0, cx-x0, cx-x0],
[0, fy, cy-y0, cy-y0],
[fx, 0, fx-x1, 0],
[0, fy, cy-y1, 0]
# b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
x = np.matmul(np.linalg.inv(A), b) # use 4 or 5 does not really matter, same results
# A is always a full-rank matrix
return x.flatten()[:3]