xxie
add instructions
40d0f76
"""
common functions for image operations
"""
import cv2
import numpy as np
def crop(img, center, crop_size):
"""
crop image around the given center, pad zeros for borders
:param img:
:param center: np array
:param crop_size: np array or a float size of the resulting crop
:return: a square crop around the center
"""
assert isinstance(img, np.ndarray)
h, w = img.shape[:2]
topleft = np.round(center - crop_size / 2).astype(int)
bottom_right = np.round(center + crop_size / 2).astype(int)
x1 = max(0, topleft[0])
y1 = max(0, topleft[1])
x2 = min(w - 1, bottom_right[0])
y2 = min(h - 1, bottom_right[1])
cropped = img[y1:y2, x1:x2]
p1 = max(0, -topleft[0]) # padding in x, top
p2 = max(0, -topleft[1]) # padding in y, top
p3 = max(0, bottom_right[0] - w + 1) # padding in x, bottom
p4 = max(0, bottom_right[1] - h + 1) # padding in y, bottom
dim = len(img.shape)
if dim == 3:
padded = np.pad(cropped, [[p2, p4], [p1, p3], [0, 0]])
elif dim == 2:
padded = np.pad(cropped, [[p2, p4], [p1, p3]])
else:
raise NotImplemented
return padded
def resize(img, img_size, mode=cv2.INTER_LINEAR):
"""
resize image to the input
:param img:
:param img_size: (width, height) of the target image size
:param mode:
:return:
"""
h, w = img.shape[:2]
load_ratio = 1.0 * w / h
netin_ratio = 1.0 * img_size[0] / img_size[1]
assert load_ratio == netin_ratio, "image aspect ration not matching, given image: {}, net input: {}".format(
img.shape, img_size)
resized = cv2.resize(img, img_size, interpolation=mode)
return resized
def masks2bbox(masks, threshold=127):
"""
:param masks:
:param threshold:
:return: bounding box corner coordinate
"""
mask_comb = np.zeros_like(masks[0], dtype=bool)
for m in masks:
mask_comb = mask_comb | (m > threshold)
yid, xid = np.where(mask_comb)
bmin = np.array([xid.min(), yid.min()])
bmax = np.array([xid.max(), yid.max()])
return bmin, bmax
def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5):
"""
solve for an optimal translation that project gaussian in origin to the crop
Parameters
----------
crop_center: (x, y) of the crop center
crop_size: float, the size of the square crop
std_coverage: which edge point should be projected back to the edge of the 2d crop
Returns
-------
the estimated translation
"""
x0, y0 = crop_center
x1, y1 = x0 + crop_size/2, y0
x2, y2 = x0 - crop_size/2, y0
x3, y3 = x0, y0 + crop_size/2.
# predefined kinect intrinsics
if is_behave:
fx = 979.7844
fy = 979.840
cx = 1018.952
cy = 779.486
else:
# intercap camera
fx, fy = 918.457763671875, 918.4373779296875
cx, cy = 956.9661865234375, 555.944580078125
# Construct the matrix
# First two equations: origin (0, 0, 0) is projected to the crop center
# Last two equations: edge point (std_coverage, 0, z) is projected to the edge of crop
A = np.array([
[fx, 0, cx-x0, cx-x0],
[0, fy, cy-y0, cy-y0],
[fx, 0, fx-x1, 0],
[0, fy, cy-y1, 0]
])
# b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
x = np.matmul(np.linalg.inv(A), b)
# A is always a full-rank matrix
return x.flatten()[:3]