Spaces:
Sleeping
Sleeping
""" | |
common functions for image operations | |
""" | |
import cv2 | |
import numpy as np | |
def crop(img, center, crop_size): | |
""" | |
crop image around the given center, pad zeros for borders | |
:param img: | |
:param center: np array | |
:param crop_size: np array or a float size of the resulting crop | |
:return: a square crop around the center | |
""" | |
assert isinstance(img, np.ndarray) | |
h, w = img.shape[:2] | |
topleft = np.round(center - crop_size / 2).astype(int) | |
bottom_right = np.round(center + crop_size / 2).astype(int) | |
x1 = max(0, topleft[0]) | |
y1 = max(0, topleft[1]) | |
x2 = min(w - 1, bottom_right[0]) | |
y2 = min(h - 1, bottom_right[1]) | |
cropped = img[y1:y2, x1:x2] | |
p1 = max(0, -topleft[0]) # padding in x, top | |
p2 = max(0, -topleft[1]) # padding in y, top | |
p3 = max(0, bottom_right[0] - w + 1) # padding in x, bottom | |
p4 = max(0, bottom_right[1] - h + 1) # padding in y, bottom | |
dim = len(img.shape) | |
if dim == 3: | |
padded = np.pad(cropped, [[p2, p4], [p1, p3], [0, 0]]) | |
elif dim == 2: | |
padded = np.pad(cropped, [[p2, p4], [p1, p3]]) | |
else: | |
raise NotImplemented | |
return padded | |
def resize(img, img_size, mode=cv2.INTER_LINEAR): | |
""" | |
resize image to the input | |
:param img: | |
:param img_size: (width, height) of the target image size | |
:param mode: | |
:return: | |
""" | |
h, w = img.shape[:2] | |
load_ratio = 1.0 * w / h | |
netin_ratio = 1.0 * img_size[0] / img_size[1] | |
assert load_ratio == netin_ratio, "image aspect ration not matching, given image: {}, net input: {}".format( | |
img.shape, img_size) | |
resized = cv2.resize(img, img_size, interpolation=mode) | |
return resized | |
def masks2bbox(masks, threshold=127): | |
""" | |
:param masks: | |
:param threshold: | |
:return: bounding box corner coordinate | |
""" | |
mask_comb = np.zeros_like(masks[0], dtype=bool) | |
for m in masks: | |
mask_comb = mask_comb | (m > threshold) | |
yid, xid = np.where(mask_comb) | |
bmin = np.array([xid.min(), yid.min()]) | |
bmax = np.array([xid.max(), yid.max()]) | |
return bmin, bmax | |
def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5): | |
""" | |
solve for an optimal translation that project gaussian in origin to the crop | |
Parameters | |
---------- | |
crop_center: (x, y) of the crop center | |
crop_size: float, the size of the square crop | |
std_coverage: which edge point should be projected back to the edge of the 2d crop | |
Returns | |
------- | |
the estimated translation | |
""" | |
x0, y0 = crop_center | |
x1, y1 = x0 + crop_size/2, y0 | |
x2, y2 = x0 - crop_size/2, y0 | |
x3, y3 = x0, y0 + crop_size/2. | |
# predefined kinect intrinsics | |
if is_behave: | |
fx = 979.7844 | |
fy = 979.840 | |
cx = 1018.952 | |
cy = 779.486 | |
else: | |
# intercap camera | |
fx, fy = 918.457763671875, 918.4373779296875 | |
cx, cy = 956.9661865234375, 555.944580078125 | |
# Construct the matrix | |
# First two equations: origin (0, 0, 0) is projected to the crop center | |
# Last two equations: edge point (std_coverage, 0, z) is projected to the edge of crop | |
A = np.array([ | |
[fx, 0, cx-x0, cx-x0], | |
[0, fy, cy-y0, cy-y0], | |
[fx, 0, fx-x1, 0], | |
[0, fy, cy-y1, 0] | |
]) | |
# b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0 | |
b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0 | |
x = np.matmul(np.linalg.inv(A), b) | |
# A is always a full-rank matrix | |
return x.flatten()[:3] | |