File size: 4,831 Bytes
2fd6166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""
common functions for image operations
"""

import cv2
import numpy as np


def crop(img, center, crop_size):
    """
    crop image around the given center, pad zeros for borders
    :param img:
    :param center: np array
    :param crop_size: np array or a float size of the resulting crop
    :return: a square crop around the center
    """
    assert isinstance(img, np.ndarray)
    h, w = img.shape[:2]
    topleft = np.round(center - crop_size / 2).astype(int)
    bottom_right = np.round(center + crop_size / 2).astype(int)

    x1 = max(0, topleft[0])
    y1 = max(0, topleft[1])
    x2 = min(w - 1, bottom_right[0])
    y2 = min(h - 1, bottom_right[1])
    cropped = img[y1:y2, x1:x2]

    p1 = max(0, -topleft[0])  # padding in x, top
    p2 = max(0, -topleft[1])  # padding in y, top
    p3 = max(0, bottom_right[0] - w + 1)  # padding in x, bottom
    p4 = max(0, bottom_right[1] - h + 1)  # padding in y, bottom

    dim = len(img.shape)
    if dim == 3:
        padded = np.pad(cropped, [[p2, p4], [p1, p3], [0, 0]])
    elif dim == 2:
        padded = np.pad(cropped, [[p2, p4], [p1, p3]])
    else:
        raise NotImplemented
    return padded


def resize(img, img_size, mode=cv2.INTER_LINEAR):
    """
    resize image to the input
    :param img:
    :param img_size: (width, height) of the target image size
    :param mode:
    :return:
    """
    h, w = img.shape[:2]
    load_ratio = 1.0 * w / h
    netin_ratio = 1.0 * img_size[0] / img_size[1]
    assert load_ratio == netin_ratio, "image aspect ration not matching, given image: {}, net input: {}".format(
        img.shape, img_size)
    resized = cv2.resize(img, img_size, interpolation=mode)
    return resized


def masks2bbox(masks, threshold=127):
    """

    :param masks:
    :param threshold:
    :return: bounding box corner coordinate
    """
    mask_comb = np.zeros_like(masks[0], dtype=bool)
    for m in masks:
        mask_comb = mask_comb | (m > threshold)

    yid, xid = np.where(mask_comb)
    bmin = np.array([xid.min(), yid.min()])
    bmax = np.array([xid.max(), yid.max()])
    return bmin, bmax


def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5):
    """
    solve for an optimal translation that project gaussian in origin to the crop
    Parameters
    ----------
    crop_center: (x, y) of the crop center
    crop_size: float, the size of the square crop
    std_coverage: which edge point should be projected back to the edge of the 2d crop

    Returns
    -------
    the estimated translation

    """
    x0, y0 = crop_center
    x1, y1 = x0 + crop_size/2, y0
    x2, y2 = x0 - crop_size/2, y0
    x3, y3 = x0, y0 + crop_size/2.
    # predefined kinect intrinsics
    if is_behave:
        fx = 979.7844
        fy = 979.840
        cx = 1018.952
        cy = 779.486
    else:
        # intercap camera
        fx, fy = 918.457763671875, 918.4373779296875
        cx, cy = 956.9661865234375, 555.944580078125

    # construct the matrix
    # A = np.array([
    #     [fx, 0, cx-x0, cx-x0,  0,  0],
    #     [0, fy, cy-y0, cy-y0,  0,  0],
    #     [fx, 0, cx-x1,   0, cx-x1, 0],
    #     [0, fy, cy-y1,   0, cy-y1, 0],
    #     [fx, 0, cx-x2,   0,  0,    cx-x2],
    #     [0, fy, cy-y2,   0,  0,    cy-y2]
    # ]) # this matrix is low-rank because columns are linearly dependent: col3 - col4 = col5 + col6
    # # find linearly dependent rows
    # lambdas, V = np.linalg.eig(A)
    # # print()
    # # The linearly dependent row vectors
    # print(lambdas == 0, np.linalg.det(A), A[lambdas == 0, :]) # some have determinant zero, some don't??
    # print(np.linalg.inv(A))

    # A = np.array([
    #     [fx, 0, cx - x0, cx - x0, 0, 0],
    #     [0, fy, cy - y0, cy - y0, 0, 0],
    #     [fx, 0, cx - x1, 0, cx - x1, 0],
    #     [0, fy, cy - y1, 0, cy - y1, 0],
    #     [fx, 0, cx - x3, 0, 0, cx - x3],
    #     [0, fy, cy - y3, 0, 0, cy - y3]
    # ]) # this is also low rank!
    # b = np.array([0, 0, -3*fx, 0, 0, -3*fy]).reshape((-1, 1))
    # print("rank of the coefficient matrix:", np.linalg.matrix_rank(A))  # rank is 5! underconstrained matrix!
    # x = np.matmul(np.linalg.inv(A), b)

    # fix z0 as 0, then A is a full-rank matrix
    # first two equations: origin (0, 0, 0) is projected to the crop center
    # last two equations: edge point (3.5, 0, z) is projected to the edge of crop
    A = np.array([
        [fx, 0, cx-x0, cx-x0],
        [0, fy, cy-y0, cy-y0],
        [fx, 0, fx-x1,   0],
        [0, fy, cy-y1,   0]
    ])
    # b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
    b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1))  # 3.5->half of 7.0
    x = np.matmul(np.linalg.inv(A), b) # use 4 or 5 does not really matter, same results

    # A is always a full-rank matrix

    return x.flatten()[:3]