Spaces:
Build error
Build error
File size: 8,196 Bytes
858279b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import cv2 as cv
import numpy as np
pyramid_source = []
pyramid_target = []
patch_source_pyramid = []
JT_source_pyramid = []
Hinv_source_pyramid = []
windows = []
class Window:
def __init__(self, center_x, center_y, window_size):
self.center_x = center_x
self.center_y = center_y
self.window_size = window_size
# The displacement vectors.
# Important for simulating the internal calculation vector: g and d. Also they record the final results.
self.Dx = 0
self.Dy = 0
self.map_x = None
self.map_y = None
self.generate_map()
def generate_map(self):
epsilon = 0.001
start_x = self.center_x - self.window_size//2
start_y = self.center_y - self.window_size//2
# print(start_x, start_y)
# When window_size is odd, we must use this form to enforce the size of map to be window_size.
crop_x = np.arange(start_x, start_x+self.window_size - epsilon, 1.0).astype(np.float32).reshape(1, self.window_size)
self.map_x = np.repeat(crop_x, self.window_size, axis=0)
crop_y = np.arange(start_y, start_y + self.window_size - epsilon, 1.0).astype(np.float32).reshape(self.window_size, 1)
self.map_y = np.repeat(crop_y, self.window_size, axis=1)
def pyrDown(self):
"""
When down-sample the original patch, the corresponding point position should be /2.
However the maps' coordinate should not be dimply /2, therefore the maps need regenerate.
"""
self.center_x = self.center_x / 2
self.center_y = self.center_y / 2
self.Dx = self.Dx / 2
self.Dy = self.Dy / 2
self.generate_map()
def pyrUp(self):
"""
When calculating the pyramidal LK and moving to the next (bigger) pyramid, the patch size will be doubled.
Thus the corresponding point position should be *2.
Here we should consider the displacement vector (Dx, Dy), to simulate the equation: g_(L-1) = 2*(g_L + d_L)
(d_L calculated in this level iteration and g_L is inherited from the former level iteration, both stored in
displacement vector)
"""
self.center_x = self.center_x * 2
self.center_y = self.center_y * 2
self.Dx = self.Dx * 2
self.Dy = self.Dy * 2
self.generate_map()
def move(self, delta_x, delta_y):
self.Dx += delta_x
self.Dy += delta_y
def crop(self, img):
# Notice!!: map_column calculated from x, while map_row calculated from y.
# Which contradict to the matrix index.
patch = cv.remap(img, self.map_x + self.Dx,
self.map_y + self.Dy, cv.INTER_LINEAR)
return patch
def generate_weight(patch_size):
"""
Generate the weight matrix
:param patch_size: (Int) The patch_size
:return: The weight map (patch_size * patch_size * 1).
"""
center = [patch_size // 2, patch_size // 2]
sigma_x = sigma_y = patch_size // 2
maps = np.fromfunction(lambda x, y: ((x - center[0])/sigma_x) ** 2 +
((y - center[1])/sigma_y) ** 2,
(patch_size, patch_size),
dtype=int)
return np.expand_dims(np.exp(maps/-2.0), -1)
def craft_pyramid(image, level, pyramid_container):
pyramid_container.clear()
pyramid_container.append(image)
for i in range(level - 1):
image = cv.pyrDown(image)
pyramid_container.append(image)
def lk_track(face_source, face_target, landmarks_source, window_size, pyramid_level):
# Create the image pyramid for both source and target.
craft_pyramid(face_source, pyramid_level, pyramid_source)
craft_pyramid(face_target, pyramid_level, pyramid_target)
# Generate the weight map
weight_map = generate_weight(window_size)
# Create windows for cropping patches.
windows.clear()
for landmark in landmarks_source:
x, y = landmark
# windows.append(Window(x, y, patch_size, face_source.shape[0], face_source.shape[0]))
windows.append(Window(x, y, window_size))
# Initialize the patches of both the source.
# Notice that here both using the same window, i.e., d = 0.
# Afterwards, patch_target will be changed while patch_source will fixed.
patch_source_pyramid.clear()
JT_source_pyramid.clear()
Hinv_source_pyramid.clear()
for level in range(pyramid_level):
patch_source = []
for window in windows:
patch_source.append(window.crop(pyramid_source[level]))
if level < pyramid_level - 1:
window.pyrDown()
# Calculate the Jacobian and Hessen matrix of patch_source
JT_source = []
Hinv_source = []
for patch in patch_source:
"""
# cv.Sobel(_, _, x, y, ...), x indicating the horizontal,
# while it's in fact the y axis, for the y is the column.
# horizontal means increase at column.
"""
gradient_x = cv.Sobel(patch, cv.CV_64F, 1, 0, ksize=3)
gradient_y = cv.Sobel(patch, cv.CV_64F, 0, 1, ksize=3)
gradient_x_w = gradient_x * weight_map
gradient_y_w = gradient_y * weight_map
J_x = np.reshape(gradient_x, (-1, 1))
J_y = np.reshape(gradient_y, (-1, 1))
J_x_w = np.reshape(gradient_x_w, (-1, 1))
J_y_w = np.reshape(gradient_y_w, (-1, 1))
J = np.concatenate((J_x, J_y), axis=1)
J_w = np.concatenate((J_x_w, J_y_w), axis=1)
JT_w = np.transpose(J_w)
H = np.matmul(JT_w, J)
Hinv = np.linalg.inv(H)
# Noticed that we only collect the weighted JT here.
JT_source.append(JT_w)
Hinv_source.append(Hinv)
# Collect all the pre-processed data in each level.
patch_source_pyramid.append(patch_source)
JT_source_pyramid.append(JT_source)
Hinv_source_pyramid.append(Hinv_source)
#
# """
# Sequential Execution
# """
max_iter_step = 15
for level in range(pyramid_level-1, -1, -1):
epsilon_der1 = 1.0 + level
for patch_s, window, JT, Hinv in zip(patch_source_pyramid[level], windows, JT_source_pyramid[level], Hinv_source_pyramid[level]):
count = 1
while True:
# Patch of target. which will move in each iteration.
patch_t = window.crop(pyramid_target[level])
# Calculate the residual
r = patch_t - patch_s
r = np.reshape(r, (-1, 1))
der1 = np.matmul(JT, r)
der1_norm = np.linalg.norm(der1)
delta = - np.matmul(Hinv, der1)
if der1_norm < epsilon_der1 or count > max_iter_step:
if level != 0:
# When reach the final level, stop the up-sample.
window.pyrUp()
break
else:
window.move(delta[0][0], delta[1][0])
count += 1
predictions = []
for window in windows: # type: Window
predictions.append([window.center_x + window.Dx, window.center_y + window.Dy])
return np.array(predictions)
def track_bidirectional(faces, locations):
patch_size = 15
frames_num = len(faces)
pyramid_level = 4
forward_pts = [locations[0].copy()]
for i in range(1, frames_num):
feature_old = faces[i-1] / 255.0
feature_new = faces[i] / 255.0
location_old = forward_pts[i - 1]
forward_pt = lk_track(feature_old, feature_new, location_old, patch_size, pyramid_level)
forward_pts.append(forward_pt)
feedback_pts = [None] * (frames_num - 1) + [forward_pts[-1].copy()]
for i in range(frames_num - 2, -1, -1):
feature_old = faces[i+1] / 255.0
feature_new = faces[i] / 255.0
location_old = feedback_pts[i - 1]
feedback_pt = lk_track(feature_old, feature_new, location_old, patch_size, pyramid_level)
feedback_pts[i] = feedback_pt
return forward_pts, feedback_pts |