Spaces:
Running
on
Zero
Running
on
Zero
lemonaddie
commited on
Commit
•
6408d03
1
Parent(s):
fbf7415
Upload bilateral_normal_integration_cupy.py
Browse files
bilateral_normal_integration/bilateral_normal_integration_cupy.py
ADDED
@@ -0,0 +1,490 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Bilateral Normal Integration (BiNI)
|
3 |
+
"""
|
4 |
+
__author__ = "Xu Cao <cao.xu@ist.osaka-u.ac.jp>; Yuliang Xiu <yuliang.xiu@tue.mpg.de>"
|
5 |
+
__copyright__ = "Copyright (C) 2022 Xu Cao; Yuliang Xiu"
|
6 |
+
__version__ = "2.0"
|
7 |
+
|
8 |
+
import pyvista as pv
|
9 |
+
import cupy as cp
|
10 |
+
import numpy as np
|
11 |
+
from cupyx.scipy.sparse import csr_matrix
|
12 |
+
from cupyx.scipy.sparse.linalg import cg
|
13 |
+
from tqdm.auto import tqdm
|
14 |
+
import time
|
15 |
+
|
16 |
+
pool = cp.cuda.MemoryPool(cp.cuda.malloc_managed)
|
17 |
+
cp.cuda.set_allocator(pool.malloc)
|
18 |
+
|
19 |
+
# Define helper functions for moving masks in different directions
|
20 |
+
def move_left(mask): return cp.pad(mask,((0,0),(0,1)),'constant',constant_values=0)[:,1:] # Shift the input mask array to the left by 1, filling the right edge with zeros.
|
21 |
+
def move_right(mask): return cp.pad(mask,((0,0),(1,0)),'constant',constant_values=0)[:,:-1] # Shift the input mask array to the right by 1, filling the left edge with zeros.
|
22 |
+
def move_top(mask): return cp.pad(mask,((0,1),(0,0)),'constant',constant_values=0)[1:,:] # Shift the input mask array up by 1, filling the bottom edge with zeros.
|
23 |
+
def move_bottom(mask): return cp.pad(mask,((1,0),(0,0)),'constant',constant_values=0)[:-1,:] # Shift the input mask array down by 1, filling the top edge with zeros.
|
24 |
+
def move_top_left(mask): return cp.pad(mask,((0,1),(0,1)),'constant',constant_values=0)[1:,1:] # Shift the input mask array up and to the left by 1, filling the bottom and right edges with zeros.
|
25 |
+
def move_top_right(mask): return cp.pad(mask,((0,1),(1,0)),'constant',constant_values=0)[1:,:-1] # Shift the input mask array up and to the right by 1, filling the bottom and left edges with zeros.
|
26 |
+
def move_bottom_left(mask): return cp.pad(mask,((1,0),(0,1)),'constant',constant_values=0)[:-1,1:] # Shift the input mask array down and to the left by 1, filling the top and right edges with zeros.
|
27 |
+
def move_bottom_right(mask): return cp.pad(mask,((1,0),(1,0)),'constant',constant_values=0)[:-1,:-1] # Shift the input mask array down and to the right by 1, filling the top and left edges with zeros.
|
28 |
+
|
29 |
+
|
30 |
+
def generate_dx_dy(mask, nz_horizontal, nz_vertical, step_size=1):
|
31 |
+
# pixel coordinates
|
32 |
+
# ^ vertical positive
|
33 |
+
# |
|
34 |
+
# |
|
35 |
+
# |
|
36 |
+
# o ---> horizontal positive
|
37 |
+
num_pixel = cp.sum(mask)
|
38 |
+
|
39 |
+
# Generate an integer index array with the same shape as the mask.
|
40 |
+
pixel_idx = cp.zeros_like(mask, dtype=int)
|
41 |
+
# Assign a unique integer index to each True value in the mask.
|
42 |
+
pixel_idx[mask] = cp.arange(num_pixel)
|
43 |
+
|
44 |
+
# Create boolean masks representing the presence of neighboring pixels in each direction.
|
45 |
+
has_left_mask = cp.logical_and(move_right(mask), mask)
|
46 |
+
has_right_mask = cp.logical_and(move_left(mask), mask)
|
47 |
+
has_bottom_mask = cp.logical_and(move_top(mask), mask)
|
48 |
+
has_top_mask = cp.logical_and(move_bottom(mask), mask)
|
49 |
+
|
50 |
+
# Extract the horizontal and vertical components of the normal vectors for the neighboring pixels.
|
51 |
+
nz_left = nz_horizontal[has_left_mask[mask]]
|
52 |
+
nz_right = nz_horizontal[has_right_mask[mask]]
|
53 |
+
nz_top = nz_vertical[has_top_mask[mask]]
|
54 |
+
nz_bottom = nz_vertical[has_bottom_mask[mask]]
|
55 |
+
|
56 |
+
# Create sparse matrices representing the partial derivatives for each direction.
|
57 |
+
# top/bottom/left/right = vertical positive/vertical negative/horizontal negative/horizontal positive
|
58 |
+
# The matrices are constructed using the extracted normal components and pixel indices.
|
59 |
+
data = cp.stack([-nz_left/step_size, nz_left/step_size], -1).flatten()
|
60 |
+
indices = cp.stack((pixel_idx[move_left(has_left_mask)], pixel_idx[has_left_mask]), -1).flatten()
|
61 |
+
indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_left_mask[mask].astype(int) * 2)])
|
62 |
+
D_horizontal_neg = csr_matrix((data, indices, indptr), shape=(num_pixel, num_pixel))
|
63 |
+
|
64 |
+
data = cp.stack([-nz_right/step_size, nz_right/step_size], -1).flatten()
|
65 |
+
indices = cp.stack((pixel_idx[has_right_mask], pixel_idx[move_right(has_right_mask)]), -1).flatten()
|
66 |
+
indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_right_mask[mask].astype(int) * 2)])
|
67 |
+
D_horizontal_pos = csr_matrix((data, indices, indptr), shape=(num_pixel, num_pixel))
|
68 |
+
|
69 |
+
data = cp.stack([-nz_top/step_size, nz_top/step_size], -1).flatten()
|
70 |
+
indices = cp.stack((pixel_idx[has_top_mask], pixel_idx[move_top(has_top_mask)]), -1).flatten()
|
71 |
+
indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_top_mask[mask].astype(int) * 2)])
|
72 |
+
D_vertical_pos = csr_matrix((data, indices, indptr), shape=(num_pixel, num_pixel))
|
73 |
+
|
74 |
+
data = cp.stack([-nz_bottom/step_size, nz_bottom/step_size], -1).flatten()
|
75 |
+
indices = cp.stack((pixel_idx[move_bottom(has_bottom_mask)], pixel_idx[has_bottom_mask]), -1).flatten()
|
76 |
+
indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_bottom_mask[mask].astype(int) * 2)])
|
77 |
+
D_vertical_neg = csr_matrix((data, indices, indptr), shape=(num_pixel, num_pixel))
|
78 |
+
|
79 |
+
# Return the four sparse matrices representing the partial derivatives for each direction.
|
80 |
+
return D_horizontal_pos, D_horizontal_neg, D_vertical_pos, D_vertical_neg
|
81 |
+
|
82 |
+
|
83 |
+
def construct_facets_from(mask):
|
84 |
+
# Initialize an array 'idx' of the same shape as 'mask' with integers
|
85 |
+
# representing the indices of valid pixels in the mask.
|
86 |
+
idx = cp.zeros_like(mask, dtype=int)
|
87 |
+
idx[mask] = cp.arange(cp.sum(mask))
|
88 |
+
|
89 |
+
# Generate masks for neighboring pixels to define facets
|
90 |
+
facet_move_top_mask = move_top(mask)
|
91 |
+
facet_move_left_mask = move_left(mask)
|
92 |
+
facet_move_top_left_mask = move_top_left(mask)
|
93 |
+
|
94 |
+
# Identify the top-left pixel of each facet by performing a logical AND operation
|
95 |
+
# on the masks of neighboring pixels and the input mask.
|
96 |
+
facet_top_left_mask = facet_move_top_mask * facet_move_left_mask * facet_move_top_left_mask * mask
|
97 |
+
|
98 |
+
# Create masks for the other three vertices of each facet by shifting the top-left mask.
|
99 |
+
facet_top_right_mask = move_right(facet_top_left_mask)
|
100 |
+
facet_bottom_left_mask = move_bottom(facet_top_left_mask)
|
101 |
+
facet_bottom_right_mask = move_bottom_right(facet_top_left_mask)
|
102 |
+
|
103 |
+
# Return a numpy array of facets by stacking the indices of the four vertices
|
104 |
+
# of each facet along the last dimension. Each row of the resulting array represents
|
105 |
+
# a single facet with the format [4, idx_top_left, idx_bottom_left, idx_bottom_right, idx_top_right].
|
106 |
+
return cp.stack((4 * cp.ones(cp.sum(facet_top_left_mask).item()),
|
107 |
+
idx[facet_top_left_mask],
|
108 |
+
idx[facet_bottom_left_mask],
|
109 |
+
idx[facet_bottom_right_mask],
|
110 |
+
idx[facet_top_right_mask]), axis=-1).astype(int)
|
111 |
+
|
112 |
+
|
113 |
+
def map_depth_map_to_point_clouds(depth_map, mask, K=None, step_size=1):
|
114 |
+
# y
|
115 |
+
# | z
|
116 |
+
# | /
|
117 |
+
# |/
|
118 |
+
# o ---x
|
119 |
+
H, W = mask.shape
|
120 |
+
yy, xx = cp.meshgrid(cp.arange(W), cp.arange(H))
|
121 |
+
xx = cp.flip(xx, axis=0)
|
122 |
+
|
123 |
+
if K is None:
|
124 |
+
vertices = cp.zeros((H, W, 3))
|
125 |
+
vertices[..., 0] = xx * step_size
|
126 |
+
vertices[..., 1] = yy * step_size
|
127 |
+
vertices[..., 2] = depth_map
|
128 |
+
vertices = vertices[mask]
|
129 |
+
else:
|
130 |
+
u = cp.zeros((H, W, 3))
|
131 |
+
u[..., 0] = xx
|
132 |
+
u[..., 1] = yy
|
133 |
+
u[..., 2] = 1
|
134 |
+
u = u[mask].T # 3 x m
|
135 |
+
vertices = (cp.linalg.inv(cp.asarray(K)) @ u).T * \
|
136 |
+
depth_map[mask, cp.newaxis] # m x 3
|
137 |
+
|
138 |
+
return vertices
|
139 |
+
|
140 |
+
|
141 |
+
def sigmoid(x, k=1):
|
142 |
+
return 1 / (1 + cp.exp(-k * x))
|
143 |
+
|
144 |
+
|
145 |
+
def bilateral_normal_integration_function(normal_map,
|
146 |
+
normal_mask,
|
147 |
+
k=2,
|
148 |
+
lambda1=0,
|
149 |
+
depth_map=None,
|
150 |
+
depth_mask=None,
|
151 |
+
K=None,
|
152 |
+
step_size=1,
|
153 |
+
max_iter=150,
|
154 |
+
tol=1e-4,
|
155 |
+
cg_max_iter=5000,
|
156 |
+
cg_tol=1e-3):
|
157 |
+
"""
|
158 |
+
This function performs the bilateral normal integration algorithm, as described in the paper.
|
159 |
+
It takes as input the normal map, normal mask, and several optional parameters to control the integration process.
|
160 |
+
|
161 |
+
:param normal_map: A normal map, which is an image where each pixel's color encodes the corresponding 3D surface normal.
|
162 |
+
:param normal_mask: A binary mask that indicates the region of interest in the normal_map to be integrated.
|
163 |
+
:param k: A parameter that controls the stiffness of the surface.
|
164 |
+
The smaller the k value, the smoother the surface appears (fewer discontinuities).
|
165 |
+
If set as 0, a smooth surface is obtained (No discontinuities), and the iteration should end at step 2 since the surface will not change with iterations.
|
166 |
+
|
167 |
+
:param depth_map: (Optional) An initial depth map to guide the integration process.
|
168 |
+
:param depth_mask: (Optional) A binary mask that indicates the valid depths in the depth_map.
|
169 |
+
|
170 |
+
:param lambda1 (Optional): A regularization parameter that controls the influence of the depth_map on the final result.
|
171 |
+
Required when depth map is input.
|
172 |
+
The larger the lambda1 is, the result more close to the initial depth map (fine details from the normal map are less reflected)
|
173 |
+
|
174 |
+
:param K: (Optional) A 3x3 camera intrinsic matrix, used for perspective camera models. If not provided, the algorithm assumes an orthographic camera model.
|
175 |
+
:param step_size: (Optional) The pixel size in the world coordinates. Default value is 1.
|
176 |
+
Used only in the orthographic camera mdoel.
|
177 |
+
Default value should be fine, unless you know the true value of the pixel size in the world coordinates.
|
178 |
+
Do not adjust it in perspective camera model.
|
179 |
+
|
180 |
+
:param max_iter: (Optional) The maximum number of iterations for the optimization process. Default value is 150.
|
181 |
+
If set as 1, a smooth surface is obtained (No discontinuities).
|
182 |
+
Default value should be fine.
|
183 |
+
:param tol: (Optional) The tolerance for the relative change in energy to determine the convergence of the optimization process. Default value is 1e-4.
|
184 |
+
The larger, the iteration stops faster, but the discontinuity preservation quality might be worse. (fewer discontinuities)
|
185 |
+
Default value should be fine.
|
186 |
+
|
187 |
+
:param cg_max_iter: (Optional) The maximum number of iterations for the Conjugate Gradient solver. Default value is 5000.
|
188 |
+
Default value should be fine.
|
189 |
+
:param cg_tol: (Optional) The tolerance for the Conjugate Gradient solver. Default value is 1e-3.
|
190 |
+
Default value should be fine.
|
191 |
+
|
192 |
+
:return: depth_map: The resulting depth map after the bilateral normal integration process.
|
193 |
+
surface: A pyvista PolyData mesh representing the 3D surface reconstructed from the depth map.
|
194 |
+
wu_map: A 2D image that represents the horizontal smoothness weight for each pixel. (green for smooth, blue/red for discontinuities)
|
195 |
+
wv_map: A 2D image that represents the vertical smoothness weight for each pixel. (green for smooth, blue/red for discontinuities)
|
196 |
+
energy_list: A list of energy values during the optimization process.
|
197 |
+
"""
|
198 |
+
# To avoid confusion, we list the coordinate systems in this code as follows
|
199 |
+
#
|
200 |
+
# pixel coordinates camera coordinates normal coordinates (the main paper's Fig. 1 (a))
|
201 |
+
# u x y
|
202 |
+
# | | z |
|
203 |
+
# | | / o -- x
|
204 |
+
# | |/ /
|
205 |
+
# o --- v o --- y z
|
206 |
+
# (bottom left)
|
207 |
+
# (o is the optical center;
|
208 |
+
# xy-plane is parallel to the image plane;
|
209 |
+
# +z is the viewing direction.)
|
210 |
+
#
|
211 |
+
# The input normal map should be defined in the normal coordinates.
|
212 |
+
# The camera matrix K should be defined in the camera coordinates.
|
213 |
+
# K = [[fx, 0, cx],
|
214 |
+
# [0, fy, cy],
|
215 |
+
# [0, 0, 1]]
|
216 |
+
# I forgot why I chose the awkward coordinate system after getting used to opencv convention :(
|
217 |
+
# but I won't touch the working code.
|
218 |
+
|
219 |
+
normal_map = cp.asarray(normal_map)
|
220 |
+
normal_mask = cp.asarray(normal_mask)
|
221 |
+
if depth_map is not None:
|
222 |
+
depth_map = cp.asarray(depth_map)
|
223 |
+
depth_mask = cp.asarray(depth_mask)
|
224 |
+
|
225 |
+
num_normals = cp.sum(normal_mask).item()
|
226 |
+
projection = "orthographic" if K is None else "perspective"
|
227 |
+
print(f"Running bilateral normal integration with k={k} in the {projection} case. \n"
|
228 |
+
f"The number of normal vectors is {num_normals}.")
|
229 |
+
# transfer the normal map from the normal coordinates to the camera coordinates
|
230 |
+
nx = normal_map[normal_mask, 1]
|
231 |
+
ny = normal_map[normal_mask, 0]
|
232 |
+
nz = - normal_map[normal_mask, 2]
|
233 |
+
del normal_map
|
234 |
+
|
235 |
+
if K is not None: # perspective
|
236 |
+
H, W = normal_mask.shape
|
237 |
+
|
238 |
+
yy, xx = cp.meshgrid(cp.arange(W), cp.arange(H))
|
239 |
+
xx = cp.flip(xx, axis=0)
|
240 |
+
|
241 |
+
cx = K[0, 2]
|
242 |
+
cy = K[1, 2]
|
243 |
+
fx = K[0, 0]
|
244 |
+
fy = K[1, 1]
|
245 |
+
|
246 |
+
uu = xx[normal_mask] - cx
|
247 |
+
vv = yy[normal_mask] - cy
|
248 |
+
|
249 |
+
nz_u = uu * nx + vv * ny + fx * nz
|
250 |
+
nz_v = uu * nx + vv * ny + fy * nz
|
251 |
+
del xx, yy, uu, vv
|
252 |
+
else: # orthographic
|
253 |
+
nz_u = nz.copy()
|
254 |
+
nz_v = nz.copy()
|
255 |
+
|
256 |
+
# right, left, top, bottom
|
257 |
+
A3, A4, A1, A2 = generate_dx_dy(normal_mask, nz_horizontal=nz_v, nz_vertical=nz_u, step_size=step_size)
|
258 |
+
|
259 |
+
pixel_idx = cp.zeros_like(normal_mask, dtype=int)
|
260 |
+
pixel_idx[normal_mask] = cp.arange(num_normals)
|
261 |
+
pixel_idx_flat = cp.arange(num_normals)
|
262 |
+
pixel_idx_flat_indptr = cp.arange(num_normals + 1)
|
263 |
+
|
264 |
+
has_left_mask = cp.logical_and(move_right(normal_mask), normal_mask)
|
265 |
+
has_left_mask_left = move_left(has_left_mask)
|
266 |
+
has_right_mask = cp.logical_and(move_left(normal_mask), normal_mask)
|
267 |
+
has_right_mask_right = move_right(has_right_mask)
|
268 |
+
has_bottom_mask = cp.logical_and(move_top(normal_mask), normal_mask)
|
269 |
+
has_bottom_mask_bottom = move_bottom(has_bottom_mask)
|
270 |
+
has_top_mask = cp.logical_and(move_bottom(normal_mask), normal_mask)
|
271 |
+
has_top_mask_top = move_top(has_top_mask)
|
272 |
+
|
273 |
+
has_left_mask_flat = has_left_mask[normal_mask]
|
274 |
+
has_right_mask_flat = has_right_mask[normal_mask]
|
275 |
+
has_bottom_mask_flat = has_bottom_mask[normal_mask]
|
276 |
+
has_top_mask_flat = has_top_mask[normal_mask]
|
277 |
+
|
278 |
+
has_left_mask_left_flat = has_left_mask_left[normal_mask]
|
279 |
+
has_right_mask_right_flat = has_right_mask_right[normal_mask]
|
280 |
+
has_bottom_mask_bottom_flat = has_bottom_mask_bottom[normal_mask]
|
281 |
+
has_top_mask_top_flat = has_top_mask_top[normal_mask]
|
282 |
+
|
283 |
+
nz_left_square = nz_v[has_left_mask_flat] ** 2
|
284 |
+
nz_right_square = nz_v[has_right_mask_flat] ** 2
|
285 |
+
nz_top_square = nz_u[has_top_mask_flat] ** 2
|
286 |
+
nz_bottom_square = nz_u[has_bottom_mask_flat] ** 2
|
287 |
+
|
288 |
+
pixel_idx_left_center = pixel_idx[has_left_mask]
|
289 |
+
pixel_idx_right_right = pixel_idx[has_right_mask_right]
|
290 |
+
pixel_idx_top_center = pixel_idx[has_top_mask]
|
291 |
+
pixel_idx_bottom_bottom = pixel_idx[has_bottom_mask_bottom]
|
292 |
+
|
293 |
+
pixel_idx_left_left_indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_left_mask_left_flat)])
|
294 |
+
pixel_idx_right_center_indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_right_mask_flat)])
|
295 |
+
pixel_idx_top_top_indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_top_mask_top_flat)])
|
296 |
+
pixel_idx_bottom_center_indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_bottom_mask_flat)])
|
297 |
+
|
298 |
+
# initialization
|
299 |
+
wu = 0.5 * cp.ones(num_normals, float)
|
300 |
+
wv = 0.5 * cp.ones(num_normals, float)
|
301 |
+
z = cp.zeros(num_normals, float)
|
302 |
+
energy = cp.sum(wu * (A1.dot(z) + nx) ** 2) + \
|
303 |
+
cp.sum((1 - wu) * (A2.dot(z) + nx) ** 2) + \
|
304 |
+
cp.sum(wv * (A3.dot(z) + ny) ** 2) + \
|
305 |
+
cp.sum((1 - wv) * (A4.dot(z) + ny) ** 2)
|
306 |
+
energy_list = []
|
307 |
+
|
308 |
+
tic = time.time()
|
309 |
+
|
310 |
+
energy_list = []
|
311 |
+
|
312 |
+
if depth_map is not None:
|
313 |
+
depth_mask_flat = depth_mask[normal_mask].astype(bool) # shape: (num_normals,)
|
314 |
+
z_prior = cp.log(depth_map)[normal_mask] if K is not None else depth_map[normal_mask] # shape: (num_normals,)
|
315 |
+
z_prior[~depth_mask_flat] = 0
|
316 |
+
|
317 |
+
pbar = tqdm(range(max_iter))
|
318 |
+
|
319 |
+
for i in pbar:
|
320 |
+
################################################################################################################
|
321 |
+
# I am manually computing A_mat = A.T @ W @ A here. It saves 2/3 time compared to the simpliest way A.T @ W @ A.
|
322 |
+
# A.T @ W @ A can take more time than you think when the normal map become larger.
|
323 |
+
# The diaganol matrix W=diag([wu, 1-wu, wv, 1-wv]) needs not be explicited defined in this case.
|
324 |
+
#
|
325 |
+
data_term_top = wu[has_top_mask_flat] * nz_top_square
|
326 |
+
data_term_bottom = (1 - wu[has_bottom_mask_flat]) * nz_bottom_square
|
327 |
+
data_term_left = (1 - wv[has_left_mask_flat]) * nz_left_square
|
328 |
+
data_term_right = wv[has_right_mask_flat] * nz_right_square
|
329 |
+
|
330 |
+
diagonal_data_term = cp.zeros(num_normals)
|
331 |
+
diagonal_data_term[has_left_mask_flat] += data_term_left
|
332 |
+
diagonal_data_term[has_left_mask_left_flat] += data_term_left
|
333 |
+
diagonal_data_term[has_right_mask_flat] += data_term_right
|
334 |
+
diagonal_data_term[has_right_mask_right_flat] += data_term_right
|
335 |
+
diagonal_data_term[has_top_mask_flat] += data_term_top
|
336 |
+
diagonal_data_term[has_top_mask_top_flat] += data_term_top
|
337 |
+
diagonal_data_term[has_bottom_mask_flat] += data_term_bottom
|
338 |
+
diagonal_data_term[has_bottom_mask_bottom_flat] += data_term_bottom
|
339 |
+
if depth_map is not None:
|
340 |
+
diagonal_data_term[depth_mask_flat] += lambda1
|
341 |
+
|
342 |
+
A_mat_d = csr_matrix((diagonal_data_term, pixel_idx_flat, pixel_idx_flat_indptr),
|
343 |
+
shape=(num_normals, num_normals))
|
344 |
+
|
345 |
+
A_mat_left_odu = csr_matrix((-data_term_left, pixel_idx_left_center, pixel_idx_left_left_indptr),
|
346 |
+
shape=(num_normals, num_normals))
|
347 |
+
A_mat_right_odu = csr_matrix((-data_term_right, pixel_idx_right_right, pixel_idx_right_center_indptr),
|
348 |
+
shape=(num_normals, num_normals))
|
349 |
+
A_mat_top_odu = csr_matrix((-data_term_top, pixel_idx_top_center, pixel_idx_top_top_indptr),
|
350 |
+
shape=(num_normals, num_normals))
|
351 |
+
A_mat_bottom_odu = csr_matrix((-data_term_bottom, pixel_idx_bottom_bottom, pixel_idx_bottom_center_indptr),
|
352 |
+
shape=(num_normals, num_normals))
|
353 |
+
|
354 |
+
A_mat_odu = A_mat_top_odu + A_mat_bottom_odu + A_mat_right_odu + A_mat_left_odu
|
355 |
+
A_mat = A_mat_d + A_mat_odu + A_mat_odu.T # diagnol + upper triangle + lower triangle matrix
|
356 |
+
################################################################################################################
|
357 |
+
|
358 |
+
D = csr_matrix((1 / cp.clip(diagonal_data_term, 1e-5, None), pixel_idx_flat, pixel_idx_flat_indptr),
|
359 |
+
shape=(num_normals, num_normals)) # Jacobi preconditioner.
|
360 |
+
b_vec = A1.T @ (wu * (-nx)) \
|
361 |
+
+ A2.T @ ((1 - wu) * (-nx)) \
|
362 |
+
+ A3.T @ (wv * (-ny)) \
|
363 |
+
+ A4.T @ ((1 - wv) * (-ny))
|
364 |
+
|
365 |
+
if depth_map is not None:
|
366 |
+
b_vec += lambda1 * z_prior
|
367 |
+
offset = cp.mean((z_prior - z)[depth_mask_flat])
|
368 |
+
z = z + offset
|
369 |
+
|
370 |
+
z, _ = cg(A_mat, b_vec, x0=z, M=D, maxiter=cg_max_iter, tol=cg_tol)
|
371 |
+
del A_mat, b_vec, wu, wv
|
372 |
+
|
373 |
+
# Update weights
|
374 |
+
wu = sigmoid((A2.dot(z)) ** 2 - (A1.dot(z)) ** 2, k) # top
|
375 |
+
wv = sigmoid((A4.dot(z)) ** 2 - (A3.dot(z)) ** 2, k) # right
|
376 |
+
|
377 |
+
# Check for convergence
|
378 |
+
energy_old = energy
|
379 |
+
energy = cp.sum(wu * (A1.dot(z) + nx) ** 2) + \
|
380 |
+
cp.sum((1 - wu) * (A2.dot(z) + nx) ** 2) + \
|
381 |
+
cp.sum(wv * (A3.dot(z) + ny) ** 2) + \
|
382 |
+
cp.sum((1 - wv) * (A4.dot(z) + ny) ** 2)
|
383 |
+
|
384 |
+
energy_list.append(energy)
|
385 |
+
relative_energy = cp.abs(energy - energy_old) / energy_old
|
386 |
+
pbar.set_description(
|
387 |
+
f"step {i + 1}/{max_iter} energy: {energy:.3e}"
|
388 |
+
f" relative energy: {relative_energy:.3e}")
|
389 |
+
if relative_energy < tol:
|
390 |
+
break
|
391 |
+
del A1, A2, A3, A4, nx, ny
|
392 |
+
toc = time.time()
|
393 |
+
|
394 |
+
print(f"Total time: {toc - tic:.3f} sec")
|
395 |
+
depth_map = cp.ones_like(normal_mask, float) * cp.nan
|
396 |
+
depth_map[normal_mask] = z
|
397 |
+
|
398 |
+
if K is not None: # perspective
|
399 |
+
depth_map = cp.exp(depth_map)
|
400 |
+
vertices = cp.asnumpy(map_depth_map_to_point_clouds(depth_map, normal_mask, K=K))
|
401 |
+
else: # orthographic
|
402 |
+
vertices = cp.asnumpy(map_depth_map_to_point_clouds(depth_map, normal_mask, K=None, step_size=step_size))
|
403 |
+
|
404 |
+
facets = cp.asnumpy(construct_facets_from(normal_mask))
|
405 |
+
if nz.mean() > 0:
|
406 |
+
facets = facets[:, [0, 1, 4, 3, 2]]
|
407 |
+
surface = pv.PolyData(vertices, facets)
|
408 |
+
|
409 |
+
# In the main paper, wu indicates the horizontal direction; wv indicates the vertical direction
|
410 |
+
wu_map = cp.ones_like(normal_mask) * cp.nan
|
411 |
+
wu_map[normal_mask] = wv
|
412 |
+
|
413 |
+
wv_map = cp.ones_like(normal_mask) * cp.nan
|
414 |
+
wv_map[normal_mask] = wu
|
415 |
+
|
416 |
+
depth_map = cp.asnumpy(depth_map)
|
417 |
+
wu_map = cp.asnumpy(wu_map)
|
418 |
+
wv_map = cp.asnumpy(wv_map)
|
419 |
+
|
420 |
+
return depth_map, surface, wu_map, wv_map, energy_list
|
421 |
+
|
422 |
+
|
423 |
+
if __name__ == '__main__':
|
424 |
+
import cv2
|
425 |
+
import argparse
|
426 |
+
import os
|
427 |
+
import warnings
|
428 |
+
warnings.filterwarnings('ignore')
|
429 |
+
# To ignore the possible overflow runtime warning: overflow encountered in exp return 1 / (1 + cp.exp(-k * x)).
|
430 |
+
# This overflow issue does not affect our results as cp.exp will correctly return 0.0 when -k * x is massive.
|
431 |
+
|
432 |
+
def dir_path(string):
|
433 |
+
if os.path.isdir(string):
|
434 |
+
return string
|
435 |
+
else:
|
436 |
+
raise FileNotFoundError(string)
|
437 |
+
|
438 |
+
parser = argparse.ArgumentParser()
|
439 |
+
parser.add_argument('-p', '--path', type=dir_path)
|
440 |
+
parser.add_argument('-k', type=float, default=2)
|
441 |
+
parser.add_argument('-i', '--iter', type=int, default=150)
|
442 |
+
parser.add_argument('-t', '--tol', type=float, default=1e-4)
|
443 |
+
parser.add_argument('--cgiter', type=int, default=5000)
|
444 |
+
parser.add_argument('--cgtol', type=float, default=1e-3)
|
445 |
+
arg = parser.parse_args()
|
446 |
+
|
447 |
+
normal_map = cv2.cvtColor(cv2.imread(os.path.join(
|
448 |
+
arg.path, "normal_map.png"), cv2.IMREAD_UNCHANGED), cv2.COLOR_RGB2BGR)
|
449 |
+
if normal_map.dtype is np.dtype(np.uint16):
|
450 |
+
normal_map = normal_map/65535 * 2 - 1
|
451 |
+
else:
|
452 |
+
normal_map = normal_map/255 * 2 - 1
|
453 |
+
|
454 |
+
try:
|
455 |
+
mask = cv2.imread(os.path.join(arg.path, "mask.png"), cv2.IMREAD_GRAYSCALE).astype(bool)
|
456 |
+
except:
|
457 |
+
mask = np.ones(normal_map.shape[:2], bool)
|
458 |
+
|
459 |
+
if os.path.exists(os.path.join(arg.path, "K.txt")):
|
460 |
+
K = np.loadtxt(os.path.join(arg.path, "K.txt"))
|
461 |
+
depth_map, surface, wu_map, wv_map, energy_list = bilateral_normal_integration(normal_map=normal_map,
|
462 |
+
normal_mask=mask,
|
463 |
+
k=arg.k,
|
464 |
+
K=K,
|
465 |
+
max_iter=arg.iter,
|
466 |
+
tol=arg.tol,
|
467 |
+
cg_max_iter=arg.cgiter,
|
468 |
+
cg_tol=arg.cgtol)
|
469 |
+
else:
|
470 |
+
depth_map, surface, wu_map, wv_map, energy_list = bilateral_normal_integration(normal_map=normal_map,
|
471 |
+
normal_mask=mask,
|
472 |
+
k=arg.k,
|
473 |
+
K=None,
|
474 |
+
max_iter=arg.iter,
|
475 |
+
tol=arg.tol,
|
476 |
+
cg_max_iter=arg.cgiter,
|
477 |
+
cg_tol=arg.cgtol)
|
478 |
+
|
479 |
+
# save the resultant polygon mesh and discontinuity maps.
|
480 |
+
cp.save(os.path.join(arg.path, "energy"), cp.array(energy_list))
|
481 |
+
surface.save(os.path.join(arg.path, f"mesh_k_{arg.k}.ply"), binary=False)
|
482 |
+
wu_map = cv2.applyColorMap(
|
483 |
+
(255 * wu_map).astype(np.uint8), cv2.COLORMAP_JET)
|
484 |
+
wv_map = cv2.applyColorMap(
|
485 |
+
(255 * wv_map).astype(np.uint8), cv2.COLORMAP_JET)
|
486 |
+
wu_map[~mask] = 255
|
487 |
+
wv_map[~mask] = 255
|
488 |
+
cv2.imwrite(os.path.join(arg.path, f"wu_k_{arg.k}.png"), wu_map)
|
489 |
+
cv2.imwrite(os.path.join(arg.path, f"wv_k_{arg.k}.png"), wv_map)
|
490 |
+
print(f"saved {arg.path}")
|