lemonaddie commited on
Commit
6408d03
1 Parent(s): fbf7415

Upload bilateral_normal_integration_cupy.py

Browse files
bilateral_normal_integration/bilateral_normal_integration_cupy.py ADDED
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Bilateral Normal Integration (BiNI)
3
+ """
4
+ __author__ = "Xu Cao <cao.xu@ist.osaka-u.ac.jp>; Yuliang Xiu <yuliang.xiu@tue.mpg.de>"
5
+ __copyright__ = "Copyright (C) 2022 Xu Cao; Yuliang Xiu"
6
+ __version__ = "2.0"
7
+
8
+ import pyvista as pv
9
+ import cupy as cp
10
+ import numpy as np
11
+ from cupyx.scipy.sparse import csr_matrix
12
+ from cupyx.scipy.sparse.linalg import cg
13
+ from tqdm.auto import tqdm
14
+ import time
15
+
16
+ pool = cp.cuda.MemoryPool(cp.cuda.malloc_managed)
17
+ cp.cuda.set_allocator(pool.malloc)
18
+
19
+ # Define helper functions for moving masks in different directions
20
+ def move_left(mask): return cp.pad(mask,((0,0),(0,1)),'constant',constant_values=0)[:,1:] # Shift the input mask array to the left by 1, filling the right edge with zeros.
21
+ def move_right(mask): return cp.pad(mask,((0,0),(1,0)),'constant',constant_values=0)[:,:-1] # Shift the input mask array to the right by 1, filling the left edge with zeros.
22
+ def move_top(mask): return cp.pad(mask,((0,1),(0,0)),'constant',constant_values=0)[1:,:] # Shift the input mask array up by 1, filling the bottom edge with zeros.
23
+ def move_bottom(mask): return cp.pad(mask,((1,0),(0,0)),'constant',constant_values=0)[:-1,:] # Shift the input mask array down by 1, filling the top edge with zeros.
24
+ def move_top_left(mask): return cp.pad(mask,((0,1),(0,1)),'constant',constant_values=0)[1:,1:] # Shift the input mask array up and to the left by 1, filling the bottom and right edges with zeros.
25
+ def move_top_right(mask): return cp.pad(mask,((0,1),(1,0)),'constant',constant_values=0)[1:,:-1] # Shift the input mask array up and to the right by 1, filling the bottom and left edges with zeros.
26
+ def move_bottom_left(mask): return cp.pad(mask,((1,0),(0,1)),'constant',constant_values=0)[:-1,1:] # Shift the input mask array down and to the left by 1, filling the top and right edges with zeros.
27
+ def move_bottom_right(mask): return cp.pad(mask,((1,0),(1,0)),'constant',constant_values=0)[:-1,:-1] # Shift the input mask array down and to the right by 1, filling the top and left edges with zeros.
28
+
29
+
30
+ def generate_dx_dy(mask, nz_horizontal, nz_vertical, step_size=1):
31
+ # pixel coordinates
32
+ # ^ vertical positive
33
+ # |
34
+ # |
35
+ # |
36
+ # o ---> horizontal positive
37
+ num_pixel = cp.sum(mask)
38
+
39
+ # Generate an integer index array with the same shape as the mask.
40
+ pixel_idx = cp.zeros_like(mask, dtype=int)
41
+ # Assign a unique integer index to each True value in the mask.
42
+ pixel_idx[mask] = cp.arange(num_pixel)
43
+
44
+ # Create boolean masks representing the presence of neighboring pixels in each direction.
45
+ has_left_mask = cp.logical_and(move_right(mask), mask)
46
+ has_right_mask = cp.logical_and(move_left(mask), mask)
47
+ has_bottom_mask = cp.logical_and(move_top(mask), mask)
48
+ has_top_mask = cp.logical_and(move_bottom(mask), mask)
49
+
50
+ # Extract the horizontal and vertical components of the normal vectors for the neighboring pixels.
51
+ nz_left = nz_horizontal[has_left_mask[mask]]
52
+ nz_right = nz_horizontal[has_right_mask[mask]]
53
+ nz_top = nz_vertical[has_top_mask[mask]]
54
+ nz_bottom = nz_vertical[has_bottom_mask[mask]]
55
+
56
+ # Create sparse matrices representing the partial derivatives for each direction.
57
+ # top/bottom/left/right = vertical positive/vertical negative/horizontal negative/horizontal positive
58
+ # The matrices are constructed using the extracted normal components and pixel indices.
59
+ data = cp.stack([-nz_left/step_size, nz_left/step_size], -1).flatten()
60
+ indices = cp.stack((pixel_idx[move_left(has_left_mask)], pixel_idx[has_left_mask]), -1).flatten()
61
+ indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_left_mask[mask].astype(int) * 2)])
62
+ D_horizontal_neg = csr_matrix((data, indices, indptr), shape=(num_pixel, num_pixel))
63
+
64
+ data = cp.stack([-nz_right/step_size, nz_right/step_size], -1).flatten()
65
+ indices = cp.stack((pixel_idx[has_right_mask], pixel_idx[move_right(has_right_mask)]), -1).flatten()
66
+ indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_right_mask[mask].astype(int) * 2)])
67
+ D_horizontal_pos = csr_matrix((data, indices, indptr), shape=(num_pixel, num_pixel))
68
+
69
+ data = cp.stack([-nz_top/step_size, nz_top/step_size], -1).flatten()
70
+ indices = cp.stack((pixel_idx[has_top_mask], pixel_idx[move_top(has_top_mask)]), -1).flatten()
71
+ indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_top_mask[mask].astype(int) * 2)])
72
+ D_vertical_pos = csr_matrix((data, indices, indptr), shape=(num_pixel, num_pixel))
73
+
74
+ data = cp.stack([-nz_bottom/step_size, nz_bottom/step_size], -1).flatten()
75
+ indices = cp.stack((pixel_idx[move_bottom(has_bottom_mask)], pixel_idx[has_bottom_mask]), -1).flatten()
76
+ indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_bottom_mask[mask].astype(int) * 2)])
77
+ D_vertical_neg = csr_matrix((data, indices, indptr), shape=(num_pixel, num_pixel))
78
+
79
+ # Return the four sparse matrices representing the partial derivatives for each direction.
80
+ return D_horizontal_pos, D_horizontal_neg, D_vertical_pos, D_vertical_neg
81
+
82
+
83
+ def construct_facets_from(mask):
84
+ # Initialize an array 'idx' of the same shape as 'mask' with integers
85
+ # representing the indices of valid pixels in the mask.
86
+ idx = cp.zeros_like(mask, dtype=int)
87
+ idx[mask] = cp.arange(cp.sum(mask))
88
+
89
+ # Generate masks for neighboring pixels to define facets
90
+ facet_move_top_mask = move_top(mask)
91
+ facet_move_left_mask = move_left(mask)
92
+ facet_move_top_left_mask = move_top_left(mask)
93
+
94
+ # Identify the top-left pixel of each facet by performing a logical AND operation
95
+ # on the masks of neighboring pixels and the input mask.
96
+ facet_top_left_mask = facet_move_top_mask * facet_move_left_mask * facet_move_top_left_mask * mask
97
+
98
+ # Create masks for the other three vertices of each facet by shifting the top-left mask.
99
+ facet_top_right_mask = move_right(facet_top_left_mask)
100
+ facet_bottom_left_mask = move_bottom(facet_top_left_mask)
101
+ facet_bottom_right_mask = move_bottom_right(facet_top_left_mask)
102
+
103
+ # Return a numpy array of facets by stacking the indices of the four vertices
104
+ # of each facet along the last dimension. Each row of the resulting array represents
105
+ # a single facet with the format [4, idx_top_left, idx_bottom_left, idx_bottom_right, idx_top_right].
106
+ return cp.stack((4 * cp.ones(cp.sum(facet_top_left_mask).item()),
107
+ idx[facet_top_left_mask],
108
+ idx[facet_bottom_left_mask],
109
+ idx[facet_bottom_right_mask],
110
+ idx[facet_top_right_mask]), axis=-1).astype(int)
111
+
112
+
113
+ def map_depth_map_to_point_clouds(depth_map, mask, K=None, step_size=1):
114
+ # y
115
+ # | z
116
+ # | /
117
+ # |/
118
+ # o ---x
119
+ H, W = mask.shape
120
+ yy, xx = cp.meshgrid(cp.arange(W), cp.arange(H))
121
+ xx = cp.flip(xx, axis=0)
122
+
123
+ if K is None:
124
+ vertices = cp.zeros((H, W, 3))
125
+ vertices[..., 0] = xx * step_size
126
+ vertices[..., 1] = yy * step_size
127
+ vertices[..., 2] = depth_map
128
+ vertices = vertices[mask]
129
+ else:
130
+ u = cp.zeros((H, W, 3))
131
+ u[..., 0] = xx
132
+ u[..., 1] = yy
133
+ u[..., 2] = 1
134
+ u = u[mask].T # 3 x m
135
+ vertices = (cp.linalg.inv(cp.asarray(K)) @ u).T * \
136
+ depth_map[mask, cp.newaxis] # m x 3
137
+
138
+ return vertices
139
+
140
+
141
+ def sigmoid(x, k=1):
142
+ return 1 / (1 + cp.exp(-k * x))
143
+
144
+
145
+ def bilateral_normal_integration_function(normal_map,
146
+ normal_mask,
147
+ k=2,
148
+ lambda1=0,
149
+ depth_map=None,
150
+ depth_mask=None,
151
+ K=None,
152
+ step_size=1,
153
+ max_iter=150,
154
+ tol=1e-4,
155
+ cg_max_iter=5000,
156
+ cg_tol=1e-3):
157
+ """
158
+ This function performs the bilateral normal integration algorithm, as described in the paper.
159
+ It takes as input the normal map, normal mask, and several optional parameters to control the integration process.
160
+
161
+ :param normal_map: A normal map, which is an image where each pixel's color encodes the corresponding 3D surface normal.
162
+ :param normal_mask: A binary mask that indicates the region of interest in the normal_map to be integrated.
163
+ :param k: A parameter that controls the stiffness of the surface.
164
+ The smaller the k value, the smoother the surface appears (fewer discontinuities).
165
+ If set as 0, a smooth surface is obtained (No discontinuities), and the iteration should end at step 2 since the surface will not change with iterations.
166
+
167
+ :param depth_map: (Optional) An initial depth map to guide the integration process.
168
+ :param depth_mask: (Optional) A binary mask that indicates the valid depths in the depth_map.
169
+
170
+ :param lambda1 (Optional): A regularization parameter that controls the influence of the depth_map on the final result.
171
+ Required when depth map is input.
172
+ The larger the lambda1 is, the result more close to the initial depth map (fine details from the normal map are less reflected)
173
+
174
+ :param K: (Optional) A 3x3 camera intrinsic matrix, used for perspective camera models. If not provided, the algorithm assumes an orthographic camera model.
175
+ :param step_size: (Optional) The pixel size in the world coordinates. Default value is 1.
176
+ Used only in the orthographic camera mdoel.
177
+ Default value should be fine, unless you know the true value of the pixel size in the world coordinates.
178
+ Do not adjust it in perspective camera model.
179
+
180
+ :param max_iter: (Optional) The maximum number of iterations for the optimization process. Default value is 150.
181
+ If set as 1, a smooth surface is obtained (No discontinuities).
182
+ Default value should be fine.
183
+ :param tol: (Optional) The tolerance for the relative change in energy to determine the convergence of the optimization process. Default value is 1e-4.
184
+ The larger, the iteration stops faster, but the discontinuity preservation quality might be worse. (fewer discontinuities)
185
+ Default value should be fine.
186
+
187
+ :param cg_max_iter: (Optional) The maximum number of iterations for the Conjugate Gradient solver. Default value is 5000.
188
+ Default value should be fine.
189
+ :param cg_tol: (Optional) The tolerance for the Conjugate Gradient solver. Default value is 1e-3.
190
+ Default value should be fine.
191
+
192
+ :return: depth_map: The resulting depth map after the bilateral normal integration process.
193
+ surface: A pyvista PolyData mesh representing the 3D surface reconstructed from the depth map.
194
+ wu_map: A 2D image that represents the horizontal smoothness weight for each pixel. (green for smooth, blue/red for discontinuities)
195
+ wv_map: A 2D image that represents the vertical smoothness weight for each pixel. (green for smooth, blue/red for discontinuities)
196
+ energy_list: A list of energy values during the optimization process.
197
+ """
198
+ # To avoid confusion, we list the coordinate systems in this code as follows
199
+ #
200
+ # pixel coordinates camera coordinates normal coordinates (the main paper's Fig. 1 (a))
201
+ # u x y
202
+ # | | z |
203
+ # | | / o -- x
204
+ # | |/ /
205
+ # o --- v o --- y z
206
+ # (bottom left)
207
+ # (o is the optical center;
208
+ # xy-plane is parallel to the image plane;
209
+ # +z is the viewing direction.)
210
+ #
211
+ # The input normal map should be defined in the normal coordinates.
212
+ # The camera matrix K should be defined in the camera coordinates.
213
+ # K = [[fx, 0, cx],
214
+ # [0, fy, cy],
215
+ # [0, 0, 1]]
216
+ # I forgot why I chose the awkward coordinate system after getting used to opencv convention :(
217
+ # but I won't touch the working code.
218
+
219
+ normal_map = cp.asarray(normal_map)
220
+ normal_mask = cp.asarray(normal_mask)
221
+ if depth_map is not None:
222
+ depth_map = cp.asarray(depth_map)
223
+ depth_mask = cp.asarray(depth_mask)
224
+
225
+ num_normals = cp.sum(normal_mask).item()
226
+ projection = "orthographic" if K is None else "perspective"
227
+ print(f"Running bilateral normal integration with k={k} in the {projection} case. \n"
228
+ f"The number of normal vectors is {num_normals}.")
229
+ # transfer the normal map from the normal coordinates to the camera coordinates
230
+ nx = normal_map[normal_mask, 1]
231
+ ny = normal_map[normal_mask, 0]
232
+ nz = - normal_map[normal_mask, 2]
233
+ del normal_map
234
+
235
+ if K is not None: # perspective
236
+ H, W = normal_mask.shape
237
+
238
+ yy, xx = cp.meshgrid(cp.arange(W), cp.arange(H))
239
+ xx = cp.flip(xx, axis=0)
240
+
241
+ cx = K[0, 2]
242
+ cy = K[1, 2]
243
+ fx = K[0, 0]
244
+ fy = K[1, 1]
245
+
246
+ uu = xx[normal_mask] - cx
247
+ vv = yy[normal_mask] - cy
248
+
249
+ nz_u = uu * nx + vv * ny + fx * nz
250
+ nz_v = uu * nx + vv * ny + fy * nz
251
+ del xx, yy, uu, vv
252
+ else: # orthographic
253
+ nz_u = nz.copy()
254
+ nz_v = nz.copy()
255
+
256
+ # right, left, top, bottom
257
+ A3, A4, A1, A2 = generate_dx_dy(normal_mask, nz_horizontal=nz_v, nz_vertical=nz_u, step_size=step_size)
258
+
259
+ pixel_idx = cp.zeros_like(normal_mask, dtype=int)
260
+ pixel_idx[normal_mask] = cp.arange(num_normals)
261
+ pixel_idx_flat = cp.arange(num_normals)
262
+ pixel_idx_flat_indptr = cp.arange(num_normals + 1)
263
+
264
+ has_left_mask = cp.logical_and(move_right(normal_mask), normal_mask)
265
+ has_left_mask_left = move_left(has_left_mask)
266
+ has_right_mask = cp.logical_and(move_left(normal_mask), normal_mask)
267
+ has_right_mask_right = move_right(has_right_mask)
268
+ has_bottom_mask = cp.logical_and(move_top(normal_mask), normal_mask)
269
+ has_bottom_mask_bottom = move_bottom(has_bottom_mask)
270
+ has_top_mask = cp.logical_and(move_bottom(normal_mask), normal_mask)
271
+ has_top_mask_top = move_top(has_top_mask)
272
+
273
+ has_left_mask_flat = has_left_mask[normal_mask]
274
+ has_right_mask_flat = has_right_mask[normal_mask]
275
+ has_bottom_mask_flat = has_bottom_mask[normal_mask]
276
+ has_top_mask_flat = has_top_mask[normal_mask]
277
+
278
+ has_left_mask_left_flat = has_left_mask_left[normal_mask]
279
+ has_right_mask_right_flat = has_right_mask_right[normal_mask]
280
+ has_bottom_mask_bottom_flat = has_bottom_mask_bottom[normal_mask]
281
+ has_top_mask_top_flat = has_top_mask_top[normal_mask]
282
+
283
+ nz_left_square = nz_v[has_left_mask_flat] ** 2
284
+ nz_right_square = nz_v[has_right_mask_flat] ** 2
285
+ nz_top_square = nz_u[has_top_mask_flat] ** 2
286
+ nz_bottom_square = nz_u[has_bottom_mask_flat] ** 2
287
+
288
+ pixel_idx_left_center = pixel_idx[has_left_mask]
289
+ pixel_idx_right_right = pixel_idx[has_right_mask_right]
290
+ pixel_idx_top_center = pixel_idx[has_top_mask]
291
+ pixel_idx_bottom_bottom = pixel_idx[has_bottom_mask_bottom]
292
+
293
+ pixel_idx_left_left_indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_left_mask_left_flat)])
294
+ pixel_idx_right_center_indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_right_mask_flat)])
295
+ pixel_idx_top_top_indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_top_mask_top_flat)])
296
+ pixel_idx_bottom_center_indptr = cp.concatenate([cp.array([0]), cp.cumsum(has_bottom_mask_flat)])
297
+
298
+ # initialization
299
+ wu = 0.5 * cp.ones(num_normals, float)
300
+ wv = 0.5 * cp.ones(num_normals, float)
301
+ z = cp.zeros(num_normals, float)
302
+ energy = cp.sum(wu * (A1.dot(z) + nx) ** 2) + \
303
+ cp.sum((1 - wu) * (A2.dot(z) + nx) ** 2) + \
304
+ cp.sum(wv * (A3.dot(z) + ny) ** 2) + \
305
+ cp.sum((1 - wv) * (A4.dot(z) + ny) ** 2)
306
+ energy_list = []
307
+
308
+ tic = time.time()
309
+
310
+ energy_list = []
311
+
312
+ if depth_map is not None:
313
+ depth_mask_flat = depth_mask[normal_mask].astype(bool) # shape: (num_normals,)
314
+ z_prior = cp.log(depth_map)[normal_mask] if K is not None else depth_map[normal_mask] # shape: (num_normals,)
315
+ z_prior[~depth_mask_flat] = 0
316
+
317
+ pbar = tqdm(range(max_iter))
318
+
319
+ for i in pbar:
320
+ ################################################################################################################
321
+ # I am manually computing A_mat = A.T @ W @ A here. It saves 2/3 time compared to the simpliest way A.T @ W @ A.
322
+ # A.T @ W @ A can take more time than you think when the normal map become larger.
323
+ # The diaganol matrix W=diag([wu, 1-wu, wv, 1-wv]) needs not be explicited defined in this case.
324
+ #
325
+ data_term_top = wu[has_top_mask_flat] * nz_top_square
326
+ data_term_bottom = (1 - wu[has_bottom_mask_flat]) * nz_bottom_square
327
+ data_term_left = (1 - wv[has_left_mask_flat]) * nz_left_square
328
+ data_term_right = wv[has_right_mask_flat] * nz_right_square
329
+
330
+ diagonal_data_term = cp.zeros(num_normals)
331
+ diagonal_data_term[has_left_mask_flat] += data_term_left
332
+ diagonal_data_term[has_left_mask_left_flat] += data_term_left
333
+ diagonal_data_term[has_right_mask_flat] += data_term_right
334
+ diagonal_data_term[has_right_mask_right_flat] += data_term_right
335
+ diagonal_data_term[has_top_mask_flat] += data_term_top
336
+ diagonal_data_term[has_top_mask_top_flat] += data_term_top
337
+ diagonal_data_term[has_bottom_mask_flat] += data_term_bottom
338
+ diagonal_data_term[has_bottom_mask_bottom_flat] += data_term_bottom
339
+ if depth_map is not None:
340
+ diagonal_data_term[depth_mask_flat] += lambda1
341
+
342
+ A_mat_d = csr_matrix((diagonal_data_term, pixel_idx_flat, pixel_idx_flat_indptr),
343
+ shape=(num_normals, num_normals))
344
+
345
+ A_mat_left_odu = csr_matrix((-data_term_left, pixel_idx_left_center, pixel_idx_left_left_indptr),
346
+ shape=(num_normals, num_normals))
347
+ A_mat_right_odu = csr_matrix((-data_term_right, pixel_idx_right_right, pixel_idx_right_center_indptr),
348
+ shape=(num_normals, num_normals))
349
+ A_mat_top_odu = csr_matrix((-data_term_top, pixel_idx_top_center, pixel_idx_top_top_indptr),
350
+ shape=(num_normals, num_normals))
351
+ A_mat_bottom_odu = csr_matrix((-data_term_bottom, pixel_idx_bottom_bottom, pixel_idx_bottom_center_indptr),
352
+ shape=(num_normals, num_normals))
353
+
354
+ A_mat_odu = A_mat_top_odu + A_mat_bottom_odu + A_mat_right_odu + A_mat_left_odu
355
+ A_mat = A_mat_d + A_mat_odu + A_mat_odu.T # diagnol + upper triangle + lower triangle matrix
356
+ ################################################################################################################
357
+
358
+ D = csr_matrix((1 / cp.clip(diagonal_data_term, 1e-5, None), pixel_idx_flat, pixel_idx_flat_indptr),
359
+ shape=(num_normals, num_normals)) # Jacobi preconditioner.
360
+ b_vec = A1.T @ (wu * (-nx)) \
361
+ + A2.T @ ((1 - wu) * (-nx)) \
362
+ + A3.T @ (wv * (-ny)) \
363
+ + A4.T @ ((1 - wv) * (-ny))
364
+
365
+ if depth_map is not None:
366
+ b_vec += lambda1 * z_prior
367
+ offset = cp.mean((z_prior - z)[depth_mask_flat])
368
+ z = z + offset
369
+
370
+ z, _ = cg(A_mat, b_vec, x0=z, M=D, maxiter=cg_max_iter, tol=cg_tol)
371
+ del A_mat, b_vec, wu, wv
372
+
373
+ # Update weights
374
+ wu = sigmoid((A2.dot(z)) ** 2 - (A1.dot(z)) ** 2, k) # top
375
+ wv = sigmoid((A4.dot(z)) ** 2 - (A3.dot(z)) ** 2, k) # right
376
+
377
+ # Check for convergence
378
+ energy_old = energy
379
+ energy = cp.sum(wu * (A1.dot(z) + nx) ** 2) + \
380
+ cp.sum((1 - wu) * (A2.dot(z) + nx) ** 2) + \
381
+ cp.sum(wv * (A3.dot(z) + ny) ** 2) + \
382
+ cp.sum((1 - wv) * (A4.dot(z) + ny) ** 2)
383
+
384
+ energy_list.append(energy)
385
+ relative_energy = cp.abs(energy - energy_old) / energy_old
386
+ pbar.set_description(
387
+ f"step {i + 1}/{max_iter} energy: {energy:.3e}"
388
+ f" relative energy: {relative_energy:.3e}")
389
+ if relative_energy < tol:
390
+ break
391
+ del A1, A2, A3, A4, nx, ny
392
+ toc = time.time()
393
+
394
+ print(f"Total time: {toc - tic:.3f} sec")
395
+ depth_map = cp.ones_like(normal_mask, float) * cp.nan
396
+ depth_map[normal_mask] = z
397
+
398
+ if K is not None: # perspective
399
+ depth_map = cp.exp(depth_map)
400
+ vertices = cp.asnumpy(map_depth_map_to_point_clouds(depth_map, normal_mask, K=K))
401
+ else: # orthographic
402
+ vertices = cp.asnumpy(map_depth_map_to_point_clouds(depth_map, normal_mask, K=None, step_size=step_size))
403
+
404
+ facets = cp.asnumpy(construct_facets_from(normal_mask))
405
+ if nz.mean() > 0:
406
+ facets = facets[:, [0, 1, 4, 3, 2]]
407
+ surface = pv.PolyData(vertices, facets)
408
+
409
+ # In the main paper, wu indicates the horizontal direction; wv indicates the vertical direction
410
+ wu_map = cp.ones_like(normal_mask) * cp.nan
411
+ wu_map[normal_mask] = wv
412
+
413
+ wv_map = cp.ones_like(normal_mask) * cp.nan
414
+ wv_map[normal_mask] = wu
415
+
416
+ depth_map = cp.asnumpy(depth_map)
417
+ wu_map = cp.asnumpy(wu_map)
418
+ wv_map = cp.asnumpy(wv_map)
419
+
420
+ return depth_map, surface, wu_map, wv_map, energy_list
421
+
422
+
423
+ if __name__ == '__main__':
424
+ import cv2
425
+ import argparse
426
+ import os
427
+ import warnings
428
+ warnings.filterwarnings('ignore')
429
+ # To ignore the possible overflow runtime warning: overflow encountered in exp return 1 / (1 + cp.exp(-k * x)).
430
+ # This overflow issue does not affect our results as cp.exp will correctly return 0.0 when -k * x is massive.
431
+
432
+ def dir_path(string):
433
+ if os.path.isdir(string):
434
+ return string
435
+ else:
436
+ raise FileNotFoundError(string)
437
+
438
+ parser = argparse.ArgumentParser()
439
+ parser.add_argument('-p', '--path', type=dir_path)
440
+ parser.add_argument('-k', type=float, default=2)
441
+ parser.add_argument('-i', '--iter', type=int, default=150)
442
+ parser.add_argument('-t', '--tol', type=float, default=1e-4)
443
+ parser.add_argument('--cgiter', type=int, default=5000)
444
+ parser.add_argument('--cgtol', type=float, default=1e-3)
445
+ arg = parser.parse_args()
446
+
447
+ normal_map = cv2.cvtColor(cv2.imread(os.path.join(
448
+ arg.path, "normal_map.png"), cv2.IMREAD_UNCHANGED), cv2.COLOR_RGB2BGR)
449
+ if normal_map.dtype is np.dtype(np.uint16):
450
+ normal_map = normal_map/65535 * 2 - 1
451
+ else:
452
+ normal_map = normal_map/255 * 2 - 1
453
+
454
+ try:
455
+ mask = cv2.imread(os.path.join(arg.path, "mask.png"), cv2.IMREAD_GRAYSCALE).astype(bool)
456
+ except:
457
+ mask = np.ones(normal_map.shape[:2], bool)
458
+
459
+ if os.path.exists(os.path.join(arg.path, "K.txt")):
460
+ K = np.loadtxt(os.path.join(arg.path, "K.txt"))
461
+ depth_map, surface, wu_map, wv_map, energy_list = bilateral_normal_integration(normal_map=normal_map,
462
+ normal_mask=mask,
463
+ k=arg.k,
464
+ K=K,
465
+ max_iter=arg.iter,
466
+ tol=arg.tol,
467
+ cg_max_iter=arg.cgiter,
468
+ cg_tol=arg.cgtol)
469
+ else:
470
+ depth_map, surface, wu_map, wv_map, energy_list = bilateral_normal_integration(normal_map=normal_map,
471
+ normal_mask=mask,
472
+ k=arg.k,
473
+ K=None,
474
+ max_iter=arg.iter,
475
+ tol=arg.tol,
476
+ cg_max_iter=arg.cgiter,
477
+ cg_tol=arg.cgtol)
478
+
479
+ # save the resultant polygon mesh and discontinuity maps.
480
+ cp.save(os.path.join(arg.path, "energy"), cp.array(energy_list))
481
+ surface.save(os.path.join(arg.path, f"mesh_k_{arg.k}.ply"), binary=False)
482
+ wu_map = cv2.applyColorMap(
483
+ (255 * wu_map).astype(np.uint8), cv2.COLORMAP_JET)
484
+ wv_map = cv2.applyColorMap(
485
+ (255 * wv_map).astype(np.uint8), cv2.COLORMAP_JET)
486
+ wu_map[~mask] = 255
487
+ wv_map[~mask] = 255
488
+ cv2.imwrite(os.path.join(arg.path, f"wu_k_{arg.k}.png"), wu_map)
489
+ cv2.imwrite(os.path.join(arg.path, f"wv_k_{arg.k}.png"), wv_map)
490
+ print(f"saved {arg.path}")