Sohaib36 commited on
Commit
e502daa
1 Parent(s): 20ffb87

add: adding model helper

Browse files
Files changed (3) hide show
  1. app.py +1 -0
  2. helpers.py +157 -190
  3. images/08/3-1.jpg +0 -0
app.py CHANGED
@@ -74,6 +74,7 @@ We also released a <b>smaller</b> MonoScene model (Half resolution - w/o 3D CRP)
74
  """
75
 
76
  examples = [
 
77
  'images/08/001385.jpg',
78
  'images/08/000295.jpg',
79
  'images/08/002505.jpg',
 
74
  """
75
 
76
  examples = [
77
+ 'images/08/3-1.jpg',
78
  'images/08/001385.jpg',
79
  'images/08/000295.jpg',
80
  'images/08/002505.jpg',
helpers.py CHANGED
@@ -5,36 +5,37 @@ import pandas as pd
5
  import plotly.express as px
6
  import plotly.graph_objects as go
7
 
 
8
  def read_calib(calib_path):
9
- """
10
- Modify from https://github.com/utiasSTARS/pykitti/blob/d3e1bb81676e831886726cc5ed79ce1f049aef2c/pykitti/utils.py#L68
11
- :param calib_path: Path to a calibration text file.
12
- :return: dict with calibration matrices.
13
- """
14
- calib_all = {}
15
- with open(calib_path, "r") as f:
16
- for line in f.readlines():
17
- if line == "\n":
18
- break
19
- key, value = line.split(":", 1)
20
- calib_all[key] = np.array([float(x) for x in value.split()])
21
-
22
- # reshape matrices
23
- calib_out = {}
24
- # 3x4 projection matrix for left camera
25
- calib_out["P2"] = calib_all["P2"].reshape(3, 4)
26
- calib_out["Tr"] = np.identity(4) # 4x4 matrix
27
- calib_out["Tr"][:3, :4] = calib_all["Tr"].reshape(3, 4)
28
- return calib_out
29
-
30
-
31
- def vox2pix(cam_E, cam_k,
32
- vox_origin, voxel_size,
33
- img_W, img_H,
34
  scene_size):
35
  """
36
  compute the 2D projection of voxels centroids
37
-
38
  Parameters:
39
  ----------
40
  cam_E: 4x4
@@ -50,7 +51,7 @@ def vox2pix(cam_E, cam_k,
50
  image height
51
  scene_size: (3,)
52
  scene size in meter: (51.2, 51.2, 6.4) for SemKITTI and (4.8, 4.8, 2.88) for NYUv2
53
-
54
  Returns
55
  -------
56
  projected_pix: (N, 2)
@@ -61,23 +62,24 @@ def vox2pix(cam_E, cam_k,
61
  Voxels'distance to the sensor in meter
62
  """
63
  # Compute the x, y, z bounding of the scene in meter
64
- vol_bnds = np.zeros((3,2))
65
- vol_bnds[:,0] = vox_origin
66
- vol_bnds[:,1] = vox_origin + np.array(scene_size)
67
 
68
  # Compute the voxels centroids in lidar cooridnates
69
- vol_dim = np.ceil((vol_bnds[:,1]- vol_bnds[:,0])/ voxel_size).copy(order='C').astype(int)
 
70
  xv, yv, zv = np.meshgrid(
71
- range(vol_dim[0]),
72
- range(vol_dim[1]),
73
- range(vol_dim[2]),
74
- indexing='ij'
75
- )
76
  vox_coords = np.concatenate([
77
- xv.reshape(1,-1),
78
- yv.reshape(1,-1),
79
- zv.reshape(1,-1)
80
- ], axis=0).astype(int).T
81
 
82
  # Project voxels'centroid from lidar coordinates to camera coordinates
83
  cam_pts = fusion.TSDFVolume.vox2world(vox_origin, vox_coords, voxel_size)
@@ -90,16 +92,14 @@ def vox2pix(cam_E, cam_k,
90
  # Eliminate pixels outside view frustum
91
  pix_z = cam_pts[:, 2]
92
  fov_mask = np.logical_and(pix_x >= 0,
93
- np.logical_and(pix_x < img_W,
94
- np.logical_and(pix_y >= 0,
95
- np.logical_and(pix_y < img_H,
96
- pix_z > 0))))
97
-
98
 
99
  return torch.from_numpy(projected_pix), torch.from_numpy(fov_mask), torch.from_numpy(pix_z)
100
 
101
 
102
-
103
  def get_grid_coords(dims, resolution):
104
  """
105
  :param dims: the dimensions of the grid [x, y, z] (i.e. [256, 256, 32])
@@ -125,18 +125,25 @@ def get_grid_coords(dims, resolution):
125
 
126
  return coords_grid
127
 
 
128
  def get_projections(img_W, img_H):
129
  scale_3ds = [1, 2]
130
  data = {}
131
  for scale_3d in scale_3ds:
132
- scene_size = (51.2, 51.2, 6.4)
133
- vox_origin = np.array([0, -25.6, -2])
134
- voxel_size = 0.2
135
-
136
- calib = read_calib("calib.txt")
137
- cam_k = calib["P2"][:3, :3]
138
- T_velo_2_cam = calib["Tr"]
139
-
 
 
 
 
 
 
140
  # compute the 3D-2D mapping
141
  projected_pix, fov_mask, pix_z = vox2pix(
142
  T_velo_2_cam,
@@ -146,26 +153,27 @@ def get_projections(img_W, img_H):
146
  img_W,
147
  img_H,
148
  scene_size,
149
- )
150
 
151
  data["projected_pix_{}".format(scale_3d)] = projected_pix
152
  data["pix_z_{}".format(scale_3d)] = pix_z
153
- data["fov_mask_{}".format(scale_3d)] = fov_mask
154
  return data
155
 
156
 
157
  def majority_pooling(grid, k_size=2):
158
  result = np.zeros(
159
- (grid.shape[0] // k_size, grid.shape[1] // k_size, grid.shape[2] // k_size)
 
160
  )
161
  for xx in range(0, int(np.floor(grid.shape[0] / k_size))):
162
  for yy in range(0, int(np.floor(grid.shape[1] / k_size))):
163
  for zz in range(0, int(np.floor(grid.shape[2] / k_size))):
164
 
165
  sub_m = grid[
166
- (xx * k_size) : (xx * k_size) + k_size,
167
- (yy * k_size) : (yy * k_size) + k_size,
168
- (zz * k_size) : (zz * k_size) + k_size,
169
  ]
170
  unique, counts = np.unique(sub_m, return_counts=True)
171
  if True in ((unique != 0) & (unique != 255)):
@@ -181,156 +189,115 @@ def majority_pooling(grid, k_size=2):
181
  return result
182
 
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  def draw(
185
  voxels,
186
- # T_velo_2_cam,
187
- # vox_origin,
188
- fov_mask,
189
- # img_size,
190
- # f,
191
- voxel_size=0.4,
192
- # d=7, # 7m - determine the size of the mesh representing the camera
193
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
- fov_mask = fov_mask.reshape(-1)
196
  # Compute the voxels coordinates
197
  grid_coords = get_grid_coords(
198
- [voxels.shape[0], voxels.shape[1], voxels.shape[2]], voxel_size
199
  )
200
 
201
-
202
  # Attach the predicted class to every voxel
203
- grid_coords = np.vstack([grid_coords.T, voxels.reshape(-1)]).T
204
-
205
- # Get the voxels inside FOV
206
- fov_grid_coords = grid_coords[fov_mask, :]
207
-
208
- # Get the voxels outside FOV
209
- outfov_grid_coords = grid_coords[~fov_mask, :]
210
 
211
  # Remove empty and unknown voxels
212
- fov_voxels = fov_grid_coords[
213
- (fov_grid_coords[:, 3] > 0) & (fov_grid_coords[:, 3] < 255), :
214
- ]
215
- # print(np.unique(fov_voxels[:, 3], return_counts=True))
216
- outfov_voxels = outfov_grid_coords[
217
- (outfov_grid_coords[:, 3] > 0) & (outfov_grid_coords[:, 3] < 255), :
218
- ]
219
-
220
- # figure = mlab.figure(size=(1400, 1400), bgcolor=(1, 1, 1))
221
  colors = np.array(
222
  [
223
- [0,0,0],
224
- [100, 150, 245],
225
- [100, 230, 245],
226
- [30, 60, 150],
227
- [80, 30, 180],
228
- [100, 80, 250],
229
- [255, 30, 30],
230
- [255, 40, 200],
231
- [150, 30, 90],
232
- [255, 0, 255],
233
- [255, 150, 255],
234
- [75, 0, 75],
235
- [175, 0, 75],
236
- [255, 200, 0],
237
- [255, 120, 50],
238
- [0, 175, 0],
239
- [135, 60, 0],
240
- [150, 240, 80],
241
- [255, 240, 150],
242
- [255, 0, 0],
243
  ]
244
  ).astype(np.uint8)
245
 
246
- pts_colors = [f'rgb({colors[int(i)][0]}, {colors[int(i)][1]}, {colors[int(i)][2]})' for i in fov_voxels[:, 3]]
247
- out_fov_colors = [f'rgb({colors[int(i)][0]//3*2}, {colors[int(i)][1]//3*2}, {colors[int(i)][2]//3*2})' for i in outfov_voxels[:, 3]]
 
 
248
  pts_colors = pts_colors + out_fov_colors
249
-
250
- fov_voxels = np.concatenate([fov_voxels, outfov_voxels], axis=0)
251
- x = fov_voxels[:, 0].flatten()
252
- y = fov_voxels[:, 1].flatten()
253
- z = fov_voxels[:, 2].flatten()
254
- # label = fov_voxels[:, 3].flatten()
255
- fig = go.Figure(data=[go.Scatter3d(x=x, y=y, z=z,mode='markers',
256
- marker=dict(
257
- size=2,
258
  color=pts_colors, # set color to an array/list of desired values
259
- # colorscale='Viridis', # choose a colorscale
260
  opacity=1.0,
261
  symbol='square'
262
  ))])
 
263
  fig.update_layout(
264
- scene = dict(
265
- aspectmode='data',
266
- xaxis = dict(
267
- backgroundcolor="rgb(255, 255, 255)",
268
- gridcolor="black",
269
- showbackground=True,
270
- zerolinecolor="black",
271
- nticks=4,
272
- visible=False,
273
- range=[-1,55],),
274
- yaxis = dict(
275
- backgroundcolor="rgb(255, 255, 255)",
276
- gridcolor="black",
277
- showbackground=True,
278
- zerolinecolor="black",
279
- visible=False,
280
- nticks=4, range=[-1,55],),
281
- zaxis = dict(
282
- backgroundcolor="rgb(255, 255, 255)",
283
- gridcolor="black",
284
- showbackground=True,
285
- zerolinecolor="black",
286
- visible=False,
287
- nticks=4, range=[-1,7],),
288
- bgcolor="black",
289
- ),
290
-
291
- )
292
 
293
- # fig = px.scatter_3d(
294
- # fov_voxels,
295
- # x=fov_voxels[:, 0], y="y", z="z", color="label")
296
- # Draw occupied inside FOV voxels
297
- # plt_plot_fov = mlab.points3d(
298
- # fov_voxels[:, 0],
299
- # fov_voxels[:, 1],
300
- # fov_voxels[:, 2],
301
- # fov_voxels[:, 3],
302
- # colormap="viridis",
303
- # scale_factor=voxel_size - 0.05 * voxel_size,
304
- # mode="cube",
305
- # opacity=1.0,
306
- # vmin=1,
307
- # vmax=19,
308
- # )
309
-
310
- # # Draw occupied outside FOV voxels
311
- # plt_plot_outfov = mlab.points3d(
312
- # outfov_voxels[:, 0],
313
- # outfov_voxels[:, 1],
314
- # outfov_voxels[:, 2],
315
- # outfov_voxels[:, 3],
316
- # colormap="viridis",
317
- # scale_factor=voxel_size - 0.05 * voxel_size,
318
- # mode="cube",
319
- # opacity=1.0,
320
- # vmin=1,
321
- # vmax=19,
322
- # )
323
-
324
-
325
-
326
- # plt_plot_fov.glyph.scale_mode = "scale_by_vector"
327
- # plt_plot_outfov.glyph.scale_mode = "scale_by_vector"
328
-
329
- # plt_plot_fov.module_manager.scalar_lut_manager.lut.table = colors
330
-
331
- # outfov_colors = colors
332
- # outfov_colors[:, :3] = outfov_colors[:, :3] // 3 * 2
333
- # plt_plot_outfov.module_manager.scalar_lut_manager.lut.table = outfov_colors
334
-
335
- # mlab.show()
336
- return fig
 
5
  import plotly.express as px
6
  import plotly.graph_objects as go
7
 
8
+
9
  def read_calib(calib_path):
10
+ """
11
+ Modify from https://github.com/utiasSTARS/pykitti/blob/d3e1bb81676e831886726cc5ed79ce1f049aef2c/pykitti/utils.py#L68
12
+ :param calib_path: Path to a calibration text file.
13
+ :return: dict with calibration matrices.
14
+ """
15
+ calib_all = {}
16
+ with open(calib_path, "r") as f:
17
+ for line in f.readlines():
18
+ if line == "\n":
19
+ break
20
+ key, value = line.split(":", 1)
21
+ calib_all[key] = np.array([float(x) for x in value.split()])
22
+
23
+ # reshape matrices
24
+ calib_out = {}
25
+ # 3x4 projection matrix for left camera
26
+ calib_out["P2"] = calib_all["P2"].reshape(3, 4)
27
+ calib_out["Tr"] = np.identity(4) # 4x4 matrix
28
+ calib_out["Tr"][:3, :4] = calib_all["Tr"].reshape(3, 4)
29
+ return calib_out
30
+
31
+
32
+ def vox2pix(cam_E, cam_k,
33
+ vox_origin, voxel_size,
34
+ img_W, img_H,
35
  scene_size):
36
  """
37
  compute the 2D projection of voxels centroids
38
+
39
  Parameters:
40
  ----------
41
  cam_E: 4x4
 
51
  image height
52
  scene_size: (3,)
53
  scene size in meter: (51.2, 51.2, 6.4) for SemKITTI and (4.8, 4.8, 2.88) for NYUv2
54
+
55
  Returns
56
  -------
57
  projected_pix: (N, 2)
 
62
  Voxels'distance to the sensor in meter
63
  """
64
  # Compute the x, y, z bounding of the scene in meter
65
+ vol_bnds = np.zeros((3, 2))
66
+ vol_bnds[:, 0] = vox_origin
67
+ vol_bnds[:, 1] = vox_origin + np.array(scene_size)
68
 
69
  # Compute the voxels centroids in lidar cooridnates
70
+ vol_dim = np.ceil((vol_bnds[:, 1] - vol_bnds[:, 0]) /
71
+ voxel_size).copy(order='C').astype(int)
72
  xv, yv, zv = np.meshgrid(
73
+ range(vol_dim[0]),
74
+ range(vol_dim[1]),
75
+ range(vol_dim[2]),
76
+ indexing='ij'
77
+ )
78
  vox_coords = np.concatenate([
79
+ xv.reshape(1, -1),
80
+ yv.reshape(1, -1),
81
+ zv.reshape(1, -1)
82
+ ], axis=0).astype(int).T
83
 
84
  # Project voxels'centroid from lidar coordinates to camera coordinates
85
  cam_pts = fusion.TSDFVolume.vox2world(vox_origin, vox_coords, voxel_size)
 
92
  # Eliminate pixels outside view frustum
93
  pix_z = cam_pts[:, 2]
94
  fov_mask = np.logical_and(pix_x >= 0,
95
+ np.logical_and(pix_x < img_W,
96
+ np.logical_and(pix_y >= 0,
97
+ np.logical_and(pix_y < img_H,
98
+ pix_z > 0))))
 
99
 
100
  return torch.from_numpy(projected_pix), torch.from_numpy(fov_mask), torch.from_numpy(pix_z)
101
 
102
 
 
103
  def get_grid_coords(dims, resolution):
104
  """
105
  :param dims: the dimensions of the grid [x, y, z] (i.e. [256, 256, 32])
 
125
 
126
  return coords_grid
127
 
128
+
129
  def get_projections(img_W, img_H):
130
  scale_3ds = [1, 2]
131
  data = {}
132
  for scale_3d in scale_3ds:
133
+ scene_size = (4.8, 4.8, 2.88)
134
+ vox_origin = np.array([-1.54591799, 0.8907361, -0.05])
135
+ voxel_size = 0.08
136
+
137
+ calib = read_calib("/monoscene/MonoScene/calib.txt")
138
+ cam_k = np.array([[518.8579, 0, 320], [0, 518.8579, 240], [0, 0, 1]])
139
+ cam_pose = np.asarray([[9.6699458e-01, 4.2662762e-02, 2.5120059e-01, 0.0000000e+00],
140
+ [-2.5147417e-01, 1.0867463e-03,
141
+ 9.6786356e-01, 0.0000000e+00],
142
+ [4.1018680e-02, -9.9908894e-01,
143
+ 1.1779292e-02, 1.1794727e+00],
144
+ [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00]])
145
+ T_velo_2_cam = np.linalg.inv(cam_pose)
146
+
147
  # compute the 3D-2D mapping
148
  projected_pix, fov_mask, pix_z = vox2pix(
149
  T_velo_2_cam,
 
153
  img_W,
154
  img_H,
155
  scene_size,
156
+ )
157
 
158
  data["projected_pix_{}".format(scale_3d)] = projected_pix
159
  data["pix_z_{}".format(scale_3d)] = pix_z
160
+ data["fov_mask_{}".format(scale_3d)] = fov_mask
161
  return data
162
 
163
 
164
  def majority_pooling(grid, k_size=2):
165
  result = np.zeros(
166
+ (grid.shape[0] // k_size, grid.shape[1] //
167
+ k_size, grid.shape[2] // k_size)
168
  )
169
  for xx in range(0, int(np.floor(grid.shape[0] / k_size))):
170
  for yy in range(0, int(np.floor(grid.shape[1] / k_size))):
171
  for zz in range(0, int(np.floor(grid.shape[2] / k_size))):
172
 
173
  sub_m = grid[
174
+ (xx * k_size): (xx * k_size) + k_size,
175
+ (yy * k_size): (yy * k_size) + k_size,
176
+ (zz * k_size): (zz * k_size) + k_size,
177
  ]
178
  unique, counts = np.unique(sub_m, return_counts=True)
179
  if True in ((unique != 0) & (unique != 255)):
 
189
  return result
190
 
191
 
192
+ def get_grid_coords(dims, resolution):
193
+ """
194
+ :param dims: the dimensions of the grid [x, y, z] (i.e. [256, 256, 32])
195
+ :return coords_grid: is the center coords of voxels in the grid
196
+ """
197
+
198
+ g_xx = np.arange(0, dims[0] + 1)
199
+ g_yy = np.arange(0, dims[1] + 1)
200
+
201
+ g_zz = np.arange(0, dims[2] + 1)
202
+
203
+ # Obtaining the grid with coords...
204
+ xx, yy, zz = np.meshgrid(g_xx[:-1], g_yy[:-1], g_zz[:-1])
205
+ coords_grid = np.array([xx.flatten(), yy.flatten(), zz.flatten()]).T
206
+ coords_grid = coords_grid.astype(np.float)
207
+
208
+ coords_grid = (coords_grid * resolution) + resolution / 2
209
+
210
+ temp = np.copy(coords_grid)
211
+ temp[:, 0] = coords_grid[:, 1]
212
+ temp[:, 1] = coords_grid[:, 0]
213
+ coords_grid = np.copy(temp)
214
+
215
+ return coords_grid
216
+
217
+
218
  def draw(
219
  voxels,
220
+ cam_pose,
221
+ vox_origin,
222
+ voxel_size=0.08,
223
+ d=0.75, # 0.75m - determine the size of the mesh representing the camera
 
 
 
224
  ):
225
+ # Compute the coordinates of the mesh representing camera
226
+ y = d * 480 / (2 * 518.8579)
227
+ x = d * 640 / (2 * 518.8579)
228
+ tri_points = np.array(
229
+ [
230
+ [0, 0, 0],
231
+ [x, y, d],
232
+ [-x, y, d],
233
+ [-x, -y, d],
234
+ [x, -y, d],
235
+ ]
236
+ )
237
+ tri_points = np.hstack([tri_points, np.ones((5, 1))])
238
+
239
+ tri_points = (cam_pose @ tri_points.T).T
240
+ x = tri_points[:, 0] - vox_origin[0]
241
+ y = tri_points[:, 1] - vox_origin[1]
242
+ z = tri_points[:, 2] - vox_origin[2]
243
+ triangles = [
244
+ (0, 1, 2),
245
+ (0, 1, 4),
246
+ (0, 3, 4),
247
+ (0, 2, 3),
248
+ ]
249
 
 
250
  # Compute the voxels coordinates
251
  grid_coords = get_grid_coords(
252
+ [voxels.shape[0], voxels.shape[2], voxels.shape[1]], voxel_size
253
  )
254
 
 
255
  # Attach the predicted class to every voxel
256
+ grid_coords = np.vstack(
257
+ (grid_coords.T, np.moveaxis(voxels, [0, 1, 2], [0, 2, 1]).reshape(-1))
258
+ ).T
 
 
 
 
259
 
260
  # Remove empty and unknown voxels
261
+ occupied_voxels = grid_coords[(grid_coords[:, 3] > 0) & (grid_coords[:, 3] < 255)]
 
 
 
 
 
 
 
 
262
  colors = np.array(
263
  [
264
+ [22, 191, 206, 255],
265
+ [214, 38, 40, 255],
266
+ [43, 160, 43, 255],
267
+ [158, 216, 229, 255],
268
+ [114, 158, 206, 255],
269
+ [204, 204, 91, 255],
270
+ [255, 186, 119, 255],
271
+ [147, 102, 188, 255],
272
+ [30, 119, 181, 255],
273
+ [188, 188, 33, 255],
274
+ [255, 127, 12, 255],
275
+ [196, 175, 214, 255],
276
+ [153, 153, 153, 255],
 
 
 
 
 
 
 
277
  ]
278
  ).astype(np.uint8)
279
 
280
+ pts_colors = [
281
+ f'rgb({colors[int(i)][0]}, {colors[int(i)][1]}, {colors[int(i)][2]})' for i in occupied_voxels[:, 3]]
282
+ out_fov_colors = [
283
+ f'rgb({colors[int(i)][0]//3*2}, {colors[int(i)][1]//3*2}, {colors[int(i)][2]//3*2})' for i in occupied_voxels[:, 3]]
284
  pts_colors = pts_colors + out_fov_colors
285
+ fig = go.Figure(data=[go.Scatter3d(x=occupied_voxels[:, 0], y=occupied_voxels[:, 1], z=occupied_voxels[:, 2], mode='markers',
286
+ marker=dict(
287
+ size=4,
 
 
 
 
 
 
288
  color=pts_colors, # set color to an array/list of desired values
 
289
  opacity=1.0,
290
  symbol='square'
291
  ))])
292
+
293
  fig.update_layout(
294
+ scene=dict(
295
+ aspectmode='data',
296
+ yaxis=dict(visible=False, showticklabels=False),
297
+ bgcolor="black",
298
+ ),
299
+
300
+ )
301
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
+ return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
images/08/3-1.jpg ADDED