hugoycj commited on
Commit
fa4f45e
1 Parent(s): e08a54a

Add depth image support and point cloud generation

Browse files

- Added a new function `backproject_depth_to_pointcloud` to convert depth images to point clouds.
- Added a new function `get_intrinsics` to estimate camera intrinsics.
- Replaced the point cloud file input in the `infer` function with a depth image input.
- Updated the `infer` function to generate a point cloud from the depth image using the new `backproject_depth_to_pointcloud` function.
- Updated the Gradio interface to accept a depth image file instead of a point cloud file.
- Added a depth image file to the demo examples.

Files changed (2) hide show
  1. app.py +57 -10
  2. demo/quest2_depth.png +3 -0
app.py CHANGED
@@ -71,7 +71,52 @@ def pad_image(im, value):
71
  diff = im.shape[1] - im.shape[0]
72
  return torch.cat([im, (torch.zeros((diff, im.shape[1], im.shape[2])) + value)], dim=0)
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def normalize(seen_xyz):
76
  seen_xyz = seen_xyz / (seen_xyz[torch.isfinite(seen_xyz.sum(dim=-1))].var(dim=0) ** 0.5).mean()
77
  seen_xyz = seen_xyz - seen_xyz[torch.isfinite(seen_xyz.sum(dim=-1))].mean(axis=0)
@@ -79,15 +124,18 @@ def normalize(seen_xyz):
79
 
80
  def infer(
81
  image,
82
- point_cloud,
83
  seg,
84
  granularity,
85
  temperature,
86
  ):
87
 
 
 
88
  rgb = image
89
- obj = load_obj(point_cloud.name)
90
-
 
91
  seen_rgb = (torch.tensor(rgb).float() / 255)[..., [2, 1, 0]]
92
  H, W = seen_rgb.shape[:2]
93
  seen_rgb = torch.nn.functional.interpolate(
@@ -97,11 +145,10 @@ def infer(
97
  align_corners=False,
98
  )[0].permute(1, 2, 0)
99
 
100
- seen_xyz = obj[0].reshape(H, W, 3)
101
  seg = cv2.imread(seg.name, cv2.IMREAD_UNCHANGED)
102
  mask = torch.tensor(cv2.resize(seg, (W, H))).bool()
103
  seen_xyz[~mask] = float('inf')
104
-
105
  seen_xyz = normalize(seen_xyz)
106
 
107
  bottom, right = mask.nonzero().max(dim=0)[0]
@@ -138,7 +185,7 @@ def infer(
138
  ]
139
 
140
  pred_colors, pred_occupy, unseen_xyz = run_inference(model, samples, device, temperature, args)
141
- _masks = pred_occupy > 0.1
142
  unseen_xyz = unseen_xyz[_masks]
143
  pred_colors = pred_colors[None, ...][_masks] * 255
144
 
@@ -179,12 +226,12 @@ if __name__ == '__main__':
179
 
180
  demo = gr.Interface(fn=infer,
181
  inputs=[gr.Image(label="Input Image"),
182
- gr.File(label="Pointcloud File"),
183
  gr.File(label="Segmentation File"),
184
- gr.Slider(minimum=0.05, maximum=0.5, step=0.05, value=0.2, label="Granularity"),
185
- gr.Slider(minimum=0, maximum=1.0, step=0.1, value=0.1, label="Temperature")
186
  ],
187
  outputs=[gr.outputs.File(label="Point Cloud")],
188
- examples=[["demo/quest2.jpg", "demo/quest2.obj", "demo/quest2_seg.png", 0.2, 0.1]],
189
  cache_examples=True)
190
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
71
  diff = im.shape[1] - im.shape[0]
72
  return torch.cat([im, (torch.zeros((diff, im.shape[1], im.shape[2])) + value)], dim=0)
73
 
74
+ def backproject_depth_to_pointcloud(depth, rotation=np.eye(3), translation=np.zeros(3)):
75
+ # Calculate the principal point as the center of the image
76
+ principal_point = [depth.shape[1] / 2, depth.shape[0] / 2]
77
+ intrinsics = get_intrinsics(depth.shape[0], depth.shape[1], principal_point)
78
+
79
+ intrinsics = get_intrinsics(depth.shape[0], depth.shape[1], principal_point)
80
+ # Get the depth map shape
81
+ height, width = depth.shape
82
+
83
+ # Create a matrix of pixel coordinates
84
+ u, v = np.meshgrid(np.arange(width), np.arange(height))
85
+ uv_homogeneous = np.stack((u, v, np.ones_like(u)), axis=-1).reshape(-1, 3)
86
+
87
+ # Invert the intrinsic matrix
88
+ inv_intrinsics = np.linalg.inv(intrinsics)
89
+
90
+ # Convert depth to the camera coordinate system
91
+ points_cam_homogeneous = np.dot(uv_homogeneous, inv_intrinsics.T) * depth.flatten()[:, np.newaxis]
92
+
93
+ # Convert to 3D homogeneous coordinates
94
+ points_cam_homogeneous = np.concatenate((points_cam_homogeneous, np.ones((len(points_cam_homogeneous), 1))), axis=1)
95
+
96
+ # Apply the rotation and translation to get the 3D point cloud in the world coordinate system
97
+ extrinsics = np.hstack((rotation, translation[:, np.newaxis]))
98
+ pointcloud = np.dot(points_cam_homogeneous, extrinsics.T)
99
+ pointcloud[:, 1:] *= -1
100
+
101
+ # Reshape the point cloud back to the original depth map shape
102
+ pointcloud = pointcloud[:, :3].reshape(height, width, 3)
103
+
104
 
105
+ return pointcloud
106
+
107
+ # estimate camera intrinsics
108
+ def get_intrinsics(H,W, principal_point):
109
+ """
110
+ Intrinsics for a pinhole camera model.
111
+ Assume fov of 55 degrees and central principal point
112
+ of bounding box.
113
+ """
114
+ f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0)
115
+ cx, cy = principal_point
116
+ return np.array([[f, 0, cx],
117
+ [0, f, cy],
118
+ [0, 0, 1]])
119
+
120
  def normalize(seen_xyz):
121
  seen_xyz = seen_xyz / (seen_xyz[torch.isfinite(seen_xyz.sum(dim=-1))].var(dim=0) ** 0.5).mean()
122
  seen_xyz = seen_xyz - seen_xyz[torch.isfinite(seen_xyz.sum(dim=-1))].mean(axis=0)
 
124
 
125
  def infer(
126
  image,
127
+ depth_image,
128
  seg,
129
  granularity,
130
  temperature,
131
  ):
132
 
133
+ args.viz_granularity = granularity
134
+
135
  rgb = image
136
+ depth_image = cv2.imread(depth_image.name, -1)
137
+ depth_image = depth_image.astype(np.float32) / 256
138
+ seen_xyz = backproject_depth_to_pointcloud(depth_image)
139
  seen_rgb = (torch.tensor(rgb).float() / 255)[..., [2, 1, 0]]
140
  H, W = seen_rgb.shape[:2]
141
  seen_rgb = torch.nn.functional.interpolate(
 
145
  align_corners=False,
146
  )[0].permute(1, 2, 0)
147
 
 
148
  seg = cv2.imread(seg.name, cv2.IMREAD_UNCHANGED)
149
  mask = torch.tensor(cv2.resize(seg, (W, H))).bool()
150
  seen_xyz[~mask] = float('inf')
151
+ seen_xyz = torch.tensor(seen_xyz).float()
152
  seen_xyz = normalize(seen_xyz)
153
 
154
  bottom, right = mask.nonzero().max(dim=0)[0]
 
185
  ]
186
 
187
  pred_colors, pred_occupy, unseen_xyz = run_inference(model, samples, device, temperature, args)
188
+ _masks = pred_occupy > 0.1
189
  unseen_xyz = unseen_xyz[_masks]
190
  pred_colors = pred_colors[None, ...][_masks] * 255
191
 
 
226
 
227
  demo = gr.Interface(fn=infer,
228
  inputs=[gr.Image(label="Input Image"),
229
+ gr.File(label="Depth Image"),
230
  gr.File(label="Segmentation File"),
231
+ gr.Slider(minimum=0.05, maximum=0.5, step=0.05, value=0.2, label="Grain Size"),
232
+ gr.Slider(minimum=0, maximum=1.0, step=0.1, value=0.1, label="Color Temperature")
233
  ],
234
  outputs=[gr.outputs.File(label="Point Cloud")],
235
+ examples=[["demo/quest2.jpg", "demo/quest2_depth.png", "demo/quest2_seg.png", 0.2, 0.1]],
236
  cache_examples=True)
237
  demo.launch(server_name="0.0.0.0", server_port=7860)
demo/quest2_depth.png ADDED

Git LFS Details

  • SHA256: 085b84c9f82155c5b1e5d7660d993f9445c08debb82b0867546a15f351c776fd
  • Pointer size: 131 Bytes
  • Size of remote file: 117 kB