colin1842
/

s23dr_tum

Model card Files Files and versions Community

colin1842 commited on Jun 10, 2024

Commit

b90039c

verified ·

1 Parent(s): f89da73

Upload 2 files

Browse files

Files changed (2) hide show

feature_solution.py +131 -22
script.py +25 -23

feature_solution.py CHANGED Viewed

@@ -140,7 +140,6 @@ def convert_entry_to_human_readable(entry):
             out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
     return out
 def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.0):
     '''Get the vertices and edges from the gestalt segmentation mask of the house'''
     vertices = []
@@ -253,14 +252,14 @@ def merge_vertices_3d_ours(vert_edge_per_image, th=0.1):
     cur_start = 0
     types = []
     for cimg_idx, (connections, vertices_3d) in vert_edge_per_image.items():
         all_3d_vertices.append(vertices_3d)
-        connections = []
-        # connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections]
-        # cur_start+=len(vertices_3d)
     all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
     new_vertices, _ = non_maximum_suppression(all_3d_vertices, 75)
     new_connections = []
-    return new_vertices, new_connections
 def merge_vertices_3d(vert_edge_per_image, th=0.1):
     '''Merge vertices that are close to each other in 3D space and are of same types'''
@@ -338,10 +337,6 @@ def prune_not_connected(all_3d_vertices, connections_3d):
     return np.array(new_verts), connected_out
-checkpoint_path = "loftr_outdoor.ckpt"
-loftr_model = KF.LoFTR(pretrained=None)
-loftr_model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
 def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
     import torchvision.transforms as transforms
     rgb_to_gray = transforms.Compose([
@@ -350,7 +345,7 @@ def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
         transforms.ToTensor()  # Convert back to tensor
     ])
-    device = 'cpu' #torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     w, h = depth_images.size
     gest_seg_0 = gestalt_img_0.resize(depth_images.size)
@@ -365,7 +360,7 @@ def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
     gest_seg_1_tensor = K.image_to_tensor(gest_seg_1_np, False).float().to(device)
     img2 = K.geometry.resize(gest_seg_1_tensor, (int(h/4), int(w/4)))  / 255
-    # matcher = KF.LoFTR(pretrained="outdoor").to(device)
     input_dict = {
         "image0": img1,
@@ -374,7 +369,7 @@ def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
     # print("Input dict shape", input_dict["image0"].shape, input_dict["image1"].shape)
     with torch.no_grad():
-        correspondences = loftr_model(input_dict)
     # mkpts0 = correspondences["keypoints0"].cpu().numpy()
     # mkpts1 = correspondences["keypoints1"].cpu().numpy()
@@ -536,6 +531,29 @@ def line_matcher(gestalt_img_0, gestalt_img1, depth_images, line_th=0.1):
     return matched_lines1, matched_lines2
 # Gestalt color mapping
 gestalt_color_mapping = {
     'unclassified': [215, 62, 138],
@@ -619,15 +637,9 @@ def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
         mkpts_filtered_0, filtered_index = non_maximum_suppression(mkpts_filtered_0, 50)
         mkpts_filtered_1 = mkpts_filtered_1[filtered_index]
         # save_image_with_keypoints(f'keypoints_{i}.png', np.array(good_entry['gestalt'][i]), mkpts_filtered_0, (255, 0, 0))
         # save_image_with_keypoints(f'keypoints_{j}.png', np.array(good_entry['gestalt'][j]), mkpts_filtered_1, (255, 0, 0))
-        # Line matching
-        # line_0, line_1 = line_matcher(good_entry['gestalt'][i], good_entry['gestalt'][j], good_entry['depthcm'][i])
-        # save_image_with_lines(f'line_{i}.png', np.array(good_entry['gestalt'][i]), line_0, (255, 0, 0))
-        # save_image_with_lines(f'line_{j}.png', np.array(good_entry['gestalt'][j]), line_1, (255, 0, 0))
         # Triangulation with matched keypoints
         R_0 = good_entry['R'][i]
         t_0 = good_entry['t'][i]
@@ -637,17 +649,114 @@ def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
         points_3d = triangulate_points(mkpts_filtered_0, mkpts_filtered_1, R_0, t_0, R_1, t_1, intrinsics)
         gest_seg = gest.resize(depth.size)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
-        connections = [(0,1)]
         # ours method
-        vert_edge_per_image[i] = connections, points_3d
     all_3d_vertices, connections_3d = merge_vertices_3d_ours(vert_edge_per_image, 3.0)
-    # all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d)
     all_3d_vertices_clean = all_3d_vertices
-    connections_3d_clean = connections
     if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
         print (f'Not enough vertices or connections in the 3D vertices')

             out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
     return out
 def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.0):
     '''Get the vertices and edges from the gestalt segmentation mask of the house'''
     vertices = []
     cur_start = 0
     types = []
     for cimg_idx, (connections, vertices_3d) in vert_edge_per_image.items():
+        cur_start+=len(vertices_3d)
         all_3d_vertices.append(vertices_3d)
+        connections+=[(x+cur_start,y+cur_start) for (x,y) in connections]
+        connections_3d.append(connections)
     all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
     new_vertices, _ = non_maximum_suppression(all_3d_vertices, 75)
     new_connections = []
+    return new_vertices, connections_3d
 def merge_vertices_3d(vert_edge_per_image, th=0.1):
     '''Merge vertices that are close to each other in 3D space and are of same types'''
     return np.array(new_verts), connected_out
 def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
     import torchvision.transforms as transforms
     rgb_to_gray = transforms.Compose([
         transforms.ToTensor()  # Convert back to tensor
     ])
+    device = 'cpu'#torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     w, h = depth_images.size
     gest_seg_0 = gestalt_img_0.resize(depth_images.size)
     gest_seg_1_tensor = K.image_to_tensor(gest_seg_1_np, False).float().to(device)
     img2 = K.geometry.resize(gest_seg_1_tensor, (int(h/4), int(w/4)))  / 255
+    matcher = KF.LoFTR(pretrained="outdoor").to(device)
     input_dict = {
         "image0": img1,
     # print("Input dict shape", input_dict["image0"].shape, input_dict["image1"].shape)
     with torch.no_grad():
+        correspondences = matcher(input_dict)
     # mkpts0 = correspondences["keypoints0"].cpu().numpy()
     # mkpts1 = correspondences["keypoints1"].cpu().numpy()
     return matched_lines1, matched_lines2
+from scipy.ndimage import center_of_mass
+proximity_threshold = 225
+def find_nearest_point(target_point, points, threshold):
+    if isinstance(target_point, torch.Tensor):
+        target_point = target_point.numpy()
+    if target_point.ndim == 2 and target_point.shape[0] == 1:
+        target_point = target_point[0]
+    if points.shape[1] != target_point.shape[0]:
+        raise ValueError("Shape mismatch: points and target_point must have the same number of dimensions")
+    distances = np.linalg.norm(points - target_point, axis=1)
+    min_distance_index = np.argmin(distances)
+    if distances[min_distance_index] < threshold:
+        return points[min_distance_index], min_distance_index
+    return None, None
+def replace_with_center_of_mass(point, mask):
+    y, x = int(point[1]), int(point[0])
+    region_mask = (mask == mask[y, x])
+    com = center_of_mass(region_mask)
+    return np.array([com[1], com[0]])  # Return as (x, y)
 # Gestalt color mapping
 gestalt_color_mapping = {
     'unclassified': [215, 62, 138],
         mkpts_filtered_0, filtered_index = non_maximum_suppression(mkpts_filtered_0, 50)
         mkpts_filtered_1 = mkpts_filtered_1[filtered_index]
         # save_image_with_keypoints(f'keypoints_{i}.png', np.array(good_entry['gestalt'][i]), mkpts_filtered_0, (255, 0, 0))
         # save_image_with_keypoints(f'keypoints_{j}.png', np.array(good_entry['gestalt'][j]), mkpts_filtered_1, (255, 0, 0))
         # Triangulation with matched keypoints
         R_0 = good_entry['R'][i]
         t_0 = good_entry['t'][i]
         points_3d = triangulate_points(mkpts_filtered_0, mkpts_filtered_1, R_0, t_0, R_1, t_1, intrinsics)
+        # Line matching
+        line_0, line_1 = line_matcher(good_entry['gestalt'][i], good_entry['gestalt'][j], good_entry['depthcm'][i])
+        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 5.)
+        apex_points = np.array([v['xy'] for v in vertices if v['type'] == 'apex'])
+        eave_end_points = np.array([v['xy'] for v in vertices if v['type'] == 'eave_end_point'])
+        # Adjust lines based on proximity to points_3d, apex, and eave_end_points
+        adjusted_lines = []
+        connections_idx = set()
+        matched_lines = line_matcher(good_entry['gestalt'][i], good_entry['gestalt'][j], good_entry['depthcm'][i])
+        # matched_lines2 = line_matcher(good_entry['gestalt'][j], good_entry['gestalt'][i], good_entry['depthcm'][j])
+        # for line1, line2 in zip(matched_lines1, matched_lines2):
+        for line in matched_lines[0]:
+            line = line.numpy()
+            # for k in range(2):
+            #     nearest_point_2d = find_nearest_point(line[k], mkpts_filtered_0, proximity_threshold)
+            #     if nearest_point_2d is not None:
+            #         line[k] = nearest_point_2d
+            #     else:
+            #         nearest_apex = find_nearest_point(line[k], apex_points, proximity_threshold)
+            #         nearest_eave_end = find_nearest_point(line[k], eave_end_points, proximity_threshold) if len(eave_end_points) > 0 else None
+            #         if nearest_apex is not None:
+            #             line[k] = replace_with_center_of_mass(line[k], gest_seg_np)
+            #         elif nearest_eave_end is not None:
+            #             line[k] = replace_with_center_of_mass(line[k], gest_seg_np)
+            index_0 = -1
+            index_1 = -1
+            for k in range(2):
+                # nearest_apex = find_nearest_point(line[k], apex_points, proximity_threshold)
+                # nearest_eave_end = find_nearest_point(line[k], eave_end_points, proximity_threshold) if len(eave_end_points) > 0 else None
+                # if nearest_apex is not None:
+                #     line[k] = torch.tensor(replace_with_center_of_mass(line[k], gest_seg_np), dtype=torch.float32)
+                # elif nearest_eave_end is not None:
+                #     line[k] = torch.tensor(replace_with_center_of_mass(line[k], gest_seg_np), dtype=torch.float32)
+                # else:
+                nearest_point_2d, index = find_nearest_point(line[k], mkpts_filtered_0, proximity_threshold)
+                connection = None
+                if nearest_point_2d is not None:
+                    line[k] = torch.tensor(nearest_point_2d, dtype=torch.float32)
+                    if k == 0:
+                        index_0 = index
+                    if k == 1:
+                        index_1 = index
+                if index_0 != index_1 and index_0 != -1 and index_1 != -1:
+                    connection = (index_0, index_1)
+            # append all indices of the matched lines
+            connections_idx.add(connection) if connection is not None else None
+            adjusted_lines.append(line)
+        connections_idx = list(connections_idx)
+        adjusted_lines = np.array(adjusted_lines)
+        # save_image_with_lines(f'line_{i}.png', np.array(good_entry['gestalt'][i]), line_0, (255, 0, 0))
+        # save_image_with_lines(f'line_{j}.png', np.array(good_entry['gestalt'][j]), line_1, (255, 0, 0))
         gest_seg = gest.resize(depth.size)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
+        # Metric3D
+        depth_np = np.array(depth) / 2.5 # 2.5 is the scale estimation coefficient
+        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 5.)
+        if (len(vertices) < 2) or (len(connections) < 1):
+            print (f'Not enough vertices or connections in image {i}')
+            vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
+            # continue
+        uv, depth_vert = get_uv_depth(vertices, depth_np)
+        # monodepth
+        # r<32 scale = colmap depth / monodepth
+        # monodepth /= scale
+        # # Assuming monodepth is provided similarly as depth
+        # monodepth = ?
+        # scale = np.mean(depth_np / monodepth)
+        # monodepth /= scale
+        # Normalize the uv to the camera intrinsics
+        xy_local = np.ones((len(uv), 3))
+        xy_local[:, 0] = (uv[:, 0] - K[0,2]) / K[0,0]
+        xy_local[:, 1] = (uv[:, 1] - K[1,2]) / K[1,1]
+        # Get the 3D vertices
+        vertices_3d_local = depth_vert[...,None] * (xy_local/np.linalg.norm(xy_local, axis=1)[...,None])
+        world_to_cam = np.eye(4)
+        world_to_cam[:3, :3] = R
+        world_to_cam[:3, 3] = t.reshape(-1)
+        cam_to_world =  np.linalg.inv(world_to_cam)
+        vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
+        vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
+        # vert_edge_per_image[i] = vertices, connections, vertices_3d
         # ours method
+        vert_edge_per_image[i] = connections_idx, points_3d
     all_3d_vertices, connections_3d = merge_vertices_3d_ours(vert_edge_per_image, 3.0)
     all_3d_vertices_clean = all_3d_vertices
+    concatenated_list = []
+    # Iterate over each sublist in connections_3d_clean and extend the main list
+    for sublist in connections_3d:
+        concatenated_list.extend(sublist)
+    connections_3d_clean = concatenated_list
     if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
         print (f'Not enough vertices or connections in the 3D vertices')

script.py CHANGED Viewed

@@ -192,30 +192,32 @@ if __name__ == "__main__":
 	print('------------ Now you can do your solution ---------------')
 	solution = []
-	from concurrent.futures import ProcessPoolExecutor
-	with ProcessPoolExecutor(max_workers=1) as pool:
-		results = []
-		for i, sample in enumerate(tqdm(dataset)):
-			results.append(pool.submit(predict, sample, visualize=False))
-		for i, result in enumerate(tqdm(results)):
-			key, pred_vertices, pred_edges = result.result()
-			solution.append({
-							'__key__': key,
-							'wf_vertices': pred_vertices.tolist(),
-							'wf_edges': pred_edges
-						})
-	# for i, sample in enumerate(tqdm(dataset)):
-	# 	key, pred_vertices, pred_edges = predict(sample, visualize=False)
-	# 	solution.append({
-	# 					'__key__': key,
-	# 					'wf_vertices': pred_vertices.tolist(),
-	# 					'wf_edges': pred_edges
-	# 				})
-			if i % 100 == 0:
-				# incrementally save the results in case we run out of time
-				print(f"Processed {i} samples")
-				save_submission(solution, Path(params['output_path']) / "submission.parquet")
 	print('------------ Saving results ---------------')
 	save_submission(solution, Path(params['output_path']) / "submission.parquet")
 	print("------------ Done ------------ ")

 	print('------------ Now you can do your solution ---------------')
 	solution = []
+	# from concurrent.futures import ProcessPoolExecutor
+	# with ProcessPoolExecutor(max_workers=1) as pool:
+	# 	results = []
+	# 	for i, sample in enumerate(tqdm(dataset)):
+	# 		results.append(pool.submit(predict, sample, visualize=False))
+	# 	for i, result in enumerate(tqdm(results)):
+	# 		key, pred_vertices, pred_edges = result.result()
+	# 		solution.append({
+	# 						'__key__': key,
+	# 						'wf_vertices': pred_vertices.tolist(),
+	# 						'wf_edges': pred_edges
+	# 					})
+	####### added for removing multiprocessing ########
+	for i, sample in enumerate(tqdm(dataset)):
+		key, pred_vertices, pred_edges = predict(sample, visualize=False)
+		solution.append({
+						'__key__': key,
+						'wf_vertices': pred_vertices.tolist(),
+						'wf_edges': pred_edges
+					})
+	####### added for removing multiprocessing ########
+		if i % 10 == 0:
+			# incrementally save the results in case we run out of time
+			print(f"Processed {i} samples")
+			# save_submission(solution, Path(params['output_path']) / "submission.parquet")
 	print('------------ Saving results ---------------')
 	save_submission(solution, Path(params['output_path']) / "submission.parquet")
 	print("------------ Done ------------ ")