Upload 2 files
Browse files- feature_solution.py +131 -22
- script.py +25 -23
feature_solution.py
CHANGED
@@ -140,7 +140,6 @@ def convert_entry_to_human_readable(entry):
|
|
140 |
out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
|
141 |
return out
|
142 |
|
143 |
-
|
144 |
def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.0):
|
145 |
'''Get the vertices and edges from the gestalt segmentation mask of the house'''
|
146 |
vertices = []
|
@@ -253,14 +252,14 @@ def merge_vertices_3d_ours(vert_edge_per_image, th=0.1):
|
|
253 |
cur_start = 0
|
254 |
types = []
|
255 |
for cimg_idx, (connections, vertices_3d) in vert_edge_per_image.items():
|
|
|
256 |
all_3d_vertices.append(vertices_3d)
|
257 |
-
connections
|
258 |
-
|
259 |
-
# cur_start+=len(vertices_3d)
|
260 |
all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
|
261 |
new_vertices, _ = non_maximum_suppression(all_3d_vertices, 75)
|
262 |
new_connections = []
|
263 |
-
return new_vertices,
|
264 |
|
265 |
def merge_vertices_3d(vert_edge_per_image, th=0.1):
|
266 |
'''Merge vertices that are close to each other in 3D space and are of same types'''
|
@@ -338,10 +337,6 @@ def prune_not_connected(all_3d_vertices, connections_3d):
|
|
338 |
|
339 |
return np.array(new_verts), connected_out
|
340 |
|
341 |
-
checkpoint_path = "loftr_outdoor.ckpt"
|
342 |
-
loftr_model = KF.LoFTR(pretrained=None)
|
343 |
-
loftr_model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
|
344 |
-
|
345 |
def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
|
346 |
import torchvision.transforms as transforms
|
347 |
rgb_to_gray = transforms.Compose([
|
@@ -350,7 +345,7 @@ def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
|
|
350 |
transforms.ToTensor() # Convert back to tensor
|
351 |
])
|
352 |
|
353 |
-
device = 'cpu'
|
354 |
|
355 |
w, h = depth_images.size
|
356 |
gest_seg_0 = gestalt_img_0.resize(depth_images.size)
|
@@ -365,7 +360,7 @@ def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
|
|
365 |
gest_seg_1_tensor = K.image_to_tensor(gest_seg_1_np, False).float().to(device)
|
366 |
img2 = K.geometry.resize(gest_seg_1_tensor, (int(h/4), int(w/4))) / 255
|
367 |
|
368 |
-
|
369 |
|
370 |
input_dict = {
|
371 |
"image0": img1,
|
@@ -374,7 +369,7 @@ def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
|
|
374 |
# print("Input dict shape", input_dict["image0"].shape, input_dict["image1"].shape)
|
375 |
|
376 |
with torch.no_grad():
|
377 |
-
correspondences =
|
378 |
|
379 |
# mkpts0 = correspondences["keypoints0"].cpu().numpy()
|
380 |
# mkpts1 = correspondences["keypoints1"].cpu().numpy()
|
@@ -536,6 +531,29 @@ def line_matcher(gestalt_img_0, gestalt_img1, depth_images, line_th=0.1):
|
|
536 |
|
537 |
return matched_lines1, matched_lines2
|
538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
# Gestalt color mapping
|
540 |
gestalt_color_mapping = {
|
541 |
'unclassified': [215, 62, 138],
|
@@ -619,15 +637,9 @@ def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
|
|
619 |
mkpts_filtered_0, filtered_index = non_maximum_suppression(mkpts_filtered_0, 50)
|
620 |
mkpts_filtered_1 = mkpts_filtered_1[filtered_index]
|
621 |
|
622 |
-
|
623 |
# save_image_with_keypoints(f'keypoints_{i}.png', np.array(good_entry['gestalt'][i]), mkpts_filtered_0, (255, 0, 0))
|
624 |
# save_image_with_keypoints(f'keypoints_{j}.png', np.array(good_entry['gestalt'][j]), mkpts_filtered_1, (255, 0, 0))
|
625 |
|
626 |
-
# Line matching
|
627 |
-
# line_0, line_1 = line_matcher(good_entry['gestalt'][i], good_entry['gestalt'][j], good_entry['depthcm'][i])
|
628 |
-
# save_image_with_lines(f'line_{i}.png', np.array(good_entry['gestalt'][i]), line_0, (255, 0, 0))
|
629 |
-
# save_image_with_lines(f'line_{j}.png', np.array(good_entry['gestalt'][j]), line_1, (255, 0, 0))
|
630 |
-
|
631 |
# Triangulation with matched keypoints
|
632 |
R_0 = good_entry['R'][i]
|
633 |
t_0 = good_entry['t'][i]
|
@@ -637,17 +649,114 @@ def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
|
|
637 |
|
638 |
points_3d = triangulate_points(mkpts_filtered_0, mkpts_filtered_1, R_0, t_0, R_1, t_1, intrinsics)
|
639 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
640 |
gest_seg = gest.resize(depth.size)
|
641 |
gest_seg_np = np.array(gest_seg).astype(np.uint8)
|
642 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
# ours method
|
644 |
-
vert_edge_per_image[i] =
|
645 |
|
646 |
all_3d_vertices, connections_3d = merge_vertices_3d_ours(vert_edge_per_image, 3.0)
|
647 |
|
648 |
-
# all_3d_vertices_clean, connections_3d_clean = prune_not_connected(all_3d_vertices, connections_3d)
|
649 |
all_3d_vertices_clean = all_3d_vertices
|
650 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
651 |
|
652 |
if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
|
653 |
print (f'Not enough vertices or connections in the 3D vertices')
|
|
|
140 |
out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
|
141 |
return out
|
142 |
|
|
|
143 |
def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.0):
|
144 |
'''Get the vertices and edges from the gestalt segmentation mask of the house'''
|
145 |
vertices = []
|
|
|
252 |
cur_start = 0
|
253 |
types = []
|
254 |
for cimg_idx, (connections, vertices_3d) in vert_edge_per_image.items():
|
255 |
+
cur_start+=len(vertices_3d)
|
256 |
all_3d_vertices.append(vertices_3d)
|
257 |
+
connections+=[(x+cur_start,y+cur_start) for (x,y) in connections]
|
258 |
+
connections_3d.append(connections)
|
|
|
259 |
all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
|
260 |
new_vertices, _ = non_maximum_suppression(all_3d_vertices, 75)
|
261 |
new_connections = []
|
262 |
+
return new_vertices, connections_3d
|
263 |
|
264 |
def merge_vertices_3d(vert_edge_per_image, th=0.1):
|
265 |
'''Merge vertices that are close to each other in 3D space and are of same types'''
|
|
|
337 |
|
338 |
return np.array(new_verts), connected_out
|
339 |
|
|
|
|
|
|
|
|
|
340 |
def loftr_matcher(gestalt_img_0, gestalt_img1, depth_images):
|
341 |
import torchvision.transforms as transforms
|
342 |
rgb_to_gray = transforms.Compose([
|
|
|
345 |
transforms.ToTensor() # Convert back to tensor
|
346 |
])
|
347 |
|
348 |
+
device = 'cpu'#torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
349 |
|
350 |
w, h = depth_images.size
|
351 |
gest_seg_0 = gestalt_img_0.resize(depth_images.size)
|
|
|
360 |
gest_seg_1_tensor = K.image_to_tensor(gest_seg_1_np, False).float().to(device)
|
361 |
img2 = K.geometry.resize(gest_seg_1_tensor, (int(h/4), int(w/4))) / 255
|
362 |
|
363 |
+
matcher = KF.LoFTR(pretrained="outdoor").to(device)
|
364 |
|
365 |
input_dict = {
|
366 |
"image0": img1,
|
|
|
369 |
# print("Input dict shape", input_dict["image0"].shape, input_dict["image1"].shape)
|
370 |
|
371 |
with torch.no_grad():
|
372 |
+
correspondences = matcher(input_dict)
|
373 |
|
374 |
# mkpts0 = correspondences["keypoints0"].cpu().numpy()
|
375 |
# mkpts1 = correspondences["keypoints1"].cpu().numpy()
|
|
|
531 |
|
532 |
return matched_lines1, matched_lines2
|
533 |
|
534 |
+
from scipy.ndimage import center_of_mass
|
535 |
+
|
536 |
+
proximity_threshold = 225
|
537 |
+
|
538 |
+
def find_nearest_point(target_point, points, threshold):
|
539 |
+
if isinstance(target_point, torch.Tensor):
|
540 |
+
target_point = target_point.numpy()
|
541 |
+
if target_point.ndim == 2 and target_point.shape[0] == 1:
|
542 |
+
target_point = target_point[0]
|
543 |
+
if points.shape[1] != target_point.shape[0]:
|
544 |
+
raise ValueError("Shape mismatch: points and target_point must have the same number of dimensions")
|
545 |
+
distances = np.linalg.norm(points - target_point, axis=1)
|
546 |
+
min_distance_index = np.argmin(distances)
|
547 |
+
if distances[min_distance_index] < threshold:
|
548 |
+
return points[min_distance_index], min_distance_index
|
549 |
+
return None, None
|
550 |
+
|
551 |
+
def replace_with_center_of_mass(point, mask):
|
552 |
+
y, x = int(point[1]), int(point[0])
|
553 |
+
region_mask = (mask == mask[y, x])
|
554 |
+
com = center_of_mass(region_mask)
|
555 |
+
return np.array([com[1], com[0]]) # Return as (x, y)
|
556 |
+
|
557 |
# Gestalt color mapping
|
558 |
gestalt_color_mapping = {
|
559 |
'unclassified': [215, 62, 138],
|
|
|
637 |
mkpts_filtered_0, filtered_index = non_maximum_suppression(mkpts_filtered_0, 50)
|
638 |
mkpts_filtered_1 = mkpts_filtered_1[filtered_index]
|
639 |
|
|
|
640 |
# save_image_with_keypoints(f'keypoints_{i}.png', np.array(good_entry['gestalt'][i]), mkpts_filtered_0, (255, 0, 0))
|
641 |
# save_image_with_keypoints(f'keypoints_{j}.png', np.array(good_entry['gestalt'][j]), mkpts_filtered_1, (255, 0, 0))
|
642 |
|
|
|
|
|
|
|
|
|
|
|
643 |
# Triangulation with matched keypoints
|
644 |
R_0 = good_entry['R'][i]
|
645 |
t_0 = good_entry['t'][i]
|
|
|
649 |
|
650 |
points_3d = triangulate_points(mkpts_filtered_0, mkpts_filtered_1, R_0, t_0, R_1, t_1, intrinsics)
|
651 |
|
652 |
+
# Line matching
|
653 |
+
line_0, line_1 = line_matcher(good_entry['gestalt'][i], good_entry['gestalt'][j], good_entry['depthcm'][i])
|
654 |
+
|
655 |
+
vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 5.)
|
656 |
+
|
657 |
+
apex_points = np.array([v['xy'] for v in vertices if v['type'] == 'apex'])
|
658 |
+
eave_end_points = np.array([v['xy'] for v in vertices if v['type'] == 'eave_end_point'])
|
659 |
+
|
660 |
+
# Adjust lines based on proximity to points_3d, apex, and eave_end_points
|
661 |
+
adjusted_lines = []
|
662 |
+
connections_idx = set()
|
663 |
+
matched_lines = line_matcher(good_entry['gestalt'][i], good_entry['gestalt'][j], good_entry['depthcm'][i])
|
664 |
+
# matched_lines2 = line_matcher(good_entry['gestalt'][j], good_entry['gestalt'][i], good_entry['depthcm'][j])
|
665 |
+
|
666 |
+
# for line1, line2 in zip(matched_lines1, matched_lines2):
|
667 |
+
for line in matched_lines[0]:
|
668 |
+
|
669 |
+
line = line.numpy()
|
670 |
+
# for k in range(2):
|
671 |
+
# nearest_point_2d = find_nearest_point(line[k], mkpts_filtered_0, proximity_threshold)
|
672 |
+
# if nearest_point_2d is not None:
|
673 |
+
# line[k] = nearest_point_2d
|
674 |
+
# else:
|
675 |
+
# nearest_apex = find_nearest_point(line[k], apex_points, proximity_threshold)
|
676 |
+
# nearest_eave_end = find_nearest_point(line[k], eave_end_points, proximity_threshold) if len(eave_end_points) > 0 else None
|
677 |
+
# if nearest_apex is not None:
|
678 |
+
# line[k] = replace_with_center_of_mass(line[k], gest_seg_np)
|
679 |
+
# elif nearest_eave_end is not None:
|
680 |
+
# line[k] = replace_with_center_of_mass(line[k], gest_seg_np)
|
681 |
+
index_0 = -1
|
682 |
+
index_1 = -1
|
683 |
+
for k in range(2):
|
684 |
+
# nearest_apex = find_nearest_point(line[k], apex_points, proximity_threshold)
|
685 |
+
# nearest_eave_end = find_nearest_point(line[k], eave_end_points, proximity_threshold) if len(eave_end_points) > 0 else None
|
686 |
+
# if nearest_apex is not None:
|
687 |
+
# line[k] = torch.tensor(replace_with_center_of_mass(line[k], gest_seg_np), dtype=torch.float32)
|
688 |
+
# elif nearest_eave_end is not None:
|
689 |
+
# line[k] = torch.tensor(replace_with_center_of_mass(line[k], gest_seg_np), dtype=torch.float32)
|
690 |
+
# else:
|
691 |
+
nearest_point_2d, index = find_nearest_point(line[k], mkpts_filtered_0, proximity_threshold)
|
692 |
+
|
693 |
+
connection = None
|
694 |
+
if nearest_point_2d is not None:
|
695 |
+
line[k] = torch.tensor(nearest_point_2d, dtype=torch.float32)
|
696 |
+
if k == 0:
|
697 |
+
index_0 = index
|
698 |
+
if k == 1:
|
699 |
+
index_1 = index
|
700 |
+
|
701 |
+
if index_0 != index_1 and index_0 != -1 and index_1 != -1:
|
702 |
+
connection = (index_0, index_1)
|
703 |
+
# append all indices of the matched lines
|
704 |
+
connections_idx.add(connection) if connection is not None else None
|
705 |
+
adjusted_lines.append(line)
|
706 |
+
connections_idx = list(connections_idx)
|
707 |
+
adjusted_lines = np.array(adjusted_lines)
|
708 |
+
|
709 |
+
# save_image_with_lines(f'line_{i}.png', np.array(good_entry['gestalt'][i]), line_0, (255, 0, 0))
|
710 |
+
# save_image_with_lines(f'line_{j}.png', np.array(good_entry['gestalt'][j]), line_1, (255, 0, 0))
|
711 |
+
|
712 |
+
|
713 |
gest_seg = gest.resize(depth.size)
|
714 |
gest_seg_np = np.array(gest_seg).astype(np.uint8)
|
715 |
+
# Metric3D
|
716 |
+
depth_np = np.array(depth) / 2.5 # 2.5 is the scale estimation coefficient
|
717 |
+
vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 5.)
|
718 |
+
if (len(vertices) < 2) or (len(connections) < 1):
|
719 |
+
print (f'Not enough vertices or connections in image {i}')
|
720 |
+
vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
|
721 |
+
# continue
|
722 |
+
uv, depth_vert = get_uv_depth(vertices, depth_np)
|
723 |
+
|
724 |
+
# monodepth
|
725 |
+
# r<32 scale = colmap depth / monodepth
|
726 |
+
# monodepth /= scale
|
727 |
+
# # Assuming monodepth is provided similarly as depth
|
728 |
+
# monodepth = ?
|
729 |
+
# scale = np.mean(depth_np / monodepth)
|
730 |
+
# monodepth /= scale
|
731 |
+
|
732 |
+
# Normalize the uv to the camera intrinsics
|
733 |
+
xy_local = np.ones((len(uv), 3))
|
734 |
+
xy_local[:, 0] = (uv[:, 0] - K[0,2]) / K[0,0]
|
735 |
+
xy_local[:, 1] = (uv[:, 1] - K[1,2]) / K[1,1]
|
736 |
+
# Get the 3D vertices
|
737 |
+
vertices_3d_local = depth_vert[...,None] * (xy_local/np.linalg.norm(xy_local, axis=1)[...,None])
|
738 |
+
world_to_cam = np.eye(4)
|
739 |
+
world_to_cam[:3, :3] = R
|
740 |
+
world_to_cam[:3, 3] = t.reshape(-1)
|
741 |
+
cam_to_world = np.linalg.inv(world_to_cam)
|
742 |
+
vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
|
743 |
+
vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
|
744 |
+
# vert_edge_per_image[i] = vertices, connections, vertices_3d
|
745 |
+
|
746 |
# ours method
|
747 |
+
vert_edge_per_image[i] = connections_idx, points_3d
|
748 |
|
749 |
all_3d_vertices, connections_3d = merge_vertices_3d_ours(vert_edge_per_image, 3.0)
|
750 |
|
|
|
751 |
all_3d_vertices_clean = all_3d_vertices
|
752 |
+
|
753 |
+
concatenated_list = []
|
754 |
+
|
755 |
+
# Iterate over each sublist in connections_3d_clean and extend the main list
|
756 |
+
for sublist in connections_3d:
|
757 |
+
concatenated_list.extend(sublist)
|
758 |
+
|
759 |
+
connections_3d_clean = concatenated_list
|
760 |
|
761 |
if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
|
762 |
print (f'Not enough vertices or connections in the 3D vertices')
|
script.py
CHANGED
@@ -192,30 +192,32 @@ if __name__ == "__main__":
|
|
192 |
|
193 |
print('------------ Now you can do your solution ---------------')
|
194 |
solution = []
|
195 |
-
from concurrent.futures import ProcessPoolExecutor
|
196 |
-
with ProcessPoolExecutor(max_workers=1) as pool:
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
|
|
|
|
219 |
print('------------ Saving results ---------------')
|
220 |
save_submission(solution, Path(params['output_path']) / "submission.parquet")
|
221 |
print("------------ Done ------------ ")
|
|
|
192 |
|
193 |
print('------------ Now you can do your solution ---------------')
|
194 |
solution = []
|
195 |
+
# from concurrent.futures import ProcessPoolExecutor
|
196 |
+
# with ProcessPoolExecutor(max_workers=1) as pool:
|
197 |
+
# results = []
|
198 |
+
# for i, sample in enumerate(tqdm(dataset)):
|
199 |
+
# results.append(pool.submit(predict, sample, visualize=False))
|
200 |
|
201 |
+
# for i, result in enumerate(tqdm(results)):
|
202 |
+
# key, pred_vertices, pred_edges = result.result()
|
203 |
+
# solution.append({
|
204 |
+
# '__key__': key,
|
205 |
+
# 'wf_vertices': pred_vertices.tolist(),
|
206 |
+
# 'wf_edges': pred_edges
|
207 |
+
# })
|
208 |
+
####### added for removing multiprocessing ########
|
209 |
+
for i, sample in enumerate(tqdm(dataset)):
|
210 |
+
key, pred_vertices, pred_edges = predict(sample, visualize=False)
|
211 |
+
solution.append({
|
212 |
+
'__key__': key,
|
213 |
+
'wf_vertices': pred_vertices.tolist(),
|
214 |
+
'wf_edges': pred_edges
|
215 |
+
})
|
216 |
+
####### added for removing multiprocessing ########
|
217 |
+
if i % 10 == 0:
|
218 |
+
# incrementally save the results in case we run out of time
|
219 |
+
print(f"Processed {i} samples")
|
220 |
+
# save_submission(solution, Path(params['output_path']) / "submission.parquet")
|
221 |
print('------------ Saving results ---------------')
|
222 |
save_submission(solution, Path(params['output_path']) / "submission.parquet")
|
223 |
print("------------ Done ------------ ")
|