Siromanec
/

s23dr-hoho-competition

Model card Files Files and versions Community

Siromanec commited on May 24

Commit

1ffdc4a

•

1 Parent(s): 0d51758

added line-component separation using vertex masks

Browse files

Files changed (1) hide show

handcrafted_solution.py +91 -74

handcrafted_solution.py CHANGED Viewed

@@ -1,15 +1,15 @@
 # Description: This file contains the handcrafted solution for the task of wireframe reconstruction
 import io
-from PIL import Image as PImage
-import numpy as np
 from collections import defaultdict
-import cv2
 from typing import Tuple, List
-from scipy.spatial.distance import cdist
 from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
-from hoho.color_mappings import gestalt_color_mapping, ade20k_color_mapping
 apex_color = gestalt_color_mapping["apex"]
 eave_end_point = gestalt_color_mapping["eave_end_point"]
@@ -19,10 +19,10 @@ apex_color, eave_end_point, flashing_end_point = [np.array(i) for i in [apex_col
 unclassified = np.array([(215, 62, 138)])
 line_classes = ['eave', 'ridge', 'rake', 'valley']
 def empty_solution():
     '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
-    return np.zeros((2,3)), [(0, 1)]
 def undesired_objects(image):
@@ -40,6 +40,7 @@ def undesired_objects(image):
     img2[output == max_label] = 1
     return img2
 def clean_image(image_gestalt) -> np.ndarray:
     # clears image in from of unclassified and disconected components
     image_gestalt = np.array(image_gestalt)
@@ -55,10 +56,10 @@ def clean_image(image_gestalt) -> np.ndarray:
 def get_vertices(image_gestalt, *, color_range=4., dialations=3, erosions=1, kernel_size=13):
     apex_mask = cv2.inRange(image_gestalt, apex_color - color_range, apex_color + color_range)
     eave_end_point_mask = cv2.inRange(image_gestalt, eave_end_point - color_range, eave_end_point + color_range)
-    flashing_end_point_mask = cv2.inRange(image_gestalt, flashing_end_point - color_range, flashing_end_point + color_range)
     eave_end_point_mask = cv2.bitwise_or(eave_end_point_mask, flashing_end_point_mask)
     kernel = np.ones((kernel_size, kernel_size), np.uint8)
@@ -69,14 +70,16 @@ def get_vertices(image_gestalt, *, color_range=4., dialations=3, erosions=1, ker
     eave_end_point_mask = cv2.morphologyEx(eave_end_point_mask, cv2.MORPH_DILATE, kernel, iterations=dialations)
     eave_end_point_mask = cv2.morphologyEx(eave_end_point_mask, cv2.MORPH_ERODE, kernel, iterations=erosions)
-    *_, apex_centroids = cv2.connectedComponentsWithStats(apex_mask, connectivity=8)
-    *_, other_centroids = cv2.connectedComponentsWithStats(eave_end_point_mask, connectivity=8)
-    return [apex_centroids[1:], other_centroids[1:]]
 def convert_entry_to_human_readable(entry):
     out = {}
-    already_good = ['__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces', 'face_semantics', 'K', 'R', 't']
     for k, v in entry.items():
         if k in already_good:
             out[k] = v
@@ -88,35 +91,47 @@ def convert_entry_to_human_readable(entry):
         if k == 'images':
             out[k] = read_images_binary(fid=io.BytesIO(v))
         if k in ['ade20k', 'gestalt']:
-            out[k] =  [PImage.open(io.BytesIO(x)).convert('RGB') for x in v]
         if k == 'depthcm':
             out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
     return out
-def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.0):
     '''Get the vertices and edges from the gestalt segmentation mask of the house'''
-    connections = []
     # Apex
     gest_seg_np = clean_image(gest_seg_np)
-    apex_centroids, eave_end_point_centroids = get_vertices(gest_seg_np)
     apex_pts = np.concatenate([apex_centroids, eave_end_point_centroids])
-    # Ridge connects two apex points
-    for edge_class in ['eave', 'ridge', 'rake', 'valley']:
         edge_color = np.array(gestalt_color_mapping[edge_class])
-        mask = cv2.morphologyEx(cv2.inRange(gest_seg_np,
-                                            edge_color-0.5,
-                                            edge_color+0.5),
-                                cv2.MORPH_DILATE, np.ones((11, 11)))
-        line_img = np.copy(gest_seg_np) * 0
-        if mask.sum() > 0:
             output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
             (numLabels, labels, stats, centroids) = output
             stats, centroids = stats[1:], centroids[1:]
             edges = []
             for i in range(1, numLabels):
-                y,x = np.where(labels == i)
                 xleft_idx = np.argmin(x)
                 x_left = x[xleft_idx]
                 y_left = y[xleft_idx]
@@ -126,21 +141,22 @@ def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.0):
                 edges.append((x_left, y_left, x_right, y_right))
                 cv2.line(line_img, (x_left, y_left), (x_right, y_right), (255, 255, 255), 2)
             edges = np.array(edges)
-            if (len(apex_pts) < 2) or len(edges) <1:
                 continue
-            pts_to_edges_dist = np.minimum(cdist(apex_pts, edges[:,:2]), cdist(apex_pts, edges[:,2:]))
             connectivity_mask = pts_to_edges_dist <= edge_th
             edge_connects = connectivity_mask.sum(axis=0)
             for edge_idx, edgesum in enumerate(edge_connects):
-                if edgesum>=2:
-                    connected_verts = np.where(connectivity_mask[:,edge_idx])[0]
                     for a_i, a in enumerate(connected_verts):
-                        for b in connected_verts[a_i+1:]:
                             connections.append((a, b))
     vertices = [{"xy": v, "type": "apex"} for v in apex_centroids]
     vertices += [{"xy": v, "type": "eave_end_point"} for v in eave_end_point_centroids]
     return vertices, connections
 def get_uv_depth(vertices, depth):
     '''Get the depth of the vertices from the depth image'''
     uv = []
@@ -149,9 +165,9 @@ def get_uv_depth(vertices, depth):
     uv = np.array(uv)
     uv_int = uv.astype(np.int32)
     H, W = depth.shape[:2]
-    uv_int[:, 0] = np.clip( uv_int[:, 0], 0, W-1)
-    uv_int[:, 1] = np.clip( uv_int[:, 1], 0, H-1)
-    vertex_depth = depth[(uv_int[:, 1] , uv_int[:, 0])]
     return uv, vertex_depth
@@ -163,16 +179,16 @@ def merge_vertices_3d(vert_edge_per_image, th=0.1):
     cur_start = 0
     types = []
     for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
-        types += [int(v['type']=='apex') for v in vertices]
         all_3d_vertices.append(vertices_3d)
-        connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections]
-        cur_start+=len(vertices_3d)
     all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
-    #print (connections_3d)
     distmat = cdist(all_3d_vertices, all_3d_vertices)
-    types = np.array(types).reshape(-1,1)
     same_types = cdist(types, types)
-    mask_to_merge = (distmat <= th) & (same_types==0)
     new_vertices = []
     new_connections = []
     to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
@@ -180,10 +196,10 @@ def merge_vertices_3d(vert_edge_per_image, th=0.1):
     for i in range(len(all_3d_vertices)):
         for j in to_merge:
             if i in j:
-                to_merge_final[i]+=j
     for k, v in to_merge_final.items():
         to_merge_final[k] = list(set(v))
-    already_there = set()
     merged = []
     for k, v in to_merge_final.items():
         if k in already_there:
@@ -192,24 +208,25 @@ def merge_vertices_3d(vert_edge_per_image, th=0.1):
         for vv in v:
             already_there.add(vv)
     old_idx_to_new = {}
-    count=0
     for idxs in merged:
         new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
         for idx in idxs:
             old_idx_to_new[idx] = count
-        count +=1
-    #print (connections_3d)
-    new_vertices=np.array(new_vertices)
-    #print (connections_3d)
     for conn in connections_3d:
         new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
         if new_con[0] == new_con[1]:
             continue
         if new_con not in new_connections:
             new_connections.append(new_con)
-    #print (f'{len(new_vertices)} left after merging {len(all_3d_vertices)} with {th=}')
     return new_vertices, new_connections
 def prune_not_connected(all_3d_vertices, connections_3d):
     '''Prune vertices that are not connected to any other vertex'''
     connected = defaultdict(list)
@@ -219,16 +236,16 @@ def prune_not_connected(all_3d_vertices, connections_3d):
     new_indexes = {}
     new_verts = []
     connected_out = []
-    for k,v in connected.items():
         vert = all_3d_vertices[k]
         if tuple(vert) not in new_verts:
             new_verts.append(tuple(vert))
-            new_indexes[k]=len(new_verts) -1
-    for k,v in connected.items():
         for vv in v:
-            connected_out.append((new_indexes[vv[0]],new_indexes[vv[1]]))
-    connected_out=list(set(connected_out))
     return np.array(new_verts), connected_out
@@ -236,43 +253,43 @@ def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
     good_entry = convert_entry_to_human_readable(entry)
     vert_edge_per_image = {}
     for i, (gest, depth, K, R, t) in enumerate(zip(good_entry['gestalt'],
-                                                good_entry['depthcm'],
-                                                good_entry['K'],
-                                                good_entry['R'],
-                                                good_entry['t']
-                                                )):
         gest_seg = gest.resize(depth.size)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
         # Metric3D
-        depth_np = np.array(depth) / 2.5 # 2.5 is the scale estimation coefficient
-        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 20.)
         if (len(vertices) < 2) or (len(connections) < 1):
-            print (f'Not enough vertices or connections in image {i}')
             vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
             continue
         uv, depth_vert = get_uv_depth(vertices, depth_np)
         # Normalize the uv to the camera intrinsics
         xy_local = np.ones((len(uv), 3))
-        xy_local[:, 0] = (uv[:, 0] - K[0,2]) / K[0,0]
-        xy_local[:, 1] = (uv[:, 1] - K[1,2]) / K[1,1]
         # Get the 3D vertices
-        vertices_3d_local = depth_vert[...,None] * (xy_local/np.linalg.norm(xy_local, axis=1)[...,None])
         world_to_cam = np.eye(4)
         world_to_cam[:3, :3] = R
         world_to_cam[:3, 3] = t.reshape(-1)
-        cam_to_world =  np.linalg.inv(world_to_cam)
         vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
         vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
         vert_edge_per_image[i] = vertices, connections, vertices_3d
     all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 3.0)
-    all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d)
     if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
-        print (f'Not enough vertices or connections in the 3D vertices')
         return (good_entry['__key__'], *empty_solution())
     if visualize:
         from hoho.viz3d import plot_estimate_and_gt
-        plot_estimate_and_gt(   all_3d_vertices_clean,
-                                connections_3d_clean,
-                                good_entry['wf_vertices'],
-                                good_entry['wf_edges'])
-    return good_entry['__key__'], all_3d_vertices_clean, connections_3d_clean

 # Description: This file contains the handcrafted solution for the task of wireframe reconstruction
 import io
 from collections import defaultdict
 from typing import Tuple, List
+import cv2
+import numpy as np
+from PIL import Image as PImage
+from hoho.color_mappings import gestalt_color_mapping
 from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
+from scipy.spatial.distance import cdist
 apex_color = gestalt_color_mapping["apex"]
 eave_end_point = gestalt_color_mapping["eave_end_point"]
 unclassified = np.array([(215, 62, 138)])
 line_classes = ['eave', 'ridge', 'rake', 'valley']
 def empty_solution():
     '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
+    return np.zeros((2, 3)), [(0, 1)]
 def undesired_objects(image):
     img2[output == max_label] = 1
     return img2
 def clean_image(image_gestalt) -> np.ndarray:
     # clears image in from of unclassified and disconected components
     image_gestalt = np.array(image_gestalt)
 def get_vertices(image_gestalt, *, color_range=4., dialations=3, erosions=1, kernel_size=13):
     apex_mask = cv2.inRange(image_gestalt, apex_color - color_range, apex_color + color_range)
     eave_end_point_mask = cv2.inRange(image_gestalt, eave_end_point - color_range, eave_end_point + color_range)
+    flashing_end_point_mask = cv2.inRange(image_gestalt, flashing_end_point - color_range,
+                                          flashing_end_point + color_range)
     eave_end_point_mask = cv2.bitwise_or(eave_end_point_mask, flashing_end_point_mask)
     kernel = np.ones((kernel_size, kernel_size), np.uint8)
     eave_end_point_mask = cv2.morphologyEx(eave_end_point_mask, cv2.MORPH_DILATE, kernel, iterations=dialations)
     eave_end_point_mask = cv2.morphologyEx(eave_end_point_mask, cv2.MORPH_ERODE, kernel, iterations=erosions)
+    *_, apex_centroids = cv2.connectedComponentsWithStats(apex_mask, connectivity=8, stats=cv2.CV_32S)
+    *_, other_centroids = cv2.connectedComponentsWithStats(eave_end_point_mask, connectivity=8, stats=cv2.CV_32S)
+    return apex_centroids[1:], other_centroids[1:], apex_mask, eave_end_point_mask
 def convert_entry_to_human_readable(entry):
     out = {}
+    already_good = ['__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces',
+                    'face_semantics', 'K', 'R', 't']
     for k, v in entry.items():
         if k in already_good:
             out[k] = v
         if k == 'images':
             out[k] = read_images_binary(fid=io.BytesIO(v))
         if k in ['ade20k', 'gestalt']:
+            out[k] = [PImage.open(io.BytesIO(x)).convert('RGB') for x in v]
         if k == 'depthcm':
             out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
     return out
+def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=50.0):
     '''Get the vertices and edges from the gestalt segmentation mask of the house'''
     # Apex
+    color_range = 4.
+    connections = []
     gest_seg_np = clean_image(gest_seg_np)
+    apex_centroids, eave_end_point_centroids, apex_mask, eave_end_point_mask = get_vertices(gest_seg_np)
+    apex_mask = cv2.morphologyEx(apex_mask,
+                                 cv2.MORPH_DILATE, np.ones((11, 11)), iterations=4)
+    eave_end_point_mask = cv2.morphologyEx(eave_end_point_mask,
+                                           cv2.MORPH_DILATE, np.ones((5, 5)), iterations=4)
+    vertex_mask = cv2.bitwise_not(cv2.bitwise_or(apex_mask, eave_end_point_mask))
     apex_pts = np.concatenate([apex_centroids, eave_end_point_centroids])
+    for edge_class in ['eave', 'ridge', 'rake', 'valley', 'flashing']:
         edge_color = np.array(gestalt_color_mapping[edge_class])
+        mask = cv2.inRange(gest_seg_np,
+                           edge_color - color_range,
+                           edge_color + color_range)
+        if np.any(mask):  # does not really make sense to dilate something if it is empty
+            mask = cv2.bitwise_and(mask, vertex_mask)
+            mask = cv2.morphologyEx(mask,
+                                    cv2.MORPH_DILATE, np.ones((11, 11)), iterations=3)
+            line_img = np.zeros_like(gest_seg_np)
             output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
             (numLabels, labels, stats, centroids) = output
             stats, centroids = stats[1:], centroids[1:]
             edges = []
             for i in range(1, numLabels):
+                y, x = np.where(labels == i)
                 xleft_idx = np.argmin(x)
                 x_left = x[xleft_idx]
                 y_left = y[xleft_idx]
                 edges.append((x_left, y_left, x_right, y_right))
                 cv2.line(line_img, (x_left, y_left), (x_right, y_right), (255, 255, 255), 2)
             edges = np.array(edges)
+            if (len(apex_pts) < 2) or len(edges) < 1:
                 continue
+            pts_to_edges_dist = np.minimum(cdist(apex_pts, edges[:, :2]), cdist(apex_pts, edges[:, 2:]))
             connectivity_mask = pts_to_edges_dist <= edge_th
             edge_connects = connectivity_mask.sum(axis=0)
             for edge_idx, edgesum in enumerate(edge_connects):
+                if edgesum >= 2:
+                    connected_verts = np.where(connectivity_mask[:, edge_idx])[0]
                     for a_i, a in enumerate(connected_verts):
+                        for b in connected_verts[a_i + 1:]:
                             connections.append((a, b))
     vertices = [{"xy": v, "type": "apex"} for v in apex_centroids]
     vertices += [{"xy": v, "type": "eave_end_point"} for v in eave_end_point_centroids]
     return vertices, connections
 def get_uv_depth(vertices, depth):
     '''Get the depth of the vertices from the depth image'''
     uv = []
     uv = np.array(uv)
     uv_int = uv.astype(np.int32)
     H, W = depth.shape[:2]
+    uv_int[:, 0] = np.clip(uv_int[:, 0], 0, W - 1)
+    uv_int[:, 1] = np.clip(uv_int[:, 1], 0, H - 1)
+    vertex_depth = depth[(uv_int[:, 1], uv_int[:, 0])]
     return uv, vertex_depth
     cur_start = 0
     types = []
     for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
+        types += [int(v['type'] == 'apex') for v in vertices]
         all_3d_vertices.append(vertices_3d)
+        connections_3d += [(x + cur_start, y + cur_start) for (x, y) in connections]
+        cur_start += len(vertices_3d)
     all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
+    # print (connections_3d)
     distmat = cdist(all_3d_vertices, all_3d_vertices)
+    types = np.array(types).reshape(-1, 1)
     same_types = cdist(types, types)
+    mask_to_merge = (distmat <= th) & (same_types == 0)
     new_vertices = []
     new_connections = []
     to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
     for i in range(len(all_3d_vertices)):
         for j in to_merge:
             if i in j:
+                to_merge_final[i] += j
     for k, v in to_merge_final.items():
         to_merge_final[k] = list(set(v))
+    already_there = set()
     merged = []
     for k, v in to_merge_final.items():
         if k in already_there:
         for vv in v:
             already_there.add(vv)
     old_idx_to_new = {}
+    count = 0
     for idxs in merged:
         new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
         for idx in idxs:
             old_idx_to_new[idx] = count
+        count += 1
+    # print (connections_3d)
+    new_vertices = np.array(new_vertices)
+    # print (connections_3d)
     for conn in connections_3d:
         new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
         if new_con[0] == new_con[1]:
             continue
         if new_con not in new_connections:
             new_connections.append(new_con)
+    # print (f'{len(new_vertices)} left after merging {len(all_3d_vertices)} with {th=}')
     return new_vertices, new_connections
 def prune_not_connected(all_3d_vertices, connections_3d):
     '''Prune vertices that are not connected to any other vertex'''
     connected = defaultdict(list)
     new_indexes = {}
     new_verts = []
     connected_out = []
+    for k, v in connected.items():
         vert = all_3d_vertices[k]
         if tuple(vert) not in new_verts:
             new_verts.append(tuple(vert))
+            new_indexes[k] = len(new_verts) - 1
+    for k, v in connected.items():
         for vv in v:
+            connected_out.append((new_indexes[vv[0]], new_indexes[vv[1]]))
+    connected_out = list(set(connected_out))
     return np.array(new_verts), connected_out
     good_entry = convert_entry_to_human_readable(entry)
     vert_edge_per_image = {}
     for i, (gest, depth, K, R, t) in enumerate(zip(good_entry['gestalt'],
+                                                   good_entry['depthcm'],
+                                                   good_entry['K'],
+                                                   good_entry['R'],
+                                                   good_entry['t']
+                                                   )):
         gest_seg = gest.resize(depth.size)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
         # Metric3D
+        depth_np = np.array(depth) / 2.5  # 2.5 is the scale estimation coefficient
+        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=70.)
         if (len(vertices) < 2) or (len(connections) < 1):
+            print(f'Not enough vertices or connections in image {i}')
             vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
             continue
         uv, depth_vert = get_uv_depth(vertices, depth_np)
         # Normalize the uv to the camera intrinsics
         xy_local = np.ones((len(uv), 3))
+        xy_local[:, 0] = (uv[:, 0] - K[0, 2]) / K[0, 0]
+        xy_local[:, 1] = (uv[:, 1] - K[1, 2]) / K[1, 1]
         # Get the 3D vertices
+        vertices_3d_local = depth_vert[..., None] * (xy_local / np.linalg.norm(xy_local, axis=1)[..., None])
         world_to_cam = np.eye(4)
         world_to_cam[:3, :3] = R
         world_to_cam[:3, 3] = t.reshape(-1)
+        cam_to_world = np.linalg.inv(world_to_cam)
         vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
         vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
         vert_edge_per_image[i] = vertices, connections, vertices_3d
     all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 3.0)
+    all_3d_vertices_clean, connections_3d_clean = prune_not_connected(all_3d_vertices, connections_3d)
     if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
+        print(f'Not enough vertices or connections in the 3D vertices')
         return (good_entry['__key__'], *empty_solution())
     if visualize:
         from hoho.viz3d import plot_estimate_and_gt
+        plot_estimate_and_gt(all_3d_vertices_clean,
+                             connections_3d_clean,
+                             good_entry['wf_vertices'],
+                             good_entry['wf_edges'])
+    return good_entry['__key__'], all_3d_vertices_clean, connections_3d_clean