Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

Realcat commited on Jul 14, 2024

Commit

57c1094

1 Parent(s): d521fb7

update: sfm

Browse files

Files changed (19) hide show

common/app_class.py +119 -76
common/config.yaml +8 -0
common/sfm.py +164 -0
hloc/colmap_from_nvm.py +21 -5
hloc/extract_features.py +5 -1
hloc/extractors/eigenplaces.py +57 -0
hloc/localize_inloc.py +6 -2
hloc/localize_sfm.py +10 -3
hloc/match_dense.py +34 -10
hloc/matchers/mast3r.py +5 -7
hloc/matchers/superglue.py +3 -1
hloc/pairs_from_exhaustive.py +3 -1
hloc/pairs_from_poses.py +3 -1
hloc/pairs_from_retrieval.py +6 -2
hloc/reconstruction.py +8 -3
hloc/triangulation.py +21 -7
hloc/utils/viz.py +1 -1
hloc/visualization.py +27 -8
requirements.txt +3 -2

common/app_class.py CHANGED Viewed

@@ -3,7 +3,10 @@ from typing import Any, Dict, Optional, Tuple
 import gradio as gr
 import numpy as np
 from common.utils import (
     GRADIO_VERSION,
     gen_examples,
@@ -115,7 +118,7 @@ class ImageMatchingApp:
                                         label="Match thres.",
                                         value=0.1,
                                     )
-                                    match_setting_max_features = gr.Slider(
                                         minimum=10,
                                         maximum=10000,
                                         step=10,
@@ -199,7 +202,7 @@ class ImageMatchingApp:
                             input_image0,
                             input_image1,
                             match_setting_threshold,
-                            match_setting_max_features,
                             detect_keypoints_threshold,
                             matcher_list,
                             ransac_method,
@@ -314,7 +317,7 @@ class ImageMatchingApp:
                         input_image0,
                         input_image1,
                         match_setting_threshold,
-                        match_setting_max_features,
                         detect_keypoints_threshold,
                         matcher_list,
                         input_image0,
@@ -378,14 +381,14 @@ class ImageMatchingApp:
                         outputs=[output_wrapped, geometry_result],
                     )
             with gr.Tab("Structure from Motion(under-dev)"):
-                self.init_tab_sfm()
-    def init_tab_sfm(self):
-        sfm_ui = AppSfmUI()
-        sfm_ui.set_local_features(["disk", "superpoint"])
-        sfm_ui.set_matchers(["disk+lightglue", "superpoint+lightglue"])
-        sfm_ui.set_global_features(["netvlad", "mixvpr"])
-        sfm_ui.call()
     def run(self):
         self.app.queue().launch(
@@ -459,7 +462,7 @@ class ImageMatchingApp:
             self.cfg["defaults"][
                 "match_threshold"
             ],  # matching_threshold: float
-            self.cfg["defaults"]["max_keypoints"],  # max_features: int
             self.cfg["defaults"][
                 "keypoint_threshold"
             ],  # keypoint_threshold: float
@@ -546,8 +549,9 @@ class ImageMatchingApp:
 class AppBaseUI:
-    def __init__(self, cfg: Dict[str, Any] = None):
-        self.cfg = cfg
     def _init_ui(self):
         NotImplemented
@@ -559,9 +563,16 @@ class AppBaseUI:
 class AppSfmUI(AppBaseUI):
     def __init__(self, cfg: Dict[str, Any] = None):
         super().__init__(cfg)
-        self.matchers = None
-        self.features = None
-        self.global_features = None
     def _update_options(self, option):
         if option == "sparse":
@@ -571,15 +582,6 @@ class AppSfmUI(AppBaseUI):
         else:
             return gr.Textbox("not set", visible=True)
-    def set_local_features(self, features):
-        self.features = features
-    def set_global_features(self, features):
-        self.global_features = features
-    def set_matchers(self, matchers):
-        self.matchers = matchers
     def _on_select_custom_params(self, value: bool = False):
         return gr.Textbox(
             label="Camera Params",
@@ -592,15 +594,18 @@ class AppSfmUI(AppBaseUI):
         with gr.Row():
             # data settting and camera settings
             with gr.Column():
-                input_images = gr.File(
-                    label="SfM", interactive=True, file_count="multiple"
                 )
                 # camera setting
                 with gr.Accordion("Camera Settings", open=True):
                     with gr.Column():
                         with gr.Row():
                             with gr.Column():
-                                camera_model = gr.Dropdown(
                                     choices=[
                                         "PINHOLE",
                                         "SIMPLE_RADIAL",
@@ -622,7 +627,7 @@ class AppSfmUI(AppBaseUI):
                                     interactive=True,
                                 )
                         with gr.Row():
-                            camera_params = gr.Textbox(
                                 label="Camera Params",
                                 value="0,0,0,0",
                                 interactive=False,
@@ -631,30 +636,15 @@ class AppSfmUI(AppBaseUI):
                         camera_custom_params_cb.select(
                             fn=self._on_select_custom_params,
                             inputs=camera_custom_params_cb,
-                            outputs=camera_params,
                         )
                 with gr.Accordion("Matching Settings", open=True):
                     # feature extraction and matching setting
                     with gr.Row():
-                        feature_type = gr.Radio(
-                            ["sparse", "dense"],
-                            label="Feature Type",
-                            value="sparse",
-                            interactive=True,
-                        )
-                        feature_details = gr.Textbox(
-                            label="Feature Details",
-                            visible=False,
-                        )
-                        # feature_type.change(
-                        #     fn=self._update_options,
-                        #     inputs=feature_type,
-                        #     outputs=feature_details,
-                        # )
                         # matcher setting
-                        matcher_name = gr.Dropdown(
-                            choices=self.matchers,
                             value="disk+lightglue",
                             label="Matching Model",
                             interactive=True,
@@ -662,17 +652,29 @@ class AppSfmUI(AppBaseUI):
                     with gr.Row():
                         with gr.Accordion("Advanced Settings", open=False):
                             with gr.Column():
                                 with gr.Row():
                                     # matching setting
-                                    max_features = gr.Slider(
-                                        label="Max Features",
                                         minimum=100,
                                         maximum=10000,
                                         value=1000,
                                         interactive=True,
                                     )
-                                    ransac_threshold = gr.Slider(
                                         label="Ransac Threshold",
                                         minimum=0.01,
                                         maximum=12.0,
@@ -682,7 +684,7 @@ class AppSfmUI(AppBaseUI):
                                     )
                                 with gr.Row():
-                                    ransac_confidence = gr.Slider(
                                         label="Ransac Confidence",
                                         minimum=0.01,
                                         maximum=1.0,
@@ -690,7 +692,7 @@ class AppSfmUI(AppBaseUI):
                                         step=0.0001,
                                         interactive=True,
                                     )
-                                    ransac_max_iter = gr.Slider(
                                         label="Ransac Max Iter",
                                         minimum=1,
                                         maximum=100,
@@ -700,7 +702,7 @@ class AppSfmUI(AppBaseUI):
                                     )
                 with gr.Accordion("Scene Graph Settings", open=True):
                     # mapping setting
-                    scene_graph = gr.Dropdown(
                         choices=["all", "swin", "oneref"],
                         value="all",
                         label="Scene Graph",
@@ -708,14 +710,20 @@ class AppSfmUI(AppBaseUI):
                     )
                     # global feature setting
-                    global_feature = gr.Dropdown(
-                        choices=self.global_features,
                         value="netvlad",
                         label="Global features",
                         interactive=True,
                     )
-                button_match = gr.Button("Run Matching", variant="primary")
             # mapping setting
             with gr.Column():
@@ -723,26 +731,61 @@ class AppSfmUI(AppBaseUI):
                     with gr.Row():
                         with gr.Accordion("Buddle Settings", open=True):
                             with gr.Row():
-                                mapper_refine_focal_length = gr.Checkbox(
-                                    label="Refine Focal Length",
-                                    value=False,
-                                    interactive=True,
                                 )
-                                mapper_refine_principle_points = gr.Checkbox(
-                                    label="Refine Principle Points",
-                                    value=False,
-                                    interactive=True,
                                 )
-                                mapper_refine_extra_params = gr.Checkbox(
-                                    label="Refine Extra Params",
-                                    value=False,
-                                    interactive=True,
                                 )
-                    with gr.Accordion(
-                        "Retriangluation Settings", open=True
-                    ):
                         gr.Textbox(
                             label="Retriangluation Details",
                         )
-                    gr.Button("Run SFM", variant="primary")
-                model_3d = gr.Model3D()

 import gradio as gr
 import numpy as np
+from easydict import EasyDict as edict
+from omegaconf import OmegaConf
+from common.sfm import SfmEngine
 from common.utils import (
     GRADIO_VERSION,
     gen_examples,
                                         label="Match thres.",
                                         value=0.1,
                                     )
+                                    match_setting_max_keypoints = gr.Slider(
                                         minimum=10,
                                         maximum=10000,
                                         step=10,
                             input_image0,
                             input_image1,
                             match_setting_threshold,
+                            match_setting_max_keypoints,
                             detect_keypoints_threshold,
                             matcher_list,
                             ransac_method,
                         input_image0,
                         input_image1,
                         match_setting_threshold,
+                        match_setting_max_keypoints,
                         detect_keypoints_threshold,
                         matcher_list,
                         input_image0,
                         outputs=[output_wrapped, geometry_result],
                     )
             with gr.Tab("Structure from Motion(under-dev)"):
+                sfm_ui = AppSfmUI(  # noqa: F841
+                    {
+                        **self.cfg,
+                        "matcher_zoo": self.matcher_zoo,
+                        "outputs": "experiments/sfm",
+                    }
+                )
+                # sfm_ui.call()
     def run(self):
         self.app.queue().launch(
             self.cfg["defaults"][
                 "match_threshold"
             ],  # matching_threshold: float
+            self.cfg["defaults"]["max_keypoints"],  # max_keypoints: int
             self.cfg["defaults"][
                 "keypoint_threshold"
             ],  # keypoint_threshold: float
 class AppBaseUI:
+    def __init__(self, cfg: Dict[str, Any] = {}):
+        self.cfg = OmegaConf.create(cfg)
+        self.inputs = edict({})
     def _init_ui(self):
         NotImplemented
 class AppSfmUI(AppBaseUI):
     def __init__(self, cfg: Dict[str, Any] = None):
         super().__init__(cfg)
+        assert "matcher_zoo" in self.cfg
+        self.matcher_zoo = self.cfg["matcher_zoo"]
+        self.sfm_engine = SfmEngine(cfg)
+    def init_retrieval_dropdown(self):
+        algos = []
+        for k, v in self.cfg["retrieval_zoo"].items():
+            if v.get("enable", True):
+                algos.append(k)
+        return algos
     def _update_options(self, option):
         if option == "sparse":
         else:
             return gr.Textbox("not set", visible=True)
     def _on_select_custom_params(self, value: bool = False):
         return gr.Textbox(
             label="Camera Params",
         with gr.Row():
             # data settting and camera settings
             with gr.Column():
+                self.inputs.input_images = gr.File(
+                    label="SfM",
+                    interactive=True,
+                    file_count="multiple",
+                    min_width=300,
                 )
                 # camera setting
                 with gr.Accordion("Camera Settings", open=True):
                     with gr.Column():
                         with gr.Row():
                             with gr.Column():
+                                self.inputs.camera_model = gr.Dropdown(
                                     choices=[
                                         "PINHOLE",
                                         "SIMPLE_RADIAL",
                                     interactive=True,
                                 )
                         with gr.Row():
+                            self.inputs.camera_params = gr.Textbox(
                                 label="Camera Params",
                                 value="0,0,0,0",
                                 interactive=False,
                         camera_custom_params_cb.select(
                             fn=self._on_select_custom_params,
                             inputs=camera_custom_params_cb,
+                            outputs=self.inputs.camera_params,
                         )
                 with gr.Accordion("Matching Settings", open=True):
                     # feature extraction and matching setting
                     with gr.Row():
                         # matcher setting
+                        self.inputs.matcher_key = gr.Dropdown(
+                            choices=self.matcher_zoo.keys(),
                             value="disk+lightglue",
                             label="Matching Model",
                             interactive=True,
                     with gr.Row():
                         with gr.Accordion("Advanced Settings", open=False):
                             with gr.Column():
                                 with gr.Row():
                                     # matching setting
+                                    self.inputs.max_keypoints = gr.Slider(
+                                        label="Max Keypoints",
                                         minimum=100,
                                         maximum=10000,
                                         value=1000,
                                         interactive=True,
                                     )
+                                    self.inputs.keypoint_threshold = gr.Slider(
+                                        label="Keypoint Threshold",
+                                        minimum=0,
+                                        maximum=1,
+                                        value=0.01,
+                                    )
+                                with gr.Row():
+                                    self.inputs.match_threshold = gr.Slider(
+                                        label="Match Threshold",
+                                        minimum=0.01,
+                                        maximum=12.0,
+                                        value=0.2,
+                                    )
+                                    self.inputs.ransac_threshold = gr.Slider(
                                         label="Ransac Threshold",
                                         minimum=0.01,
                                         maximum=12.0,
                                     )
                                 with gr.Row():
+                                    self.inputs.ransac_confidence = gr.Slider(
                                         label="Ransac Confidence",
                                         minimum=0.01,
                                         maximum=1.0,
                                         step=0.0001,
                                         interactive=True,
                                     )
+                                    self.inputs.ransac_max_iter = gr.Slider(
                                         label="Ransac Max Iter",
                                         minimum=1,
                                         maximum=100,
                                     )
                 with gr.Accordion("Scene Graph Settings", open=True):
                     # mapping setting
+                    self.inputs.scene_graph = gr.Dropdown(
                         choices=["all", "swin", "oneref"],
                         value="all",
                         label="Scene Graph",
                     )
                     # global feature setting
+                    self.inputs.global_feature = gr.Dropdown(
+                        choices=self.init_retrieval_dropdown(),
                         value="netvlad",
                         label="Global features",
                         interactive=True,
                     )
+                    self.inputs.top_k = gr.Slider(
+                        label="Number of Images per Image to Match",
+                        minimum=1,
+                        maximum=100,
+                        value=10,
+                        step=1,
+                    )
+                # button_match = gr.Button("Run Matching", variant="primary")
             # mapping setting
             with gr.Column():
                     with gr.Row():
                         with gr.Accordion("Buddle Settings", open=True):
                             with gr.Row():
+                                self.inputs.mapper_refine_focal_length = (
+                                    gr.Checkbox(
+                                        label="Refine Focal Length",
+                                        value=False,
+                                        interactive=True,
+                                    )
                                 )
+                                self.inputs.mapper_refine_principle_points = (
+                                    gr.Checkbox(
+                                        label="Refine Principle Points",
+                                        value=False,
+                                        interactive=True,
+                                    )
                                 )
+                                self.inputs.mapper_refine_extra_params = (
+                                    gr.Checkbox(
+                                        label="Refine Extra Params",
+                                        value=False,
+                                        interactive=True,
+                                    )
                                 )
+                    with gr.Accordion("Retriangluation Settings", open=True):
                         gr.Textbox(
                             label="Retriangluation Details",
                         )
+                    button_sfm = gr.Button("Run SFM", variant="primary")
+                model_3d = gr.Model3D(
+                    interactive=True,
+                )
+                output_image = gr.Image(
+                    label="SFM Visualize",
+                    type="numpy",
+                    image_mode="RGB",
+                    interactive=False,
+                )
+                button_sfm.click(
+                    fn=self.sfm_engine.call,
+                    inputs=[
+                        self.inputs.matcher_key,
+                        self.inputs.input_images,  # images
+                        self.inputs.camera_model,
+                        self.inputs.camera_params,
+                        self.inputs.max_keypoints,
+                        self.inputs.keypoint_threshold,
+                        self.inputs.match_threshold,
+                        self.inputs.ransac_threshold,
+                        self.inputs.ransac_confidence,
+                        self.inputs.ransac_max_iter,
+                        self.inputs.scene_graph,
+                        self.inputs.global_feature,
+                        self.inputs.top_k,
+                        self.inputs.mapper_refine_focal_length,
+                        self.inputs.mapper_refine_principle_points,
+                        self.inputs.mapper_refine_extra_params,
+                    ],
+                    outputs=[model_3d, output_image],
+                )

common/config.yaml CHANGED Viewed

@@ -403,3 +403,11 @@ matcher_zoo:
       paper: https://arxiv.org/abs/2304.14845
       project: https://feixue94.github.io/
       display: true

       paper: https://arxiv.org/abs/2304.14845
       project: https://feixue94.github.io/
       display: true
+retrieval_zoo:
+  netvlad:
+    enable: true
+  openibl:
+    enable: true
+  cosplace:
+    enable: true

common/sfm.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import shutil
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, List
+import pycolmap
+from hloc import (
+    extract_features,
+    logger,
+    match_features,
+    pairs_from_retrieval,
+    reconstruction,
+    visualization,
+)
+from .viz import fig2im
+class SfmEngine:
+    def __init__(self, cfg: Dict[str, Any] = None):
+        self.cfg = cfg
+        if "outputs" in cfg and Path(cfg["outputs"]):
+            outputs = Path(cfg["outputs"])
+            outputs.mkdir(parents=True, exist_ok=True)
+        else:
+            outputs = tempfile.mkdtemp()
+        self.outputs = Path(outputs)
+    def call(
+        self,
+        key: str,
+        images: Path,
+        camera_model: str,
+        camera_params: List[float],
+        max_keypoints: int,
+        keypoint_threshold: float,
+        match_threshold: float,
+        ransac_threshold: int,
+        ransac_confidence: float,
+        ransac_max_iter: int,
+        scene_graph: bool,
+        global_feature: str,
+        top_k: int = 10,
+        mapper_refine_focal_length: bool = False,
+        mapper_refine_principle_points: bool = False,
+        mapper_refine_extra_params: bool = False,
+    ):
+        """
+        Call a list of functions to perform feature extraction, matching, and reconstruction.
+        Args:
+            key (str): The key to retrieve the matcher and feature models.
+            images (Path): The directory containing the images.
+            outputs (Path): The directory to store the outputs.
+            camera_model (str): The camera model.
+            camera_params (List[float]): The camera parameters.
+            max_keypoints (int): The maximum number of features.
+            match_threshold (float): The match threshold.
+            ransac_threshold (int): The RANSAC threshold.
+            ransac_confidence (float): The RANSAC confidence.
+            ransac_max_iter (int): The maximum number of RANSAC iterations.
+            scene_graph (bool): Whether to compute the scene graph.
+            global_feature (str): Whether to compute the global feature.
+            top_k (int): The number of image-pair to use.
+            mapper_refine_focal_length (bool): Whether to refine the focal length.
+            mapper_refine_principle_points (bool): Whether to refine the principle points.
+            mapper_refine_extra_params (bool): Whether to refine the extra parameters.
+        Returns:
+            Path: The directory containing the SfM results.
+        """
+        if len(images) == 0:
+            logger.error(f"{images} does not exist.")
+        temp_images = Path(tempfile.mkdtemp())
+        # copy images
+        logger.info(f"Copying images to {temp_images}.")
+        for image in images:
+            shutil.copy(image, temp_images)
+        matcher_zoo = self.cfg["matcher_zoo"]
+        model = matcher_zoo[key]
+        match_conf = model["matcher"]
+        match_conf["model"]["max_keypoints"] = max_keypoints
+        match_conf["model"]["match_threshold"] = match_threshold
+        feature_conf = model["feature"]
+        feature_conf["model"]["max_keypoints"] = max_keypoints
+        feature_conf["model"]["keypoint_threshold"] = keypoint_threshold
+        # retrieval
+        retrieval_name = self.cfg.get("retrieval_name", "netvlad")
+        retrieval_conf = extract_features.confs[retrieval_name]
+        mapper_options = {
+            "ba_refine_extra_params": mapper_refine_extra_params,
+            "ba_refine_focal_length": mapper_refine_focal_length,
+            "ba_refine_principal_point": mapper_refine_principle_points,
+            "ba_local_max_num_iterations": 40,
+            "ba_local_max_refinements": 3,
+            "ba_global_max_num_iterations": 100,
+            # below 3 options are for individual/video data, for internet photos, they should be left
+            # default
+            "min_focal_length_ratio": 0.1,
+            "max_focal_length_ratio": 10,
+            "max_extra_param": 1e15,
+        }
+        sfm_dir = self.outputs / "sfm_{}".format(key)
+        sfm_pairs = self.outputs / "pairs-sfm.txt"
+        sfm_dir.mkdir(exist_ok=True, parents=True)
+        # extract features
+        retrieval_path = extract_features.main(
+            retrieval_conf, temp_images, self.outputs
+        )
+        pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=top_k)
+        feature_path = extract_features.main(
+            feature_conf, temp_images, self.outputs
+        )
+        # match features
+        match_path = match_features.main(
+            match_conf, sfm_pairs, feature_conf["output"], self.outputs
+        )
+        # reconstruction
+        already_sfm = False
+        if sfm_dir.exists():
+            try:
+                model = pycolmap.Reconstruction(str(sfm_dir))
+                already_sfm = True
+            except ValueError:
+                logger.info(f"sfm_dir not exists model: {sfm_dir}")
+        if not already_sfm:
+            model = reconstruction.main(
+                sfm_dir,
+                temp_images,
+                sfm_pairs,
+                feature_path,
+                match_path,
+                mapper_options=mapper_options,
+            )
+        vertices = []
+        for point3D_id, point3D in model.points3D.items():
+            vertices.append([point3D.xyz, point3D.color])
+        model_3d = sfm_dir / "points3D.obj"
+        with open(model_3d, "w") as f:
+            for p, c in vertices:
+                # Write vertex position
+                f.write("v {} {} {}\n".format(p[0], p[1], p[2]))
+                # Write vertex normal (color)
+                f.write(
+                    "vn {} {} {}\n".format(
+                        c[0] / 255.0, c[1] / 255.0, c[2] / 255.0
+                    )
+                )
+        viz_2d = visualization.visualize_sfm_2d(
+            model, temp_images, color_by="visibility", n=2, dpi=300
+        )
+        return model_3d, fig2im(viz_2d) / 255.0

hloc/colmap_from_nvm.py CHANGED Viewed

@@ -25,7 +25,9 @@ def recover_database_images_and_ids(database_path):
         images[name] = image_id
         cameras[name] = camera_id
     db.close()
-    logger.info(f"Found {len(images)} images and {len(cameras)} cameras in database.")
     return images, cameras
@@ -34,9 +36,21 @@ def quaternion_to_rotation_matrix(qvec):
     w, x, y, z = qvec
     R = np.array(
         [
-            [1 - 2 * y * y - 2 * z * z, 2 * x * y - 2 * z * w, 2 * x * z + 2 * y * w],
-            [2 * x * y + 2 * z * w, 1 - 2 * x * x - 2 * z * z, 2 * y * z - 2 * x * w],
-            [2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y],
         ]
     )
     return R
@@ -47,7 +61,9 @@ def camera_center_to_translation(c, qvec):
     return (-1) * np.matmul(R, c)
-def read_nvm_model(nvm_path, intrinsics_path, image_ids, camera_ids, skip_points=False):
     with open(intrinsics_path, "r") as f:
         raw_intrinsics = f.readlines()

         images[name] = image_id
         cameras[name] = camera_id
     db.close()
+    logger.info(
+        f"Found {len(images)} images and {len(cameras)} cameras in database."
+    )
     return images, cameras
     w, x, y, z = qvec
     R = np.array(
         [
+            [
+                1 - 2 * y * y - 2 * z * z,
+                2 * x * y - 2 * z * w,
+                2 * x * z + 2 * y * w,
+            ],
+            [
+                2 * x * y + 2 * z * w,
+                1 - 2 * x * x - 2 * z * z,
+                2 * y * z - 2 * x * w,
+            ],
+            [
+                2 * x * z - 2 * y * w,
+                2 * y * z + 2 * x * w,
+                1 - 2 * x * x - 2 * y * y,
+            ],
         ]
     )
     return R
     return (-1) * np.matmul(R, c)
+def read_nvm_model(
+    nvm_path, intrinsics_path, image_ids, camera_ids, skip_points=False
+):
     with open(intrinsics_path, "r") as f:
         raw_intrinsics = f.readlines()

hloc/extract_features.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import argparse
 import collections.abc as collections
-import glob
 import pprint
 from pathlib import Path
 from types import SimpleNamespace
@@ -330,6 +329,11 @@ confs = {
         "model": {"name": "cosplace"},
         "preprocessing": {"resize_max": 1024},
     },
 }

 import argparse
 import collections.abc as collections
 import pprint
 from pathlib import Path
 from types import SimpleNamespace
         "model": {"name": "cosplace"},
         "preprocessing": {"resize_max": 1024},
     },
+    "eigenplaces": {
+        "output": "global-feats-eigenplaces",
+        "model": {"name": "eigenplaces"},
+        "preprocessing": {"resize_max": 1024},
+    },
 }

hloc/extractors/eigenplaces.py ADDED Viewed

	@@ -0,0 +1,57 @@

+"""
+Code for loading models trained with EigenPlaces (or CosPlace) as a global
+features extractor for geolocalization through image retrieval.
+Multiple models are available with different backbones. Below is a summary of
+models available (backbone : list of available output descriptors
+dimensionality). For example you can use a model based on a ResNet50 with
+descriptors dimensionality 1024.
+EigenPlaces trained models:
+    ResNet18:  [     256, 512]
+    ResNet50:  [128, 256, 512, 2048]
+    ResNet101: [128, 256, 512, 2048]
+    VGG16:     [     512]
+CosPlace trained models:
+    ResNet18:  [32, 64, 128, 256, 512]
+    ResNet50:  [32, 64, 128, 256, 512, 1024, 2048]
+    ResNet101: [32, 64, 128, 256, 512, 1024, 2048]
+    ResNet152: [32, 64, 128, 256, 512, 1024, 2048]
+    VGG16:     [    64, 128, 256, 512]
+EigenPlaces paper (ICCV 2023): https://arxiv.org/abs/2308.10832
+CosPlace paper (CVPR 2022): https://arxiv.org/abs/2204.02287
+"""
+import torch
+import torchvision.transforms as tvf
+from ..utils.base_model import BaseModel
+class EigenPlaces(BaseModel):
+    default_conf = {
+        "variant": "EigenPlaces",
+        "backbone": "ResNet101",
+        "fc_output_dim": 2048,
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        self.net = torch.hub.load(
+            "gmberton/" + conf["variant"],
+            "get_trained_model",
+            backbone=conf["backbone"],
+            fc_output_dim=conf["fc_output_dim"],
+        ).eval()
+        mean = [0.485, 0.456, 0.406]
+        std = [0.229, 0.224, 0.225]
+        self.norm_rgb = tvf.Normalize(mean=mean, std=std)
+    def _forward(self, data):
+        image = self.norm_rgb(data["image"])
+        desc = self.net(image)
+        return {
+            "global_descriptor": desc,
+        }

hloc/localize_inloc.py CHANGED Viewed

@@ -24,7 +24,9 @@ def interpolate_scan(scan, kp):
     # To maximize the number of points that have depth:
     # do bilinear interpolation first and then nearest for the remaining points
-    interp_lin = grid_sample(scan, kp, align_corners=True, mode="bilinear")[0, :, 0]
     interp_nn = torch.nn.functional.grid_sample(
         scan, kp, align_corners=True, mode="nearest"
     )[0, :, 0]
@@ -64,7 +66,9 @@ def get_scan_pose(dataset_dir, rpath):
     return P_after_GICP
-def pose_from_cluster(dataset_dir, q, retrieved, feature_file, match_file, skip=None):
     height, width = cv2.imread(str(dataset_dir / q)).shape[:2]
     cx = 0.5 * width
     cy = 0.5 * height

     # To maximize the number of points that have depth:
     # do bilinear interpolation first and then nearest for the remaining points
+    interp_lin = grid_sample(scan, kp, align_corners=True, mode="bilinear")[
+        0, :, 0
+    ]
     interp_nn = torch.nn.functional.grid_sample(
         scan, kp, align_corners=True, mode="nearest"
     )[0, :, 0]
     return P_after_GICP
+def pose_from_cluster(
+    dataset_dir, q, retrieved, feature_file, match_file, skip=None
+):
     height, width = cv2.imread(str(dataset_dir / q)).shape[:2]
     cx = 0.5 * width
     cy = 0.5 * height

hloc/localize_sfm.py CHANGED Viewed

@@ -40,7 +40,9 @@ def do_covisibility_clustering(
                 obs.image_id
                 for p2D in observed
                 if p2D.has_point3D()
-                for obs in reconstruction.points3D[p2D.point3D_id].track.elements
             }
             connected_frames &= set(frame_ids)
             connected_frames -= visited
@@ -149,7 +151,10 @@ def main(
         reference_sfm = pycolmap.Reconstruction(reference_sfm)
     db_name_to_id = {img.name: i for i, img in reference_sfm.images.items()}
-    config = {"estimation": {"ransac": {"max_error": ransac_thresh}}, **(config or {})}
     localizer = QueryLocalizer(reference_sfm, config)
     cam_from_world = {}
@@ -162,7 +167,9 @@ def main(
     logger.info("Starting localization...")
     for qname, qcam in tqdm(queries):
         if qname not in retrieval_dict:
-            logger.warning(f"No images retrieved for query image {qname}. Skipping...")
             continue
         db_names = retrieval_dict[qname]
         db_ids = []

                 obs.image_id
                 for p2D in observed
                 if p2D.has_point3D()
+                for obs in reconstruction.points3D[
+                    p2D.point3D_id
+                ].track.elements
             }
             connected_frames &= set(frame_ids)
             connected_frames -= visited
         reference_sfm = pycolmap.Reconstruction(reference_sfm)
     db_name_to_id = {img.name: i for i, img in reference_sfm.images.items()}
+    config = {
+        "estimation": {"ransac": {"max_error": ransac_thresh}},
+        **(config or {}),
+    }
     localizer = QueryLocalizer(reference_sfm, config)
     cam_from_world = {}
     logger.info("Starting localization...")
     for qname, qcam in tqdm(queries):
         if qname not in retrieval_dict:
+            logger.warning(
+                f"No images retrieved for query image {qname}. Skipping..."
+            )
             continue
         db_names = retrieval_dict[qname]
         db_ids = []

hloc/match_dense.py CHANGED Viewed

@@ -13,8 +13,9 @@ import torch
 import torchvision.transforms.functional as F
 from scipy.spatial import KDTree
 from tqdm import tqdm
-from .extract_features import read_image, resize_image
 from . import logger, matchers
 from .match_features import find_unique_new_pairs
 from .utils.base_model import dynamic_load
 from .utils.io import list_h5_names
@@ -288,6 +289,7 @@ confs = {
     },
 }
 def to_cpts(kpts, ps):
     if ps > 0.0:
         kpts = np.round(np.round((kpts + 0.5) / ps) * ps - 0.5, 2)
@@ -379,11 +381,13 @@ def kpids_to_matches0(kpt_ids0, kpt_ids1, scores):
     matches, scores = get_unique_matches(matches, scores)
     return matches_to_matches0(matches, scores)
 def scale_keypoints(kpts, scale):
     if np.any(scale != 1.0):
         kpts *= kpts.new_tensor(scale)
     return kpts
 class ImagePairDataset(torch.utils.data.Dataset):
     default_conf = {
         "grayscale": True,
@@ -398,7 +402,9 @@ class ImagePairDataset(torch.utils.data.Dataset):
         self.pairs = pairs
         if self.conf.cache_images:
             image_names = set(sum(pairs, ()))  # unique image names in pairs
-            logger.info(f"Loading and caching {len(image_names)} unique images.")
             self.images = {}
             self.scales = {}
             for name in tqdm(image_names):
@@ -570,7 +576,9 @@ def aggregate_matches(
         required_queries -= set(list_h5_names(feature_path))
     # if an entry in cpdict is provided as np.ndarray we assume it is fixed
-    required_queries -= set([k for k, v in cpdict.items() if isinstance(v, np.ndarray)])
     # sort pairs for reduced RAM
     pairs_per_q = Counter(list(chain(*pairs)))
@@ -578,7 +586,9 @@ def aggregate_matches(
     pairs = [p for _, p in sorted(zip(pairs_score, pairs))]
     if len(required_queries) > 0:
-        logger.info(f"Aggregating keypoints for {len(required_queries)} images.")
     n_kps = 0
     with h5py.File(str(match_path), "a") as fd:
         for name0, name1 in tqdm(pairs, smoothing=0.1):
@@ -756,6 +766,7 @@ def match_and_assign(
         logger.info(f'Reassign matches with max_error={conf["max_error"]}.')
         assign_matches(pairs, match_path, cpdict, max_error=conf["max_error"])
 def scale_lines(lines, scale):
     if np.any(scale != 1.0):
         lines *= lines.new_tensor(scale)
@@ -972,6 +983,7 @@ def match_images(model, image_0, image_1, conf, device="cpu"):
     torch.cuda.empty_cache()
     return ret
 @torch.no_grad()
 def main(
     conf: Dict,
@@ -985,7 +997,8 @@ def main(
     overwrite: bool = False,
 ) -> Path:
     logger.info(
-        "Extracting semi-dense features with configuration:" f"\n{pprint.pformat(conf)}"
     )
     if features is None:
@@ -995,7 +1008,8 @@ def main(
         features_q = features
         if matches is None:
             raise ValueError(
-                "Either provide both features and matches as Path" " or both as names."
             )
     else:
         if export_dir is None:
@@ -1017,7 +1031,14 @@ def main(
         raise TypeError(str(features_ref))
     match_and_assign(
-        conf, pairs, image_dir, matches, features_q, features_ref, max_kps, overwrite
     )
     return features_q, matches
@@ -1028,11 +1049,15 @@ if __name__ == "__main__":
     parser.add_argument("--pairs", type=Path, required=True)
     parser.add_argument("--image_dir", type=Path, required=True)
     parser.add_argument("--export_dir", type=Path, required=True)
-    parser.add_argument("--matches", type=Path, default=confs["loftr"]["output"])
     parser.add_argument(
         "--features", type=str, default="feats_" + confs["loftr"]["output"]
     )
-    parser.add_argument("--conf", type=str, default="loftr", choices=list(confs.keys()))
     args = parser.parse_args()
     main(
         confs[args.conf],
@@ -1042,4 +1067,3 @@ if __name__ == "__main__":
         args.matches,
         args.features,
     )

 import torchvision.transforms.functional as F
 from scipy.spatial import KDTree
 from tqdm import tqdm
 from . import logger, matchers
+from .extract_features import read_image, resize_image
 from .match_features import find_unique_new_pairs
 from .utils.base_model import dynamic_load
 from .utils.io import list_h5_names
     },
 }
 def to_cpts(kpts, ps):
     if ps > 0.0:
         kpts = np.round(np.round((kpts + 0.5) / ps) * ps - 0.5, 2)
     matches, scores = get_unique_matches(matches, scores)
     return matches_to_matches0(matches, scores)
 def scale_keypoints(kpts, scale):
     if np.any(scale != 1.0):
         kpts *= kpts.new_tensor(scale)
     return kpts
 class ImagePairDataset(torch.utils.data.Dataset):
     default_conf = {
         "grayscale": True,
         self.pairs = pairs
         if self.conf.cache_images:
             image_names = set(sum(pairs, ()))  # unique image names in pairs
+            logger.info(
+                f"Loading and caching {len(image_names)} unique images."
+            )
             self.images = {}
             self.scales = {}
             for name in tqdm(image_names):
         required_queries -= set(list_h5_names(feature_path))
     # if an entry in cpdict is provided as np.ndarray we assume it is fixed
+    required_queries -= set(
+        [k for k, v in cpdict.items() if isinstance(v, np.ndarray)]
+    )
     # sort pairs for reduced RAM
     pairs_per_q = Counter(list(chain(*pairs)))
     pairs = [p for _, p in sorted(zip(pairs_score, pairs))]
     if len(required_queries) > 0:
+        logger.info(
+            f"Aggregating keypoints for {len(required_queries)} images."
+        )
     n_kps = 0
     with h5py.File(str(match_path), "a") as fd:
         for name0, name1 in tqdm(pairs, smoothing=0.1):
         logger.info(f'Reassign matches with max_error={conf["max_error"]}.')
         assign_matches(pairs, match_path, cpdict, max_error=conf["max_error"])
 def scale_lines(lines, scale):
     if np.any(scale != 1.0):
         lines *= lines.new_tensor(scale)
     torch.cuda.empty_cache()
     return ret
 @torch.no_grad()
 def main(
     conf: Dict,
     overwrite: bool = False,
 ) -> Path:
     logger.info(
+        "Extracting semi-dense features with configuration:"
+        f"\n{pprint.pformat(conf)}"
     )
     if features is None:
         features_q = features
         if matches is None:
             raise ValueError(
+                "Either provide both features and matches as Path"
+                " or both as names."
             )
     else:
         if export_dir is None:
         raise TypeError(str(features_ref))
     match_and_assign(
+        conf,
+        pairs,
+        image_dir,
+        matches,
+        features_q,
+        features_ref,
+        max_kps,
+        overwrite,
     )
     return features_q, matches
     parser.add_argument("--pairs", type=Path, required=True)
     parser.add_argument("--image_dir", type=Path, required=True)
     parser.add_argument("--export_dir", type=Path, required=True)
+    parser.add_argument(
+        "--matches", type=Path, default=confs["loftr"]["output"]
+    )
     parser.add_argument(
         "--features", type=str, default="feats_" + confs["loftr"]["output"]
     )
+    parser.add_argument(
+        "--conf", type=str, default="loftr", choices=list(confs.keys())
+    )
     args = parser.parse_args()
     main(
         confs[args.conf],
         args.matches,
         args.features,
     )

hloc/matchers/mast3r.py CHANGED Viewed

@@ -8,7 +8,6 @@ import torch
 import torchvision.transforms as tfm
 from .. import logger
-from ..utils.base_model import BaseModel
 mast3r_path = Path(__file__).parent / "../../third_party/mast3r"
 sys.path.append(str(mast3r_path))
@@ -16,12 +15,11 @@ sys.path.append(str(mast3r_path))
 dust3r_path = Path(__file__).parent / "../../third_party/dust3r"
 sys.path.append(str(dust3r_path))
-from mast3r.model import AsymmetricMASt3R
-from mast3r.fast_nn import fast_reciprocal_NNs
 from dust3r.image_pairs import make_pairs
 from dust3r.inference import inference
-from dust3r.utils.image import load_images
 from hloc.matchers.duster import Duster
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -70,8 +68,8 @@ class Mast3r(Duster):
         output = inference(pairs, self.net, device, batch_size=1)
         # at this stage, you have the raw dust3r predictions
-        view1, pred1 = output["view1"], output["pred1"]
-        view2, pred2 = output["view2"], output["pred2"]
         desc1, desc2 = (
             pred1["desc"][1].squeeze(0).detach(),

 import torchvision.transforms as tfm
 from .. import logger
 mast3r_path = Path(__file__).parent / "../../third_party/mast3r"
 sys.path.append(str(mast3r_path))
 dust3r_path = Path(__file__).parent / "../../third_party/dust3r"
 sys.path.append(str(dust3r_path))
 from dust3r.image_pairs import make_pairs
 from dust3r.inference import inference
+from mast3r.fast_nn import fast_reciprocal_NNs
+from mast3r.model import AsymmetricMASt3R
 from hloc.matchers.duster import Duster
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         output = inference(pairs, self.net, device, batch_size=1)
         # at this stage, you have the raw dust3r predictions
+        _, pred1 = output["view1"], output["pred1"]
+        _, pred2 = output["view2"], output["pred2"]
         desc1, desc2 = (
             pred1["desc"][1].squeeze(0).detach(),

hloc/matchers/superglue.py CHANGED Viewed

@@ -4,7 +4,9 @@ from pathlib import Path
 from ..utils.base_model import BaseModel
 sys.path.append(str(Path(__file__).parent / "../../third_party"))
-from SuperGluePretrainedNetwork.models.superglue import SuperGlue as SG  # noqa: E402
 class SuperGlue(BaseModel):

 from ..utils.base_model import BaseModel
 sys.path.append(str(Path(__file__).parent / "../../third_party"))
+from SuperGluePretrainedNetwork.models.superglue import (  # noqa: E402
+    SuperGlue as SG,
+)
 class SuperGlue(BaseModel):

hloc/pairs_from_exhaustive.py CHANGED Viewed

@@ -34,7 +34,9 @@ def main(
         elif isinstance(image_list, collections.Iterable):
             names_ref = list(ref_list)
         else:
-            raise ValueError(f"Unknown type for reference image list: {ref_list}")
     elif ref_features is not None:
         names_ref = list_h5_names(ref_features)
     else:

         elif isinstance(image_list, collections.Iterable):
             names_ref = list(ref_list)
         else:
+            raise ValueError(
+                f"Unknown type for reference image list: {ref_list}"
+            )
     elif ref_features is not None:
         names_ref = list_h5_names(ref_features)
     else:

hloc/pairs_from_poses.py CHANGED Viewed

@@ -63,6 +63,8 @@ if __name__ == "__main__":
     parser.add_argument("--model", required=True, type=Path)
     parser.add_argument("--output", required=True, type=Path)
     parser.add_argument("--num_matched", required=True, type=int)
-    parser.add_argument("--rotation_threshold", default=DEFAULT_ROT_THRESH, type=float)
     args = parser.parse_args()
     main(**args.__dict__)

     parser.add_argument("--model", required=True, type=Path)
     parser.add_argument("--output", required=True, type=Path)
     parser.add_argument("--num_matched", required=True, type=int)
+    parser.add_argument(
+        "--rotation_threshold", default=DEFAULT_ROT_THRESH, type=float
+    )
     args = parser.parse_args()
     main(**args.__dict__)

hloc/pairs_from_retrieval.py CHANGED Viewed

@@ -19,7 +19,9 @@ def parse_names(prefix, names, names_all):
             prefix = tuple(prefix)
         names = [n for n in names_all if n.startswith(prefix)]
         if len(names) == 0:
-            raise ValueError(f"Could not find any image with the prefix `{prefix}`.")
     elif names is not None:
         if isinstance(names, (str, Path)):
             names = parse_image_lists(names)
@@ -90,7 +92,9 @@ def main(
         db_descriptors = descriptors
     if isinstance(db_descriptors, (Path, str)):
         db_descriptors = [db_descriptors]
-    name2db = {n: i for i, p in enumerate(db_descriptors) for n in list_h5_names(p)}
     db_names_h5 = list(name2db.keys())
     query_names_h5 = list_h5_names(descriptors)

             prefix = tuple(prefix)
         names = [n for n in names_all if n.startswith(prefix)]
         if len(names) == 0:
+            raise ValueError(
+                f"Could not find any image with the prefix `{prefix}`."
+            )
     elif names is not None:
         if isinstance(names, (str, Path)):
             names = parse_image_lists(names)
         db_descriptors = descriptors
     if isinstance(db_descriptors, (Path, str)):
         db_descriptors = [db_descriptors]
+    name2db = {
+        n: i for i, p in enumerate(db_descriptors) for n in list_h5_names(p)
+    }
     db_names_h5 = list(name2db.keys())
     query_names_h5 = list_h5_names(descriptors)

hloc/reconstruction.py CHANGED Viewed

@@ -93,13 +93,16 @@ def run_reconstruction(
             largest_num_images = num_images
     assert largest_index is not None
     logger.info(
-        f"Largest model is #{largest_index} " f"with {largest_num_images} images."
     )
     for filename in ["images.bin", "cameras.bin", "points3D.bin"]:
         if (sfm_dir / filename).exists():
             (sfm_dir / filename).unlink()
-        shutil.move(str(models_path / str(largest_index) / filename), str(sfm_dir))
     return reconstructions[largest_index]
@@ -172,7 +175,9 @@ if __name__ == "__main__":
         "--image_options",
         nargs="+",
         default=[],
-        help="List of key=value from {}".format(pycolmap.ImageReaderOptions().todict()),
     )
     parser.add_argument(
         "--mapper_options",

             largest_num_images = num_images
     assert largest_index is not None
     logger.info(
+        f"Largest model is #{largest_index} "
+        f"with {largest_num_images} images."
     )
     for filename in ["images.bin", "cameras.bin", "points3D.bin"]:
         if (sfm_dir / filename).exists():
             (sfm_dir / filename).unlink()
+        shutil.move(
+            str(models_path / str(largest_index) / filename), str(sfm_dir)
+        )
     return reconstructions[largest_index]
         "--image_options",
         nargs="+",
         default=[],
+        help="List of key=value from {}".format(
+            pycolmap.ImageReaderOptions().todict()
+        ),
     )
     parser.add_argument(
         "--mapper_options",

hloc/triangulation.py CHANGED Viewed

@@ -118,7 +118,9 @@ def estimation_and_geometric_verification(
             pycolmap.verify_matches(
                 database_path,
                 pairs_path,
-                options=dict(ransac=dict(max_num_trials=20000, min_inlier_ratio=0.1)),
             )
@@ -142,7 +144,9 @@ def geometric_verification(
         id0 = image_ids[name0]
         image0 = reference.images[id0]
         cam0 = reference.cameras[image0.camera_id]
-        kps0, noise0 = get_keypoints(features_path, name0, return_uncertainty=True)
         noise0 = 1.0 if noise0 is None else noise0
         if len(kps0) > 0:
             kps0 = np.stack(cam0.cam_from_img(kps0))
@@ -153,7 +157,9 @@ def geometric_verification(
             id1 = image_ids[name1]
             image1 = reference.images[id1]
             cam1 = reference.cameras[image1.camera_id]
-            kps1, noise1 = get_keypoints(features_path, name1, return_uncertainty=True)
             noise1 = 1.0 if noise1 is None else noise1
             if len(kps1) > 0:
                 kps1 = np.stack(cam1.cam_from_img(kps1))
@@ -170,7 +176,9 @@ def geometric_verification(
                 db.add_two_view_geometry(id0, id1, matches)
                 continue
-            cam1_from_cam0 = image1.cam_from_world * image0.cam_from_world.inverse()
             errors0, errors1 = compute_epipolar_errors(
                 cam1_from_cam0, kps0[matches[:, 0]], kps1[matches[:, 1]]
             )
@@ -209,7 +217,11 @@ def run_triangulation(
     with OutputCapture(verbose):
         with pycolmap.ostream():
             reconstruction = pycolmap.triangulate_points(
-                reference_model, database_path, image_dir, model_path, options=options
             )
     return reconstruction
@@ -257,7 +269,8 @@ def main(
         sfm_dir, database, image_dir, reference, verbose, mapper_options
     )
     logger.info(
-        "Finished the triangulation with statistics:\n%s", reconstruction.summary()
     )
     return reconstruction
@@ -278,7 +291,8 @@ def parse_option_args(args: List[str], default_options) -> Dict[str, Any]:
         target_type = type(getattr(default_options, key))
         if not isinstance(value, target_type):
             raise ValueError(
-                f'Incorrect type for option "{key}":' f" {type(value)} vs {target_type}"
             )
         options[key] = value
     return options

             pycolmap.verify_matches(
                 database_path,
                 pairs_path,
+                options=dict(
+                    ransac=dict(max_num_trials=20000, min_inlier_ratio=0.1)
+                ),
             )
         id0 = image_ids[name0]
         image0 = reference.images[id0]
         cam0 = reference.cameras[image0.camera_id]
+        kps0, noise0 = get_keypoints(
+            features_path, name0, return_uncertainty=True
+        )
         noise0 = 1.0 if noise0 is None else noise0
         if len(kps0) > 0:
             kps0 = np.stack(cam0.cam_from_img(kps0))
             id1 = image_ids[name1]
             image1 = reference.images[id1]
             cam1 = reference.cameras[image1.camera_id]
+            kps1, noise1 = get_keypoints(
+                features_path, name1, return_uncertainty=True
+            )
             noise1 = 1.0 if noise1 is None else noise1
             if len(kps1) > 0:
                 kps1 = np.stack(cam1.cam_from_img(kps1))
                 db.add_two_view_geometry(id0, id1, matches)
                 continue
+            cam1_from_cam0 = (
+                image1.cam_from_world * image0.cam_from_world.inverse()
+            )
             errors0, errors1 = compute_epipolar_errors(
                 cam1_from_cam0, kps0[matches[:, 0]], kps1[matches[:, 1]]
             )
     with OutputCapture(verbose):
         with pycolmap.ostream():
             reconstruction = pycolmap.triangulate_points(
+                reference_model,
+                database_path,
+                image_dir,
+                model_path,
+                options=options,
             )
     return reconstruction
         sfm_dir, database, image_dir, reference, verbose, mapper_options
     )
     logger.info(
+        "Finished the triangulation with statistics:\n%s",
+        reconstruction.summary(),
     )
     return reconstruction
         target_type = type(getattr(default_options, key))
         if not isinstance(value, target_type):
             raise ValueError(
+                f'Incorrect type for option "{key}":'
+                f" {type(value)} vs {target_type}"
             )
         options[key] = value
     return options

hloc/utils/viz.py CHANGED Viewed

@@ -49,7 +49,7 @@ def plot_images(
         if titles:
             ax.set_title(titles[i])
     fig.tight_layout(pad=pad)
 def plot_keypoints(kpts, colors="lime", ps=4):
     """Plot keypoints for existing images.

         if titles:
             ax.set_title(titles[i])
     fig.tight_layout(pad=pad)
+    return fig
 def plot_keypoints(kpts, colors="lime", ps=4):
     """Plot keypoints for existing images.

hloc/visualization.py CHANGED Viewed

@@ -6,11 +6,23 @@ import pycolmap
 from matplotlib import cm
 from .utils.io import read_image
-from .utils.viz import add_text, cm_RdGn, plot_images, plot_keypoints, plot_matches
 def visualize_sfm_2d(
-    reconstruction, image_dir, color_by="visibility", selected=[], n=1, seed=0, dpi=75
 ):
     assert image_dir.exists()
     if not isinstance(reconstruction, pycolmap.Reconstruction):
@@ -31,9 +43,11 @@ def visualize_sfm_2d(
         elif color_by == "track_length":
             tl = np.array(
                 [
-                    reconstruction.points3D[p.point3D_id].track.length()
-                    if p.has_point3D()
-                    else 1
                     for p in image.points2D
                 ]
             )
@@ -57,10 +71,11 @@ def visualize_sfm_2d(
             raise NotImplementedError(f"Coloring not implemented: {color_by}.")
         name = image.name
-        plot_images([read_image(image_dir / name)], dpi=dpi)
         plot_keypoints([keypoints], colors=[color], ps=4)
         add_text(0, text)
         add_text(0, name, pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom")
 def visualize_loc(
@@ -121,7 +136,9 @@ def visualize_loc_from_log(
         counts = np.zeros(n)
         dbs_kp_q_db = [[] for _ in range(n)]
         inliers_dbs = [[] for _ in range(n)]
-        for i, (inl, (p3D_id, db_idxs)) in enumerate(zip(inliers, kp_to_3D_to_db)):
             track = reconstruction.points3D[p3D_id].track
             track = {el.image_id: el.point2D_idx for el in track.elements}
             for db_idx in db_idxs:
@@ -133,7 +150,9 @@ def visualize_loc_from_log(
         # for inloc the database keypoints are already in the logs
         assert "keypoints_db" in loc
         assert "indices_db" in loc
-        counts = np.array([np.sum(loc["indices_db"][inliers] == i) for i in range(n)])
     # display the database images with the most inlier matches
     db_sort = np.argsort(-counts)

 from matplotlib import cm
 from .utils.io import read_image
+from .utils.viz import (
+    add_text,
+    cm_RdGn,
+    plot_images,
+    plot_keypoints,
+    plot_matches,
+)
 def visualize_sfm_2d(
+    reconstruction,
+    image_dir,
+    color_by="visibility",
+    selected=[],
+    n=1,
+    seed=0,
+    dpi=75,
 ):
     assert image_dir.exists()
     if not isinstance(reconstruction, pycolmap.Reconstruction):
         elif color_by == "track_length":
             tl = np.array(
                 [
+                    (
+                        reconstruction.points3D[p.point3D_id].track.length()
+                        if p.has_point3D()
+                        else 1
+                    )
                     for p in image.points2D
                 ]
             )
             raise NotImplementedError(f"Coloring not implemented: {color_by}.")
         name = image.name
+        fig = plot_images([read_image(image_dir / name)], dpi=dpi)
         plot_keypoints([keypoints], colors=[color], ps=4)
         add_text(0, text)
         add_text(0, name, pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom")
+    return fig
 def visualize_loc(
         counts = np.zeros(n)
         dbs_kp_q_db = [[] for _ in range(n)]
         inliers_dbs = [[] for _ in range(n)]
+        for i, (inl, (p3D_id, db_idxs)) in enumerate(
+            zip(inliers, kp_to_3D_to_db)
+        ):
             track = reconstruction.points3D[p3D_id].track
             track = {el.image_id: el.point2D_idx for el in track.elements}
             for db_idx in db_idxs:
         # for inloc the database keypoints are already in the logs
         assert "keypoints_db" in loc
         assert "indices_db" in loc
+        counts = np.array(
+            [np.sum(loc["indices_db"][inliers] == i) for i in range(n)]
+        )
     # display the database images with the most inlier matches
     db_sort = np.argsort(-counts)

requirements.txt CHANGED Viewed

@@ -16,7 +16,7 @@ opencv-python==4.6.0.66
 pandas==2.0.3
 plotly==5.15.0
 protobuf==4.23.2
-pycolmap==0.5.0
 pytlsd==0.0.2
 pytorch-lightning==1.4.9
 PyYAML==6.0
@@ -34,4 +34,5 @@ onnxruntime
 poselib
 roma #dust3r
 huggingface_hub
-psutil

 pandas==2.0.3
 plotly==5.15.0
 protobuf==4.23.2
+pycolmap==0.6.0
 pytlsd==0.0.2
 pytorch-lightning==1.4.9
 PyYAML==6.0
 poselib
 roma #dust3r
 huggingface_hub
+psutil
+easydict