Realcat commited on
Commit
57c1094
1 Parent(s): d521fb7

update: sfm

Browse files
common/app_class.py CHANGED
@@ -3,7 +3,10 @@ from typing import Any, Dict, Optional, Tuple
3
 
4
  import gradio as gr
5
  import numpy as np
 
 
6
 
 
7
  from common.utils import (
8
  GRADIO_VERSION,
9
  gen_examples,
@@ -115,7 +118,7 @@ class ImageMatchingApp:
115
  label="Match thres.",
116
  value=0.1,
117
  )
118
- match_setting_max_features = gr.Slider(
119
  minimum=10,
120
  maximum=10000,
121
  step=10,
@@ -199,7 +202,7 @@ class ImageMatchingApp:
199
  input_image0,
200
  input_image1,
201
  match_setting_threshold,
202
- match_setting_max_features,
203
  detect_keypoints_threshold,
204
  matcher_list,
205
  ransac_method,
@@ -314,7 +317,7 @@ class ImageMatchingApp:
314
  input_image0,
315
  input_image1,
316
  match_setting_threshold,
317
- match_setting_max_features,
318
  detect_keypoints_threshold,
319
  matcher_list,
320
  input_image0,
@@ -378,14 +381,14 @@ class ImageMatchingApp:
378
  outputs=[output_wrapped, geometry_result],
379
  )
380
  with gr.Tab("Structure from Motion(under-dev)"):
381
- self.init_tab_sfm()
382
-
383
- def init_tab_sfm(self):
384
- sfm_ui = AppSfmUI()
385
- sfm_ui.set_local_features(["disk", "superpoint"])
386
- sfm_ui.set_matchers(["disk+lightglue", "superpoint+lightglue"])
387
- sfm_ui.set_global_features(["netvlad", "mixvpr"])
388
- sfm_ui.call()
389
 
390
  def run(self):
391
  self.app.queue().launch(
@@ -459,7 +462,7 @@ class ImageMatchingApp:
459
  self.cfg["defaults"][
460
  "match_threshold"
461
  ], # matching_threshold: float
462
- self.cfg["defaults"]["max_keypoints"], # max_features: int
463
  self.cfg["defaults"][
464
  "keypoint_threshold"
465
  ], # keypoint_threshold: float
@@ -546,8 +549,9 @@ class ImageMatchingApp:
546
 
547
 
548
  class AppBaseUI:
549
- def __init__(self, cfg: Dict[str, Any] = None):
550
- self.cfg = cfg
 
551
 
552
  def _init_ui(self):
553
  NotImplemented
@@ -559,9 +563,16 @@ class AppBaseUI:
559
  class AppSfmUI(AppBaseUI):
560
  def __init__(self, cfg: Dict[str, Any] = None):
561
  super().__init__(cfg)
562
- self.matchers = None
563
- self.features = None
564
- self.global_features = None
 
 
 
 
 
 
 
565
 
566
  def _update_options(self, option):
567
  if option == "sparse":
@@ -571,15 +582,6 @@ class AppSfmUI(AppBaseUI):
571
  else:
572
  return gr.Textbox("not set", visible=True)
573
 
574
- def set_local_features(self, features):
575
- self.features = features
576
-
577
- def set_global_features(self, features):
578
- self.global_features = features
579
-
580
- def set_matchers(self, matchers):
581
- self.matchers = matchers
582
-
583
  def _on_select_custom_params(self, value: bool = False):
584
  return gr.Textbox(
585
  label="Camera Params",
@@ -592,15 +594,18 @@ class AppSfmUI(AppBaseUI):
592
  with gr.Row():
593
  # data settting and camera settings
594
  with gr.Column():
595
- input_images = gr.File(
596
- label="SfM", interactive=True, file_count="multiple"
 
 
 
597
  )
598
  # camera setting
599
  with gr.Accordion("Camera Settings", open=True):
600
  with gr.Column():
601
  with gr.Row():
602
  with gr.Column():
603
- camera_model = gr.Dropdown(
604
  choices=[
605
  "PINHOLE",
606
  "SIMPLE_RADIAL",
@@ -622,7 +627,7 @@ class AppSfmUI(AppBaseUI):
622
  interactive=True,
623
  )
624
  with gr.Row():
625
- camera_params = gr.Textbox(
626
  label="Camera Params",
627
  value="0,0,0,0",
628
  interactive=False,
@@ -631,30 +636,15 @@ class AppSfmUI(AppBaseUI):
631
  camera_custom_params_cb.select(
632
  fn=self._on_select_custom_params,
633
  inputs=camera_custom_params_cb,
634
- outputs=camera_params,
635
  )
636
 
637
  with gr.Accordion("Matching Settings", open=True):
638
  # feature extraction and matching setting
639
  with gr.Row():
640
- feature_type = gr.Radio(
641
- ["sparse", "dense"],
642
- label="Feature Type",
643
- value="sparse",
644
- interactive=True,
645
- )
646
- feature_details = gr.Textbox(
647
- label="Feature Details",
648
- visible=False,
649
- )
650
- # feature_type.change(
651
- # fn=self._update_options,
652
- # inputs=feature_type,
653
- # outputs=feature_details,
654
- # )
655
  # matcher setting
656
- matcher_name = gr.Dropdown(
657
- choices=self.matchers,
658
  value="disk+lightglue",
659
  label="Matching Model",
660
  interactive=True,
@@ -662,17 +652,29 @@ class AppSfmUI(AppBaseUI):
662
  with gr.Row():
663
  with gr.Accordion("Advanced Settings", open=False):
664
  with gr.Column():
665
-
666
  with gr.Row():
667
  # matching setting
668
- max_features = gr.Slider(
669
- label="Max Features",
670
  minimum=100,
671
  maximum=10000,
672
  value=1000,
673
  interactive=True,
674
  )
675
- ransac_threshold = gr.Slider(
 
 
 
 
 
 
 
 
 
 
 
 
 
676
  label="Ransac Threshold",
677
  minimum=0.01,
678
  maximum=12.0,
@@ -682,7 +684,7 @@ class AppSfmUI(AppBaseUI):
682
  )
683
 
684
  with gr.Row():
685
- ransac_confidence = gr.Slider(
686
  label="Ransac Confidence",
687
  minimum=0.01,
688
  maximum=1.0,
@@ -690,7 +692,7 @@ class AppSfmUI(AppBaseUI):
690
  step=0.0001,
691
  interactive=True,
692
  )
693
- ransac_max_iter = gr.Slider(
694
  label="Ransac Max Iter",
695
  minimum=1,
696
  maximum=100,
@@ -700,7 +702,7 @@ class AppSfmUI(AppBaseUI):
700
  )
701
  with gr.Accordion("Scene Graph Settings", open=True):
702
  # mapping setting
703
- scene_graph = gr.Dropdown(
704
  choices=["all", "swin", "oneref"],
705
  value="all",
706
  label="Scene Graph",
@@ -708,14 +710,20 @@ class AppSfmUI(AppBaseUI):
708
  )
709
 
710
  # global feature setting
711
- global_feature = gr.Dropdown(
712
- choices=self.global_features,
713
  value="netvlad",
714
  label="Global features",
715
  interactive=True,
716
  )
717
-
718
- button_match = gr.Button("Run Matching", variant="primary")
 
 
 
 
 
 
719
 
720
  # mapping setting
721
  with gr.Column():
@@ -723,26 +731,61 @@ class AppSfmUI(AppBaseUI):
723
  with gr.Row():
724
  with gr.Accordion("Buddle Settings", open=True):
725
  with gr.Row():
726
- mapper_refine_focal_length = gr.Checkbox(
727
- label="Refine Focal Length",
728
- value=False,
729
- interactive=True,
 
 
730
  )
731
- mapper_refine_principle_points = gr.Checkbox(
732
- label="Refine Principle Points",
733
- value=False,
734
- interactive=True,
 
 
735
  )
736
- mapper_refine_extra_params = gr.Checkbox(
737
- label="Refine Extra Params",
738
- value=False,
739
- interactive=True,
 
 
740
  )
741
- with gr.Accordion(
742
- "Retriangluation Settings", open=True
743
- ):
744
  gr.Textbox(
745
  label="Retriangluation Details",
746
  )
747
- gr.Button("Run SFM", variant="primary")
748
- model_3d = gr.Model3D()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  import gradio as gr
5
  import numpy as np
6
+ from easydict import EasyDict as edict
7
+ from omegaconf import OmegaConf
8
 
9
+ from common.sfm import SfmEngine
10
  from common.utils import (
11
  GRADIO_VERSION,
12
  gen_examples,
 
118
  label="Match thres.",
119
  value=0.1,
120
  )
121
+ match_setting_max_keypoints = gr.Slider(
122
  minimum=10,
123
  maximum=10000,
124
  step=10,
 
202
  input_image0,
203
  input_image1,
204
  match_setting_threshold,
205
+ match_setting_max_keypoints,
206
  detect_keypoints_threshold,
207
  matcher_list,
208
  ransac_method,
 
317
  input_image0,
318
  input_image1,
319
  match_setting_threshold,
320
+ match_setting_max_keypoints,
321
  detect_keypoints_threshold,
322
  matcher_list,
323
  input_image0,
 
381
  outputs=[output_wrapped, geometry_result],
382
  )
383
  with gr.Tab("Structure from Motion(under-dev)"):
384
+ sfm_ui = AppSfmUI( # noqa: F841
385
+ {
386
+ **self.cfg,
387
+ "matcher_zoo": self.matcher_zoo,
388
+ "outputs": "experiments/sfm",
389
+ }
390
+ )
391
+ # sfm_ui.call()
392
 
393
  def run(self):
394
  self.app.queue().launch(
 
462
  self.cfg["defaults"][
463
  "match_threshold"
464
  ], # matching_threshold: float
465
+ self.cfg["defaults"]["max_keypoints"], # max_keypoints: int
466
  self.cfg["defaults"][
467
  "keypoint_threshold"
468
  ], # keypoint_threshold: float
 
549
 
550
 
551
  class AppBaseUI:
552
+ def __init__(self, cfg: Dict[str, Any] = {}):
553
+ self.cfg = OmegaConf.create(cfg)
554
+ self.inputs = edict({})
555
 
556
  def _init_ui(self):
557
  NotImplemented
 
563
  class AppSfmUI(AppBaseUI):
564
  def __init__(self, cfg: Dict[str, Any] = None):
565
  super().__init__(cfg)
566
+ assert "matcher_zoo" in self.cfg
567
+ self.matcher_zoo = self.cfg["matcher_zoo"]
568
+ self.sfm_engine = SfmEngine(cfg)
569
+
570
+ def init_retrieval_dropdown(self):
571
+ algos = []
572
+ for k, v in self.cfg["retrieval_zoo"].items():
573
+ if v.get("enable", True):
574
+ algos.append(k)
575
+ return algos
576
 
577
  def _update_options(self, option):
578
  if option == "sparse":
 
582
  else:
583
  return gr.Textbox("not set", visible=True)
584
 
 
 
 
 
 
 
 
 
 
585
  def _on_select_custom_params(self, value: bool = False):
586
  return gr.Textbox(
587
  label="Camera Params",
 
594
  with gr.Row():
595
  # data settting and camera settings
596
  with gr.Column():
597
+ self.inputs.input_images = gr.File(
598
+ label="SfM",
599
+ interactive=True,
600
+ file_count="multiple",
601
+ min_width=300,
602
  )
603
  # camera setting
604
  with gr.Accordion("Camera Settings", open=True):
605
  with gr.Column():
606
  with gr.Row():
607
  with gr.Column():
608
+ self.inputs.camera_model = gr.Dropdown(
609
  choices=[
610
  "PINHOLE",
611
  "SIMPLE_RADIAL",
 
627
  interactive=True,
628
  )
629
  with gr.Row():
630
+ self.inputs.camera_params = gr.Textbox(
631
  label="Camera Params",
632
  value="0,0,0,0",
633
  interactive=False,
 
636
  camera_custom_params_cb.select(
637
  fn=self._on_select_custom_params,
638
  inputs=camera_custom_params_cb,
639
+ outputs=self.inputs.camera_params,
640
  )
641
 
642
  with gr.Accordion("Matching Settings", open=True):
643
  # feature extraction and matching setting
644
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
  # matcher setting
646
+ self.inputs.matcher_key = gr.Dropdown(
647
+ choices=self.matcher_zoo.keys(),
648
  value="disk+lightglue",
649
  label="Matching Model",
650
  interactive=True,
 
652
  with gr.Row():
653
  with gr.Accordion("Advanced Settings", open=False):
654
  with gr.Column():
 
655
  with gr.Row():
656
  # matching setting
657
+ self.inputs.max_keypoints = gr.Slider(
658
+ label="Max Keypoints",
659
  minimum=100,
660
  maximum=10000,
661
  value=1000,
662
  interactive=True,
663
  )
664
+ self.inputs.keypoint_threshold = gr.Slider(
665
+ label="Keypoint Threshold",
666
+ minimum=0,
667
+ maximum=1,
668
+ value=0.01,
669
+ )
670
+ with gr.Row():
671
+ self.inputs.match_threshold = gr.Slider(
672
+ label="Match Threshold",
673
+ minimum=0.01,
674
+ maximum=12.0,
675
+ value=0.2,
676
+ )
677
+ self.inputs.ransac_threshold = gr.Slider(
678
  label="Ransac Threshold",
679
  minimum=0.01,
680
  maximum=12.0,
 
684
  )
685
 
686
  with gr.Row():
687
+ self.inputs.ransac_confidence = gr.Slider(
688
  label="Ransac Confidence",
689
  minimum=0.01,
690
  maximum=1.0,
 
692
  step=0.0001,
693
  interactive=True,
694
  )
695
+ self.inputs.ransac_max_iter = gr.Slider(
696
  label="Ransac Max Iter",
697
  minimum=1,
698
  maximum=100,
 
702
  )
703
  with gr.Accordion("Scene Graph Settings", open=True):
704
  # mapping setting
705
+ self.inputs.scene_graph = gr.Dropdown(
706
  choices=["all", "swin", "oneref"],
707
  value="all",
708
  label="Scene Graph",
 
710
  )
711
 
712
  # global feature setting
713
+ self.inputs.global_feature = gr.Dropdown(
714
+ choices=self.init_retrieval_dropdown(),
715
  value="netvlad",
716
  label="Global features",
717
  interactive=True,
718
  )
719
+ self.inputs.top_k = gr.Slider(
720
+ label="Number of Images per Image to Match",
721
+ minimum=1,
722
+ maximum=100,
723
+ value=10,
724
+ step=1,
725
+ )
726
+ # button_match = gr.Button("Run Matching", variant="primary")
727
 
728
  # mapping setting
729
  with gr.Column():
 
731
  with gr.Row():
732
  with gr.Accordion("Buddle Settings", open=True):
733
  with gr.Row():
734
+ self.inputs.mapper_refine_focal_length = (
735
+ gr.Checkbox(
736
+ label="Refine Focal Length",
737
+ value=False,
738
+ interactive=True,
739
+ )
740
  )
741
+ self.inputs.mapper_refine_principle_points = (
742
+ gr.Checkbox(
743
+ label="Refine Principle Points",
744
+ value=False,
745
+ interactive=True,
746
+ )
747
  )
748
+ self.inputs.mapper_refine_extra_params = (
749
+ gr.Checkbox(
750
+ label="Refine Extra Params",
751
+ value=False,
752
+ interactive=True,
753
+ )
754
  )
755
+ with gr.Accordion("Retriangluation Settings", open=True):
 
 
756
  gr.Textbox(
757
  label="Retriangluation Details",
758
  )
759
+ button_sfm = gr.Button("Run SFM", variant="primary")
760
+ model_3d = gr.Model3D(
761
+ interactive=True,
762
+ )
763
+ output_image = gr.Image(
764
+ label="SFM Visualize",
765
+ type="numpy",
766
+ image_mode="RGB",
767
+ interactive=False,
768
+ )
769
+
770
+ button_sfm.click(
771
+ fn=self.sfm_engine.call,
772
+ inputs=[
773
+ self.inputs.matcher_key,
774
+ self.inputs.input_images, # images
775
+ self.inputs.camera_model,
776
+ self.inputs.camera_params,
777
+ self.inputs.max_keypoints,
778
+ self.inputs.keypoint_threshold,
779
+ self.inputs.match_threshold,
780
+ self.inputs.ransac_threshold,
781
+ self.inputs.ransac_confidence,
782
+ self.inputs.ransac_max_iter,
783
+ self.inputs.scene_graph,
784
+ self.inputs.global_feature,
785
+ self.inputs.top_k,
786
+ self.inputs.mapper_refine_focal_length,
787
+ self.inputs.mapper_refine_principle_points,
788
+ self.inputs.mapper_refine_extra_params,
789
+ ],
790
+ outputs=[model_3d, output_image],
791
+ )
common/config.yaml CHANGED
@@ -403,3 +403,11 @@ matcher_zoo:
403
  paper: https://arxiv.org/abs/2304.14845
404
  project: https://feixue94.github.io/
405
  display: true
 
 
 
 
 
 
 
 
 
403
  paper: https://arxiv.org/abs/2304.14845
404
  project: https://feixue94.github.io/
405
  display: true
406
+
407
+ retrieval_zoo:
408
+ netvlad:
409
+ enable: true
410
+ openibl:
411
+ enable: true
412
+ cosplace:
413
+ enable: true
common/sfm.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ import tempfile
3
+ from pathlib import Path
4
+ from typing import Any, Dict, List
5
+
6
+ import pycolmap
7
+
8
+ from hloc import (
9
+ extract_features,
10
+ logger,
11
+ match_features,
12
+ pairs_from_retrieval,
13
+ reconstruction,
14
+ visualization,
15
+ )
16
+
17
+ from .viz import fig2im
18
+
19
+
20
+ class SfmEngine:
21
+ def __init__(self, cfg: Dict[str, Any] = None):
22
+ self.cfg = cfg
23
+ if "outputs" in cfg and Path(cfg["outputs"]):
24
+ outputs = Path(cfg["outputs"])
25
+ outputs.mkdir(parents=True, exist_ok=True)
26
+ else:
27
+ outputs = tempfile.mkdtemp()
28
+ self.outputs = Path(outputs)
29
+
30
+ def call(
31
+ self,
32
+ key: str,
33
+ images: Path,
34
+ camera_model: str,
35
+ camera_params: List[float],
36
+ max_keypoints: int,
37
+ keypoint_threshold: float,
38
+ match_threshold: float,
39
+ ransac_threshold: int,
40
+ ransac_confidence: float,
41
+ ransac_max_iter: int,
42
+ scene_graph: bool,
43
+ global_feature: str,
44
+ top_k: int = 10,
45
+ mapper_refine_focal_length: bool = False,
46
+ mapper_refine_principle_points: bool = False,
47
+ mapper_refine_extra_params: bool = False,
48
+ ):
49
+ """
50
+ Call a list of functions to perform feature extraction, matching, and reconstruction.
51
+
52
+ Args:
53
+ key (str): The key to retrieve the matcher and feature models.
54
+ images (Path): The directory containing the images.
55
+ outputs (Path): The directory to store the outputs.
56
+ camera_model (str): The camera model.
57
+ camera_params (List[float]): The camera parameters.
58
+ max_keypoints (int): The maximum number of features.
59
+ match_threshold (float): The match threshold.
60
+ ransac_threshold (int): The RANSAC threshold.
61
+ ransac_confidence (float): The RANSAC confidence.
62
+ ransac_max_iter (int): The maximum number of RANSAC iterations.
63
+ scene_graph (bool): Whether to compute the scene graph.
64
+ global_feature (str): Whether to compute the global feature.
65
+ top_k (int): The number of image-pair to use.
66
+ mapper_refine_focal_length (bool): Whether to refine the focal length.
67
+ mapper_refine_principle_points (bool): Whether to refine the principle points.
68
+ mapper_refine_extra_params (bool): Whether to refine the extra parameters.
69
+
70
+ Returns:
71
+ Path: The directory containing the SfM results.
72
+ """
73
+ if len(images) == 0:
74
+ logger.error(f"{images} does not exist.")
75
+
76
+ temp_images = Path(tempfile.mkdtemp())
77
+ # copy images
78
+ logger.info(f"Copying images to {temp_images}.")
79
+ for image in images:
80
+ shutil.copy(image, temp_images)
81
+
82
+ matcher_zoo = self.cfg["matcher_zoo"]
83
+ model = matcher_zoo[key]
84
+ match_conf = model["matcher"]
85
+ match_conf["model"]["max_keypoints"] = max_keypoints
86
+ match_conf["model"]["match_threshold"] = match_threshold
87
+
88
+ feature_conf = model["feature"]
89
+ feature_conf["model"]["max_keypoints"] = max_keypoints
90
+ feature_conf["model"]["keypoint_threshold"] = keypoint_threshold
91
+
92
+ # retrieval
93
+ retrieval_name = self.cfg.get("retrieval_name", "netvlad")
94
+ retrieval_conf = extract_features.confs[retrieval_name]
95
+
96
+ mapper_options = {
97
+ "ba_refine_extra_params": mapper_refine_extra_params,
98
+ "ba_refine_focal_length": mapper_refine_focal_length,
99
+ "ba_refine_principal_point": mapper_refine_principle_points,
100
+ "ba_local_max_num_iterations": 40,
101
+ "ba_local_max_refinements": 3,
102
+ "ba_global_max_num_iterations": 100,
103
+ # below 3 options are for individual/video data, for internet photos, they should be left
104
+ # default
105
+ "min_focal_length_ratio": 0.1,
106
+ "max_focal_length_ratio": 10,
107
+ "max_extra_param": 1e15,
108
+ }
109
+
110
+ sfm_dir = self.outputs / "sfm_{}".format(key)
111
+ sfm_pairs = self.outputs / "pairs-sfm.txt"
112
+ sfm_dir.mkdir(exist_ok=True, parents=True)
113
+
114
+ # extract features
115
+ retrieval_path = extract_features.main(
116
+ retrieval_conf, temp_images, self.outputs
117
+ )
118
+ pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=top_k)
119
+
120
+ feature_path = extract_features.main(
121
+ feature_conf, temp_images, self.outputs
122
+ )
123
+ # match features
124
+ match_path = match_features.main(
125
+ match_conf, sfm_pairs, feature_conf["output"], self.outputs
126
+ )
127
+ # reconstruction
128
+ already_sfm = False
129
+ if sfm_dir.exists():
130
+ try:
131
+ model = pycolmap.Reconstruction(str(sfm_dir))
132
+ already_sfm = True
133
+ except ValueError:
134
+ logger.info(f"sfm_dir not exists model: {sfm_dir}")
135
+ if not already_sfm:
136
+ model = reconstruction.main(
137
+ sfm_dir,
138
+ temp_images,
139
+ sfm_pairs,
140
+ feature_path,
141
+ match_path,
142
+ mapper_options=mapper_options,
143
+ )
144
+
145
+ vertices = []
146
+ for point3D_id, point3D in model.points3D.items():
147
+ vertices.append([point3D.xyz, point3D.color])
148
+
149
+ model_3d = sfm_dir / "points3D.obj"
150
+ with open(model_3d, "w") as f:
151
+ for p, c in vertices:
152
+ # Write vertex position
153
+ f.write("v {} {} {}\n".format(p[0], p[1], p[2]))
154
+ # Write vertex normal (color)
155
+ f.write(
156
+ "vn {} {} {}\n".format(
157
+ c[0] / 255.0, c[1] / 255.0, c[2] / 255.0
158
+ )
159
+ )
160
+ viz_2d = visualization.visualize_sfm_2d(
161
+ model, temp_images, color_by="visibility", n=2, dpi=300
162
+ )
163
+
164
+ return model_3d, fig2im(viz_2d) / 255.0
hloc/colmap_from_nvm.py CHANGED
@@ -25,7 +25,9 @@ def recover_database_images_and_ids(database_path):
25
  images[name] = image_id
26
  cameras[name] = camera_id
27
  db.close()
28
- logger.info(f"Found {len(images)} images and {len(cameras)} cameras in database.")
 
 
29
  return images, cameras
30
 
31
 
@@ -34,9 +36,21 @@ def quaternion_to_rotation_matrix(qvec):
34
  w, x, y, z = qvec
35
  R = np.array(
36
  [
37
- [1 - 2 * y * y - 2 * z * z, 2 * x * y - 2 * z * w, 2 * x * z + 2 * y * w],
38
- [2 * x * y + 2 * z * w, 1 - 2 * x * x - 2 * z * z, 2 * y * z - 2 * x * w],
39
- [2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y],
 
 
 
 
 
 
 
 
 
 
 
 
40
  ]
41
  )
42
  return R
@@ -47,7 +61,9 @@ def camera_center_to_translation(c, qvec):
47
  return (-1) * np.matmul(R, c)
48
 
49
 
50
- def read_nvm_model(nvm_path, intrinsics_path, image_ids, camera_ids, skip_points=False):
 
 
51
  with open(intrinsics_path, "r") as f:
52
  raw_intrinsics = f.readlines()
53
 
 
25
  images[name] = image_id
26
  cameras[name] = camera_id
27
  db.close()
28
+ logger.info(
29
+ f"Found {len(images)} images and {len(cameras)} cameras in database."
30
+ )
31
  return images, cameras
32
 
33
 
 
36
  w, x, y, z = qvec
37
  R = np.array(
38
  [
39
+ [
40
+ 1 - 2 * y * y - 2 * z * z,
41
+ 2 * x * y - 2 * z * w,
42
+ 2 * x * z + 2 * y * w,
43
+ ],
44
+ [
45
+ 2 * x * y + 2 * z * w,
46
+ 1 - 2 * x * x - 2 * z * z,
47
+ 2 * y * z - 2 * x * w,
48
+ ],
49
+ [
50
+ 2 * x * z - 2 * y * w,
51
+ 2 * y * z + 2 * x * w,
52
+ 1 - 2 * x * x - 2 * y * y,
53
+ ],
54
  ]
55
  )
56
  return R
 
61
  return (-1) * np.matmul(R, c)
62
 
63
 
64
+ def read_nvm_model(
65
+ nvm_path, intrinsics_path, image_ids, camera_ids, skip_points=False
66
+ ):
67
  with open(intrinsics_path, "r") as f:
68
  raw_intrinsics = f.readlines()
69
 
hloc/extract_features.py CHANGED
@@ -1,6 +1,5 @@
1
  import argparse
2
  import collections.abc as collections
3
- import glob
4
  import pprint
5
  from pathlib import Path
6
  from types import SimpleNamespace
@@ -330,6 +329,11 @@ confs = {
330
  "model": {"name": "cosplace"},
331
  "preprocessing": {"resize_max": 1024},
332
  },
 
 
 
 
 
333
  }
334
 
335
 
 
1
  import argparse
2
  import collections.abc as collections
 
3
  import pprint
4
  from pathlib import Path
5
  from types import SimpleNamespace
 
329
  "model": {"name": "cosplace"},
330
  "preprocessing": {"resize_max": 1024},
331
  },
332
+ "eigenplaces": {
333
+ "output": "global-feats-eigenplaces",
334
+ "model": {"name": "eigenplaces"},
335
+ "preprocessing": {"resize_max": 1024},
336
+ },
337
  }
338
 
339
 
hloc/extractors/eigenplaces.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code for loading models trained with EigenPlaces (or CosPlace) as a global
3
+ features extractor for geolocalization through image retrieval.
4
+ Multiple models are available with different backbones. Below is a summary of
5
+ models available (backbone : list of available output descriptors
6
+ dimensionality). For example you can use a model based on a ResNet50 with
7
+ descriptors dimensionality 1024.
8
+
9
+ EigenPlaces trained models:
10
+ ResNet18: [ 256, 512]
11
+ ResNet50: [128, 256, 512, 2048]
12
+ ResNet101: [128, 256, 512, 2048]
13
+ VGG16: [ 512]
14
+
15
+ CosPlace trained models:
16
+ ResNet18: [32, 64, 128, 256, 512]
17
+ ResNet50: [32, 64, 128, 256, 512, 1024, 2048]
18
+ ResNet101: [32, 64, 128, 256, 512, 1024, 2048]
19
+ ResNet152: [32, 64, 128, 256, 512, 1024, 2048]
20
+ VGG16: [ 64, 128, 256, 512]
21
+
22
+ EigenPlaces paper (ICCV 2023): https://arxiv.org/abs/2308.10832
23
+ CosPlace paper (CVPR 2022): https://arxiv.org/abs/2204.02287
24
+ """
25
+
26
+ import torch
27
+ import torchvision.transforms as tvf
28
+
29
+ from ..utils.base_model import BaseModel
30
+
31
+
32
+ class EigenPlaces(BaseModel):
33
+ default_conf = {
34
+ "variant": "EigenPlaces",
35
+ "backbone": "ResNet101",
36
+ "fc_output_dim": 2048,
37
+ }
38
+ required_inputs = ["image"]
39
+
40
+ def _init(self, conf):
41
+ self.net = torch.hub.load(
42
+ "gmberton/" + conf["variant"],
43
+ "get_trained_model",
44
+ backbone=conf["backbone"],
45
+ fc_output_dim=conf["fc_output_dim"],
46
+ ).eval()
47
+
48
+ mean = [0.485, 0.456, 0.406]
49
+ std = [0.229, 0.224, 0.225]
50
+ self.norm_rgb = tvf.Normalize(mean=mean, std=std)
51
+
52
+ def _forward(self, data):
53
+ image = self.norm_rgb(data["image"])
54
+ desc = self.net(image)
55
+ return {
56
+ "global_descriptor": desc,
57
+ }
hloc/localize_inloc.py CHANGED
@@ -24,7 +24,9 @@ def interpolate_scan(scan, kp):
24
 
25
  # To maximize the number of points that have depth:
26
  # do bilinear interpolation first and then nearest for the remaining points
27
- interp_lin = grid_sample(scan, kp, align_corners=True, mode="bilinear")[0, :, 0]
 
 
28
  interp_nn = torch.nn.functional.grid_sample(
29
  scan, kp, align_corners=True, mode="nearest"
30
  )[0, :, 0]
@@ -64,7 +66,9 @@ def get_scan_pose(dataset_dir, rpath):
64
  return P_after_GICP
65
 
66
 
67
- def pose_from_cluster(dataset_dir, q, retrieved, feature_file, match_file, skip=None):
 
 
68
  height, width = cv2.imread(str(dataset_dir / q)).shape[:2]
69
  cx = 0.5 * width
70
  cy = 0.5 * height
 
24
 
25
  # To maximize the number of points that have depth:
26
  # do bilinear interpolation first and then nearest for the remaining points
27
+ interp_lin = grid_sample(scan, kp, align_corners=True, mode="bilinear")[
28
+ 0, :, 0
29
+ ]
30
  interp_nn = torch.nn.functional.grid_sample(
31
  scan, kp, align_corners=True, mode="nearest"
32
  )[0, :, 0]
 
66
  return P_after_GICP
67
 
68
 
69
+ def pose_from_cluster(
70
+ dataset_dir, q, retrieved, feature_file, match_file, skip=None
71
+ ):
72
  height, width = cv2.imread(str(dataset_dir / q)).shape[:2]
73
  cx = 0.5 * width
74
  cy = 0.5 * height
hloc/localize_sfm.py CHANGED
@@ -40,7 +40,9 @@ def do_covisibility_clustering(
40
  obs.image_id
41
  for p2D in observed
42
  if p2D.has_point3D()
43
- for obs in reconstruction.points3D[p2D.point3D_id].track.elements
 
 
44
  }
45
  connected_frames &= set(frame_ids)
46
  connected_frames -= visited
@@ -149,7 +151,10 @@ def main(
149
  reference_sfm = pycolmap.Reconstruction(reference_sfm)
150
  db_name_to_id = {img.name: i for i, img in reference_sfm.images.items()}
151
 
152
- config = {"estimation": {"ransac": {"max_error": ransac_thresh}}, **(config or {})}
 
 
 
153
  localizer = QueryLocalizer(reference_sfm, config)
154
 
155
  cam_from_world = {}
@@ -162,7 +167,9 @@ def main(
162
  logger.info("Starting localization...")
163
  for qname, qcam in tqdm(queries):
164
  if qname not in retrieval_dict:
165
- logger.warning(f"No images retrieved for query image {qname}. Skipping...")
 
 
166
  continue
167
  db_names = retrieval_dict[qname]
168
  db_ids = []
 
40
  obs.image_id
41
  for p2D in observed
42
  if p2D.has_point3D()
43
+ for obs in reconstruction.points3D[
44
+ p2D.point3D_id
45
+ ].track.elements
46
  }
47
  connected_frames &= set(frame_ids)
48
  connected_frames -= visited
 
151
  reference_sfm = pycolmap.Reconstruction(reference_sfm)
152
  db_name_to_id = {img.name: i for i, img in reference_sfm.images.items()}
153
 
154
+ config = {
155
+ "estimation": {"ransac": {"max_error": ransac_thresh}},
156
+ **(config or {}),
157
+ }
158
  localizer = QueryLocalizer(reference_sfm, config)
159
 
160
  cam_from_world = {}
 
167
  logger.info("Starting localization...")
168
  for qname, qcam in tqdm(queries):
169
  if qname not in retrieval_dict:
170
+ logger.warning(
171
+ f"No images retrieved for query image {qname}. Skipping..."
172
+ )
173
  continue
174
  db_names = retrieval_dict[qname]
175
  db_ids = []
hloc/match_dense.py CHANGED
@@ -13,8 +13,9 @@ import torch
13
  import torchvision.transforms.functional as F
14
  from scipy.spatial import KDTree
15
  from tqdm import tqdm
16
- from .extract_features import read_image, resize_image
17
  from . import logger, matchers
 
18
  from .match_features import find_unique_new_pairs
19
  from .utils.base_model import dynamic_load
20
  from .utils.io import list_h5_names
@@ -288,6 +289,7 @@ confs = {
288
  },
289
  }
290
 
 
291
  def to_cpts(kpts, ps):
292
  if ps > 0.0:
293
  kpts = np.round(np.round((kpts + 0.5) / ps) * ps - 0.5, 2)
@@ -379,11 +381,13 @@ def kpids_to_matches0(kpt_ids0, kpt_ids1, scores):
379
  matches, scores = get_unique_matches(matches, scores)
380
  return matches_to_matches0(matches, scores)
381
 
 
382
  def scale_keypoints(kpts, scale):
383
  if np.any(scale != 1.0):
384
  kpts *= kpts.new_tensor(scale)
385
  return kpts
386
 
 
387
  class ImagePairDataset(torch.utils.data.Dataset):
388
  default_conf = {
389
  "grayscale": True,
@@ -398,7 +402,9 @@ class ImagePairDataset(torch.utils.data.Dataset):
398
  self.pairs = pairs
399
  if self.conf.cache_images:
400
  image_names = set(sum(pairs, ())) # unique image names in pairs
401
- logger.info(f"Loading and caching {len(image_names)} unique images.")
 
 
402
  self.images = {}
403
  self.scales = {}
404
  for name in tqdm(image_names):
@@ -570,7 +576,9 @@ def aggregate_matches(
570
  required_queries -= set(list_h5_names(feature_path))
571
 
572
  # if an entry in cpdict is provided as np.ndarray we assume it is fixed
573
- required_queries -= set([k for k, v in cpdict.items() if isinstance(v, np.ndarray)])
 
 
574
 
575
  # sort pairs for reduced RAM
576
  pairs_per_q = Counter(list(chain(*pairs)))
@@ -578,7 +586,9 @@ def aggregate_matches(
578
  pairs = [p for _, p in sorted(zip(pairs_score, pairs))]
579
 
580
  if len(required_queries) > 0:
581
- logger.info(f"Aggregating keypoints for {len(required_queries)} images.")
 
 
582
  n_kps = 0
583
  with h5py.File(str(match_path), "a") as fd:
584
  for name0, name1 in tqdm(pairs, smoothing=0.1):
@@ -756,6 +766,7 @@ def match_and_assign(
756
  logger.info(f'Reassign matches with max_error={conf["max_error"]}.')
757
  assign_matches(pairs, match_path, cpdict, max_error=conf["max_error"])
758
 
 
759
  def scale_lines(lines, scale):
760
  if np.any(scale != 1.0):
761
  lines *= lines.new_tensor(scale)
@@ -972,6 +983,7 @@ def match_images(model, image_0, image_1, conf, device="cpu"):
972
  torch.cuda.empty_cache()
973
  return ret
974
 
 
975
  @torch.no_grad()
976
  def main(
977
  conf: Dict,
@@ -985,7 +997,8 @@ def main(
985
  overwrite: bool = False,
986
  ) -> Path:
987
  logger.info(
988
- "Extracting semi-dense features with configuration:" f"\n{pprint.pformat(conf)}"
 
989
  )
990
 
991
  if features is None:
@@ -995,7 +1008,8 @@ def main(
995
  features_q = features
996
  if matches is None:
997
  raise ValueError(
998
- "Either provide both features and matches as Path" " or both as names."
 
999
  )
1000
  else:
1001
  if export_dir is None:
@@ -1017,7 +1031,14 @@ def main(
1017
  raise TypeError(str(features_ref))
1018
 
1019
  match_and_assign(
1020
- conf, pairs, image_dir, matches, features_q, features_ref, max_kps, overwrite
 
 
 
 
 
 
 
1021
  )
1022
 
1023
  return features_q, matches
@@ -1028,11 +1049,15 @@ if __name__ == "__main__":
1028
  parser.add_argument("--pairs", type=Path, required=True)
1029
  parser.add_argument("--image_dir", type=Path, required=True)
1030
  parser.add_argument("--export_dir", type=Path, required=True)
1031
- parser.add_argument("--matches", type=Path, default=confs["loftr"]["output"])
 
 
1032
  parser.add_argument(
1033
  "--features", type=str, default="feats_" + confs["loftr"]["output"]
1034
  )
1035
- parser.add_argument("--conf", type=str, default="loftr", choices=list(confs.keys()))
 
 
1036
  args = parser.parse_args()
1037
  main(
1038
  confs[args.conf],
@@ -1042,4 +1067,3 @@ if __name__ == "__main__":
1042
  args.matches,
1043
  args.features,
1044
  )
1045
-
 
13
  import torchvision.transforms.functional as F
14
  from scipy.spatial import KDTree
15
  from tqdm import tqdm
16
+
17
  from . import logger, matchers
18
+ from .extract_features import read_image, resize_image
19
  from .match_features import find_unique_new_pairs
20
  from .utils.base_model import dynamic_load
21
  from .utils.io import list_h5_names
 
289
  },
290
  }
291
 
292
+
293
  def to_cpts(kpts, ps):
294
  if ps > 0.0:
295
  kpts = np.round(np.round((kpts + 0.5) / ps) * ps - 0.5, 2)
 
381
  matches, scores = get_unique_matches(matches, scores)
382
  return matches_to_matches0(matches, scores)
383
 
384
+
385
  def scale_keypoints(kpts, scale):
386
  if np.any(scale != 1.0):
387
  kpts *= kpts.new_tensor(scale)
388
  return kpts
389
 
390
+
391
  class ImagePairDataset(torch.utils.data.Dataset):
392
  default_conf = {
393
  "grayscale": True,
 
402
  self.pairs = pairs
403
  if self.conf.cache_images:
404
  image_names = set(sum(pairs, ())) # unique image names in pairs
405
+ logger.info(
406
+ f"Loading and caching {len(image_names)} unique images."
407
+ )
408
  self.images = {}
409
  self.scales = {}
410
  for name in tqdm(image_names):
 
576
  required_queries -= set(list_h5_names(feature_path))
577
 
578
  # if an entry in cpdict is provided as np.ndarray we assume it is fixed
579
+ required_queries -= set(
580
+ [k for k, v in cpdict.items() if isinstance(v, np.ndarray)]
581
+ )
582
 
583
  # sort pairs for reduced RAM
584
  pairs_per_q = Counter(list(chain(*pairs)))
 
586
  pairs = [p for _, p in sorted(zip(pairs_score, pairs))]
587
 
588
  if len(required_queries) > 0:
589
+ logger.info(
590
+ f"Aggregating keypoints for {len(required_queries)} images."
591
+ )
592
  n_kps = 0
593
  with h5py.File(str(match_path), "a") as fd:
594
  for name0, name1 in tqdm(pairs, smoothing=0.1):
 
766
  logger.info(f'Reassign matches with max_error={conf["max_error"]}.')
767
  assign_matches(pairs, match_path, cpdict, max_error=conf["max_error"])
768
 
769
+
770
  def scale_lines(lines, scale):
771
  if np.any(scale != 1.0):
772
  lines *= lines.new_tensor(scale)
 
983
  torch.cuda.empty_cache()
984
  return ret
985
 
986
+
987
  @torch.no_grad()
988
  def main(
989
  conf: Dict,
 
997
  overwrite: bool = False,
998
  ) -> Path:
999
  logger.info(
1000
+ "Extracting semi-dense features with configuration:"
1001
+ f"\n{pprint.pformat(conf)}"
1002
  )
1003
 
1004
  if features is None:
 
1008
  features_q = features
1009
  if matches is None:
1010
  raise ValueError(
1011
+ "Either provide both features and matches as Path"
1012
+ " or both as names."
1013
  )
1014
  else:
1015
  if export_dir is None:
 
1031
  raise TypeError(str(features_ref))
1032
 
1033
  match_and_assign(
1034
+ conf,
1035
+ pairs,
1036
+ image_dir,
1037
+ matches,
1038
+ features_q,
1039
+ features_ref,
1040
+ max_kps,
1041
+ overwrite,
1042
  )
1043
 
1044
  return features_q, matches
 
1049
  parser.add_argument("--pairs", type=Path, required=True)
1050
  parser.add_argument("--image_dir", type=Path, required=True)
1051
  parser.add_argument("--export_dir", type=Path, required=True)
1052
+ parser.add_argument(
1053
+ "--matches", type=Path, default=confs["loftr"]["output"]
1054
+ )
1055
  parser.add_argument(
1056
  "--features", type=str, default="feats_" + confs["loftr"]["output"]
1057
  )
1058
+ parser.add_argument(
1059
+ "--conf", type=str, default="loftr", choices=list(confs.keys())
1060
+ )
1061
  args = parser.parse_args()
1062
  main(
1063
  confs[args.conf],
 
1067
  args.matches,
1068
  args.features,
1069
  )
 
hloc/matchers/mast3r.py CHANGED
@@ -8,7 +8,6 @@ import torch
8
  import torchvision.transforms as tfm
9
 
10
  from .. import logger
11
- from ..utils.base_model import BaseModel
12
 
13
  mast3r_path = Path(__file__).parent / "../../third_party/mast3r"
14
  sys.path.append(str(mast3r_path))
@@ -16,12 +15,11 @@ sys.path.append(str(mast3r_path))
16
  dust3r_path = Path(__file__).parent / "../../third_party/dust3r"
17
  sys.path.append(str(dust3r_path))
18
 
19
- from mast3r.model import AsymmetricMASt3R
20
- from mast3r.fast_nn import fast_reciprocal_NNs
21
-
22
  from dust3r.image_pairs import make_pairs
23
  from dust3r.inference import inference
24
- from dust3r.utils.image import load_images
 
 
25
  from hloc.matchers.duster import Duster
26
 
27
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -70,8 +68,8 @@ class Mast3r(Duster):
70
  output = inference(pairs, self.net, device, batch_size=1)
71
 
72
  # at this stage, you have the raw dust3r predictions
73
- view1, pred1 = output["view1"], output["pred1"]
74
- view2, pred2 = output["view2"], output["pred2"]
75
 
76
  desc1, desc2 = (
77
  pred1["desc"][1].squeeze(0).detach(),
 
8
  import torchvision.transforms as tfm
9
 
10
  from .. import logger
 
11
 
12
  mast3r_path = Path(__file__).parent / "../../third_party/mast3r"
13
  sys.path.append(str(mast3r_path))
 
15
  dust3r_path = Path(__file__).parent / "../../third_party/dust3r"
16
  sys.path.append(str(dust3r_path))
17
 
 
 
 
18
  from dust3r.image_pairs import make_pairs
19
  from dust3r.inference import inference
20
+ from mast3r.fast_nn import fast_reciprocal_NNs
21
+ from mast3r.model import AsymmetricMASt3R
22
+
23
  from hloc.matchers.duster import Duster
24
 
25
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
68
  output = inference(pairs, self.net, device, batch_size=1)
69
 
70
  # at this stage, you have the raw dust3r predictions
71
+ _, pred1 = output["view1"], output["pred1"]
72
+ _, pred2 = output["view2"], output["pred2"]
73
 
74
  desc1, desc2 = (
75
  pred1["desc"][1].squeeze(0).detach(),
hloc/matchers/superglue.py CHANGED
@@ -4,7 +4,9 @@ from pathlib import Path
4
  from ..utils.base_model import BaseModel
5
 
6
  sys.path.append(str(Path(__file__).parent / "../../third_party"))
7
- from SuperGluePretrainedNetwork.models.superglue import SuperGlue as SG # noqa: E402
 
 
8
 
9
 
10
  class SuperGlue(BaseModel):
 
4
  from ..utils.base_model import BaseModel
5
 
6
  sys.path.append(str(Path(__file__).parent / "../../third_party"))
7
+ from SuperGluePretrainedNetwork.models.superglue import ( # noqa: E402
8
+ SuperGlue as SG,
9
+ )
10
 
11
 
12
  class SuperGlue(BaseModel):
hloc/pairs_from_exhaustive.py CHANGED
@@ -34,7 +34,9 @@ def main(
34
  elif isinstance(image_list, collections.Iterable):
35
  names_ref = list(ref_list)
36
  else:
37
- raise ValueError(f"Unknown type for reference image list: {ref_list}")
 
 
38
  elif ref_features is not None:
39
  names_ref = list_h5_names(ref_features)
40
  else:
 
34
  elif isinstance(image_list, collections.Iterable):
35
  names_ref = list(ref_list)
36
  else:
37
+ raise ValueError(
38
+ f"Unknown type for reference image list: {ref_list}"
39
+ )
40
  elif ref_features is not None:
41
  names_ref = list_h5_names(ref_features)
42
  else:
hloc/pairs_from_poses.py CHANGED
@@ -63,6 +63,8 @@ if __name__ == "__main__":
63
  parser.add_argument("--model", required=True, type=Path)
64
  parser.add_argument("--output", required=True, type=Path)
65
  parser.add_argument("--num_matched", required=True, type=int)
66
- parser.add_argument("--rotation_threshold", default=DEFAULT_ROT_THRESH, type=float)
 
 
67
  args = parser.parse_args()
68
  main(**args.__dict__)
 
63
  parser.add_argument("--model", required=True, type=Path)
64
  parser.add_argument("--output", required=True, type=Path)
65
  parser.add_argument("--num_matched", required=True, type=int)
66
+ parser.add_argument(
67
+ "--rotation_threshold", default=DEFAULT_ROT_THRESH, type=float
68
+ )
69
  args = parser.parse_args()
70
  main(**args.__dict__)
hloc/pairs_from_retrieval.py CHANGED
@@ -19,7 +19,9 @@ def parse_names(prefix, names, names_all):
19
  prefix = tuple(prefix)
20
  names = [n for n in names_all if n.startswith(prefix)]
21
  if len(names) == 0:
22
- raise ValueError(f"Could not find any image with the prefix `{prefix}`.")
 
 
23
  elif names is not None:
24
  if isinstance(names, (str, Path)):
25
  names = parse_image_lists(names)
@@ -90,7 +92,9 @@ def main(
90
  db_descriptors = descriptors
91
  if isinstance(db_descriptors, (Path, str)):
92
  db_descriptors = [db_descriptors]
93
- name2db = {n: i for i, p in enumerate(db_descriptors) for n in list_h5_names(p)}
 
 
94
  db_names_h5 = list(name2db.keys())
95
  query_names_h5 = list_h5_names(descriptors)
96
 
 
19
  prefix = tuple(prefix)
20
  names = [n for n in names_all if n.startswith(prefix)]
21
  if len(names) == 0:
22
+ raise ValueError(
23
+ f"Could not find any image with the prefix `{prefix}`."
24
+ )
25
  elif names is not None:
26
  if isinstance(names, (str, Path)):
27
  names = parse_image_lists(names)
 
92
  db_descriptors = descriptors
93
  if isinstance(db_descriptors, (Path, str)):
94
  db_descriptors = [db_descriptors]
95
+ name2db = {
96
+ n: i for i, p in enumerate(db_descriptors) for n in list_h5_names(p)
97
+ }
98
  db_names_h5 = list(name2db.keys())
99
  query_names_h5 = list_h5_names(descriptors)
100
 
hloc/reconstruction.py CHANGED
@@ -93,13 +93,16 @@ def run_reconstruction(
93
  largest_num_images = num_images
94
  assert largest_index is not None
95
  logger.info(
96
- f"Largest model is #{largest_index} " f"with {largest_num_images} images."
 
97
  )
98
 
99
  for filename in ["images.bin", "cameras.bin", "points3D.bin"]:
100
  if (sfm_dir / filename).exists():
101
  (sfm_dir / filename).unlink()
102
- shutil.move(str(models_path / str(largest_index) / filename), str(sfm_dir))
 
 
103
  return reconstructions[largest_index]
104
 
105
 
@@ -172,7 +175,9 @@ if __name__ == "__main__":
172
  "--image_options",
173
  nargs="+",
174
  default=[],
175
- help="List of key=value from {}".format(pycolmap.ImageReaderOptions().todict()),
 
 
176
  )
177
  parser.add_argument(
178
  "--mapper_options",
 
93
  largest_num_images = num_images
94
  assert largest_index is not None
95
  logger.info(
96
+ f"Largest model is #{largest_index} "
97
+ f"with {largest_num_images} images."
98
  )
99
 
100
  for filename in ["images.bin", "cameras.bin", "points3D.bin"]:
101
  if (sfm_dir / filename).exists():
102
  (sfm_dir / filename).unlink()
103
+ shutil.move(
104
+ str(models_path / str(largest_index) / filename), str(sfm_dir)
105
+ )
106
  return reconstructions[largest_index]
107
 
108
 
 
175
  "--image_options",
176
  nargs="+",
177
  default=[],
178
+ help="List of key=value from {}".format(
179
+ pycolmap.ImageReaderOptions().todict()
180
+ ),
181
  )
182
  parser.add_argument(
183
  "--mapper_options",
hloc/triangulation.py CHANGED
@@ -118,7 +118,9 @@ def estimation_and_geometric_verification(
118
  pycolmap.verify_matches(
119
  database_path,
120
  pairs_path,
121
- options=dict(ransac=dict(max_num_trials=20000, min_inlier_ratio=0.1)),
 
 
122
  )
123
 
124
 
@@ -142,7 +144,9 @@ def geometric_verification(
142
  id0 = image_ids[name0]
143
  image0 = reference.images[id0]
144
  cam0 = reference.cameras[image0.camera_id]
145
- kps0, noise0 = get_keypoints(features_path, name0, return_uncertainty=True)
 
 
146
  noise0 = 1.0 if noise0 is None else noise0
147
  if len(kps0) > 0:
148
  kps0 = np.stack(cam0.cam_from_img(kps0))
@@ -153,7 +157,9 @@ def geometric_verification(
153
  id1 = image_ids[name1]
154
  image1 = reference.images[id1]
155
  cam1 = reference.cameras[image1.camera_id]
156
- kps1, noise1 = get_keypoints(features_path, name1, return_uncertainty=True)
 
 
157
  noise1 = 1.0 if noise1 is None else noise1
158
  if len(kps1) > 0:
159
  kps1 = np.stack(cam1.cam_from_img(kps1))
@@ -170,7 +176,9 @@ def geometric_verification(
170
  db.add_two_view_geometry(id0, id1, matches)
171
  continue
172
 
173
- cam1_from_cam0 = image1.cam_from_world * image0.cam_from_world.inverse()
 
 
174
  errors0, errors1 = compute_epipolar_errors(
175
  cam1_from_cam0, kps0[matches[:, 0]], kps1[matches[:, 1]]
176
  )
@@ -209,7 +217,11 @@ def run_triangulation(
209
  with OutputCapture(verbose):
210
  with pycolmap.ostream():
211
  reconstruction = pycolmap.triangulate_points(
212
- reference_model, database_path, image_dir, model_path, options=options
 
 
 
 
213
  )
214
  return reconstruction
215
 
@@ -257,7 +269,8 @@ def main(
257
  sfm_dir, database, image_dir, reference, verbose, mapper_options
258
  )
259
  logger.info(
260
- "Finished the triangulation with statistics:\n%s", reconstruction.summary()
 
261
  )
262
  return reconstruction
263
 
@@ -278,7 +291,8 @@ def parse_option_args(args: List[str], default_options) -> Dict[str, Any]:
278
  target_type = type(getattr(default_options, key))
279
  if not isinstance(value, target_type):
280
  raise ValueError(
281
- f'Incorrect type for option "{key}":' f" {type(value)} vs {target_type}"
 
282
  )
283
  options[key] = value
284
  return options
 
118
  pycolmap.verify_matches(
119
  database_path,
120
  pairs_path,
121
+ options=dict(
122
+ ransac=dict(max_num_trials=20000, min_inlier_ratio=0.1)
123
+ ),
124
  )
125
 
126
 
 
144
  id0 = image_ids[name0]
145
  image0 = reference.images[id0]
146
  cam0 = reference.cameras[image0.camera_id]
147
+ kps0, noise0 = get_keypoints(
148
+ features_path, name0, return_uncertainty=True
149
+ )
150
  noise0 = 1.0 if noise0 is None else noise0
151
  if len(kps0) > 0:
152
  kps0 = np.stack(cam0.cam_from_img(kps0))
 
157
  id1 = image_ids[name1]
158
  image1 = reference.images[id1]
159
  cam1 = reference.cameras[image1.camera_id]
160
+ kps1, noise1 = get_keypoints(
161
+ features_path, name1, return_uncertainty=True
162
+ )
163
  noise1 = 1.0 if noise1 is None else noise1
164
  if len(kps1) > 0:
165
  kps1 = np.stack(cam1.cam_from_img(kps1))
 
176
  db.add_two_view_geometry(id0, id1, matches)
177
  continue
178
 
179
+ cam1_from_cam0 = (
180
+ image1.cam_from_world * image0.cam_from_world.inverse()
181
+ )
182
  errors0, errors1 = compute_epipolar_errors(
183
  cam1_from_cam0, kps0[matches[:, 0]], kps1[matches[:, 1]]
184
  )
 
217
  with OutputCapture(verbose):
218
  with pycolmap.ostream():
219
  reconstruction = pycolmap.triangulate_points(
220
+ reference_model,
221
+ database_path,
222
+ image_dir,
223
+ model_path,
224
+ options=options,
225
  )
226
  return reconstruction
227
 
 
269
  sfm_dir, database, image_dir, reference, verbose, mapper_options
270
  )
271
  logger.info(
272
+ "Finished the triangulation with statistics:\n%s",
273
+ reconstruction.summary(),
274
  )
275
  return reconstruction
276
 
 
291
  target_type = type(getattr(default_options, key))
292
  if not isinstance(value, target_type):
293
  raise ValueError(
294
+ f'Incorrect type for option "{key}":'
295
+ f" {type(value)} vs {target_type}"
296
  )
297
  options[key] = value
298
  return options
hloc/utils/viz.py CHANGED
@@ -49,7 +49,7 @@ def plot_images(
49
  if titles:
50
  ax.set_title(titles[i])
51
  fig.tight_layout(pad=pad)
52
-
53
 
54
  def plot_keypoints(kpts, colors="lime", ps=4):
55
  """Plot keypoints for existing images.
 
49
  if titles:
50
  ax.set_title(titles[i])
51
  fig.tight_layout(pad=pad)
52
+ return fig
53
 
54
  def plot_keypoints(kpts, colors="lime", ps=4):
55
  """Plot keypoints for existing images.
hloc/visualization.py CHANGED
@@ -6,11 +6,23 @@ import pycolmap
6
  from matplotlib import cm
7
 
8
  from .utils.io import read_image
9
- from .utils.viz import add_text, cm_RdGn, plot_images, plot_keypoints, plot_matches
 
 
 
 
 
 
10
 
11
 
12
  def visualize_sfm_2d(
13
- reconstruction, image_dir, color_by="visibility", selected=[], n=1, seed=0, dpi=75
 
 
 
 
 
 
14
  ):
15
  assert image_dir.exists()
16
  if not isinstance(reconstruction, pycolmap.Reconstruction):
@@ -31,9 +43,11 @@ def visualize_sfm_2d(
31
  elif color_by == "track_length":
32
  tl = np.array(
33
  [
34
- reconstruction.points3D[p.point3D_id].track.length()
35
- if p.has_point3D()
36
- else 1
 
 
37
  for p in image.points2D
38
  ]
39
  )
@@ -57,10 +71,11 @@ def visualize_sfm_2d(
57
  raise NotImplementedError(f"Coloring not implemented: {color_by}.")
58
 
59
  name = image.name
60
- plot_images([read_image(image_dir / name)], dpi=dpi)
61
  plot_keypoints([keypoints], colors=[color], ps=4)
62
  add_text(0, text)
63
  add_text(0, name, pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom")
 
64
 
65
 
66
  def visualize_loc(
@@ -121,7 +136,9 @@ def visualize_loc_from_log(
121
  counts = np.zeros(n)
122
  dbs_kp_q_db = [[] for _ in range(n)]
123
  inliers_dbs = [[] for _ in range(n)]
124
- for i, (inl, (p3D_id, db_idxs)) in enumerate(zip(inliers, kp_to_3D_to_db)):
 
 
125
  track = reconstruction.points3D[p3D_id].track
126
  track = {el.image_id: el.point2D_idx for el in track.elements}
127
  for db_idx in db_idxs:
@@ -133,7 +150,9 @@ def visualize_loc_from_log(
133
  # for inloc the database keypoints are already in the logs
134
  assert "keypoints_db" in loc
135
  assert "indices_db" in loc
136
- counts = np.array([np.sum(loc["indices_db"][inliers] == i) for i in range(n)])
 
 
137
 
138
  # display the database images with the most inlier matches
139
  db_sort = np.argsort(-counts)
 
6
  from matplotlib import cm
7
 
8
  from .utils.io import read_image
9
+ from .utils.viz import (
10
+ add_text,
11
+ cm_RdGn,
12
+ plot_images,
13
+ plot_keypoints,
14
+ plot_matches,
15
+ )
16
 
17
 
18
  def visualize_sfm_2d(
19
+ reconstruction,
20
+ image_dir,
21
+ color_by="visibility",
22
+ selected=[],
23
+ n=1,
24
+ seed=0,
25
+ dpi=75,
26
  ):
27
  assert image_dir.exists()
28
  if not isinstance(reconstruction, pycolmap.Reconstruction):
 
43
  elif color_by == "track_length":
44
  tl = np.array(
45
  [
46
+ (
47
+ reconstruction.points3D[p.point3D_id].track.length()
48
+ if p.has_point3D()
49
+ else 1
50
+ )
51
  for p in image.points2D
52
  ]
53
  )
 
71
  raise NotImplementedError(f"Coloring not implemented: {color_by}.")
72
 
73
  name = image.name
74
+ fig = plot_images([read_image(image_dir / name)], dpi=dpi)
75
  plot_keypoints([keypoints], colors=[color], ps=4)
76
  add_text(0, text)
77
  add_text(0, name, pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom")
78
+ return fig
79
 
80
 
81
  def visualize_loc(
 
136
  counts = np.zeros(n)
137
  dbs_kp_q_db = [[] for _ in range(n)]
138
  inliers_dbs = [[] for _ in range(n)]
139
+ for i, (inl, (p3D_id, db_idxs)) in enumerate(
140
+ zip(inliers, kp_to_3D_to_db)
141
+ ):
142
  track = reconstruction.points3D[p3D_id].track
143
  track = {el.image_id: el.point2D_idx for el in track.elements}
144
  for db_idx in db_idxs:
 
150
  # for inloc the database keypoints are already in the logs
151
  assert "keypoints_db" in loc
152
  assert "indices_db" in loc
153
+ counts = np.array(
154
+ [np.sum(loc["indices_db"][inliers] == i) for i in range(n)]
155
+ )
156
 
157
  # display the database images with the most inlier matches
158
  db_sort = np.argsort(-counts)
requirements.txt CHANGED
@@ -16,7 +16,7 @@ opencv-python==4.6.0.66
16
  pandas==2.0.3
17
  plotly==5.15.0
18
  protobuf==4.23.2
19
- pycolmap==0.5.0
20
  pytlsd==0.0.2
21
  pytorch-lightning==1.4.9
22
  PyYAML==6.0
@@ -34,4 +34,5 @@ onnxruntime
34
  poselib
35
  roma #dust3r
36
  huggingface_hub
37
- psutil
 
 
16
  pandas==2.0.3
17
  plotly==5.15.0
18
  protobuf==4.23.2
19
+ pycolmap==0.6.0
20
  pytlsd==0.0.2
21
  pytorch-lightning==1.4.9
22
  PyYAML==6.0
 
34
  poselib
35
  roma #dust3r
36
  huggingface_hub
37
+ psutil
38
+ easydict