Vincentqyw commited on
Commit
e73df10
·
1 Parent(s): e2c1f36

add: superglue and dedode

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. third_party/DeDoDe/.gitignore +162 -0
  2. third_party/DeDoDe/DeDoDe/__init__.py +1 -0
  3. third_party/DeDoDe/DeDoDe/benchmarks/__init__.py +3 -0
  4. third_party/DeDoDe/DeDoDe/benchmarks/mega_pose_est.py +114 -0
  5. third_party/DeDoDe/DeDoDe/benchmarks/mega_pose_est_mnn.py +119 -0
  6. third_party/DeDoDe/DeDoDe/benchmarks/num_inliers.py +76 -0
  7. third_party/DeDoDe/DeDoDe/checkpoint.py +59 -0
  8. third_party/DeDoDe/DeDoDe/datasets/__init__.py +0 -0
  9. third_party/DeDoDe/DeDoDe/datasets/megadepth.py +269 -0
  10. third_party/DeDoDe/DeDoDe/decoder.py +90 -0
  11. third_party/DeDoDe/DeDoDe/descriptors/__init__.py +0 -0
  12. third_party/DeDoDe/DeDoDe/descriptors/dedode_descriptor.py +49 -0
  13. third_party/DeDoDe/DeDoDe/descriptors/descriptor_loss.py +75 -0
  14. third_party/DeDoDe/DeDoDe/detectors/__init__.py +0 -0
  15. third_party/DeDoDe/DeDoDe/detectors/dedode_detector.py +75 -0
  16. third_party/DeDoDe/DeDoDe/detectors/loss.py +275 -0
  17. third_party/DeDoDe/DeDoDe/encoder.py +47 -0
  18. third_party/DeDoDe/DeDoDe/matchers/__init__.py +0 -0
  19. third_party/DeDoDe/DeDoDe/matchers/dual_softmax_matcher.py +38 -0
  20. third_party/DeDoDe/DeDoDe/model_zoo/__init__.py +3 -0
  21. third_party/DeDoDe/DeDoDe/model_zoo/dedode_models.py +177 -0
  22. third_party/DeDoDe/DeDoDe/train.py +76 -0
  23. third_party/DeDoDe/DeDoDe/utils.py +759 -0
  24. third_party/DeDoDe/LICENSE +21 -0
  25. third_party/DeDoDe/README.md +74 -0
  26. third_party/DeDoDe/assets/dedode_roma.png +3 -0
  27. third_party/DeDoDe/assets/im_A.jpg +3 -0
  28. third_party/DeDoDe/assets/im_B.jpg +3 -0
  29. third_party/DeDoDe/assets/matches.jpg +3 -0
  30. third_party/DeDoDe/assets/teaser.png +3 -0
  31. third_party/DeDoDe/data_prep/prep_keypoints.py +100 -0
  32. third_party/DeDoDe/demo/demo_kpts.py +20 -0
  33. third_party/DeDoDe/demo/demo_match.py +45 -0
  34. third_party/DeDoDe/demo/demo_scoremap.py +20 -0
  35. third_party/DeDoDe/pretrained/dedode_descriptor_B.pth +3 -0
  36. third_party/DeDoDe/pretrained/dedode_detector_L.pth +3 -0
  37. third_party/DeDoDe/requirements.txt +9 -0
  38. third_party/DeDoDe/setup.py +10 -0
  39. third_party/SuperGluePretrainedNetwork/.gitignore +3 -0
  40. third_party/SuperGluePretrainedNetwork/LICENSE +48 -0
  41. third_party/SuperGluePretrainedNetwork/README.md +388 -0
  42. third_party/SuperGluePretrainedNetwork/assets/freiburg_matches.gif +3 -0
  43. third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847980.722988.png +3 -0
  44. third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847981.726650.png +3 -0
  45. third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847982.730674.png +3 -0
  46. third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847983.738736.png +3 -0
  47. third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847984.743352.png +3 -0
  48. third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847985.746954.png +3 -0
  49. third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847986.762616.png +3 -0
  50. third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847987.758741.png +3 -0
third_party/DeDoDe/.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ .vscode*
third_party/DeDoDe/DeDoDe/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .model_zoo import dedode_detector_B, dedode_detector_L, dedode_descriptor_B
third_party/DeDoDe/DeDoDe/benchmarks/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .num_inliers import NumInliersBenchmark
2
+ from .mega_pose_est import MegaDepthPoseEstimationBenchmark
3
+ from .mega_pose_est_mnn import MegaDepthPoseMNNBenchmark
third_party/DeDoDe/DeDoDe/benchmarks/mega_pose_est.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from DeDoDe.utils import *
4
+ from PIL import Image
5
+ from tqdm import tqdm
6
+ import torch.nn.functional as F
7
+
8
+ class MegaDepthPoseEstimationBenchmark:
9
+ def __init__(self, data_root="data/megadepth", scene_names = None) -> None:
10
+ if scene_names is None:
11
+ self.scene_names = [
12
+ "0015_0.1_0.3.npz",
13
+ "0015_0.3_0.5.npz",
14
+ "0022_0.1_0.3.npz",
15
+ "0022_0.3_0.5.npz",
16
+ "0022_0.5_0.7.npz",
17
+ ]
18
+ else:
19
+ self.scene_names = scene_names
20
+ self.scenes = [
21
+ np.load(f"{data_root}/{scene}", allow_pickle=True)
22
+ for scene in self.scene_names
23
+ ]
24
+ self.data_root = data_root
25
+
26
+ def benchmark(self, keypoint_model, matching_model, model_name = None, resolution = None, scale_intrinsics = True, calibrated = True):
27
+ H,W = matching_model.get_output_resolution()
28
+ with torch.no_grad():
29
+ data_root = self.data_root
30
+ tot_e_t, tot_e_R, tot_e_pose = [], [], []
31
+ thresholds = [5, 10, 20]
32
+ for scene_ind in range(len(self.scenes)):
33
+ import os
34
+ scene_name = os.path.splitext(self.scene_names[scene_ind])[0]
35
+ scene = self.scenes[scene_ind]
36
+ pairs = scene["pair_infos"]
37
+ intrinsics = scene["intrinsics"]
38
+ poses = scene["poses"]
39
+ im_paths = scene["image_paths"]
40
+ pair_inds = range(len(pairs))
41
+ for pairind in tqdm(pair_inds):
42
+ idx1, idx2 = pairs[pairind][0]
43
+ K1 = intrinsics[idx1].copy()
44
+ T1 = poses[idx1].copy()
45
+ R1, t1 = T1[:3, :3], T1[:3, 3]
46
+ K2 = intrinsics[idx2].copy()
47
+ T2 = poses[idx2].copy()
48
+ R2, t2 = T2[:3, :3], T2[:3, 3]
49
+ R, t = compute_relative_pose(R1, t1, R2, t2)
50
+ T1_to_2 = np.concatenate((R,t[:,None]), axis=-1)
51
+ im_A_path = f"{data_root}/{im_paths[idx1]}"
52
+ im_B_path = f"{data_root}/{im_paths[idx2]}"
53
+
54
+ keypoints_A = keypoint_model.detect_from_path(im_A_path, num_keypoints = 20_000)["keypoints"][0]
55
+ keypoints_B = keypoint_model.detect_from_path(im_B_path, num_keypoints = 20_000)["keypoints"][0]
56
+ warp, certainty = matching_model.match(im_A_path, im_B_path)
57
+ matches = matching_model.match_keypoints(keypoints_A, keypoints_B, warp, certainty, return_tuple = False)
58
+ im_A = Image.open(im_A_path)
59
+ w1, h1 = im_A.size
60
+ im_B = Image.open(im_B_path)
61
+ w2, h2 = im_B.size
62
+ if scale_intrinsics:
63
+ scale1 = 1200 / max(w1, h1)
64
+ scale2 = 1200 / max(w2, h2)
65
+ w1, h1 = scale1 * w1, scale1 * h1
66
+ w2, h2 = scale2 * w2, scale2 * h2
67
+ K1, K2 = K1.copy(), K2.copy()
68
+ K1[:2] = K1[:2] * scale1
69
+ K2[:2] = K2[:2] * scale2
70
+ kpts1, kpts2 = matching_model.to_pixel_coordinates(matches, h1, w1, h2, w2)
71
+ for _ in range(1):
72
+ shuffling = np.random.permutation(np.arange(len(kpts1)))
73
+ kpts1 = kpts1[shuffling]
74
+ kpts2 = kpts2[shuffling]
75
+ try:
76
+ threshold = 0.5
77
+ if calibrated:
78
+ norm_threshold = threshold / (np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
79
+ R_est, t_est, mask = estimate_pose(
80
+ kpts1.cpu().numpy(),
81
+ kpts2.cpu().numpy(),
82
+ K1,
83
+ K2,
84
+ norm_threshold,
85
+ conf=0.99999,
86
+ )
87
+ T1_to_2_est = np.concatenate((R_est, t_est), axis=-1) #
88
+ e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
89
+ e_pose = max(e_t, e_R)
90
+ except Exception as e:
91
+ print(repr(e))
92
+ e_t, e_R = 90, 90
93
+ e_pose = max(e_t, e_R)
94
+ tot_e_t.append(e_t)
95
+ tot_e_R.append(e_R)
96
+ tot_e_pose.append(e_pose)
97
+ tot_e_pose = np.array(tot_e_pose)
98
+ auc = pose_auc(tot_e_pose, thresholds)
99
+ acc_5 = (tot_e_pose < 5).mean()
100
+ acc_10 = (tot_e_pose < 10).mean()
101
+ acc_15 = (tot_e_pose < 15).mean()
102
+ acc_20 = (tot_e_pose < 20).mean()
103
+ map_5 = acc_5
104
+ map_10 = np.mean([acc_5, acc_10])
105
+ map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
106
+ print(f"{model_name} auc: {auc}")
107
+ return {
108
+ "auc_5": auc[0],
109
+ "auc_10": auc[1],
110
+ "auc_20": auc[2],
111
+ "map_5": map_5,
112
+ "map_10": map_10,
113
+ "map_20": map_20,
114
+ }
third_party/DeDoDe/DeDoDe/benchmarks/mega_pose_est_mnn.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from DeDoDe.utils import *
4
+ from PIL import Image
5
+ from tqdm import tqdm
6
+ import torch.nn.functional as F
7
+
8
+ class MegaDepthPoseMNNBenchmark:
9
+ def __init__(self, data_root="data/megadepth", scene_names = None) -> None:
10
+ if scene_names is None:
11
+ self.scene_names = [
12
+ "0015_0.1_0.3.npz",
13
+ "0015_0.3_0.5.npz",
14
+ "0022_0.1_0.3.npz",
15
+ "0022_0.3_0.5.npz",
16
+ "0022_0.5_0.7.npz",
17
+ ]
18
+ else:
19
+ self.scene_names = scene_names
20
+ self.scenes = [
21
+ np.load(f"{data_root}/{scene}", allow_pickle=True)
22
+ for scene in self.scene_names
23
+ ]
24
+ self.data_root = data_root
25
+
26
+ def benchmark(self, detector_model, descriptor_model, matcher_model, model_name = None, resolution = None, scale_intrinsics = True, calibrated = True):
27
+ with torch.no_grad():
28
+ data_root = self.data_root
29
+ tot_e_t, tot_e_R, tot_e_pose = [], [], []
30
+ thresholds = [5, 10, 20]
31
+ for scene_ind in range(len(self.scenes)):
32
+ import os
33
+ scene_name = os.path.splitext(self.scene_names[scene_ind])[0]
34
+ scene = self.scenes[scene_ind]
35
+ pairs = scene["pair_infos"]
36
+ intrinsics = scene["intrinsics"]
37
+ poses = scene["poses"]
38
+ im_paths = scene["image_paths"]
39
+ pair_inds = range(len(pairs))
40
+ for pairind in tqdm(pair_inds):
41
+ idx1, idx2 = pairs[pairind][0]
42
+ K1 = intrinsics[idx1].copy()
43
+ T1 = poses[idx1].copy()
44
+ R1, t1 = T1[:3, :3], T1[:3, 3]
45
+ K2 = intrinsics[idx2].copy()
46
+ T2 = poses[idx2].copy()
47
+ R2, t2 = T2[:3, :3], T2[:3, 3]
48
+ R, t = compute_relative_pose(R1, t1, R2, t2)
49
+ T1_to_2 = np.concatenate((R,t[:,None]), axis=-1)
50
+ im_A_path = f"{data_root}/{im_paths[idx1]}"
51
+ im_B_path = f"{data_root}/{im_paths[idx2]}"
52
+ detections_A = detector_model.detect_from_path(im_A_path)
53
+ keypoints_A, P_A = detections_A["keypoints"], detections_A["confidence"]
54
+ detections_B = detector_model.detect_from_path(im_B_path)
55
+ keypoints_B, P_B = detections_B["keypoints"], detections_B["confidence"]
56
+ description_A = descriptor_model.describe_keypoints_from_path(im_A_path, keypoints_A)["descriptions"]
57
+ description_B = descriptor_model.describe_keypoints_from_path(im_B_path, keypoints_B)["descriptions"]
58
+ matches_A, matches_B, batch_ids = matcher_model.match(keypoints_A, description_A,
59
+ keypoints_B, description_B,
60
+ P_A = P_A, P_B = P_B,
61
+ normalize = True, inv_temp=20, threshold = 0.01)
62
+
63
+ im_A = Image.open(im_A_path)
64
+ w1, h1 = im_A.size
65
+ im_B = Image.open(im_B_path)
66
+ w2, h2 = im_B.size
67
+ if scale_intrinsics:
68
+ scale1 = 1200 / max(w1, h1)
69
+ scale2 = 1200 / max(w2, h2)
70
+ w1, h1 = scale1 * w1, scale1 * h1
71
+ w2, h2 = scale2 * w2, scale2 * h2
72
+ K1, K2 = K1.copy(), K2.copy()
73
+ K1[:2] = K1[:2] * scale1
74
+ K2[:2] = K2[:2] * scale2
75
+ kpts1, kpts2 = matcher_model.to_pixel_coords(matches_A, matches_B, h1, w1, h2, w2)
76
+ for _ in range(1):
77
+ shuffling = np.random.permutation(np.arange(len(kpts1)))
78
+ kpts1 = kpts1[shuffling]
79
+ kpts2 = kpts2[shuffling]
80
+ try:
81
+ threshold = 0.5
82
+ if calibrated:
83
+ norm_threshold = threshold / (np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
84
+ R_est, t_est, mask = estimate_pose(
85
+ kpts1.cpu().numpy(),
86
+ kpts2.cpu().numpy(),
87
+ K1,
88
+ K2,
89
+ norm_threshold,
90
+ conf=0.99999,
91
+ )
92
+ T1_to_2_est = np.concatenate((R_est, t_est), axis=-1) #
93
+ e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
94
+ e_pose = max(e_t, e_R)
95
+ except Exception as e:
96
+ print(repr(e))
97
+ e_t, e_R = 90, 90
98
+ e_pose = max(e_t, e_R)
99
+ tot_e_t.append(e_t)
100
+ tot_e_R.append(e_R)
101
+ tot_e_pose.append(e_pose)
102
+ tot_e_pose = np.array(tot_e_pose)
103
+ auc = pose_auc(tot_e_pose, thresholds)
104
+ acc_5 = (tot_e_pose < 5).mean()
105
+ acc_10 = (tot_e_pose < 10).mean()
106
+ acc_15 = (tot_e_pose < 15).mean()
107
+ acc_20 = (tot_e_pose < 20).mean()
108
+ map_5 = acc_5
109
+ map_10 = np.mean([acc_5, acc_10])
110
+ map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
111
+ print(f"{model_name} auc: {auc}")
112
+ return {
113
+ "auc_5": auc[0],
114
+ "auc_10": auc[1],
115
+ "auc_20": auc[2],
116
+ "map_5": map_5,
117
+ "map_10": map_10,
118
+ "map_20": map_20,
119
+ }
third_party/DeDoDe/DeDoDe/benchmarks/num_inliers.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from DeDoDe.utils import *
4
+ import DeDoDe
5
+
6
+ class NumInliersBenchmark(nn.Module):
7
+
8
+ def __init__(self, dataset, num_samples = 1000, batch_size = 8, num_keypoints = 10_000, device = "cuda") -> None:
9
+ super().__init__()
10
+ sampler = torch.utils.data.WeightedRandomSampler(
11
+ torch.ones(len(dataset)), replacement=False, num_samples=num_samples
12
+ )
13
+ dataloader = torch.utils.data.DataLoader(
14
+ dataset, batch_size=batch_size, num_workers=batch_size, sampler=sampler
15
+ )
16
+ self.dataloader = dataloader
17
+ self.tracked_metrics = {}
18
+ self.batch_size = batch_size
19
+ self.N = len(dataloader)
20
+ self.num_keypoints = num_keypoints
21
+
22
+ def compute_batch_metrics(self, outputs, batch, device = "cuda"):
23
+ kpts_A, kpts_B = outputs["keypoints_A"], outputs["keypoints_B"]
24
+ B, K, H, W = batch["im_A"].shape
25
+ gt_warp_A_to_B, valid_mask_A_to_B = get_gt_warp(
26
+ batch["im_A_depth"],
27
+ batch["im_B_depth"],
28
+ batch["T_1to2"],
29
+ batch["K1"],
30
+ batch["K2"],
31
+ H=H,
32
+ W=W,
33
+ )
34
+ kpts_A_to_B = F.grid_sample(gt_warp_A_to_B[...,2:].float().permute(0,3,1,2), kpts_A[...,None,:],
35
+ align_corners=False, mode = 'bilinear')[...,0].mT
36
+ legit_A_to_B = F.grid_sample(valid_mask_A_to_B.reshape(B,1,H,W), kpts_A[...,None,:],
37
+ align_corners=False, mode = 'bilinear')[...,0,:,0]
38
+ dists = (torch.cdist(kpts_A_to_B, kpts_B).min(dim=-1).values[legit_A_to_B > 0.]).float()
39
+ if legit_A_to_B.sum() == 0:
40
+ return
41
+ percent_inliers_at_1 = (dists < 0.02).float().mean()
42
+ percent_inliers_at_05 = (dists < 0.01).float().mean()
43
+ percent_inliers_at_025 = (dists < 0.005).float().mean()
44
+ percent_inliers_at_01 = (dists < 0.002).float().mean()
45
+ percent_inliers_at_005 = (dists < 0.001).float().mean()
46
+
47
+ inlier_bins = torch.linspace(0, 0.002, steps = 100, device = device)[None]
48
+ inlier_counts = (dists[...,None] < inlier_bins).float().mean(dim=0)
49
+ self.tracked_metrics["inlier_counts"] = self.tracked_metrics.get("inlier_counts", 0) + 1/self.N * inlier_counts
50
+ self.tracked_metrics["percent_inliers_at_1"] = self.tracked_metrics.get("percent_inliers_at_1", 0) + 1/self.N * percent_inliers_at_1
51
+ self.tracked_metrics["percent_inliers_at_05"] = self.tracked_metrics.get("percent_inliers_at_05", 0) + 1/self.N * percent_inliers_at_05
52
+ self.tracked_metrics["percent_inliers_at_025"] = self.tracked_metrics.get("percent_inliers_at_025", 0) + 1/self.N * percent_inliers_at_025
53
+ self.tracked_metrics["percent_inliers_at_01"] = self.tracked_metrics.get("percent_inliers_at_01", 0) + 1/self.N * percent_inliers_at_01
54
+ self.tracked_metrics["percent_inliers_at_005"] = self.tracked_metrics.get("percent_inliers_at_005", 0) + 1/self.N * percent_inliers_at_005
55
+
56
+ def benchmark(self, detector):
57
+ self.tracked_metrics = {}
58
+ from tqdm import tqdm
59
+ print("Evaluating percent inliers...")
60
+ for idx, batch in tqdm(enumerate(self.dataloader), mininterval = 10.):
61
+ batch = to_cuda(batch)
62
+ outputs = detector.detect(batch, num_keypoints = self.num_keypoints)
63
+ keypoints_A, keypoints_B = outputs["keypoints"][:self.batch_size], outputs["keypoints"][self.batch_size:]
64
+ if isinstance(outputs["keypoints"], (tuple, list)):
65
+ keypoints_A, keypoints_B = torch.stack(keypoints_A), torch.stack(keypoints_B)
66
+ outputs = {"keypoints_A": keypoints_A, "keypoints_B": keypoints_B}
67
+ self.compute_batch_metrics(outputs, batch)
68
+ import matplotlib.pyplot as plt
69
+ plt.plot(torch.linspace(0, 0.002, steps = 100), self.tracked_metrics["inlier_counts"].cpu())
70
+ import numpy as np
71
+ x = np.linspace(0,0.002, 100)
72
+ sigma = 0.52 * 2 / 512
73
+ F = 1 - np.exp(-x**2 / (2*sigma**2))
74
+ plt.plot(x, F)
75
+ plt.savefig("vis/inlier_counts")
76
+ [print(name, metric.item() * self.N / (idx+1)) for name, metric in self.tracked_metrics.items() if "percent" in name]
third_party/DeDoDe/DeDoDe/checkpoint.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from torch.nn.parallel.data_parallel import DataParallel
4
+ from torch.nn.parallel.distributed import DistributedDataParallel
5
+ import gc
6
+
7
+ import DeDoDe
8
+
9
+ class CheckPoint:
10
+ def __init__(self, dir=None, name="tmp"):
11
+ self.name = name
12
+ self.dir = dir
13
+ os.makedirs(self.dir, exist_ok=True)
14
+
15
+ def save(
16
+ self,
17
+ model,
18
+ optimizer,
19
+ lr_scheduler,
20
+ n,
21
+ ):
22
+ if DeDoDe.RANK == 0:
23
+ assert model is not None
24
+ if isinstance(model, (DataParallel, DistributedDataParallel)):
25
+ model = model.module
26
+ states = {
27
+ "model": model.state_dict(),
28
+ "n": n,
29
+ "optimizer": optimizer.state_dict(),
30
+ "lr_scheduler": lr_scheduler.state_dict(),
31
+ }
32
+ torch.save(states, self.dir + self.name + f"_latest.pth")
33
+ print(f"Saved states {list(states.keys())}, at step {n}")
34
+
35
+ def load(
36
+ self,
37
+ model,
38
+ optimizer,
39
+ lr_scheduler,
40
+ n,
41
+ ):
42
+ if os.path.exists(self.dir + self.name + f"_latest.pth") and DeDoDe.RANK == 0:
43
+ states = torch.load(self.dir + self.name + f"_latest.pth")
44
+ if "model" in states:
45
+ model.load_state_dict(states["model"])
46
+ if "n" in states:
47
+ n = states["n"] if states["n"] else n
48
+ if "optimizer" in states:
49
+ try:
50
+ optimizer.load_state_dict(states["optimizer"])
51
+ except Exception as e:
52
+ print(f"Failed to load states for optimizer, with error {e}")
53
+ if "lr_scheduler" in states:
54
+ lr_scheduler.load_state_dict(states["lr_scheduler"])
55
+ print(f"Loaded states {list(states.keys())}, at step {n}")
56
+ del states
57
+ gc.collect()
58
+ torch.cuda.empty_cache()
59
+ return model, optimizer, lr_scheduler, n
third_party/DeDoDe/DeDoDe/datasets/__init__.py ADDED
File without changes
third_party/DeDoDe/DeDoDe/datasets/megadepth.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PIL import Image
3
+ import h5py
4
+ import numpy as np
5
+ import torch
6
+ import torchvision.transforms.functional as tvf
7
+ from tqdm import tqdm
8
+
9
+ from DeDoDe.utils import get_depth_tuple_transform_ops, get_tuple_transform_ops
10
+ import DeDoDe
11
+ from DeDoDe.utils import *
12
+
13
+ class MegadepthScene:
14
+ def __init__(
15
+ self,
16
+ data_root,
17
+ scene_info,
18
+ ht=512,
19
+ wt=512,
20
+ min_overlap=0.0,
21
+ max_overlap=1.0,
22
+ shake_t=0,
23
+ scene_info_detections=None,
24
+ scene_info_detections3D=None,
25
+ normalize=True,
26
+ max_num_pairs = 100_000,
27
+ scene_name = None,
28
+ use_horizontal_flip_aug = False,
29
+ grayscale = False,
30
+ clahe = False,
31
+ ) -> None:
32
+ self.data_root = data_root
33
+ self.scene_name = os.path.splitext(scene_name)[0]+f"_{min_overlap}_{max_overlap}"
34
+ self.image_paths = scene_info["image_paths"]
35
+ self.depth_paths = scene_info["depth_paths"]
36
+ self.intrinsics = scene_info["intrinsics"]
37
+ self.poses = scene_info["poses"]
38
+ self.pairs = scene_info["pairs"]
39
+ self.overlaps = scene_info["overlaps"]
40
+ threshold = (self.overlaps > min_overlap) & (self.overlaps < max_overlap)
41
+ self.pairs = self.pairs[threshold]
42
+ self.overlaps = self.overlaps[threshold]
43
+ self.detections = scene_info_detections
44
+ self.tracks3D = scene_info_detections3D
45
+ if len(self.pairs) > max_num_pairs:
46
+ pairinds = np.random.choice(
47
+ np.arange(0, len(self.pairs)), max_num_pairs, replace=False
48
+ )
49
+ self.pairs = self.pairs[pairinds]
50
+ self.overlaps = self.overlaps[pairinds]
51
+ self.im_transform_ops = get_tuple_transform_ops(
52
+ resize=(ht, wt), normalize=normalize, clahe = clahe,
53
+ )
54
+ self.depth_transform_ops = get_depth_tuple_transform_ops(
55
+ resize=(ht, wt), normalize=False
56
+ )
57
+ self.wt, self.ht = wt, ht
58
+ self.shake_t = shake_t
59
+ self.use_horizontal_flip_aug = use_horizontal_flip_aug
60
+ self.grayscale = grayscale
61
+
62
+ def load_im(self, im_B, crop=None):
63
+ im = Image.open(im_B)
64
+ return im
65
+
66
+ def horizontal_flip(self, im_A, im_B, depth_A, depth_B, K_A, K_B):
67
+ im_A = im_A.flip(-1)
68
+ im_B = im_B.flip(-1)
69
+ depth_A, depth_B = depth_A.flip(-1), depth_B.flip(-1)
70
+ flip_mat = torch.tensor([[-1, 0, self.wt],[0,1,0],[0,0,1.]]).to(K_A.device)
71
+ K_A = flip_mat@K_A
72
+ K_B = flip_mat@K_B
73
+
74
+ return im_A, im_B, depth_A, depth_B, K_A, K_B
75
+
76
+ def load_depth(self, depth_ref, crop=None):
77
+ depth = np.array(h5py.File(depth_ref, "r")["depth"])
78
+ return torch.from_numpy(depth)
79
+
80
+ def __len__(self):
81
+ return len(self.pairs)
82
+
83
+ def scale_intrinsic(self, K, wi, hi):
84
+ sx, sy = self.wt / wi, self.ht / hi
85
+ sK = torch.tensor([[sx, 0, 0], [0, sy, 0], [0, 0, 1]])
86
+ return sK @ K
87
+
88
+ def scale_detections(self, detections, wi, hi):
89
+ sx, sy = self.wt / wi, self.ht / hi
90
+ return detections * torch.tensor([[sx,sy]])
91
+
92
+ def rand_shake(self, *things):
93
+ t = np.random.choice(range(-self.shake_t, self.shake_t + 1), size=(2))
94
+ return [
95
+ tvf.affine(thing, angle=0.0, translate=list(t), scale=1.0, shear=[0.0, 0.0])
96
+ for thing in things
97
+ ], t
98
+
99
+ def tracks_to_detections(self, tracks3D, pose, intrinsics, H, W):
100
+ tracks3D = tracks3D.double()
101
+ intrinsics = intrinsics.double()
102
+ bearing_vectors = pose[...,:3,:3] @ tracks3D.mT + pose[...,:3,3:]
103
+ hom_pixel_coords = (intrinsics @ bearing_vectors).mT
104
+ pixel_coords = hom_pixel_coords[...,:2] / (hom_pixel_coords[...,2:]+1e-12)
105
+ legit_detections = (pixel_coords > 0).prod(dim = -1) * (pixel_coords[...,0] < W - 1) * (pixel_coords[...,1] < H - 1) * (tracks3D != 0).prod(dim=-1)
106
+ return pixel_coords.float(), legit_detections.bool()
107
+
108
+ def __getitem__(self, pair_idx):
109
+ try:
110
+ # read intrinsics of original size
111
+ idx1, idx2 = self.pairs[pair_idx]
112
+ K1 = torch.tensor(self.intrinsics[idx1].copy(), dtype=torch.float).reshape(3, 3)
113
+ K2 = torch.tensor(self.intrinsics[idx2].copy(), dtype=torch.float).reshape(3, 3)
114
+
115
+ # read and compute relative poses
116
+ T1 = self.poses[idx1]
117
+ T2 = self.poses[idx2]
118
+ T_1to2 = torch.tensor(np.matmul(T2, np.linalg.inv(T1)), dtype=torch.float)[
119
+ :4, :4
120
+ ] # (4, 4)
121
+
122
+ # Load positive pair data
123
+ im_A, im_B = self.image_paths[idx1], self.image_paths[idx2]
124
+ depth1, depth2 = self.depth_paths[idx1], self.depth_paths[idx2]
125
+ im_A_ref = os.path.join(self.data_root, im_A)
126
+ im_B_ref = os.path.join(self.data_root, im_B)
127
+ depth_A_ref = os.path.join(self.data_root, depth1)
128
+ depth_B_ref = os.path.join(self.data_root, depth2)
129
+ # return torch.randn((1000,1000))
130
+ im_A = self.load_im(im_A_ref)
131
+ im_B = self.load_im(im_B_ref)
132
+ depth_A = self.load_depth(depth_A_ref)
133
+ depth_B = self.load_depth(depth_B_ref)
134
+
135
+ # Recompute camera intrinsic matrix due to the resize
136
+ W_A, H_A = im_A.width, im_A.height
137
+ W_B, H_B = im_B.width, im_B.height
138
+
139
+ detections2D_A = self.detections[idx1]
140
+ detections2D_B = self.detections[idx2]
141
+
142
+ K = 10000
143
+ tracks3D_A = torch.zeros(K,3)
144
+ tracks3D_B = torch.zeros(K,3)
145
+ tracks3D_A[:len(detections2D_A)] = torch.tensor(self.tracks3D[detections2D_A[:K,-1].astype(np.int32)])
146
+ tracks3D_B[:len(detections2D_B)] = torch.tensor(self.tracks3D[detections2D_B[:K,-1].astype(np.int32)])
147
+
148
+ #projs_A, _ = self.tracks_to_detections(tracks3D_A, T1, K1, W_A, H_A)
149
+ #tracks3D_B = torch.zeros(K,2)
150
+
151
+ K1 = self.scale_intrinsic(K1, W_A, H_A)
152
+ K2 = self.scale_intrinsic(K2, W_B, H_B)
153
+
154
+ # Process images
155
+ im_A, im_B = self.im_transform_ops((im_A, im_B))
156
+ depth_A, depth_B = self.depth_transform_ops(
157
+ (depth_A[None, None], depth_B[None, None])
158
+ )
159
+ [im_A, depth_A], t_A = self.rand_shake(im_A, depth_A)
160
+ [im_B, depth_B], t_B = self.rand_shake(im_B, depth_B)
161
+
162
+ detections_A = -torch.ones(K,2)
163
+ detections_B = -torch.ones(K,2)
164
+ detections_A[:len(self.detections[idx1])] = self.scale_detections(torch.tensor(detections2D_A[:K,:2]), W_A, H_A) + t_A
165
+ detections_B[:len(self.detections[idx2])] = self.scale_detections(torch.tensor(detections2D_B[:K,:2]), W_B, H_B) + t_B
166
+
167
+
168
+ K1[:2, 2] += t_A
169
+ K2[:2, 2] += t_B
170
+
171
+ if self.use_horizontal_flip_aug:
172
+ if np.random.rand() > 0.5:
173
+ im_A, im_B, depth_A, depth_B, K1, K2 = self.horizontal_flip(im_A, im_B, depth_A, depth_B, K1, K2)
174
+ detections_A[:,0] = W-detections_A
175
+ detections_B[:,0] = W-detections_B
176
+
177
+ if DeDoDe.DEBUG_MODE:
178
+ tensor_to_pil(im_A[0], unnormalize=True).save(
179
+ f"vis/im_A.jpg")
180
+ tensor_to_pil(im_B[0], unnormalize=True).save(
181
+ f"vis/im_B.jpg")
182
+ if self.grayscale:
183
+ im_A = im_A.mean(dim=-3,keepdim=True)
184
+ im_B = im_B.mean(dim=-3,keepdim=True)
185
+ data_dict = {
186
+ "im_A": im_A,
187
+ "im_A_identifier": self.image_paths[idx1].split("/")[-1].split(".jpg")[0],
188
+ "im_B": im_B,
189
+ "im_B_identifier": self.image_paths[idx2].split("/")[-1].split(".jpg")[0],
190
+ "im_A_depth": depth_A[0, 0],
191
+ "im_B_depth": depth_B[0, 0],
192
+ "pose_A": T1,
193
+ "pose_B": T2,
194
+ "detections_A": detections_A,
195
+ "detections_B": detections_B,
196
+ "tracks3D_A": tracks3D_A,
197
+ "tracks3D_B": tracks3D_B,
198
+ "K1": K1,
199
+ "K2": K2,
200
+ "T_1to2": T_1to2,
201
+ "im_A_path": im_A_ref,
202
+ "im_B_path": im_B_ref,
203
+ }
204
+ except Exception as e:
205
+ print(e)
206
+ print(f"Failed to load image pair {self.pairs[pair_idx]}")
207
+ print("Loading a random pair in scene instead")
208
+ rand_ind = np.random.choice(range(len(self)))
209
+ return self[rand_ind]
210
+ return data_dict
211
+
212
+
213
+ class MegadepthBuilder:
214
+ def __init__(self, data_root="data/megadepth", loftr_ignore=True, imc21_ignore = True) -> None:
215
+ self.data_root = data_root
216
+ self.scene_info_root = os.path.join(data_root, "prep_scene_info")
217
+ self.all_scenes = os.listdir(self.scene_info_root)
218
+ self.test_scenes = ["0017.npy", "0004.npy", "0048.npy", "0013.npy"]
219
+ # LoFTR did the D2-net preprocessing differently than we did and got more ignore scenes, can optionially ignore those
220
+ self.loftr_ignore_scenes = set(['0121.npy', '0133.npy', '0168.npy', '0178.npy', '0229.npy', '0349.npy', '0412.npy', '0430.npy', '0443.npy', '1001.npy', '5014.npy', '5015.npy', '5016.npy'])
221
+ self.imc21_scenes = set(['0008.npy', '0019.npy', '0021.npy', '0024.npy', '0025.npy', '0032.npy', '0063.npy', '1589.npy'])
222
+ self.test_scenes_loftr = ["0015.npy", "0022.npy"]
223
+ self.loftr_ignore = loftr_ignore
224
+ self.imc21_ignore = imc21_ignore
225
+
226
+ def build_scenes(self, split="train", min_overlap=0.0, scene_names = None, **kwargs):
227
+ if split == "train":
228
+ scene_names = set(self.all_scenes) - set(self.test_scenes)
229
+ elif split == "train_loftr":
230
+ scene_names = set(self.all_scenes) - set(self.test_scenes_loftr)
231
+ elif split == "test":
232
+ scene_names = self.test_scenes
233
+ elif split == "test_loftr":
234
+ scene_names = self.test_scenes_loftr
235
+ elif split == "custom":
236
+ scene_names = scene_names
237
+ else:
238
+ raise ValueError(f"Split {split} not available")
239
+ scenes = []
240
+ for scene_name in tqdm(scene_names):
241
+ if self.loftr_ignore and scene_name in self.loftr_ignore_scenes:
242
+ continue
243
+ if self.imc21_ignore and scene_name in self.imc21_scenes:
244
+ continue
245
+ if ".npy" not in scene_name:
246
+ continue
247
+ scene_info = np.load(
248
+ os.path.join(self.scene_info_root, scene_name), allow_pickle=True
249
+ ).item()
250
+ scene_info_detections = np.load(
251
+ os.path.join(self.scene_info_root, "detections", f"detections_{scene_name}"), allow_pickle=True
252
+ ).item()
253
+ scene_info_detections3D = np.load(
254
+ os.path.join(self.scene_info_root, "detections3D", f"detections3D_{scene_name}"), allow_pickle=True
255
+ )
256
+
257
+ scenes.append(
258
+ MegadepthScene(
259
+ self.data_root, scene_info, scene_info_detections = scene_info_detections, scene_info_detections3D = scene_info_detections3D, min_overlap=min_overlap,scene_name = scene_name, **kwargs
260
+ )
261
+ )
262
+ return scenes
263
+
264
+ def weight_scenes(self, concat_dataset, alpha=0.5):
265
+ ns = []
266
+ for d in concat_dataset.datasets:
267
+ ns.append(len(d))
268
+ ws = torch.cat([torch.ones(n) / n**alpha for n in ns])
269
+ return ws
third_party/DeDoDe/DeDoDe/decoder.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torchvision.models as tvm
4
+
5
+
6
+ class Decoder(nn.Module):
7
+ def __init__(self, layers, *args, super_resolution = False, num_prototypes = 1, **kwargs) -> None:
8
+ super().__init__(*args, **kwargs)
9
+ self.layers = layers
10
+ self.scales = self.layers.keys()
11
+ self.super_resolution = super_resolution
12
+ self.num_prototypes = num_prototypes
13
+ def forward(self, features, context = None, scale = None):
14
+ if context is not None:
15
+ features = torch.cat((features, context), dim = 1)
16
+ stuff = self.layers[scale](features)
17
+ logits, context = stuff[:,:self.num_prototypes], stuff[:,self.num_prototypes:]
18
+ return logits, context
19
+
20
+ class ConvRefiner(nn.Module):
21
+ def __init__(
22
+ self,
23
+ in_dim=6,
24
+ hidden_dim=16,
25
+ out_dim=2,
26
+ dw=True,
27
+ kernel_size=5,
28
+ hidden_blocks=5,
29
+ amp = True,
30
+ residual = False,
31
+ amp_dtype = torch.float16,
32
+ ):
33
+ super().__init__()
34
+ self.block1 = self.create_block(
35
+ in_dim, hidden_dim, dw=False, kernel_size=1,
36
+ )
37
+ self.hidden_blocks = nn.Sequential(
38
+ *[
39
+ self.create_block(
40
+ hidden_dim,
41
+ hidden_dim,
42
+ dw=dw,
43
+ kernel_size=kernel_size,
44
+ )
45
+ for hb in range(hidden_blocks)
46
+ ]
47
+ )
48
+ self.hidden_blocks = self.hidden_blocks
49
+ self.out_conv = nn.Conv2d(hidden_dim, out_dim, 1, 1, 0)
50
+ self.amp = amp
51
+ self.amp_dtype = amp_dtype
52
+ self.residual = residual
53
+
54
+ def create_block(
55
+ self,
56
+ in_dim,
57
+ out_dim,
58
+ dw=True,
59
+ kernel_size=5,
60
+ bias = True,
61
+ norm_type = nn.BatchNorm2d,
62
+ ):
63
+ num_groups = 1 if not dw else in_dim
64
+ if dw:
65
+ assert (
66
+ out_dim % in_dim == 0
67
+ ), "outdim must be divisible by indim for depthwise"
68
+ conv1 = nn.Conv2d(
69
+ in_dim,
70
+ out_dim,
71
+ kernel_size=kernel_size,
72
+ stride=1,
73
+ padding=kernel_size // 2,
74
+ groups=num_groups,
75
+ bias=bias,
76
+ )
77
+ norm = norm_type(out_dim) if norm_type is nn.BatchNorm2d else norm_type(num_channels = out_dim)
78
+ relu = nn.ReLU(inplace=True)
79
+ conv2 = nn.Conv2d(out_dim, out_dim, 1, 1, 0)
80
+ return nn.Sequential(conv1, norm, relu, conv2)
81
+
82
+ def forward(self, feats):
83
+ b,c,hs,ws = feats.shape
84
+ with torch.autocast("cuda", enabled=self.amp, dtype = self.amp_dtype):
85
+ x0 = self.block1(feats)
86
+ x = self.hidden_blocks(x0)
87
+ if self.residual:
88
+ x = (x + x0)/1.4
89
+ x = self.out_conv(x)
90
+ return x
third_party/DeDoDe/DeDoDe/descriptors/__init__.py ADDED
File without changes
third_party/DeDoDe/DeDoDe/descriptors/dedode_descriptor.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ import torch.nn as nn
4
+ import torchvision.models as tvm
5
+ import torch.nn.functional as F
6
+ import numpy as np
7
+
8
+ class DeDoDeDescriptor(nn.Module):
9
+ def __init__(self, encoder, decoder, *args, **kwargs) -> None:
10
+ super().__init__(*args, **kwargs)
11
+ self.encoder = encoder
12
+ self.decoder = decoder
13
+ import torchvision.transforms as transforms
14
+ self.normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
15
+
16
+ def forward(
17
+ self,
18
+ batch,
19
+ ):
20
+ if "im_A" in batch:
21
+ images = torch.cat((batch["im_A"], batch["im_B"]))
22
+ else:
23
+ images = batch["image"]
24
+ features, sizes = self.encoder(images)
25
+ descriptor = 0
26
+ context = None
27
+ scales = self.decoder.scales
28
+ for idx, (feature_map, scale) in enumerate(zip(reversed(features), scales)):
29
+ delta_descriptor, context = self.decoder(feature_map, scale = scale, context = context)
30
+ descriptor = descriptor + delta_descriptor
31
+ if idx < len(scales) - 1:
32
+ size = sizes[-(idx+2)]
33
+ descriptor = F.interpolate(descriptor, size = size, mode = "bilinear", align_corners = False)
34
+ context = F.interpolate(context, size = size, mode = "bilinear", align_corners = False)
35
+ return {"description_grid" : descriptor}
36
+
37
+ @torch.inference_mode()
38
+ def describe_keypoints(self, batch, keypoints):
39
+ self.train(False)
40
+ description_grid = self.forward(batch)["description_grid"]
41
+ described_keypoints = F.grid_sample(description_grid.float(), keypoints[:,None], mode = "bilinear", align_corners = False)[:,:,0].mT
42
+ return {"descriptions": described_keypoints}
43
+
44
+ def read_image(self, im_path, H = 560, W = 560):
45
+ return self.normalizer(torch.from_numpy(np.array(Image.open(im_path).resize((W,H)))/255.).permute(2,0,1)).cuda().float()[None]
46
+
47
+ def describe_keypoints_from_path(self, im_path, keypoints, H = 768, W = 768):
48
+ batch = {"image": self.read_image(im_path, H = H, W = W)}
49
+ return self.describe_keypoints(batch, keypoints)
third_party/DeDoDe/DeDoDe/descriptors/descriptor_loss.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import math
4
+ import torch.nn.functional as F
5
+
6
+ from DeDoDe.utils import *
7
+ import DeDoDe
8
+
9
+ class DescriptorLoss(nn.Module):
10
+
11
+ def __init__(self, detector, num_keypoints = 5000, normalize_descriptions = False, inv_temp = 1, device = "cuda") -> None:
12
+ super().__init__()
13
+ self.detector = detector
14
+ self.tracked_metrics = {}
15
+ self.num_keypoints = num_keypoints
16
+ self.normalize_descriptions = normalize_descriptions
17
+ self.inv_temp = inv_temp
18
+
19
+ def warp_from_depth(self, batch, kpts_A, kpts_B):
20
+ mask_A_to_B, kpts_A_to_B = warp_kpts(kpts_A,
21
+ batch["im_A_depth"],
22
+ batch["im_B_depth"],
23
+ batch["T_1to2"],
24
+ batch["K1"],
25
+ batch["K2"],)
26
+ mask_B_to_A, kpts_B_to_A = warp_kpts(kpts_B,
27
+ batch["im_B_depth"],
28
+ batch["im_A_depth"],
29
+ batch["T_1to2"].inverse(),
30
+ batch["K2"],
31
+ batch["K1"],)
32
+ return (mask_A_to_B, kpts_A_to_B), (mask_B_to_A, kpts_B_to_A)
33
+
34
+ def warp_from_homog(self, batch, kpts_A, kpts_B):
35
+ kpts_A_to_B = homog_transform(batch["Homog_A_to_B"], kpts_A)
36
+ kpts_B_to_A = homog_transform(batch["Homog_A_to_B"].inverse(), kpts_B)
37
+ return (None, kpts_A_to_B), (None, kpts_B_to_A)
38
+
39
+ def supervised_loss(self, outputs, batch):
40
+ kpts_A, kpts_B = self.detector.detect(batch, num_keypoints = self.num_keypoints)['keypoints'].clone().chunk(2)
41
+ desc_grid_A, desc_grid_B = outputs["description_grid"].chunk(2)
42
+ desc_A = F.grid_sample(desc_grid_A.float(), kpts_A[:,None], mode = "bilinear", align_corners = False)[:,:,0].mT
43
+ desc_B = F.grid_sample(desc_grid_B.float(), kpts_B[:,None], mode = "bilinear", align_corners = False)[:,:,0].mT
44
+ if "im_A_depth" in batch:
45
+ (mask_A_to_B, kpts_A_to_B), (mask_B_to_A, kpts_B_to_A) = self.warp_from_depth(batch, kpts_A, kpts_B)
46
+ elif "Homog_A_to_B" in batch:
47
+ (mask_A_to_B, kpts_A_to_B), (mask_B_to_A, kpts_B_to_A) = self.warp_from_homog(batch, kpts_A, kpts_B)
48
+
49
+ with torch.no_grad():
50
+ D_B = torch.cdist(kpts_A_to_B, kpts_B)
51
+ D_A = torch.cdist(kpts_A, kpts_B_to_A)
52
+ inds = torch.nonzero((D_B == D_B.min(dim=-1, keepdim = True).values)
53
+ * (D_A == D_A.min(dim=-2, keepdim = True).values)
54
+ * (D_B < 0.01)
55
+ * (D_A < 0.01))
56
+
57
+ logP_A_B = dual_log_softmax_matcher(desc_A, desc_B,
58
+ normalize = self.normalize_descriptions,
59
+ inv_temperature = self.inv_temp)
60
+ neg_log_likelihood = -logP_A_B[inds[:,0], inds[:,1], inds[:,2]].mean()
61
+ if False:
62
+ import matplotlib.pyplot as plt
63
+ inds0 = inds[inds[:,0] == 0]
64
+ mnn_A = kpts_A[0,inds0[:,1]].detach().cpu()
65
+ mnn_B = kpts_B[0,inds0[:,2]].detach().cpu()
66
+ plt.scatter(mnn_A[:,0], -mnn_A[:,1], s = 0.5)
67
+ plt.savefig("vis/mnn_A.jpg")
68
+ self.tracked_metrics["neg_log_likelihood"] = (0.99 * self.tracked_metrics.get("neg_log_likelihood", neg_log_likelihood.detach().item()) + 0.01 * neg_log_likelihood.detach().item())
69
+ if np.random.rand() > 0.99:
70
+ print(self.tracked_metrics["neg_log_likelihood"])
71
+ return neg_log_likelihood
72
+
73
+ def forward(self, outputs, batch):
74
+ losses = self.supervised_loss(outputs, batch)
75
+ return losses
third_party/DeDoDe/DeDoDe/detectors/__init__.py ADDED
File without changes
third_party/DeDoDe/DeDoDe/detectors/dedode_detector.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ import torch.nn as nn
4
+ import torchvision.models as tvm
5
+ import torch.nn.functional as F
6
+ import numpy as np
7
+
8
+ from DeDoDe.utils import sample_keypoints, to_pixel_coords, to_normalized_coords
9
+
10
+
11
+
12
+ class DeDoDeDetector(nn.Module):
13
+ def __init__(self, encoder, decoder, *args, **kwargs) -> None:
14
+ super().__init__(*args, **kwargs)
15
+ self.encoder = encoder
16
+ self.decoder = decoder
17
+ import torchvision.transforms as transforms
18
+ self.normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
19
+
20
+ def forward(
21
+ self,
22
+ batch,
23
+ ):
24
+ if "im_A" in batch:
25
+ images = torch.cat((batch["im_A"], batch["im_B"]))
26
+ else:
27
+ images = batch["image"]
28
+ features, sizes = self.encoder(images)
29
+ logits = 0
30
+ context = None
31
+ scales = ["8", "4", "2", "1"]
32
+ for idx, (feature_map, scale) in enumerate(zip(reversed(features), scales)):
33
+ delta_logits, context = self.decoder(feature_map, context = context, scale = scale)
34
+ logits = logits + delta_logits.float() # ensure float (need bf16 doesnt have f.interpolate)
35
+ if idx < len(scales) - 1:
36
+ size = sizes[-(idx+2)]
37
+ logits = F.interpolate(logits, size = size, mode = "bicubic", align_corners = False)
38
+ context = F.interpolate(context.float(), size = size, mode = "bilinear", align_corners = False)
39
+ return {"keypoint_logits" : logits.float()}
40
+
41
+ @torch.inference_mode()
42
+ def detect(self, batch, num_keypoints = 10_000):
43
+ self.train(False)
44
+ keypoint_logits = self.forward(batch)["keypoint_logits"]
45
+ B,K,H,W = keypoint_logits.shape
46
+ keypoint_p = keypoint_logits.reshape(B, K*H*W).softmax(dim=-1).reshape(B, K, H*W).sum(dim=1)
47
+ keypoints, confidence = sample_keypoints(keypoint_p.reshape(B,H,W),
48
+ use_nms = False, sample_topk = True, num_samples = num_keypoints,
49
+ return_scoremap=True, sharpen = False, upsample = False,
50
+ increase_coverage=True)
51
+ return {"keypoints": keypoints, "confidence": confidence}
52
+
53
+ @torch.inference_mode()
54
+ def detect_dense(self, batch):
55
+ self.train(False)
56
+ keypoint_logits = self.forward(batch)["keypoint_logits"]
57
+ return {"dense_keypoint_logits": keypoint_logits}
58
+
59
+ def read_image(self, im_path, H = 560, W = 560):
60
+ pil_im = Image.open(im_path).resize((W, H))
61
+ standard_im = np.array(pil_im)/255.
62
+ return self.normalizer(torch.from_numpy(standard_im).permute(2,0,1)).cuda().float()[None]
63
+
64
+ def detect_from_path(self, im_path, num_keypoints = 30_000, H = 768, W = 768, dense = False):
65
+ batch = {"image": self.read_image(im_path, H = H, W = W)}
66
+ if dense:
67
+ return self.detect_dense(batch)
68
+ else:
69
+ return self.detect(batch, num_keypoints = num_keypoints)
70
+
71
+ def to_pixel_coords(self, x, H, W):
72
+ return to_pixel_coords(x, H, W)
73
+
74
+ def to_normalized_coords(self, x, H, W):
75
+ return to_normalized_coords(x, H, W)
third_party/DeDoDe/DeDoDe/detectors/loss.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import math
4
+
5
+ from DeDoDe.utils import *
6
+ import DeDoDe
7
+
8
+ class KeyPointLoss(nn.Module):
9
+
10
+ def __init__(self, smoothing_size = 1, use_max_logit = False, entropy_target = 80,
11
+ num_matches = 1024, jacobian_density_adjustment = False,
12
+ matchability_weight = 1, device = "cuda") -> None:
13
+ super().__init__()
14
+ X = torch.linspace(-1,1,smoothing_size, device = device)
15
+ G = (-X**2 / (2 *1/2**2)).exp()
16
+ G = G/G.sum()
17
+ self.use_max_logit = use_max_logit
18
+ self.entropy_target = entropy_target
19
+ self.smoothing_kernel = G[None, None, None,:]
20
+ self.smoothing_size = smoothing_size
21
+ self.tracked_metrics = {}
22
+ self.center = None
23
+ self.num_matches = num_matches
24
+ self.jacobian_density_adjustment = jacobian_density_adjustment
25
+ self.matchability_weight = matchability_weight
26
+
27
+ def compute_consistency(self, logits_A, logits_B_to_A, mask = None):
28
+
29
+ masked_logits_A = torch.full_like(logits_A, -torch.inf)
30
+ masked_logits_A[mask] = logits_A[mask]
31
+
32
+ masked_logits_B_to_A = torch.full_like(logits_B_to_A, -torch.inf)
33
+ masked_logits_B_to_A[mask] = logits_B_to_A[mask]
34
+
35
+ log_p_A = masked_logits_A.log_softmax(dim=-1)[mask]
36
+ log_p_B_to_A = masked_logits_B_to_A.log_softmax(dim=-1)[mask]
37
+
38
+ return self.compute_jensen_shannon_div(log_p_A, log_p_B_to_A)
39
+
40
+ def compute_joint_neg_log_likelihood(self, logits_A, logits_B_to_A, detections_A = None, detections_B_to_A = None, mask = None, device = "cuda", dtype = torch.float32, num_matches = None):
41
+ B, K, HW = logits_A.shape
42
+ logits_A, logits_B_to_A = logits_A.to(dtype), logits_B_to_A.to(dtype)
43
+ mask = mask[:,None].expand(B, K, HW).reshape(B, K*HW)
44
+ log_p_B_to_A = self.masked_log_softmax(logits_B_to_A.reshape(B,K*HW), mask = mask)
45
+ log_p_A = self.masked_log_softmax(logits_A.reshape(B,K*HW), mask = mask)
46
+ log_p = log_p_A + log_p_B_to_A
47
+ if detections_A is None:
48
+ detections_A = torch.zeros_like(log_p_A)
49
+ if detections_B_to_A is None:
50
+ detections_B_to_A = torch.zeros_like(log_p_B_to_A)
51
+ detections_A = detections_A.reshape(B, HW)
52
+ detections_A[~mask] = 0
53
+ detections_B_to_A = detections_B_to_A.reshape(B, HW)
54
+ detections_B_to_A[~mask] = 0
55
+ log_p_target = log_p.detach() + 50*detections_A + 50*detections_B_to_A
56
+ num_matches = self.num_matches if num_matches is None else num_matches
57
+ best_k = -(-log_p_target).flatten().kthvalue(k = B * num_matches, dim=-1).values
58
+ p_target = (log_p_target > best_k[..., None]).float().reshape(B,K*HW)/num_matches
59
+ return self.compute_cross_entropy(log_p_A[mask], p_target[mask]) + self.compute_cross_entropy(log_p_B_to_A[mask], p_target[mask])
60
+
61
+ def compute_jensen_shannon_div(self, log_p, log_q):
62
+ return 1/2 * (self.compute_kl_div(log_p, log_q) + self.compute_kl_div(log_q, log_p))
63
+
64
+ def compute_kl_div(self, log_p, log_q):
65
+ return (log_p.exp()*(log_p-log_q)).sum(dim=-1)
66
+
67
+ def masked_log_softmax(self, logits, mask):
68
+ masked_logits = torch.full_like(logits, -torch.inf)
69
+ masked_logits[mask] = logits[mask]
70
+ log_p = masked_logits.log_softmax(dim=-1)
71
+ return log_p
72
+
73
+ def masked_softmax(self, logits, mask):
74
+ masked_logits = torch.full_like(logits, -torch.inf)
75
+ masked_logits[mask] = logits[mask]
76
+ log_p = masked_logits.softmax(dim=-1)
77
+ return log_p
78
+
79
+ def compute_entropy(self, logits, mask = None):
80
+ p = self.masked_softmax(logits, mask)[mask]
81
+ log_p = self.masked_log_softmax(logits, mask)[mask]
82
+ return -(log_p * p).sum(dim=-1)
83
+
84
+ def compute_detection_img(self, detections, mask, B, H, W, device = "cuda"):
85
+ kernel_size = 5
86
+ X = torch.linspace(-2,2,kernel_size, device = device)
87
+ G = (-X**2 / (2 * (1/2)**2)).exp() # half pixel std
88
+ G = G/G.sum()
89
+ det_smoothing_kernel = G[None, None, None,:]
90
+ det_img = torch.zeros((B,1,H,W), device = device) # add small epsilon for later logstuff
91
+ for b in range(B):
92
+ valid_detections = (detections[b][mask[b]]).int()
93
+ det_img[b,0][valid_detections[:,1], valid_detections[:,0]] = 1
94
+ det_img = F.conv2d(det_img, weight = det_smoothing_kernel, padding = (kernel_size//2, 0))
95
+ det_img = F.conv2d(det_img, weight = det_smoothing_kernel.mT, padding = (0, kernel_size//2))
96
+ return det_img
97
+
98
+ def compute_cross_entropy(self, log_p_hat, p):
99
+ return -(log_p_hat * p).sum(dim=-1)
100
+
101
+ def compute_matchability(self, keypoint_p, has_depth, B, K, H, W, device = "cuda"):
102
+ smooth_keypoint_p = F.conv2d(keypoint_p.reshape(B,1,H,W), weight = self.smoothing_kernel, padding = (self.smoothing_size//2,0))
103
+ smooth_keypoint_p = F.conv2d(smooth_keypoint_p, weight = self.smoothing_kernel.mT, padding = (0,self.smoothing_size//2))
104
+ log_p_hat = (smooth_keypoint_p+1e-8).log().reshape(B,H*W).log_softmax(dim=-1)
105
+ smooth_has_depth = F.conv2d(has_depth.reshape(B,1,H,W), weight = self.smoothing_kernel, padding = (0,self.smoothing_size//2))
106
+ smooth_has_depth = F.conv2d(smooth_has_depth, weight = self.smoothing_kernel.mT, padding = (self.smoothing_size//2,0)).reshape(B,H*W)
107
+ p = smooth_has_depth/smooth_has_depth.sum(dim=-1,keepdim=True)
108
+ return self.compute_cross_entropy(log_p_hat, p) - self.compute_cross_entropy((p+1e-12).log(), p)
109
+
110
+ def tracks_to_detections(self, tracks3D, pose, intrinsics, H, W):
111
+ tracks3D = tracks3D.double()
112
+ intrinsics = intrinsics.double()
113
+ bearing_vectors = pose[:,:3,:3] @ tracks3D.mT + pose[:,:3,3:]
114
+ hom_pixel_coords = (intrinsics @ bearing_vectors).mT
115
+ pixel_coords = hom_pixel_coords[...,:2] / (hom_pixel_coords[...,2:]+1e-12)
116
+ legit_detections = (pixel_coords > 0).prod(dim = -1) * (pixel_coords[...,0] < W - 1) * (pixel_coords[...,1] < H - 1) * (tracks3D != 0).prod(dim=-1)
117
+ return pixel_coords.float(), legit_detections.bool()
118
+
119
+ def self_supervised_loss(self, outputs, batch):
120
+ keypoint_logits_A, keypoint_logits_B = outputs["keypoint_logits"].chunk(2)
121
+ B, K, H, W = keypoint_logits_A.shape
122
+ keypoint_logits_A = keypoint_logits_A.reshape(B, K, H*W)
123
+ keypoint_logits_B = keypoint_logits_B.reshape(B, K, H*W)
124
+ keypoint_logits = torch.cat((keypoint_logits_A, keypoint_logits_B))
125
+
126
+ warp_A_to_B, mask_A_to_B = get_homog_warp(
127
+ batch["Homog_A_to_B"], H, W
128
+ )
129
+ warp_B_to_A, mask_B_to_A = get_homog_warp(
130
+ torch.linalg.inv(batch["Homog_A_to_B"]), H, W
131
+ )
132
+ B = 2*B
133
+
134
+ warp = torch.cat((warp_A_to_B, warp_B_to_A)).reshape(B, H*W, 4)
135
+ mask = torch.cat((mask_A_to_B, mask_B_to_A)).reshape(B,H*W)
136
+
137
+ keypoint_logits_backwarped = F.grid_sample(torch.cat((keypoint_logits_B, keypoint_logits_A)).reshape(B,K,H,W),
138
+ warp[...,-2:].reshape(B,H,W,2).float(), align_corners = False, mode = "bicubic")
139
+
140
+ keypoint_logits_backwarped = keypoint_logits_backwarped.reshape(B,K,H*W)
141
+ joint_log_likelihood_loss = self.compute_joint_neg_log_likelihood(keypoint_logits, keypoint_logits_backwarped,
142
+ mask = mask.bool(), num_matches = 5_000).mean()
143
+ return joint_log_likelihood_loss
144
+
145
+ def supervised_loss(self, outputs, batch):
146
+ keypoint_logits_A, keypoint_logits_B = outputs["keypoint_logits"].chunk(2)
147
+ B, K, H, W = keypoint_logits_A.shape
148
+
149
+ detections_A, detections_B = batch["detections_A"], batch["detections_B"]
150
+
151
+ tracks3D_A, tracks3D_B = batch["tracks3D_A"], batch["tracks3D_B"]
152
+ gt_warp_A_to_B, valid_mask_A_to_B = get_gt_warp(
153
+ batch["im_A_depth"],
154
+ batch["im_B_depth"],
155
+ batch["T_1to2"],
156
+ batch["K1"],
157
+ batch["K2"],
158
+ H=H,
159
+ W=W,
160
+ )
161
+ gt_warp_B_to_A, valid_mask_B_to_A = get_gt_warp(
162
+ batch["im_B_depth"],
163
+ batch["im_A_depth"],
164
+ batch["T_1to2"].inverse(),
165
+ batch["K2"],
166
+ batch["K1"],
167
+ H=H,
168
+ W=W,
169
+ )
170
+ keypoint_logits_A = keypoint_logits_A.reshape(B, K, H*W)
171
+ keypoint_logits_B = keypoint_logits_B.reshape(B, K, H*W)
172
+ keypoint_logits = torch.cat((keypoint_logits_A, keypoint_logits_B))
173
+
174
+ B = 2*B
175
+ gt_warp = torch.cat((gt_warp_A_to_B, gt_warp_B_to_A))
176
+ valid_mask = torch.cat((valid_mask_A_to_B, valid_mask_B_to_A))
177
+ valid_mask = valid_mask.reshape(B,H*W)
178
+ binary_mask = valid_mask == 1
179
+ if self.jacobian_density_adjustment:
180
+ j_logdet = jacobi_determinant(gt_warp.reshape(B,H,W,4), valid_mask.reshape(B,H,W).float())[:,None]
181
+ else:
182
+ j_logdet = 0
183
+ tracks3D = torch.cat((tracks3D_A, tracks3D_B))
184
+
185
+ #detections, legit_detections = self.tracks_to_detections(tracks3D, torch.cat((batch["pose_A"],batch["pose_B"])), torch.cat((batch["K1"],batch["K2"])), H, W)
186
+ #detections_backwarped, legit_backwarped_detections = self.tracks_to_detections(torch.cat((tracks3D_B, tracks3D_A)), torch.cat((batch["pose_A"],batch["pose_B"])), torch.cat((batch["K1"],batch["K2"])), H, W)
187
+ detections = torch.cat((detections_A, detections_B))
188
+ legit_detections = ((detections > 0).prod(dim = -1) * (detections[...,0] < W) * (detections[...,1] < H)).bool()
189
+ det_imgs_A, det_imgs_B = self.compute_detection_img(detections, legit_detections, B, H, W).chunk(2)
190
+ det_imgs = torch.cat((det_imgs_A, det_imgs_B))
191
+ #det_imgs_backwarped = self.compute_detection_img(detections_backwarped, legit_backwarped_detections, B, H, W)
192
+ det_imgs_backwarped = F.grid_sample(torch.cat((det_imgs_B, det_imgs_A)).reshape(B,1,H,W),
193
+ gt_warp[...,-2:].reshape(B,H,W,2).float(), align_corners = False, mode = "bicubic")
194
+
195
+ keypoint_logits_backwarped = F.grid_sample(torch.cat((keypoint_logits_B, keypoint_logits_A)).reshape(B,K,H,W),
196
+ gt_warp[...,-2:].reshape(B,H,W,2).float(), align_corners = False, mode = "bicubic")
197
+
198
+ # Note: Below step should be taken, but seems difficult to get it to work well.
199
+ #keypoint_logits_B_to_A = keypoint_logits_B_to_A + j_logdet_A_to_B # adjust for the viewpoint by log jacobian of warp
200
+ keypoint_logits_backwarped = (keypoint_logits_backwarped + j_logdet).reshape(B,K,H*W)
201
+
202
+
203
+ depth = F.interpolate(torch.cat((batch["im_A_depth"][:,None],batch["im_B_depth"][:,None]),dim=0), size = (H,W), mode = "bilinear", align_corners=False)
204
+ has_depth = (depth > 0).float().reshape(B,H*W)
205
+
206
+ joint_log_likelihood_loss = self.compute_joint_neg_log_likelihood(keypoint_logits, keypoint_logits_backwarped,
207
+ mask = binary_mask, detections_A = det_imgs,
208
+ detections_B_to_A = det_imgs_backwarped).mean()
209
+ keypoint_p = keypoint_logits.reshape(B, K*H*W).softmax(dim=-1).reshape(B, K, H*W).sum(dim=1)
210
+ matchability_loss = self.compute_matchability(keypoint_p, has_depth, B, K, H, W).mean()
211
+
212
+ #peakiness_loss = self.compute_negative_peakiness(keypoint_logits.reshape(B,H,W), mask = binary_mask)
213
+ #mnn_loss = self.compute_mnn_loss(keypoint_logits_A, keypoint_logits_B, gt_warp_A_to_B, valid_mask_A_to_B, B, H, W)
214
+ B = B//2
215
+ import matplotlib.pyplot as plt
216
+ kpts_A = sample_keypoints(keypoint_p[:B].reshape(B,H,W),
217
+ use_nms = False, sample_topk = True, num_samples = 4*2048)
218
+ kpts_B = sample_keypoints(keypoint_p[B:].reshape(B,H,W),
219
+ use_nms = False, sample_topk = True, num_samples = 4*2048)
220
+ kpts_A_to_B = F.grid_sample(gt_warp_A_to_B[...,2:].float().permute(0,3,1,2), kpts_A[...,None,:],
221
+ align_corners=False, mode = 'bilinear')[...,0].mT
222
+ legit_A_to_B = F.grid_sample(valid_mask_A_to_B.reshape(B,1,H,W), kpts_A[...,None,:],
223
+ align_corners=False, mode = 'bilinear')[...,0,:,0]
224
+ percent_inliers = (torch.cdist(kpts_A_to_B, kpts_B).min(dim=-1).values[legit_A_to_B > 0] < 0.01).float().mean()
225
+ self.tracked_metrics["mega_percent_inliers"] = (0.9 * self.tracked_metrics.get("mega_percent_inliers", percent_inliers) + 0.1 * percent_inliers)
226
+
227
+ if torch.rand(1) > 0.995:
228
+ keypoint_logits_A_to_B = keypoint_logits_backwarped[:B]
229
+ import matplotlib.pyplot as plt
230
+ import os
231
+ os.makedirs("vis",exist_ok = True)
232
+ for b in range(0, B, 2):
233
+ #import cv2
234
+ plt.scatter(kpts_A_to_B[b,:,0].cpu(),-kpts_A_to_B[b,:,1].cpu(), s = 1)
235
+ plt.scatter(kpts_B[b,:,0].cpu(),-kpts_B[b,:,1].cpu(), s = 1)
236
+ plt.xlim(-1,1)
237
+ plt.ylim(-1,1)
238
+ plt.savefig(f"vis/keypoints_A_to_B_vs_B_{b}.png")
239
+ plt.close()
240
+ tensor_to_pil(keypoint_logits_A[b].reshape(1,H,W).expand(3,H,W).detach().cpu(),
241
+ autoscale = True).save(f"vis/logits_A_{b}.png")
242
+ tensor_to_pil(keypoint_logits_B[b].reshape(1,H,W).expand(3,H,W).detach().cpu(),
243
+ autoscale = True).save(f"vis/logits_B_{b}.png")
244
+ tensor_to_pil(keypoint_logits_A_to_B[b].reshape(1,H,W).expand(3,H,W).detach().cpu(),
245
+ autoscale = True).save(f"vis/logits_A_to_B{b}.png")
246
+ tensor_to_pil(keypoint_logits_A[b].softmax(dim=-1).reshape(1,H,W).expand(3,H,W).detach().cpu(),
247
+ autoscale = True).save(f"vis/keypoint_p_A_{b}.png")
248
+ tensor_to_pil(keypoint_logits_B[b].softmax(dim=-1).reshape(1,H,W).expand(3,H,W).detach().cpu(),
249
+ autoscale = True).save(f"vis/keypoint_p_B_{b}.png")
250
+ tensor_to_pil(has_depth[b].reshape(1,H,W).expand(3,H,W).detach().cpu(), autoscale=True).save(f"vis/has_depth_A_{b}.png")
251
+ tensor_to_pil(valid_mask_A_to_B[b].reshape(1,H,W).expand(3,H,W).detach().cpu(), autoscale=True).save(f"vis/valid_mask_A_to_B_{b}.png")
252
+ tensor_to_pil(batch['im_A'][b], unnormalize=True).save(
253
+ f"vis/im_A_{b}.jpg")
254
+ tensor_to_pil(batch['im_B'][b], unnormalize=True).save(
255
+ f"vis/im_B_{b}.jpg")
256
+ plt.close()
257
+ tot_loss = joint_log_likelihood_loss + self.matchability_weight * matchability_loss#
258
+ #tot_loss = tot_loss + (-2*consistency_loss).detach().exp()*compression_loss
259
+ if torch.rand(1) > 1:
260
+ print(f"Precent Inlier: {self.tracked_metrics.get('mega_percent_inliers', 0)}")
261
+ print(f"{joint_log_likelihood_loss=} {matchability_loss=}")
262
+ print(f"Total Loss: {tot_loss.item()}")
263
+ return tot_loss
264
+
265
+ def forward(self, outputs, batch):
266
+
267
+ if not isinstance(outputs, list):
268
+ outputs = [outputs]
269
+ losses = 0
270
+ for output in outputs:
271
+ if "Homog_A_to_B" in batch:
272
+ losses = losses + self.self_supervised_loss(output, batch)
273
+ else:
274
+ losses = losses + self.supervised_loss(output, batch)
275
+ return losses
third_party/DeDoDe/DeDoDe/encoder.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torchvision.models as tvm
4
+
5
+
6
+ class VGG19(nn.Module):
7
+ def __init__(self, pretrained=False, amp = False, amp_dtype = torch.float16) -> None:
8
+ super().__init__()
9
+ self.layers = nn.ModuleList(tvm.vgg19_bn(pretrained=pretrained).features[:40])
10
+ # Maxpool layers: 6, 13, 26, 39
11
+ self.amp = amp
12
+ self.amp_dtype = amp_dtype
13
+
14
+ def forward(self, x, **kwargs):
15
+ with torch.autocast("cuda", enabled=self.amp, dtype = self.amp_dtype):
16
+ feats = []
17
+ sizes = []
18
+ for layer in self.layers:
19
+ if isinstance(layer, nn.MaxPool2d):
20
+ feats.append(x)
21
+ sizes.append(x.shape[-2:])
22
+ x = layer(x)
23
+ return feats, sizes
24
+
25
+ class VGG(nn.Module):
26
+ def __init__(self, size = "19", pretrained=False, amp = False, amp_dtype = torch.float16) -> None:
27
+ super().__init__()
28
+ if size == "11":
29
+ self.layers = nn.ModuleList(tvm.vgg11_bn(pretrained=pretrained).features[:22])
30
+ elif size == "13":
31
+ self.layers = nn.ModuleList(tvm.vgg13_bn(pretrained=pretrained).features[:28])
32
+ elif size == "19":
33
+ self.layers = nn.ModuleList(tvm.vgg19_bn(pretrained=pretrained).features[:40])
34
+ # Maxpool layers: 6, 13, 26, 39
35
+ self.amp = amp
36
+ self.amp_dtype = amp_dtype
37
+
38
+ def forward(self, x, **kwargs):
39
+ with torch.autocast("cuda", enabled=self.amp, dtype = self.amp_dtype):
40
+ feats = []
41
+ sizes = []
42
+ for layer in self.layers:
43
+ if isinstance(layer, nn.MaxPool2d):
44
+ feats.append(x)
45
+ sizes.append(x.shape[-2:])
46
+ x = layer(x)
47
+ return feats, sizes
third_party/DeDoDe/DeDoDe/matchers/__init__.py ADDED
File without changes
third_party/DeDoDe/DeDoDe/matchers/dual_softmax_matcher.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ import torch.nn as nn
4
+ import torchvision.models as tvm
5
+ import torch.nn.functional as F
6
+ import numpy as np
7
+ from DeDoDe.utils import dual_softmax_matcher, to_pixel_coords, to_normalized_coords
8
+
9
+ class DualSoftMaxMatcher(nn.Module):
10
+ @torch.inference_mode()
11
+ def match(self, keypoints_A, descriptions_A,
12
+ keypoints_B, descriptions_B, P_A = None, P_B = None,
13
+ normalize = False, inv_temp = 1, threshold = 0.0):
14
+ if isinstance(descriptions_A, list):
15
+ matches = [self.match(k_A[None], d_A[None], k_B[None], d_B[None], normalize = normalize,
16
+ inv_temp = inv_temp, threshold = threshold)
17
+ for k_A,d_A,k_B,d_B in
18
+ zip(keypoints_A, descriptions_A, keypoints_B, descriptions_B)]
19
+ matches_A = torch.cat([m[0] for m in matches])
20
+ matches_B = torch.cat([m[1] for m in matches])
21
+ inds = torch.cat([m[2] + b for b, m in enumerate(matches)])
22
+ return matches_A, matches_B, inds
23
+
24
+ P = dual_softmax_matcher(descriptions_A, descriptions_B,
25
+ normalize = normalize, inv_temperature=inv_temp,
26
+ )
27
+ inds = torch.nonzero((P == P.max(dim=-1, keepdim = True).values)
28
+ * (P == P.max(dim=-2, keepdim = True).values) * (P > threshold))
29
+ batch_inds = inds[:,0]
30
+ matches_A = keypoints_A[batch_inds, inds[:,1]]
31
+ matches_B = keypoints_B[batch_inds, inds[:,2]]
32
+ return matches_A, matches_B, batch_inds
33
+
34
+ def to_pixel_coords(self, x_A, x_B, H_A, W_A, H_B, W_B):
35
+ return to_pixel_coords(x_A, H_A, W_A), to_pixel_coords(x_B, H_B, W_B)
36
+
37
+ def to_normalized_coords(self, x_A, x_B, H_A, W_A, H_B, W_B):
38
+ return to_normalized_coords(x_A, H_A, W_A), to_normalized_coords(x_B, H_B, W_B)
third_party/DeDoDe/DeDoDe/model_zoo/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .dedode_models import dedode_detector_B, dedode_detector_L, dedode_descriptor_B
2
+
3
+
third_party/DeDoDe/DeDoDe/model_zoo/dedode_models.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from DeDoDe.detectors.dedode_detector import DeDoDeDetector
5
+ from DeDoDe.descriptors.dedode_descriptor import DeDoDeDescriptor
6
+ from DeDoDe.decoder import ConvRefiner, Decoder
7
+ from DeDoDe.encoder import VGG19, VGG
8
+
9
+
10
+
11
+ def dedode_detector_B(device = "cuda", weights = None):
12
+ residual = True
13
+ hidden_blocks = 5
14
+ amp_dtype = torch.float16
15
+ amp = True
16
+ NUM_PROTOTYPES = 1
17
+ conv_refiner = nn.ModuleDict(
18
+ {
19
+ "8": ConvRefiner(
20
+ 512,
21
+ 512,
22
+ 256 + NUM_PROTOTYPES,
23
+ hidden_blocks = hidden_blocks,
24
+ residual = residual,
25
+ amp = amp,
26
+ amp_dtype = amp_dtype,
27
+ ),
28
+ "4": ConvRefiner(
29
+ 256+256,
30
+ 256,
31
+ 128 + NUM_PROTOTYPES,
32
+ hidden_blocks = hidden_blocks,
33
+ residual = residual,
34
+ amp = amp,
35
+ amp_dtype = amp_dtype,
36
+
37
+ ),
38
+ "2": ConvRefiner(
39
+ 128+128,
40
+ 64,
41
+ 32 + NUM_PROTOTYPES,
42
+ hidden_blocks = hidden_blocks,
43
+ residual = residual,
44
+ amp = amp,
45
+ amp_dtype = amp_dtype,
46
+
47
+ ),
48
+ "1": ConvRefiner(
49
+ 64 + 32,
50
+ 32,
51
+ 1 + NUM_PROTOTYPES,
52
+ hidden_blocks = hidden_blocks,
53
+ residual = residual,
54
+ amp = amp,
55
+ amp_dtype = amp_dtype,
56
+ ),
57
+ }
58
+ )
59
+ encoder = VGG19(pretrained = False, amp = amp, amp_dtype = amp_dtype)
60
+ decoder = Decoder(conv_refiner)
61
+ model = DeDoDeDetector(encoder = encoder, decoder = decoder).to(device)
62
+ if weights is not None:
63
+ model.load_state_dict(weights)
64
+ return model
65
+
66
+
67
+ def dedode_detector_L(device = "cuda", weights = None):
68
+ NUM_PROTOTYPES = 1
69
+ residual = True
70
+ hidden_blocks = 8
71
+ amp_dtype = torch.float16#torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
72
+ amp = True
73
+ conv_refiner = nn.ModuleDict(
74
+ {
75
+ "8": ConvRefiner(
76
+ 512,
77
+ 512,
78
+ 256 + NUM_PROTOTYPES,
79
+ hidden_blocks = hidden_blocks,
80
+ residual = residual,
81
+ amp = amp,
82
+ amp_dtype = amp_dtype,
83
+ ),
84
+ "4": ConvRefiner(
85
+ 256+256,
86
+ 256,
87
+ 128 + NUM_PROTOTYPES,
88
+ hidden_blocks = hidden_blocks,
89
+ residual = residual,
90
+ amp = amp,
91
+ amp_dtype = amp_dtype,
92
+
93
+ ),
94
+ "2": ConvRefiner(
95
+ 128+128,
96
+ 128,
97
+ 64 + NUM_PROTOTYPES,
98
+ hidden_blocks = hidden_blocks,
99
+ residual = residual,
100
+ amp = amp,
101
+ amp_dtype = amp_dtype,
102
+
103
+ ),
104
+ "1": ConvRefiner(
105
+ 64 + 64,
106
+ 64,
107
+ 1 + NUM_PROTOTYPES,
108
+ hidden_blocks = hidden_blocks,
109
+ residual = residual,
110
+ amp = amp,
111
+ amp_dtype = amp_dtype,
112
+ ),
113
+ }
114
+ )
115
+ encoder = VGG19(pretrained = False, amp = amp, amp_dtype = amp_dtype)
116
+ decoder = Decoder(conv_refiner)
117
+ model = DeDoDeDetector(encoder = encoder, decoder = decoder).to(device)
118
+ if weights is not None:
119
+ model.load_state_dict(weights)
120
+ return model
121
+
122
+
123
+
124
+ def dedode_descriptor_B(device = "cuda", weights = None):
125
+ NUM_PROTOTYPES = 256 # == descriptor size
126
+ residual = True
127
+ hidden_blocks = 5
128
+ amp_dtype = torch.float16#torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
129
+ amp = True
130
+ conv_refiner = nn.ModuleDict(
131
+ {
132
+ "8": ConvRefiner(
133
+ 512,
134
+ 512,
135
+ 256 + NUM_PROTOTYPES,
136
+ hidden_blocks = hidden_blocks,
137
+ residual = residual,
138
+ amp = amp,
139
+ amp_dtype = amp_dtype,
140
+ ),
141
+ "4": ConvRefiner(
142
+ 256+256,
143
+ 256,
144
+ 128 + NUM_PROTOTYPES,
145
+ hidden_blocks = hidden_blocks,
146
+ residual = residual,
147
+ amp = amp,
148
+ amp_dtype = amp_dtype,
149
+
150
+ ),
151
+ "2": ConvRefiner(
152
+ 128+128,
153
+ 64,
154
+ 32 + NUM_PROTOTYPES,
155
+ hidden_blocks = hidden_blocks,
156
+ residual = residual,
157
+ amp = amp,
158
+ amp_dtype = amp_dtype,
159
+
160
+ ),
161
+ "1": ConvRefiner(
162
+ 64 + 32,
163
+ 32,
164
+ 1 + NUM_PROTOTYPES,
165
+ hidden_blocks = hidden_blocks,
166
+ residual = residual,
167
+ amp = amp,
168
+ amp_dtype = amp_dtype,
169
+ ),
170
+ }
171
+ )
172
+ encoder = VGG(size = "19", pretrained = False, amp = amp, amp_dtype = amp_dtype)
173
+ decoder = Decoder(conv_refiner, num_prototypes=NUM_PROTOTYPES)
174
+ model = DeDoDeDescriptor(encoder = encoder, decoder = decoder).to(device)
175
+ if weights is not None:
176
+ model.load_state_dict(weights)
177
+ return model
third_party/DeDoDe/DeDoDe/train.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from tqdm import tqdm
3
+ from DeDoDe.utils import to_cuda
4
+
5
+
6
+ def train_step(train_batch, model, objective, optimizer, grad_scaler = None,**kwargs):
7
+ optimizer.zero_grad()
8
+ out = model(train_batch)
9
+ l = objective(out, train_batch)
10
+ if grad_scaler is not None:
11
+ grad_scaler.scale(l).backward()
12
+ grad_scaler.unscale_(optimizer)
13
+ torch.nn.utils.clip_grad_norm_(model.parameters(), 0.01)
14
+ grad_scaler.step(optimizer)
15
+ grad_scaler.update()
16
+ else:
17
+ l.backward()
18
+ optimizer.step()
19
+ return {"train_out": out, "train_loss": l.item()}
20
+
21
+
22
+ def train_k_steps(
23
+ n_0, k, dataloader, model, objective, optimizer, lr_scheduler, grad_scaler = None, progress_bar=True
24
+ ):
25
+ for n in tqdm(range(n_0, n_0 + k), disable=not progress_bar, mininterval = 10.):
26
+ batch = next(dataloader)
27
+ model.train(True)
28
+ batch = to_cuda(batch)
29
+ train_step(
30
+ train_batch=batch,
31
+ model=model,
32
+ objective=objective,
33
+ optimizer=optimizer,
34
+ lr_scheduler=lr_scheduler,
35
+ n=n,
36
+ grad_scaler = grad_scaler,
37
+ )
38
+ lr_scheduler.step()
39
+
40
+
41
+ def train_epoch(
42
+ dataloader=None,
43
+ model=None,
44
+ objective=None,
45
+ optimizer=None,
46
+ lr_scheduler=None,
47
+ epoch=None,
48
+ ):
49
+ model.train(True)
50
+ print(f"At epoch {epoch}")
51
+ for batch in tqdm(dataloader, mininterval=5.0):
52
+ batch = to_cuda(batch)
53
+ train_step(
54
+ train_batch=batch, model=model, objective=objective, optimizer=optimizer
55
+ )
56
+ lr_scheduler.step()
57
+ return {
58
+ "model": model,
59
+ "optimizer": optimizer,
60
+ "lr_scheduler": lr_scheduler,
61
+ "epoch": epoch,
62
+ }
63
+
64
+
65
+ def train_k_epochs(
66
+ start_epoch, end_epoch, dataloader, model, objective, optimizer, lr_scheduler
67
+ ):
68
+ for epoch in range(start_epoch, end_epoch + 1):
69
+ train_epoch(
70
+ dataloader=dataloader,
71
+ model=model,
72
+ objective=objective,
73
+ optimizer=optimizer,
74
+ lr_scheduler=lr_scheduler,
75
+ epoch=epoch,
76
+ )
third_party/DeDoDe/DeDoDe/utils.py ADDED
@@ -0,0 +1,759 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ import numpy as np
3
+ import math
4
+ import cv2
5
+ import torch
6
+ from torchvision import transforms
7
+ from torchvision.transforms.functional import InterpolationMode
8
+ import torch.nn.functional as F
9
+ from PIL import Image
10
+ from einops import rearrange
11
+ import torch
12
+ from time import perf_counter
13
+
14
+ def recover_pose(E, kpts0, kpts1, K0, K1, mask):
15
+ best_num_inliers = 0
16
+ K0inv = np.linalg.inv(K0[:2,:2])
17
+ K1inv = np.linalg.inv(K1[:2,:2])
18
+
19
+ kpts0_n = (K0inv @ (kpts0-K0[None,:2,2]).T).T
20
+ kpts1_n = (K1inv @ (kpts1-K1[None,:2,2]).T).T
21
+
22
+ for _E in np.split(E, len(E) / 3):
23
+ n, R, t, _ = cv2.recoverPose(_E, kpts0_n, kpts1_n, np.eye(3), 1e9, mask=mask)
24
+ if n > best_num_inliers:
25
+ best_num_inliers = n
26
+ ret = (R, t, mask.ravel() > 0)
27
+ return ret
28
+
29
+
30
+
31
+ # Code taken from https://github.com/PruneTruong/DenseMatching/blob/40c29a6b5c35e86b9509e65ab0cd12553d998e5f/validation/utils_pose_estimation.py
32
+ # --- GEOMETRY ---
33
+ def estimate_pose(kpts0, kpts1, K0, K1, norm_thresh, conf=0.99999):
34
+ if len(kpts0) < 5:
35
+ return None
36
+ K0inv = np.linalg.inv(K0[:2,:2])
37
+ K1inv = np.linalg.inv(K1[:2,:2])
38
+
39
+ kpts0 = (K0inv @ (kpts0-K0[None,:2,2]).T).T
40
+ kpts1 = (K1inv @ (kpts1-K1[None,:2,2]).T).T
41
+ E, mask = cv2.findEssentialMat(
42
+ kpts0, kpts1, np.eye(3), threshold=norm_thresh, prob=conf
43
+ )
44
+
45
+ ret = None
46
+ if E is not None:
47
+ best_num_inliers = 0
48
+
49
+ for _E in np.split(E, len(E) / 3):
50
+ n, R, t, _ = cv2.recoverPose(_E, kpts0, kpts1, np.eye(3), 1e9, mask=mask)
51
+ if n > best_num_inliers:
52
+ best_num_inliers = n
53
+ ret = (R, t, mask.ravel() > 0)
54
+ return ret
55
+
56
+
57
+ def get_grid(B,H,W, device = "cuda"):
58
+ x1_n = torch.meshgrid(
59
+ *[
60
+ torch.linspace(
61
+ -1 + 1 / n, 1 - 1 / n, n, device=device
62
+ )
63
+ for n in (B, H, W)
64
+ ]
65
+ )
66
+ x1_n = torch.stack((x1_n[2], x1_n[1]), dim=-1).reshape(B, H * W, 2)
67
+ return x1_n
68
+
69
+ @torch.no_grad()
70
+ def finite_diff_hessian(f: tuple(["B", "H", "W"]), device = "cuda"):
71
+ dxx = torch.tensor([[0,0,0],[1,-2,1],[0,0,0]], device = device)[None,None]/2
72
+ dxy = torch.tensor([[1,0,-1],[0,0,0],[-1,0,1]], device = device)[None,None]/4
73
+ dyy = dxx.mT
74
+ Hxx = F.conv2d(f[:,None], dxx, padding = 1)[:,0]
75
+ Hxy = F.conv2d(f[:,None], dxy, padding = 1)[:,0]
76
+ Hyy = F.conv2d(f[:,None], dyy, padding = 1)[:,0]
77
+ H = torch.stack((Hxx, Hxy, Hxy, Hyy), dim = -1).reshape(*f.shape,2,2)
78
+ return H
79
+
80
+ def finite_diff_grad(f: tuple(["B", "H", "W"]), device = "cuda"):
81
+ dx = torch.tensor([[0,0,0],[-1,0,1],[0,0,0]],device = device)[None,None]/2
82
+ dy = dx.mT
83
+ gx = F.conv2d(f[:,None], dx, padding = 1)
84
+ gy = F.conv2d(f[:,None], dy, padding = 1)
85
+ g = torch.cat((gx, gy), dim = 1)
86
+ return g
87
+
88
+ def fast_inv_2x2(matrix: tuple[...,2,2], eps = 1e-10):
89
+ return 1/(torch.linalg.det(matrix)[...,None,None]+eps) * torch.stack((matrix[...,1,1],-matrix[...,0,1],
90
+ -matrix[...,1,0],matrix[...,0,0]),dim=-1).reshape(*matrix.shape)
91
+
92
+ def newton_step(f:tuple["B","H","W"], inds, device = "cuda"):
93
+ B,H,W = f.shape
94
+ Hess = finite_diff_hessian(f).reshape(B,H*W,2,2)
95
+ Hess = torch.gather(Hess, dim = 1, index = inds[...,None].expand(B,-1,2,2))
96
+ grad = finite_diff_grad(f).reshape(B,H*W,2)
97
+ grad = torch.gather(grad, dim = 1, index = inds)
98
+ Hessinv = fast_inv_2x2(Hess-torch.eye(2, device = device)[None,None])
99
+ step = (Hessinv @ grad[...,None])
100
+ return step[...,0]
101
+
102
+ @torch.no_grad()
103
+ def sample_keypoints(scoremap, num_samples = 8192, device = "cuda", use_nms = True,
104
+ sample_topk = False, return_scoremap = False, sharpen = False, upsample = False,
105
+ increase_coverage = False,):
106
+ #scoremap = scoremap**2
107
+ log_scoremap = (scoremap+1e-10).log()
108
+ if upsample:
109
+ log_scoremap = F.interpolate(log_scoremap[:,None], scale_factor = 3, mode = "bicubic", align_corners = False)[:,0]#.clamp(min = 0)
110
+ scoremap = log_scoremap.exp()
111
+ B,H,W = scoremap.shape
112
+ if increase_coverage:
113
+ weights = (-torch.linspace(-2, 2, steps = 51, device = device)**2).exp()[None,None]
114
+ # 10000 is just some number for maybe numerical stability, who knows. :), result is invariant anyway
115
+ local_density_x = F.conv2d((scoremap[:,None]+1e-6)*10000,weights[...,None,:], padding = (0,51//2))
116
+ local_density = F.conv2d(local_density_x, weights[...,None], padding = (51//2,0))[:,0]
117
+ scoremap = scoremap * (local_density+1e-8)**(-1/2)
118
+ grid = get_grid(B,H,W, device=device).reshape(B,H*W,2)
119
+ if sharpen:
120
+ laplace_operator = torch.tensor([[[[0,1,0],[1,-4,1],[0,1,0]]]], device = device)/4
121
+ scoremap = scoremap[:,None] - 0.5 * F.conv2d(scoremap[:,None], weight = laplace_operator, padding = 1)
122
+ scoremap = scoremap[:,0].clamp(min = 0)
123
+ if use_nms:
124
+ scoremap = scoremap * (scoremap == F.max_pool2d(scoremap, (3, 3), stride = 1, padding = 1))
125
+ if sample_topk:
126
+ inds = torch.topk(scoremap.reshape(B,H*W), k = num_samples).indices
127
+ else:
128
+ inds = torch.multinomial(scoremap.reshape(B,H*W), num_samples = num_samples, replacement=False)
129
+ kps = torch.gather(grid, dim = 1, index = inds[...,None].expand(B,num_samples,2))
130
+ if return_scoremap:
131
+ return kps, torch.gather(scoremap.reshape(B,H*W), dim = 1, index = inds)
132
+ return kps
133
+
134
+ @torch.no_grad()
135
+ def jacobi_determinant(warp, certainty, R = 3, device = "cuda", dtype = torch.float32):
136
+ t = perf_counter()
137
+ *dims, _ = warp.shape
138
+ warp = warp.to(dtype)
139
+ certainty = certainty.to(dtype)
140
+
141
+ dtype = warp.dtype
142
+ match_regions = torch.zeros((*dims, 4, R, R), device = device).to(dtype)
143
+ match_regions[:,1:-1, 1:-1] = warp.unfold(1,R,1).unfold(2,R,1)
144
+ match_regions = rearrange(match_regions,"B H W D R1 R2 -> B H W (R1 R2) D") - warp[...,None,:]
145
+
146
+ match_regions_cert = torch.zeros((*dims, R, R), device = device).to(dtype)
147
+ match_regions_cert[:,1:-1, 1:-1] = certainty.unfold(1,R,1).unfold(2,R,1)
148
+ match_regions_cert = rearrange(match_regions_cert,"B H W R1 R2 -> B H W (R1 R2)")[..., None]
149
+
150
+ #print("Time for unfold", perf_counter()-t)
151
+ #t = perf_counter()
152
+ *dims, N, D = match_regions.shape
153
+ # standardize:
154
+ mu, sigma = match_regions.mean(dim=(-2,-1), keepdim = True), match_regions.std(dim=(-2,-1),keepdim=True)
155
+ match_regions = (match_regions-mu)/(sigma+1e-6)
156
+ x_a, x_b = match_regions.chunk(2,-1)
157
+
158
+
159
+ A = torch.zeros((*dims,2*x_a.shape[-2],4), device = device).to(dtype)
160
+ A[...,::2,:2] = x_a * match_regions_cert
161
+ A[...,1::2,2:] = x_a * match_regions_cert
162
+
163
+ a_block = A[...,::2,:2]
164
+ ata = a_block.mT @ a_block
165
+ #print("Time for ata", perf_counter()-t)
166
+ #t = perf_counter()
167
+
168
+ #atainv = torch.linalg.inv(ata+1e-5*torch.eye(2,device=device).to(dtype))
169
+ atainv = fast_inv_2x2(ata)
170
+ ATA_inv = torch.zeros((*dims, 4, 4), device = device, dtype = dtype)
171
+ ATA_inv[...,:2,:2] = atainv
172
+ ATA_inv[...,2:,2:] = atainv
173
+ atb = A.mT @ (match_regions_cert*x_b).reshape(*dims,N*2,1)
174
+ theta = ATA_inv @ atb
175
+ #print("Time for theta", perf_counter()-t)
176
+ #t = perf_counter()
177
+
178
+ J = theta.reshape(*dims, 2, 2)
179
+ abs_J_det = torch.linalg.det(J+1e-8*torch.eye(2,2,device = device).expand(*dims,2,2)).abs() # Note: This should always be positive for correct warps, but still taking abs here
180
+ abs_J_logdet = (abs_J_det+1e-12).log()
181
+ B = certainty.shape[0]
182
+ # Handle outliers
183
+ robust_abs_J_logdet = abs_J_logdet.clamp(-3, 3) # Shouldn't be more that exp(3) \approx 8 times zoom
184
+ #print("Time for logdet", perf_counter()-t)
185
+ #t = perf_counter()
186
+
187
+ return robust_abs_J_logdet
188
+
189
+ def get_gt_warp(depth1, depth2, T_1to2, K1, K2, depth_interpolation_mode = 'bilinear', relative_depth_error_threshold = 0.05, H = None, W = None):
190
+
191
+ if H is None:
192
+ B,H,W = depth1.shape
193
+ else:
194
+ B = depth1.shape[0]
195
+ with torch.no_grad():
196
+ x1_n = torch.meshgrid(
197
+ *[
198
+ torch.linspace(
199
+ -1 + 1 / n, 1 - 1 / n, n, device=depth1.device
200
+ )
201
+ for n in (B, H, W)
202
+ ]
203
+ )
204
+ x1_n = torch.stack((x1_n[2], x1_n[1]), dim=-1).reshape(B, H * W, 2)
205
+ mask, x2 = warp_kpts(
206
+ x1_n.double(),
207
+ depth1.double(),
208
+ depth2.double(),
209
+ T_1to2.double(),
210
+ K1.double(),
211
+ K2.double(),
212
+ depth_interpolation_mode = depth_interpolation_mode,
213
+ relative_depth_error_threshold = relative_depth_error_threshold,
214
+ )
215
+ prob = mask.float().reshape(B, H, W)
216
+ x2 = x2.reshape(B, H, W, 2)
217
+ return torch.cat((x1_n.reshape(B,H,W,2),x2),dim=-1), prob
218
+
219
+ def recover_pose(E, kpts0, kpts1, K0, K1, mask):
220
+ best_num_inliers = 0
221
+ K0inv = np.linalg.inv(K0[:2,:2])
222
+ K1inv = np.linalg.inv(K1[:2,:2])
223
+
224
+ kpts0_n = (K0inv @ (kpts0-K0[None,:2,2]).T).T
225
+ kpts1_n = (K1inv @ (kpts1-K1[None,:2,2]).T).T
226
+
227
+ for _E in np.split(E, len(E) / 3):
228
+ n, R, t, _ = cv2.recoverPose(_E, kpts0_n, kpts1_n, np.eye(3), 1e9, mask=mask)
229
+ if n > best_num_inliers:
230
+ best_num_inliers = n
231
+ ret = (R, t, mask.ravel() > 0)
232
+ return ret
233
+
234
+
235
+
236
+ # Code taken from https://github.com/PruneTruong/DenseMatching/blob/40c29a6b5c35e86b9509e65ab0cd12553d998e5f/validation/utils_pose_estimation.py
237
+ # --- GEOMETRY ---
238
+ def estimate_pose(kpts0, kpts1, K0, K1, norm_thresh, conf=0.99999, ):
239
+ if len(kpts0) < 5:
240
+ return None
241
+ K0inv = np.linalg.inv(K0[:2,:2])
242
+ K1inv = np.linalg.inv(K1[:2,:2])
243
+
244
+ kpts0 = (K0inv @ (kpts0-K0[None,:2,2]).T).T
245
+ kpts1 = (K1inv @ (kpts1-K1[None,:2,2]).T).T
246
+ method = cv2.USAC_ACCURATE
247
+ E, mask = cv2.findEssentialMat(
248
+ kpts0, kpts1, np.eye(3), threshold=norm_thresh, prob=conf, method=method
249
+ )
250
+
251
+ ret = None
252
+ if E is not None:
253
+ best_num_inliers = 0
254
+
255
+ for _E in np.split(E, len(E) / 3):
256
+ n, R, t, _ = cv2.recoverPose(_E, kpts0, kpts1, np.eye(3), 1e9, mask=mask)
257
+ if n > best_num_inliers:
258
+ best_num_inliers = n
259
+ ret = (R, t, mask.ravel() > 0)
260
+ return ret
261
+
262
+ def estimate_pose_uncalibrated(kpts0, kpts1, K0, K1, norm_thresh, conf=0.99999):
263
+ if len(kpts0) < 5:
264
+ return None
265
+ method = cv2.USAC_ACCURATE
266
+ F, mask = cv2.findFundamentalMat(
267
+ kpts0, kpts1, ransacReprojThreshold=norm_thresh, confidence=conf, method=method, maxIters=10000
268
+ )
269
+ E = K1.T@F@K0
270
+ ret = None
271
+ if E is not None:
272
+ best_num_inliers = 0
273
+ K0inv = np.linalg.inv(K0[:2,:2])
274
+ K1inv = np.linalg.inv(K1[:2,:2])
275
+
276
+ kpts0_n = (K0inv @ (kpts0-K0[None,:2,2]).T).T
277
+ kpts1_n = (K1inv @ (kpts1-K1[None,:2,2]).T).T
278
+
279
+ for _E in np.split(E, len(E) / 3):
280
+ n, R, t, _ = cv2.recoverPose(_E, kpts0_n, kpts1_n, np.eye(3), 1e9, mask=mask)
281
+ if n > best_num_inliers:
282
+ best_num_inliers = n
283
+ ret = (R, t, mask.ravel() > 0)
284
+ return ret
285
+
286
+ def unnormalize_coords(x_n,h,w):
287
+ x = torch.stack(
288
+ (w * (x_n[..., 0] + 1) / 2, h * (x_n[..., 1] + 1) / 2), dim=-1
289
+ ) # [-1+1/h, 1-1/h] -> [0.5, h-0.5]
290
+ return x
291
+
292
+
293
+ def rotate_intrinsic(K, n):
294
+ base_rot = np.array([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
295
+ rot = np.linalg.matrix_power(base_rot, n)
296
+ return rot @ K
297
+
298
+
299
+ def rotate_pose_inplane(i_T_w, rot):
300
+ rotation_matrices = [
301
+ np.array(
302
+ [
303
+ [np.cos(r), -np.sin(r), 0.0, 0.0],
304
+ [np.sin(r), np.cos(r), 0.0, 0.0],
305
+ [0.0, 0.0, 1.0, 0.0],
306
+ [0.0, 0.0, 0.0, 1.0],
307
+ ],
308
+ dtype=np.float32,
309
+ )
310
+ for r in [np.deg2rad(d) for d in (0, 270, 180, 90)]
311
+ ]
312
+ return np.dot(rotation_matrices[rot], i_T_w)
313
+
314
+
315
+ def scale_intrinsics(K, scales):
316
+ scales = np.diag([1.0 / scales[0], 1.0 / scales[1], 1.0])
317
+ return np.dot(scales, K)
318
+
319
+ def angle_error_mat(R1, R2):
320
+ cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2
321
+ cos = np.clip(cos, -1.0, 1.0) # numercial errors can make it out of bounds
322
+ return np.rad2deg(np.abs(np.arccos(cos)))
323
+
324
+
325
+ def angle_error_vec(v1, v2):
326
+ n = np.linalg.norm(v1) * np.linalg.norm(v2)
327
+ return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0)))
328
+
329
+
330
+ def compute_pose_error(T_0to1, R, t):
331
+ R_gt = T_0to1[:3, :3]
332
+ t_gt = T_0to1[:3, 3]
333
+ error_t = angle_error_vec(t.squeeze(), t_gt)
334
+ error_t = np.minimum(error_t, 180 - error_t) # ambiguity of E estimation
335
+ error_R = angle_error_mat(R, R_gt)
336
+ return error_t, error_R
337
+
338
+
339
+ def pose_auc(errors, thresholds):
340
+ sort_idx = np.argsort(errors)
341
+ errors = np.array(errors.copy())[sort_idx]
342
+ recall = (np.arange(len(errors)) + 1) / len(errors)
343
+ errors = np.r_[0.0, errors]
344
+ recall = np.r_[0.0, recall]
345
+ aucs = []
346
+ for t in thresholds:
347
+ last_index = np.searchsorted(errors, t)
348
+ r = np.r_[recall[:last_index], recall[last_index - 1]]
349
+ e = np.r_[errors[:last_index], t]
350
+ aucs.append(np.trapz(r, x=e) / t)
351
+ return aucs
352
+
353
+
354
+ # From Patch2Pix https://github.com/GrumpyZhou/patch2pix
355
+ def get_depth_tuple_transform_ops(resize=None, normalize=True, unscale=False):
356
+ ops = []
357
+ if resize:
358
+ ops.append(TupleResize(resize, mode=InterpolationMode.BILINEAR, antialias = False))
359
+ return TupleCompose(ops)
360
+
361
+
362
+ def get_tuple_transform_ops(resize=None, normalize=True, unscale=False, clahe = False):
363
+ ops = []
364
+ if resize:
365
+ ops.append(TupleResize(resize, antialias = True))
366
+ if clahe:
367
+ ops.append(TupleClahe())
368
+ if normalize:
369
+ ops.append(TupleToTensorScaled())
370
+ ops.append(
371
+ TupleNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
372
+ ) # Imagenet mean/std
373
+ else:
374
+ if unscale:
375
+ ops.append(TupleToTensorUnscaled())
376
+ else:
377
+ ops.append(TupleToTensorScaled())
378
+ return TupleCompose(ops)
379
+
380
+ class Clahe:
381
+ def __init__(self, cliplimit = 2, blocksize = 8) -> None:
382
+ self.clahe = cv2.createCLAHE(cliplimit,(blocksize,blocksize))
383
+ def __call__(self, im):
384
+ im_hsv = cv2.cvtColor(np.array(im),cv2.COLOR_RGB2HSV)
385
+ im_v = self.clahe.apply(im_hsv[:,:,2])
386
+ im_hsv[...,2] = im_v
387
+ im_clahe = cv2.cvtColor(im_hsv,cv2.COLOR_HSV2RGB)
388
+ return Image.fromarray(im_clahe)
389
+
390
+ class TupleClahe:
391
+ def __init__(self, cliplimit = 8, blocksize = 8) -> None:
392
+ self.clahe = Clahe(cliplimit,blocksize)
393
+ def __call__(self, ims):
394
+ return [self.clahe(im) for im in ims]
395
+
396
+ class ToTensorScaled(object):
397
+ """Convert a RGB PIL Image to a CHW ordered Tensor, scale the range to [0, 1]"""
398
+
399
+ def __call__(self, im):
400
+ if not isinstance(im, torch.Tensor):
401
+ im = np.array(im, dtype=np.float32).transpose((2, 0, 1))
402
+ im /= 255.0
403
+ return torch.from_numpy(im)
404
+ else:
405
+ return im
406
+
407
+ def __repr__(self):
408
+ return "ToTensorScaled(./255)"
409
+
410
+
411
+ class TupleToTensorScaled(object):
412
+ def __init__(self):
413
+ self.to_tensor = ToTensorScaled()
414
+
415
+ def __call__(self, im_tuple):
416
+ return [self.to_tensor(im) for im in im_tuple]
417
+
418
+ def __repr__(self):
419
+ return "TupleToTensorScaled(./255)"
420
+
421
+
422
+ class ToTensorUnscaled(object):
423
+ """Convert a RGB PIL Image to a CHW ordered Tensor"""
424
+
425
+ def __call__(self, im):
426
+ return torch.from_numpy(np.array(im, dtype=np.float32).transpose((2, 0, 1)))
427
+
428
+ def __repr__(self):
429
+ return "ToTensorUnscaled()"
430
+
431
+
432
+ class TupleToTensorUnscaled(object):
433
+ """Convert a RGB PIL Image to a CHW ordered Tensor"""
434
+
435
+ def __init__(self):
436
+ self.to_tensor = ToTensorUnscaled()
437
+
438
+ def __call__(self, im_tuple):
439
+ return [self.to_tensor(im) for im in im_tuple]
440
+
441
+ def __repr__(self):
442
+ return "TupleToTensorUnscaled()"
443
+
444
+
445
+ class TupleResize(object):
446
+ def __init__(self, size, mode=InterpolationMode.BICUBIC, antialias = None):
447
+ self.size = size
448
+ self.resize = transforms.Resize(size, mode, antialias = antialias)
449
+
450
+ def __call__(self, im_tuple):
451
+ return [self.resize(im) for im in im_tuple]
452
+
453
+ def __repr__(self):
454
+ return "TupleResize(size={})".format(self.size)
455
+
456
+ class Normalize:
457
+ def __call__(self,im):
458
+ mean = im.mean(dim=(1,2), keepdims=True)
459
+ std = im.std(dim=(1,2), keepdims=True)
460
+ return (im-mean)/std
461
+
462
+
463
+ class TupleNormalize(object):
464
+ def __init__(self, mean, std):
465
+ self.mean = mean
466
+ self.std = std
467
+ self.normalize = transforms.Normalize(mean=mean, std=std)
468
+
469
+ def __call__(self, im_tuple):
470
+ c,h,w = im_tuple[0].shape
471
+ if c > 3:
472
+ warnings.warn(f"Number of channels {c=} > 3, assuming first 3 are rgb")
473
+ return [self.normalize(im[:3]) for im in im_tuple]
474
+
475
+ def __repr__(self):
476
+ return "TupleNormalize(mean={}, std={})".format(self.mean, self.std)
477
+
478
+
479
+ class TupleCompose(object):
480
+ def __init__(self, transforms):
481
+ self.transforms = transforms
482
+
483
+ def __call__(self, im_tuple):
484
+ for t in self.transforms:
485
+ im_tuple = t(im_tuple)
486
+ return im_tuple
487
+
488
+ def __repr__(self):
489
+ format_string = self.__class__.__name__ + "("
490
+ for t in self.transforms:
491
+ format_string += "\n"
492
+ format_string += " {0}".format(t)
493
+ format_string += "\n)"
494
+ return format_string
495
+
496
+
497
+ @torch.no_grad()
498
+ def warp_kpts(kpts0, depth0, depth1, T_0to1, K0, K1, smooth_mask = False, return_relative_depth_error = False, depth_interpolation_mode = "bilinear", relative_depth_error_threshold = 0.05):
499
+ """Warp kpts0 from I0 to I1 with depth, K and Rt
500
+ Also check covisibility and depth consistency.
501
+ Depth is consistent if relative error < 0.2 (hard-coded).
502
+ # https://github.com/zju3dv/LoFTR/blob/94e98b695be18acb43d5d3250f52226a8e36f839/src/loftr/utils/geometry.py adapted from here
503
+ Args:
504
+ kpts0 (torch.Tensor): [N, L, 2] - <x, y>, should be normalized in (-1,1)
505
+ depth0 (torch.Tensor): [N, H, W],
506
+ depth1 (torch.Tensor): [N, H, W],
507
+ T_0to1 (torch.Tensor): [N, 3, 4],
508
+ K0 (torch.Tensor): [N, 3, 3],
509
+ K1 (torch.Tensor): [N, 3, 3],
510
+ Returns:
511
+ calculable_mask (torch.Tensor): [N, L]
512
+ warped_keypoints0 (torch.Tensor): [N, L, 2] <x0_hat, y1_hat>
513
+ """
514
+ (
515
+ n,
516
+ h,
517
+ w,
518
+ ) = depth0.shape
519
+ if depth_interpolation_mode == "combined":
520
+ # Inspired by approach in inloc, try to fill holes from bilinear interpolation by nearest neighbour interpolation
521
+ if smooth_mask:
522
+ raise NotImplementedError("Combined bilinear and NN warp not implemented")
523
+ valid_bilinear, warp_bilinear = warp_kpts(kpts0, depth0, depth1, T_0to1, K0, K1,
524
+ smooth_mask = smooth_mask,
525
+ return_relative_depth_error = return_relative_depth_error,
526
+ depth_interpolation_mode = "bilinear",
527
+ relative_depth_error_threshold = relative_depth_error_threshold)
528
+ valid_nearest, warp_nearest = warp_kpts(kpts0, depth0, depth1, T_0to1, K0, K1,
529
+ smooth_mask = smooth_mask,
530
+ return_relative_depth_error = return_relative_depth_error,
531
+ depth_interpolation_mode = "nearest-exact",
532
+ relative_depth_error_threshold = relative_depth_error_threshold)
533
+ nearest_valid_bilinear_invalid = (~valid_bilinear).logical_and(valid_nearest)
534
+ warp = warp_bilinear.clone()
535
+ warp[nearest_valid_bilinear_invalid] = warp_nearest[nearest_valid_bilinear_invalid]
536
+ valid = valid_bilinear | valid_nearest
537
+ return valid, warp
538
+
539
+
540
+ kpts0_depth = F.grid_sample(depth0[:, None], kpts0[:, :, None], mode = depth_interpolation_mode, align_corners=False)[
541
+ :, 0, :, 0
542
+ ]
543
+ kpts0 = torch.stack(
544
+ (w * (kpts0[..., 0] + 1) / 2, h * (kpts0[..., 1] + 1) / 2), dim=-1
545
+ ) # [-1+1/h, 1-1/h] -> [0.5, h-0.5]
546
+ # Sample depth, get calculable_mask on depth != 0
547
+ nonzero_mask = kpts0_depth != 0
548
+
549
+ # Unproject
550
+ kpts0_h = (
551
+ torch.cat([kpts0, torch.ones_like(kpts0[:, :, [0]])], dim=-1)
552
+ * kpts0_depth[..., None]
553
+ ) # (N, L, 3)
554
+ kpts0_n = K0.inverse() @ kpts0_h.transpose(2, 1) # (N, 3, L)
555
+ kpts0_cam = kpts0_n
556
+
557
+ # Rigid Transform
558
+ w_kpts0_cam = T_0to1[:, :3, :3] @ kpts0_cam + T_0to1[:, :3, [3]] # (N, 3, L)
559
+ w_kpts0_depth_computed = w_kpts0_cam[:, 2, :]
560
+
561
+ # Project
562
+ w_kpts0_h = (K1 @ w_kpts0_cam).transpose(2, 1) # (N, L, 3)
563
+ w_kpts0 = w_kpts0_h[:, :, :2] / (
564
+ w_kpts0_h[:, :, [2]] + 1e-4
565
+ ) # (N, L, 2), +1e-4 to avoid zero depth
566
+
567
+ # Covisible Check
568
+ h, w = depth1.shape[1:3]
569
+ covisible_mask = (
570
+ (w_kpts0[:, :, 0] > 0)
571
+ * (w_kpts0[:, :, 0] < w - 1)
572
+ * (w_kpts0[:, :, 1] > 0)
573
+ * (w_kpts0[:, :, 1] < h - 1)
574
+ )
575
+ w_kpts0 = torch.stack(
576
+ (2 * w_kpts0[..., 0] / w - 1, 2 * w_kpts0[..., 1] / h - 1), dim=-1
577
+ ) # from [0.5,h-0.5] -> [-1+1/h, 1-1/h]
578
+ # w_kpts0[~covisible_mask, :] = -5 # xd
579
+
580
+ w_kpts0_depth = F.grid_sample(
581
+ depth1[:, None], w_kpts0[:, :, None], mode=depth_interpolation_mode, align_corners=False
582
+ )[:, 0, :, 0]
583
+
584
+ relative_depth_error = (
585
+ (w_kpts0_depth - w_kpts0_depth_computed) / w_kpts0_depth
586
+ ).abs()
587
+ if not smooth_mask:
588
+ consistent_mask = relative_depth_error < relative_depth_error_threshold
589
+ else:
590
+ consistent_mask = (-relative_depth_error/smooth_mask).exp()
591
+ valid_mask = nonzero_mask * covisible_mask * consistent_mask
592
+ if return_relative_depth_error:
593
+ return relative_depth_error, w_kpts0
594
+ else:
595
+ return valid_mask, w_kpts0
596
+
597
+ imagenet_mean = torch.tensor([0.485, 0.456, 0.406])
598
+ imagenet_std = torch.tensor([0.229, 0.224, 0.225])
599
+
600
+
601
+ def numpy_to_pil(x: np.ndarray):
602
+ """
603
+ Args:
604
+ x: Assumed to be of shape (h,w,c)
605
+ """
606
+ if isinstance(x, torch.Tensor):
607
+ x = x.detach().cpu().numpy()
608
+ if x.max() <= 1.01:
609
+ x *= 255
610
+ x = x.astype(np.uint8)
611
+ return Image.fromarray(x)
612
+
613
+
614
+ def tensor_to_pil(x, unnormalize=False, autoscale = False):
615
+ if unnormalize:
616
+ x = x * (imagenet_std[:, None, None].to(x.device)) + (imagenet_mean[:, None, None].to(x.device))
617
+ if autoscale:
618
+ if x.max() == x.min():
619
+ warnings.warn("x max == x min, cant autoscale")
620
+ else:
621
+ x = (x-x.min())/(x.max()-x.min())
622
+
623
+ x = x.detach().permute(1, 2, 0).cpu().numpy()
624
+ x = np.clip(x, 0.0, 1.0)
625
+ return numpy_to_pil(x)
626
+
627
+
628
+ def to_cuda(batch):
629
+ for key, value in batch.items():
630
+ if isinstance(value, torch.Tensor):
631
+ batch[key] = value.cuda()
632
+ return batch
633
+
634
+
635
+ def to_cpu(batch):
636
+ for key, value in batch.items():
637
+ if isinstance(value, torch.Tensor):
638
+ batch[key] = value.cpu()
639
+ return batch
640
+
641
+
642
+ def get_pose(calib):
643
+ w, h = np.array(calib["imsize"])[0]
644
+ return np.array(calib["K"]), np.array(calib["R"]), np.array(calib["T"]).T, h, w
645
+
646
+
647
+ def compute_relative_pose(R1, t1, R2, t2):
648
+ rots = R2 @ (R1.T)
649
+ trans = -rots @ t1 + t2
650
+ return rots, trans
651
+
652
+ def to_pixel_coords(flow, h1, w1):
653
+ flow = (
654
+ torch.stack(
655
+ (
656
+ w1 * (flow[..., 0] + 1) / 2,
657
+ h1 * (flow[..., 1] + 1) / 2,
658
+ ),
659
+ axis=-1,
660
+ )
661
+ )
662
+ return flow
663
+
664
+ def to_normalized_coords(flow, h1, w1):
665
+ flow = (
666
+ torch.stack(
667
+ (
668
+ 2 * (flow[..., 0]) / w1 - 1,
669
+ 2 * (flow[..., 1]) / h1 - 1,
670
+ ),
671
+ axis=-1,
672
+ )
673
+ )
674
+ return flow
675
+
676
+
677
+ def warp_to_pixel_coords(warp, h1, w1, h2, w2):
678
+ warp1 = warp[..., :2]
679
+ warp1 = (
680
+ torch.stack(
681
+ (
682
+ w1 * (warp1[..., 0] + 1) / 2,
683
+ h1 * (warp1[..., 1] + 1) / 2,
684
+ ),
685
+ axis=-1,
686
+ )
687
+ )
688
+ warp2 = warp[..., 2:]
689
+ warp2 = (
690
+ torch.stack(
691
+ (
692
+ w2 * (warp2[..., 0] + 1) / 2,
693
+ h2 * (warp2[..., 1] + 1) / 2,
694
+ ),
695
+ axis=-1,
696
+ )
697
+ )
698
+ return torch.cat((warp1,warp2), dim=-1)
699
+
700
+
701
+ def to_homogeneous(x):
702
+ ones = torch.ones_like(x[...,-1:])
703
+ return torch.cat((x, ones), dim = -1)
704
+
705
+ def from_homogeneous(xh, eps = 1e-12):
706
+ return xh[...,:-1] / (xh[...,-1:]+eps)
707
+
708
+ def homog_transform(Homog, x):
709
+ xh = to_homogeneous(x)
710
+ yh = (Homog @ xh.mT).mT
711
+ y = from_homogeneous(yh)
712
+ return y
713
+
714
+ def get_homog_warp(Homog, H, W, device = "cuda"):
715
+ grid = torch.meshgrid(torch.linspace(-1+1/H,1-1/H,H, device = device), torch.linspace(-1+1/W,1-1/W,W, device = device))
716
+
717
+ x_A = torch.stack((grid[1], grid[0]), dim = -1)[None]
718
+ x_A_to_B = homog_transform(Homog, x_A)
719
+ mask = ((x_A_to_B > -1) * (x_A_to_B < 1)).prod(dim=-1).float()
720
+ return torch.cat((x_A.expand(*x_A_to_B.shape), x_A_to_B),dim=-1), mask
721
+
722
+ def dual_log_softmax_matcher(desc_A: tuple['B','N','C'], desc_B: tuple['B','M','C'], inv_temperature = 1, normalize = False):
723
+ B, N, C = desc_A.shape
724
+ if normalize:
725
+ desc_A = desc_A/desc_A.norm(dim=-1,keepdim=True)
726
+ desc_B = desc_B/desc_B.norm(dim=-1,keepdim=True)
727
+ corr = torch.einsum("b n c, b m c -> b n m", desc_A, desc_B) * inv_temperature
728
+ else:
729
+ corr = torch.einsum("b n c, b m c -> b n m", desc_A, desc_B) * inv_temperature
730
+ logP = corr.log_softmax(dim = -2) + corr.log_softmax(dim= -1)
731
+ return logP
732
+
733
+ def dual_softmax_matcher(desc_A: tuple['B','N','C'], desc_B: tuple['B','M','C'], inv_temperature = 1, normalize = False):
734
+ if len(desc_A.shape) < 3:
735
+ desc_A, desc_B = desc_A[None], desc_B[None]
736
+ B, N, C = desc_A.shape
737
+ if normalize:
738
+ desc_A = desc_A/desc_A.norm(dim=-1,keepdim=True)
739
+ desc_B = desc_B/desc_B.norm(dim=-1,keepdim=True)
740
+ corr = torch.einsum("b n c, b m c -> b n m", desc_A, desc_B) * inv_temperature
741
+ else:
742
+ corr = torch.einsum("b n c, b m c -> b n m", desc_A, desc_B) * inv_temperature
743
+ P = corr.softmax(dim = -2) * corr.softmax(dim= -1)
744
+ return P
745
+
746
+ def conditional_softmax_matcher(desc_A: tuple['B','N','C'], desc_B: tuple['B','M','C'], inv_temperature = 1, normalize = False):
747
+ if len(desc_A.shape) < 3:
748
+ desc_A, desc_B = desc_A[None], desc_B[None]
749
+ B, N, C = desc_A.shape
750
+ if normalize:
751
+ desc_A = desc_A/desc_A.norm(dim=-1,keepdim=True)
752
+ desc_B = desc_B/desc_B.norm(dim=-1,keepdim=True)
753
+ corr = torch.einsum("b n c, b m c -> b n m", desc_A, desc_B) * inv_temperature
754
+ else:
755
+ corr = torch.einsum("b n c, b m c -> b n m", desc_A, desc_B) * inv_temperature
756
+ P_B_cond_A = corr.softmax(dim = -1)
757
+ P_A_cond_B = corr.softmax(dim = -2)
758
+
759
+ return P_A_cond_B, P_B_cond_A
third_party/DeDoDe/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Johan Edstedt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
third_party/DeDoDe/README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <h1 align="center"><ins>DeDoDe</ins> 🎶<br>Detect, Don't Describe, Describe, Don't Detect, <br> for Local Feature Matching</h1>
3
+ <p align="center">
4
+ <a href="https://scholar.google.com/citations?user=Ul-vMR0AAAAJ">Johan Edstedt</a>
5
+ ·
6
+ <a href="https://scholar.google.com/citations?user=FUE3Wd0AAAAJ">Georg Bökman</a>
7
+ ·
8
+ <a href="https://scholar.google.com/citations?user=6WRQpCQAAAAJ">Mårten Wadenbäck</a>
9
+ ·
10
+ <a href="https://scholar.google.com/citations?user=lkWfR08AAAAJ">Michael Felsberg</a>
11
+ ·
12
+ </p>
13
+ <h2 align="center"><p>
14
+ <a href="TODO" align="center">Paper (TODO)</a> |
15
+ <a href="TODO" align="center">Project Page (TODO)</a>
16
+ </p></h2>
17
+ <div align="center"></div>
18
+ </p>
19
+ <p align="center">
20
+ <img src="assets/matches.jpg" alt="example" width=80%>
21
+ <br>
22
+ <em>The DeDoDe detector learns to detect 3D consistent repeatable keypoints, which the DeDoDe descriptor learns to match. The result is a powerful decoupled local feature matcher.</em>
23
+ <br>
24
+ <img src="assets/teaser.png" alt="example" width=40%>
25
+ <img src="assets/dedode_roma.png" alt="example" width=40%>
26
+ <br>
27
+ <em>
28
+ We experimentally find that DeDoDe significantly closes the performance gap between detector + descriptor models and fully-fledged matchers. The potential of DeDoDe is not limited to local feature matching, in fact we find that we can improve state-of-the-art matchers by incorporating DeDoDe keypoints.
29
+ </em>
30
+ </p>
31
+
32
+ ## How to Use DeDoDe?
33
+ Below we show how DeDoDe can be run, you can also check out the [demos](demo)
34
+ ```python
35
+ from DeDoDe import dedode_detector_L, dedode_descriptor_B
36
+ from DeDoDe.matchers.dual_softmax_matcher import DualSoftMaxMatcher
37
+
38
+ detector = dedode_detector_L(weights = torch.load("dedode_detector_L.pth"))
39
+ descriptor = dedode_descriptor_B(weights = torch.load("dedode_descriptor_B.pth"))
40
+ matcher = DualSoftMaxMatcher()
41
+
42
+ im_A_path = "assets/im_A.jpg"
43
+ im_B_path = "assets/im_B.jpg"
44
+ im_A = Image.open(im_A_path)
45
+ im_B = Image.open(im_B_path)
46
+ W_A, H_A = im_A.size
47
+ W_B, H_B = im_B.size
48
+
49
+
50
+ detections_A = detector.detect_from_path(im_A_path, num_keypoints = 10_000)
51
+ keypoints_A, P_A = detections_A["keypoints"], detections_A["confidence"]
52
+
53
+ detections_B = detector.detect_from_path(im_B_path, num_keypoints = 10_000)
54
+ keypoints_B, P_B = detections_B["keypoints"], detections_B["confidence"]
55
+
56
+ description_A = descriptor.describe_keypoints_from_path(im_A_path, keypoints_A)["descriptions"]
57
+ description_B = descriptor.describe_keypoints_from_path(im_B_path, keypoints_B)["descriptions"]
58
+
59
+ matches_A, matches_B, batch_ids = matcher.match(keypoints_A, description_A,
60
+ keypoints_B, description_B,
61
+ P_A = P_A, P_B = P_B,
62
+ normalize = True, inv_temp=20, threshold = 0.1)#Increasing threshold -> fewer matches, fewer outliers
63
+
64
+ matches_A, matches_B = matcher.to_pixel_coords(matches_A, matches_B, H_A, W_A, H_B, W_B)
65
+
66
+ ```
67
+ ## Pretrained Models
68
+
69
+ Right now you can find them here: https://github.com/Parskatt/DeDoDe/releases/tag/dedode_pretrained_models
70
+ Probably we'll add some autoloading in the near future.
71
+
72
+ ## BibTeX
73
+
74
+ Coming Soon ;)
third_party/DeDoDe/assets/dedode_roma.png ADDED

Git LFS Details

  • SHA256: 99b5ec4e94a39c0c02410c75a282187a67965356d299414a0d90cb1399efaaf1
  • Pointer size: 130 Bytes
  • Size of remote file: 61.1 kB
third_party/DeDoDe/assets/im_A.jpg ADDED

Git LFS Details

  • SHA256: d98d1767dfcf55774bf63de5c2a16b04e2027314196f3f27eee93e94d0be3842
  • Pointer size: 131 Bytes
  • Size of remote file: 289 kB
third_party/DeDoDe/assets/im_B.jpg ADDED

Git LFS Details

  • SHA256: 31862353454661b73afea6d2a49d5e4f15fa6d504b2bb6b2fbf2a9a24d96c4c7
  • Pointer size: 132 Bytes
  • Size of remote file: 1.14 MB
third_party/DeDoDe/assets/matches.jpg ADDED

Git LFS Details

  • SHA256: 7ec583517a1e6ba4a7bc0e3cf89ade4125131426a00aed7bc7bbc1df0e4d884e
  • Pointer size: 131 Bytes
  • Size of remote file: 729 kB
third_party/DeDoDe/assets/teaser.png ADDED

Git LFS Details

  • SHA256: 2dfed4aa08c7b2d9612e5d425ce3102d4c598839c9214a3aa5add9decf5cf0ee
  • Pointer size: 131 Bytes
  • Size of remote file: 124 kB
third_party/DeDoDe/data_prep/prep_keypoints.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ import imagesize
4
+
5
+ import numpy as np
6
+
7
+ import os
8
+
9
+
10
+ base_path = "data/megadepth"
11
+ # Remove the trailing / if need be.
12
+ if base_path[-1] in ['/', '\\']:
13
+ base_path = base_path[: - 1]
14
+
15
+
16
+ base_depth_path = os.path.join(
17
+ base_path, 'phoenix/S6/zl548/MegaDepth_v1'
18
+ )
19
+ base_undistorted_sfm_path = os.path.join(
20
+ base_path, 'Undistorted_SfM'
21
+ )
22
+
23
+ scene_ids = os.listdir(base_undistorted_sfm_path)
24
+ for scene_id in scene_ids:
25
+ if os.path.exists(f"{base_path}/prep_scene_info/detections/detections_{scene_id}.npy"):
26
+ print(f"skipping {scene_id} as it exists")
27
+ continue
28
+ undistorted_sparse_path = os.path.join(
29
+ base_undistorted_sfm_path, scene_id, 'sparse-txt'
30
+ )
31
+ if not os.path.exists(undistorted_sparse_path):
32
+ print("sparse path doesnt exist")
33
+ continue
34
+
35
+ depths_path = os.path.join(
36
+ base_depth_path, scene_id, 'dense0', 'depths'
37
+ )
38
+ if not os.path.exists(depths_path):
39
+ print("depths doesnt exist")
40
+
41
+ continue
42
+
43
+ images_path = os.path.join(
44
+ base_undistorted_sfm_path, scene_id, 'images'
45
+ )
46
+ if not os.path.exists(images_path):
47
+ print("images path doesnt exist")
48
+ continue
49
+
50
+ # Process cameras.txt
51
+ if not os.path.exists(os.path.join(undistorted_sparse_path, 'cameras.txt')):
52
+ print("no cameras")
53
+ continue
54
+ with open(os.path.join(undistorted_sparse_path, 'cameras.txt'), 'r') as f:
55
+ raw = f.readlines()[3 :] # skip the header
56
+
57
+ camera_intrinsics = {}
58
+ for camera in raw:
59
+ camera = camera.split(' ')
60
+ camera_intrinsics[int(camera[0])] = [float(elem) for elem in camera[2 :]]
61
+
62
+ # Process points3D.txt
63
+ with open(os.path.join(undistorted_sparse_path, 'points3D.txt'), 'r') as f:
64
+ raw = f.readlines()[3 :] # skip the header
65
+
66
+ points3D = {}
67
+ for point3D in raw:
68
+ point3D = point3D.split(' ')
69
+ points3D[int(point3D[0])] = np.array([
70
+ float(point3D[1]), float(point3D[2]), float(point3D[3])
71
+ ])
72
+
73
+ # Process images.txt
74
+ with open(os.path.join(undistorted_sparse_path, 'images.txt'), 'r') as f:
75
+ raw = f.readlines()[4 :] # skip the header
76
+
77
+ image_id_to_idx = {}
78
+ image_names = []
79
+ raw_pose = []
80
+ camera = []
81
+ points3D_id_to_2D = []
82
+ n_points3D = []
83
+ id_to_detections = {}
84
+ for idx, (image, points) in enumerate(zip(raw[:: 2], raw[1 :: 2])):
85
+ image = image.split(' ')
86
+ points = points.split(' ')
87
+
88
+ image_id_to_idx[int(image[0])] = idx
89
+
90
+ image_name = image[-1].strip('\n')
91
+ image_names.append(image_name)
92
+
93
+ raw_pose.append([float(elem) for elem in image[1 : -2]])
94
+ camera.append(int(image[-2]))
95
+ points_np = np.array(points).astype(np.float32).reshape(len(points)//3, 3)
96
+ visible_points = points_np[points_np[:,2] != -1]
97
+ id_to_detections[idx] = visible_points
98
+ np.save(f"{base_path}/prep_scene_info/detections/detections_{scene_id}.npy",
99
+ id_to_detections)
100
+ print(f"{scene_id} done")
third_party/DeDoDe/demo/demo_kpts.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import numpy as np
4
+ from PIL import Image
5
+ from DeDoDe import dedode_detector_L
6
+
7
+ def draw_kpts(im, kpts):
8
+ kpts = [cv2.KeyPoint(x,y,1.) for x,y in kpts.cpu().numpy()]
9
+ im = np.array(im)
10
+ ret = cv2.drawKeypoints(im, kpts, None)
11
+ return ret
12
+
13
+ detector = dedode_detector_L(weights = torch.load("dedode_detector_l.pth"))
14
+ im_path = "assets/im_A.jpg"
15
+ im = Image.open(im_path)
16
+ out = detector.detect_from_path(im_path, num_keypoints = 10_000)
17
+ W,H = im.size
18
+ kps = out["keypoints"]
19
+ kps = detector.to_pixel_coords(kps, H, W)
20
+ Image.fromarray(draw_kpts(im, kps[0])).save("demo/keypoints.png")
third_party/DeDoDe/demo/demo_match.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from DeDoDe import dedode_detector_L, dedode_descriptor_B
3
+ from DeDoDe.matchers.dual_softmax_matcher import DualSoftMaxMatcher
4
+ from DeDoDe.utils import *
5
+ from PIL import Image
6
+ import cv2
7
+
8
+ def draw_matches(im_A, kpts_A, im_B, kpts_B):
9
+ kpts_A = [cv2.KeyPoint(x,y,1.) for x,y in kpts_A.cpu().numpy()]
10
+ kpts_B = [cv2.KeyPoint(x,y,1.) for x,y in kpts_B.cpu().numpy()]
11
+ matches_A_to_B = [cv2.DMatch(idx, idx, 0.) for idx in range(len(kpts_A))]
12
+ im_A, im_B = np.array(im_A), np.array(im_B)
13
+ ret = cv2.drawMatches(im_A, kpts_A, im_B, kpts_B,
14
+ matches_A_to_B, None)
15
+ return ret
16
+
17
+ detector = dedode_detector_L(weights = torch.load("dedode_detector_L.pth"))
18
+ descriptor = dedode_descriptor_B(weights = torch.load("dedode_descriptor_B.pth"))
19
+ matcher = DualSoftMaxMatcher()
20
+
21
+ im_A_path = "assets/im_A.jpg"
22
+ im_B_path = "assets/im_B.jpg"
23
+ im_A = Image.open(im_A_path)
24
+ im_B = Image.open(im_B_path)
25
+ W_A, H_A = im_A.size
26
+ W_B, H_B = im_B.size
27
+
28
+
29
+ detections_A = detector.detect_from_path(im_A_path, num_keypoints = 10_000)
30
+ keypoints_A, P_A = detections_A["keypoints"], detections_A["confidence"]
31
+ detections_B = detector.detect_from_path(im_B_path, num_keypoints = 10_000)
32
+ keypoints_B, P_B = detections_B["keypoints"], detections_B["confidence"]
33
+ description_A = descriptor.describe_keypoints_from_path(im_A_path, keypoints_A)["descriptions"]
34
+ description_B = descriptor.describe_keypoints_from_path(im_B_path, keypoints_B)["descriptions"]
35
+ matches_A, matches_B, batch_ids = matcher.match(keypoints_A, description_A,
36
+ keypoints_B, description_B,
37
+ P_A = P_A, P_B = P_B,
38
+ normalize = True, inv_temp=20, threshold = 0.1)#Increasing threshold -> fewer matches, fewer outliers
39
+
40
+ matches_A, matches_B = matcher.to_pixel_coords(matches_A, matches_B, H_A, W_A, H_B, W_B)
41
+
42
+ import cv2
43
+ import numpy as np
44
+
45
+ Image.fromarray(draw_matches(im_A, matches_A[::5], im_B, matches_B[::5])).save("demo/matches.png")
third_party/DeDoDe/demo/demo_scoremap.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ import numpy as np
4
+
5
+ from DeDoDe import dedode_detector_L
6
+ from DeDoDe.utils import tensor_to_pil
7
+
8
+ detector = dedode_detector_L(weights = torch.load("dedode_detector_l.pth"))
9
+ H, W = 768, 768
10
+ im_path = "assets/im_A.jpg"
11
+
12
+ out = detector.detect_from_path(im_path, dense = True, H = H, W = W)
13
+
14
+ logit_map = out["dense_keypoint_logits"].clone()
15
+ min = logit_map.max() - 3
16
+ logit_map[logit_map < min] = min
17
+ logit_map = (logit_map-min)/(logit_map.max()-min)
18
+ logit_map = logit_map.cpu()[0].expand(3,H,W)
19
+ im_A = torch.tensor(np.array(Image.open(im_path).resize((W,H)))/255.).permute(2,0,1)
20
+ tensor_to_pil(logit_map * logit_map + 0.15 * (1-logit_map) * im_A).save("demo/dense_logits.png")
third_party/DeDoDe/pretrained/dedode_descriptor_B.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eccfc270ec990ced60cd54a434411a47c4c504de13586f596d042e005b3022b
3
+ size 54257185
third_party/DeDoDe/pretrained/dedode_detector_L.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:663c972a7215538ef3170ccc3183bb1019610ffde4bc7c9da6c13b143388dd64
3
+ size 58488277
third_party/DeDoDe/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ matplotlib
3
+ torch
4
+ torchvision
5
+ h5py
6
+ tqdm
7
+ pillow
8
+ einops
9
+ opencv-python
third_party/DeDoDe/setup.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+
4
+ setup(
5
+ name="DeDoDe",
6
+ packages=find_packages(include= ["DeDoDe*"]),
7
+ install_requires=open("requirements.txt", "r").read().split("\n"),
8
+ version="0.0.1",
9
+ author="Johan Edstedt",
10
+ )
third_party/SuperGluePretrainedNetwork/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.pyc
2
+ *.DS_Store
3
+ *.swp
third_party/SuperGluePretrainedNetwork/LICENSE ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SUPERGLUE: LEARNING FEATURE MATCHING WITH GRAPH NEURAL NETWORKS
2
+ SOFTWARE LICENSE AGREEMENT
3
+ ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
4
+
5
+ BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
6
+
7
+ This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Magic Leap, Inc. (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
8
+
9
+ RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
10
+ Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive, non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).
11
+
12
+ CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.
13
+
14
+ COPYRIGHT: The Software is owned by Licensor and is protected by United States copyright laws and applicable international treaties and/or conventions.
15
+
16
+ PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
17
+
18
+ DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement.
19
+
20
+ BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.
21
+
22
+ USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein. Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark "Magic Leap" or any renditions thereof without the prior written permission of Licensor.
23
+
24
+ You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software.
25
+
26
+ ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void.
27
+
28
+ TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below.
29
+
30
+ The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.
31
+
32
+ FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement.
33
+
34
+ DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
35
+
36
+ SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
37
+
38
+ EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
39
+
40
+ EXPORT REGULATION: Licensee agrees to comply with any and all applicable U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.
41
+
42
+ SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.
43
+
44
+ NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor.
45
+
46
+ GOVERNING LAW: This Agreement shall be construed and enforced in accordance with the laws of the State of Florida without reference to conflict of laws principles. You consent to the personal jurisdiction of the courts of this County and waive their rights to venue outside of Broward County, Florida.
47
+
48
+ ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto.
third_party/SuperGluePretrainedNetwork/README.md ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <img src="assets/magicleap.png" width="240">
2
+
3
+ ### Research @ Magic Leap (CVPR 2020, Oral)
4
+
5
+ # SuperGlue Inference and Evaluation Demo Script
6
+
7
+ ## Introduction
8
+ SuperGlue is a CVPR 2020 research project done at Magic Leap. The SuperGlue network is a Graph Neural Network combined with an Optimal Matching layer that is trained to perform matching on two sets of sparse image features. This repo includes PyTorch code and pretrained weights for running the SuperGlue matching network on top of [SuperPoint](https://arxiv.org/abs/1712.07629) keypoints and descriptors. Given a pair of images, you can use this repo to extract matching features across the image pair.
9
+
10
+ <p align="center">
11
+ <img src="assets/teaser.png" width="500">
12
+ </p>
13
+
14
+ SuperGlue operates as a "middle-end," performing context aggregation, matching, and filtering in a single end-to-end architecture. For more details, please see:
15
+
16
+ * Full paper PDF: [SuperGlue: Learning Feature Matching with Graph Neural Networks](https://arxiv.org/abs/1911.11763).
17
+
18
+ * Authors: *Paul-Edouard Sarlin, Daniel DeTone, Tomasz Malisiewicz, Andrew Rabinovich*
19
+
20
+ * Website: [psarlin.com/superglue](https://psarlin.com/superglue) for videos, slides, recent updates, and more visualizations.
21
+
22
+ * `hloc`: a new toolbox for visual localization and SfM with SuperGlue, available at [cvg/Hierarchical-Localization](https://github.com/cvg/Hierarchical-Localization/). Winner of 3 CVPR 2020 competitions on localization and image matching!
23
+
24
+ We provide two pre-trained weights files: an indoor model trained on ScanNet data, and an outdoor model trained on MegaDepth data. Both models are inside the [weights directory](./models/weights). By default, the demo will run the **indoor** model.
25
+
26
+ ## Dependencies
27
+ * Python 3 >= 3.5
28
+ * PyTorch >= 1.1
29
+ * OpenCV >= 3.4 (4.1.2.30 recommended for best GUI keyboard interaction, see this [note](#additional-notes))
30
+ * Matplotlib >= 3.1
31
+ * NumPy >= 1.18
32
+
33
+ Simply run the following command: `pip3 install numpy opencv-python torch matplotlib`
34
+
35
+ ## Contents
36
+ There are two main top-level scripts in this repo:
37
+
38
+ 1. `demo_superglue.py` : runs a live demo on a webcam, IP camera, image directory or movie file
39
+ 2. `match_pairs.py`: reads image pairs from files and dumps matches to disk (also runs evaluation if ground truth relative poses are provided)
40
+
41
+ ## Live Matching Demo Script (`demo_superglue.py`)
42
+ This demo runs SuperPoint + SuperGlue feature matching on an anchor image and live image. You can update the anchor image by pressing the `n` key. The demo can read image streams from a USB or IP camera, a directory containing images, or a video file. You can pass all of these inputs using the `--input` flag.
43
+
44
+ ### Run the demo on a live webcam
45
+
46
+ Run the demo on the default USB webcam (ID #0), running on a CUDA GPU if one is found:
47
+
48
+ ```sh
49
+ ./demo_superglue.py
50
+ ```
51
+
52
+ Keyboard control:
53
+
54
+ * `n`: select the current frame as the anchor
55
+ * `e`/`r`: increase/decrease the keypoint confidence threshold
56
+ * `d`/`f`: increase/decrease the match filtering threshold
57
+ * `k`: toggle the visualization of keypoints
58
+ * `q`: quit
59
+
60
+ Run the demo on 320x240 images running on the CPU:
61
+
62
+ ```sh
63
+ ./demo_superglue.py --resize 320 240 --force_cpu
64
+ ```
65
+
66
+ The `--resize` flag can be used to resize the input image in three ways:
67
+
68
+ 1. `--resize` `width` `height` : will resize to exact `width` x `height` dimensions
69
+ 2. `--resize` `max_dimension` : will resize largest input image dimension to `max_dimension`
70
+ 3. `--resize` `-1` : will not resize (i.e. use original image dimensions)
71
+
72
+ The default will resize images to `640x480`.
73
+
74
+ ### Run the demo on a directory of images
75
+
76
+ The `--input` flag also accepts a path to a directory. We provide a directory of sample images from a sequence. To run the demo on the directory of images in `freiburg_sequence/` on a headless server (will not display to the screen) and write the output visualization images to `dump_demo_sequence/`:
77
+
78
+ ```sh
79
+ ./demo_superglue.py --input assets/freiburg_sequence/ --output_dir dump_demo_sequence --resize 320 240 --no_display
80
+ ```
81
+
82
+ You should see this output on the sample Freiburg-TUM RGBD sequence:
83
+
84
+ <img src="assets/freiburg_matches.gif" width="560">
85
+
86
+ The matches are colored by their predicted confidence in a jet colormap (Red: more confident, Blue: less confident).
87
+
88
+ ### Additional useful command line parameters
89
+ * Use `--image_glob` to change the image file extension (default: `*.png`, `*.jpg`, `*.jpeg`).
90
+ * Use `--skip` to skip intermediate frames (default: `1`).
91
+ * Use `--max_length` to cap the total number of frames processed (default: `1000000`).
92
+ * Use `--show_keypoints` to visualize the detected keypoints (default: `False`).
93
+
94
+ ## Run Matching+Evaluation (`match_pairs.py`)
95
+
96
+ This repo also contains a script `match_pairs.py` that runs the matching from a list of image pairs. With this script, you can:
97
+
98
+ * Run the matcher on a set of image pairs (no ground truth needed)
99
+ * Visualize the keypoints and matches, based on their confidence
100
+ * Evaluate and visualize the match correctness, if the ground truth relative poses and intrinsics are provided
101
+ * Save the keypoints, matches, and evaluation results for further processing
102
+ * Collate evaluation results over many pairs and generate result tables
103
+
104
+ ### Matches only mode
105
+
106
+ The simplest usage of this script will process the image pairs listed in a given text file and dump the keypoints and matches to compressed numpy `npz` files. We provide the challenging ScanNet pairs from the main paper in `assets/example_indoor_pairs/`. Running the following will run SuperPoint + SuperGlue on each image pair, and dump the results to `dump_match_pairs/`:
107
+
108
+ ```sh
109
+ ./match_pairs.py
110
+ ```
111
+
112
+ The resulting `.npz` files can be read from Python as follows:
113
+
114
+ ```python
115
+ >>> import numpy as np
116
+ >>> path = 'dump_match_pairs/scene0711_00_frame-001680_scene0711_00_frame-001995_matches.npz'
117
+ >>> npz = np.load(path)
118
+ >>> npz.files
119
+ ['keypoints0', 'keypoints1', 'matches', 'match_confidence']
120
+ >>> npz['keypoints0'].shape
121
+ (382, 2)
122
+ >>> npz['keypoints1'].shape
123
+ (391, 2)
124
+ >>> npz['matches'].shape
125
+ (382,)
126
+ >>> np.sum(npz['matches']>-1)
127
+ 115
128
+ >>> npz['match_confidence'].shape
129
+ (382,)
130
+ ```
131
+
132
+ For each keypoint in `keypoints0`, the `matches` array indicates the index of the matching keypoint in `keypoints1`, or `-1` if the keypoint is unmatched.
133
+
134
+ ### Visualization mode
135
+
136
+ You can add the flag `--viz` to dump image outputs which visualize the matches:
137
+
138
+ ```sh
139
+ ./match_pairs.py --viz
140
+ ```
141
+
142
+ You should see images like this inside of `dump_match_pairs/` (or something very close to it, see this [note](#a-note-on-reproducibility)):
143
+
144
+ <img src="assets/indoor_matches.png" width="560">
145
+
146
+ The matches are colored by their predicted confidence in a jet colormap (Red: more confident, Blue: less confident).
147
+
148
+ ### Evaluation mode
149
+
150
+ You can also estimate the pose using RANSAC + Essential Matrix decomposition and evaluate it if the ground truth relative poses and intrinsics are provided in the input `.txt` files. Each `.txt` file contains three key ground truth matrices: a 3x3 intrinsics matrix of image0: `K0`, a 3x3 intrinsics matrix of image1: `K1` , and a 4x4 matrix of the relative pose extrinsics `T_0to1`.
151
+
152
+ To run the evaluation on the sample set of images (by default reading `assets/scannet_sample_pairs_with_gt.txt`), you can run:
153
+
154
+ ```sh
155
+ ./match_pairs.py --eval
156
+ ```
157
+
158
+
159
+ Since you enabled `--eval`, you should see collated results printed to the terminal. For the example images provided, you should get the following numbers (or something very close to it, see this [note](#a-note-on-reproducibility)):
160
+
161
+ ```txt
162
+ Evaluation Results (mean over 15 pairs):
163
+ AUC@5 AUC@10 AUC@20 Prec MScore
164
+ 26.99 48.40 64.47 73.52 19.60
165
+ ```
166
+
167
+ The resulting `.npz` files in `dump_match_pairs/` will now contain scalar values related to the evaluation, computed on the sample images provided. Here is what you should find in one of the generated evaluation files:
168
+
169
+ ```python
170
+ >>> import numpy as np
171
+ >>> path = 'dump_match_pairs/scene0711_00_frame-001680_scene0711_00_frame-001995_evaluation.npz'
172
+ >>> npz = np.load(path)
173
+ >>> print(npz.files)
174
+ ['error_t', 'error_R', 'precision', 'matching_score', 'num_correct', 'epipolar_errors']
175
+ ```
176
+
177
+ You can also visualize the evaluation metrics by running the following command:
178
+
179
+ ```sh
180
+ ./match_pairs.py --eval --viz
181
+ ```
182
+
183
+ You should also now see additional images in `dump_match_pairs/` which visualize the evaluation numbers (or something very close to it, see this [note](#a-note-on-reproducibility)):
184
+
185
+ <img src="assets/indoor_evaluation.png" width="560">
186
+
187
+ The top left corner of the image shows the pose error and number of inliers, while the lines are colored by their epipolar error computed with the ground truth relative pose (red: higher error, green: lower error).
188
+
189
+ ### Running on sample outdoor pairs
190
+
191
+ <details>
192
+ <summary>[Click to expand]</summary>
193
+
194
+ In this repo, we also provide a few challenging Phototourism pairs, so that you can re-create some of the figures from the paper. Run this script to run matching and visualization (no ground truth is provided, see this [note](#reproducing-outdoor-evaluation-final-table)) on the provided pairs:
195
+
196
+ ```sh
197
+ ./match_pairs.py --resize 1600 --superglue outdoor --max_keypoints 2048 --nms_radius 3 --resize_float --input_dir assets/phototourism_sample_images/ --input_pairs assets/phototourism_sample_pairs.txt --output_dir dump_match_pairs_outdoor --viz
198
+ ```
199
+
200
+ You should now image pairs such as these in `dump_match_pairs_outdoor/` (or something very close to it, see this [note](#a-note-on-reproducibility)):
201
+
202
+ <img src="assets/outdoor_matches.png" width="560">
203
+
204
+ </details>
205
+
206
+ ### Recommended settings for indoor / outdoor
207
+
208
+ <details>
209
+ <summary>[Click to expand]</summary>
210
+
211
+ For **indoor** images, we recommend the following settings (these are the defaults):
212
+
213
+ ```sh
214
+ ./match_pairs.py --resize 640 --superglue indoor --max_keypoints 1024 --nms_radius 4
215
+ ```
216
+
217
+ For **outdoor** images, we recommend the following settings:
218
+
219
+ ```sh
220
+ ./match_pairs.py --resize 1600 --superglue outdoor --max_keypoints 2048 --nms_radius 3 --resize_float
221
+ ```
222
+
223
+ You can provide your own list of pairs `--input_pairs` for images contained in `--input_dir`. Images can be resized before network inference with `--resize`. If you are re-running the same evaluation many times, you can use the `--cache` flag to reuse old computation.
224
+ </details>
225
+
226
+ ### Test set pair file format explained
227
+
228
+ <details>
229
+ <summary>[Click to expand]</summary>
230
+
231
+ We provide the list of ScanNet test pairs in `assets/scannet_test_pairs_with_gt.txt` (with ground truth) and Phototourism test pairs `assets/phototourism_test_pairs.txt` (without ground truth) used to evaluate the matching from the paper. Each line corresponds to one pair and is structured as follows:
232
+
233
+ ```
234
+ path_image_A path_image_B exif_rotationA exif_rotationB [KA_0 ... KA_8] [KB_0 ... KB_8] [T_AB_0 ... T_AB_15]
235
+ ```
236
+
237
+ The `path_image_A` and `path_image_B` entries are paths to image A and B, respectively. The `exif_rotation` is an integer in the range [0, 3] that comes from the original EXIF metadata associated with the image, where, 0: no rotation, 1: 90 degree clockwise, 2: 180 degree clockwise, 3: 270 degree clockwise. If the EXIF data is not known, you can just provide a zero here and no rotation will be performed. `KA` and `KB` are the flattened `3x3` matrices of image A and image B intrinsics. `T_AB` is a flattened `4x4` matrix of the extrinsics between the pair.
238
+ </details>
239
+
240
+ ### Reproducing the indoor evaluation on ScanNet
241
+
242
+ <details>
243
+ <summary>[Click to expand]</summary>
244
+
245
+ We provide the groundtruth for ScanNet in our format in the file `assets/scannet_test_pairs_with_gt.txt` for convenience. In order to reproduce similar tables to what was in the paper, you will need to download the dataset (we do not provide the raw test images). To download the ScanNet dataset, do the following:
246
+
247
+ 1. Head to the [ScanNet](https://github.com/ScanNet/ScanNet) github repo to download the ScanNet test set (100 scenes).
248
+ 2. You will need to extract the raw sensor data from the 100 `.sens` files in each scene in the test set using the [SensReader](https://github.com/ScanNet/ScanNet/tree/master/SensReader) tool.
249
+
250
+ Once the ScanNet dataset is downloaded in `~/data/scannet`, you can run the following:
251
+
252
+ ```sh
253
+ ./match_pairs.py --input_dir ~/data/scannet --input_pairs assets/scannet_test_pairs_with_gt.txt --output_dir dump_scannet_test_results --eval
254
+ ```
255
+
256
+ You should get the following table for ScanNet (or something very close to it, see this [note](#a-note-on-reproducibility)):
257
+
258
+ ```txt
259
+ Evaluation Results (mean over 1500 pairs):
260
+ AUC@5 AUC@10 AUC@20 Prec MScore
261
+ 16.12 33.76 51.79 84.37 31.14
262
+ ```
263
+
264
+ </details>
265
+
266
+ ### Reproducing the outdoor evaluation on YFCC
267
+
268
+ <details>
269
+ <summary>[Click to expand]</summary>
270
+
271
+ We provide the groundtruth for YFCC in our format in the file `assets/yfcc_test_pairs_with_gt.txt` for convenience. In order to reproduce similar tables to what was in the paper, you will need to download the dataset (we do not provide the raw test images). To download the YFCC dataset, you can use the [OANet](https://github.com/zjhthu/OANet) repo:
272
+
273
+ ```sh
274
+ git clone https://github.com/zjhthu/OANet
275
+ cd OANet
276
+ bash download_data.sh raw_data raw_data_yfcc.tar.gz 0 8
277
+ tar -xvf raw_data_yfcc.tar.gz
278
+ mv raw_data/yfcc100m ~/data
279
+ ```
280
+
281
+ Once the YFCC dataset is downloaded in `~/data/yfcc100m`, you can run the following:
282
+
283
+ ```sh
284
+ ./match_pairs.py --input_dir ~/data/yfcc100m --input_pairs assets/yfcc_test_pairs_with_gt.txt --output_dir dump_yfcc_test_results --eval --resize 1600 --superglue outdoor --max_keypoints 2048 --nms_radius 3 --resize_float
285
+ ```
286
+
287
+ You should get the following table for YFCC (or something very close to it, see this [note](#a-note-on-reproducibility)):
288
+
289
+ ```txt
290
+ Evaluation Results (mean over 4000 pairs):
291
+ AUC@5 AUC@10 AUC@20 Prec MScore
292
+ 39.02 59.51 75.72 98.72 23.61
293
+ ```
294
+
295
+ </details>
296
+
297
+ ### Reproducing outdoor evaluation on Phototourism
298
+
299
+ <details>
300
+ <summary>[Click to expand]</summary>
301
+
302
+ The Phototourism results shown in the paper were produced using similar data as the test set from the [Image Matching Challenge 2020](https://vision.uvic.ca/image-matching-challenge/), which holds the ground truth data private for the test set. We list the pairs we used in `assets/phototourism_test_pairs.txt`. To reproduce similar numbers on this test set, please submit to the challenge benchmark. While the challenge is still live, we cannot share the test set publically since we want to help maintain the integrity of the challenge.
303
+
304
+ </details>
305
+
306
+ ### Correcting EXIF rotation data in YFCC and Phototourism
307
+
308
+ <details>
309
+ <summary>[Click to expand]</summary>
310
+
311
+ In this repo, we provide manually corrected the EXIF rotation data for the outdoor evaluations on YFCC and Phototourism. For the YFCC dataset we found 7 images with incorrect EXIF rotation flags, resulting in 148 pairs out of 4000 being corrected. For Phototourism, we found 36 images with incorrect EXIF rotation flags, resulting in 212 out of 2200 pairs being corrected.
312
+
313
+ The SuperGlue paper reports the results of SuperGlue **without** the corrected rotations, while the numbers in this README are reported **with** the corrected rotations. We found that our final conclusions from the evaluation still hold with or without the corrected rotations. For backwards compatability, we included the original, uncorrected EXIF rotation data in `assets/phototourism_test_pairs_original.txt` and `assets/yfcc_test_pairs_with_gt_original.txt` respectively.
314
+
315
+ </details>
316
+
317
+ ### Outdoor training / validation scene splits of MegaDepth
318
+
319
+ <details>
320
+ <summary>[Click to expand]</summary>
321
+
322
+ For training and validation of the outdoor model, we used scenes from the [MegaDepth dataset](http://www.cs.cornell.edu/projects/megadepth/). We provide the list of scenes used to train the outdoor model in the `assets/` directory:
323
+
324
+ * Training set: `assets/megadepth_train_scenes.txt`
325
+ * Validation set: `assets/megadepth_validation_scenes.txt`
326
+
327
+ </details>
328
+
329
+ ### A note on reproducibility
330
+
331
+ <details>
332
+ <summary>[Click to expand]</summary>
333
+
334
+ After simplifying the model code and evaluation code and preparing it for release, we made some improvements and tweaks that result in slightly different numbers than what was reported in the paper. The numbers and figures reported in the README were done using Ubuntu 16.04, OpenCV 3.4.5, and PyTorch 1.1.0. Even with matching the library versions, we observed some slight differences across Mac and Ubuntu, which we believe are due to differences in OpenCV's image resize function implementation and randomization of RANSAC.
335
+ </details>
336
+
337
+ ### Creating high-quality PDF visualizations and faster visualization with --fast_viz
338
+
339
+ <details>
340
+ <summary>[Click to expand]</summary>
341
+
342
+ When generating output images with `match_pairs.py`, the default `--viz` flag uses a Matplotlib renderer which allows for the generation of camera-ready PDF visualizations if you additionally use `--viz_extension pdf` instead of the default png extension.
343
+
344
+ ```
345
+ ./match_pairs.py --viz --viz_extension pdf
346
+ ```
347
+
348
+ Alternatively, you might want to save visualization images but have the generation be much faster. You can use the `--fast_viz` flag to use an OpenCV-based image renderer as follows:
349
+
350
+ ```
351
+ ./match_pairs.py --viz --fast_viz
352
+ ```
353
+
354
+ If you would also like an OpenCV display window to preview the results (you must use non-pdf output and use fast_fiz), simply run:
355
+
356
+ ```
357
+ ./match_pairs.py --viz --fast_viz --opencv_display
358
+ ```
359
+
360
+ </details>
361
+
362
+
363
+ ## BibTeX Citation
364
+ If you use any ideas from the paper or code from this repo, please consider citing:
365
+
366
+ ```txt
367
+ @inproceedings{sarlin20superglue,
368
+ author = {Paul-Edouard Sarlin and
369
+ Daniel DeTone and
370
+ Tomasz Malisiewicz and
371
+ Andrew Rabinovich},
372
+ title = {{SuperGlue}: Learning Feature Matching with Graph Neural Networks},
373
+ booktitle = {CVPR},
374
+ year = {2020},
375
+ url = {https://arxiv.org/abs/1911.11763}
376
+ }
377
+ ```
378
+
379
+ ## Additional Notes
380
+ * For the demo, we found that the keyboard interaction works well with OpenCV 4.1.2.30, older versions were less responsive and the newest version had a [OpenCV bug on Mac](https://stackoverflow.com/questions/60032540/opencv-cv2-imshow-is-not-working-because-of-the-qt)
381
+ * We generally do not recommend to run SuperPoint+SuperGlue below 160x120 resolution (QQVGA) and above 2000x1500
382
+ * We do not intend to release the SuperGlue training code.
383
+ * We do not intend to release the SIFT-based or homography SuperGlue models.
384
+
385
+ ## Legal Disclaimer
386
+ Magic Leap is proud to provide its latest samples, toolkits, and research projects on Github to foster development and gather feedback from the spatial computing community. Use of the resources within this repo is subject to (a) the license(s) included herein, or (b) if no license is included, Magic Leap's [Developer Agreement](https://id.magicleap.com/terms/developer), which is available on our [Developer Portal](https://developer.magicleap.com/).
387
+ If you need more, just ask on the [forums](https://forum.magicleap.com/hc/en-us/community/topics)!
388
+ We're thrilled to be part of a well-meaning, friendly and welcoming community of millions.
third_party/SuperGluePretrainedNetwork/assets/freiburg_matches.gif ADDED

Git LFS Details

  • SHA256: c7d9e458f625eccbf94e7171218beb6f8c392ca432df3de0ed0a8a2544e13fd6
  • Pointer size: 132 Bytes
  • Size of remote file: 1.43 MB
third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847980.722988.png ADDED

Git LFS Details

  • SHA256: a84f24b35cfa73f3180d63a6f6068cbae637ff5c3612c8ef05882ce3979607cc
  • Pointer size: 131 Bytes
  • Size of remote file: 475 kB
third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847981.726650.png ADDED

Git LFS Details

  • SHA256: 412444ac5b49e20c7645aa73971fd726c6b850aa5cddd26e5cc3c98be2fca9b5
  • Pointer size: 131 Bytes
  • Size of remote file: 474 kB
third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847982.730674.png ADDED

Git LFS Details

  • SHA256: 7503ce561dc0ed56f6729987d6ea0b20d0dbbaaab4bf94a0c5da0af8acfad6b8
  • Pointer size: 131 Bytes
  • Size of remote file: 471 kB
third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847983.738736.png ADDED

Git LFS Details

  • SHA256: d17f1787d5ebaa8bec78d11dc7ca495b13c4bcf90a2107535e1ca1dfa350d97a
  • Pointer size: 131 Bytes
  • Size of remote file: 471 kB
third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847984.743352.png ADDED

Git LFS Details

  • SHA256: a55c645df3d230382c221d4d369d6fcd4a584e4c2de121fefd4f3f95de3b25ea
  • Pointer size: 131 Bytes
  • Size of remote file: 466 kB
third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847985.746954.png ADDED

Git LFS Details

  • SHA256: 48d6756acce2e4017f3e00617e5160c0fd78bf0eadc21d597810b01604690f86
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847986.762616.png ADDED

Git LFS Details

  • SHA256: b90f0e5e645c314b1cb1990a061bb92022fdc9af6552de434ce4cf1db40a6eee
  • Pointer size: 131 Bytes
  • Size of remote file: 505 kB
third_party/SuperGluePretrainedNetwork/assets/freiburg_sequence/1341847987.758741.png ADDED

Git LFS Details

  • SHA256: 4e00ba6b00d5ef3784acd99c9fedb1ad2bee4567f9b29e538986a5f85979b1f6
  • Pointer size: 131 Bytes
  • Size of remote file: 502 kB