Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

Realcat commited on Jun 22

Commit

2eaeef9

•

1 Parent(s): 68a65da

update:sift and update lightglue

Browse files

Files changed (43) hide show

common/config.yaml +13 -1
common/utils.py +13 -4
hloc/extract_features.py +3 -2
hloc/extractors/alike.py +2 -0
hloc/extractors/d2net.py +4 -0
hloc/extractors/darkfeat.py +2 -1
hloc/extractors/dedode.py +2 -3
hloc/extractors/example.py +1 -0
hloc/extractors/lanet.py +2 -0
hloc/extractors/r2d2.py +2 -0
hloc/extractors/rekd.py +2 -0
hloc/extractors/rord.py +4 -5
hloc/extractors/sift.py +224 -0
hloc/extractors/superpoint.py +2 -0
hloc/match_dense.py +2 -5
hloc/match_features.py +38 -14
hloc/matchers/duster.py +36 -30
hloc/matchers/lightglue.py +10 -0
hloc/matchers/sgmnet.py +6 -2
hloc/matchers/sold2.py +1 -0
hloc/utils/viz.py +1 -0
third_party/LightGlue/.flake8 +4 -0
third_party/LightGlue/.github/workflows/code-quality.yml +24 -0
third_party/LightGlue/.gitignore +162 -6
third_party/LightGlue/LICENSE +1 -1
third_party/LightGlue/README.md +71 -25
third_party/LightGlue/assets/DSC_0410.JPG +0 -0
third_party/LightGlue/assets/DSC_0411.JPG +0 -0
third_party/LightGlue/assets/benchmark.png +3 -0
third_party/LightGlue/assets/benchmark_cpu.png +3 -0
third_party/LightGlue/benchmark.py +255 -0
third_party/LightGlue/demo.ipynb +29 -22
third_party/LightGlue/lightglue/__init__.py +7 -4
third_party/LightGlue/lightglue/aliked.py +758 -0
third_party/LightGlue/lightglue/disk.py +10 -24
third_party/LightGlue/lightglue/dog_hardnet.py +41 -0
third_party/LightGlue/lightglue/lightglue.py +331 -146
third_party/LightGlue/lightglue/sift.py +216 -0
third_party/LightGlue/lightglue/superpoint.py +21 -36
third_party/LightGlue/lightglue/utils.py +25 -10
third_party/LightGlue/lightglue/viz2d.py +1 -1
third_party/LightGlue/pyproject.toml +30 -0
third_party/LightGlue/setup.py +0 -27

common/config.yaml CHANGED Viewed

@@ -25,7 +25,7 @@ matcher_zoo:
       source: "CVPR 2024"
       github: https://github.com/Vincentqyw/omniglue-onnx
       paper: https://arxiv.org/abs/2405.12979
-      project: https://hwjiang1510.github.io/OmniGlue/
       display: true
   DUSt3R:
     # TODO: duster is under development
@@ -40,6 +40,7 @@ matcher_zoo:
       project: https://dust3r.europe.naverlabs.com
       display: true
   GIM(dkm):
     matcher: gim(dkm)
     dense: true
     info:
@@ -197,6 +198,17 @@ matcher_zoo:
       paper: https://arxiv.org/abs/1712.07629
       project: null
       display: false
   disk+lightglue:
     matcher: disk-lightglue
     feature: disk

       source: "CVPR 2024"
       github: https://github.com/Vincentqyw/omniglue-onnx
       paper: https://arxiv.org/abs/2405.12979
+      project: https://hwjiang1510.github.io/OmniGlue
       display: true
   DUSt3R:
     # TODO: duster is under development
       project: https://dust3r.europe.naverlabs.com
       display: true
   GIM(dkm):
+    enable: false
     matcher: gim(dkm)
     dense: true
     info:
       paper: https://arxiv.org/abs/1712.07629
       project: null
       display: false
+  sift+lightglue:
+    matcher: sift-lightglue
+    feature: sift
+    dense: false
+    info:
+      name: LightGlue #dispaly name
+      source: "ICCV 2023"
+      github: https://github.com/cvg/LightGlue
+      paper: https://arxiv.org/pdf/2306.13643
+      project: null
+      display: true
   disk+lightglue:
     matcher: disk-lightglue
     feature: disk

common/utils.py CHANGED Viewed

@@ -7,6 +7,7 @@ import psutil
 import shutil
 import numpy as np
 import gradio as gr
 from pathlib import Path
 import poselib
 from itertools import combinations
@@ -231,10 +232,10 @@ def gen_examples():
         return [pairs[i] for i in selected]
     # rotated examples
-    def gen_rot_image_pairs(count: int = 5):
         path = ROOT / "datasets/sacre_coeur/mapping"
         path_rot = ROOT / "datasets/sacre_coeur/mapping_rot"
-        rot_list = [45, 90, 135, 180, 225, 270]
         pairs = []
         for file in os.listdir(path):
             if file.lower().endswith((".jpg", ".jpeg", ".png")):
@@ -274,6 +275,7 @@ def gen_examples():
     # image pair path
     pairs = gen_images_pairs()
     pairs += gen_rot_image_pairs()
     pairs += gen_image_pairs_wxbs()
     match_setting_threshold = DEFAULT_SETTING_THRESHOLD
@@ -1015,8 +1017,15 @@ ransac_zoo = {
 def rotate_image(input_path, degrees, output_path):
-    from PIL import Image
     img = Image.open(input_path)
     img_rotated = img.rotate(-degrees)
     img_rotated.save(output_path)

 import shutil
 import numpy as np
 import gradio as gr
+from PIL import Image
 from pathlib import Path
 import poselib
 from itertools import combinations
         return [pairs[i] for i in selected]
     # rotated examples
+    def gen_rot_image_pairs(count: int = 10):
         path = ROOT / "datasets/sacre_coeur/mapping"
         path_rot = ROOT / "datasets/sacre_coeur/mapping_rot"
+        rot_list = [45, 180, 90, 225, 270]
         pairs = []
         for file in os.listdir(path):
             if file.lower().endswith((".jpg", ".jpeg", ".png")):
     # image pair path
     pairs = gen_images_pairs()
     pairs += gen_rot_image_pairs()
+    pairs += gen_scale_image_pairs()
     pairs += gen_image_pairs_wxbs()
     match_setting_threshold = DEFAULT_SETTING_THRESHOLD
 def rotate_image(input_path, degrees, output_path):
     img = Image.open(input_path)
     img_rotated = img.rotate(-degrees)
     img_rotated.save(output_path)
+def scale_image(input_path, scale_factor, output_path):
+    img = Image.open(input_path)
+    width, height = img.size
+    new_width = int(width * scale_factor)
+    new_height = int(height * scale_factor)
+    img_resized = img.resize((new_width, new_height))
+    img_resized.save(output_path)

hloc/extract_features.py CHANGED Viewed

@@ -131,6 +131,7 @@ confs = {
         "output": "feats-rootsift-n5000-r1600",
         "model": {
             "name": "dog",
             "max_keypoints": 5000,
         },
         "preprocessing": {
@@ -145,8 +146,8 @@ confs = {
     "sift": {
         "output": "feats-sift-n5000-r1600",
         "model": {
-            "name": "dog",
-            "descriptor": "sift",
             "max_keypoints": 5000,
         },
         "preprocessing": {

         "output": "feats-rootsift-n5000-r1600",
         "model": {
             "name": "dog",
+            "descriptor": "rootsift",
             "max_keypoints": 5000,
         },
         "preprocessing": {
     "sift": {
         "output": "feats-sift-n5000-r1600",
         "model": {
+            "name": "sift",
+            "rootsift": True,
             "max_keypoints": 5000,
         },
         "preprocessing": {

hloc/extractors/alike.py CHANGED Viewed

@@ -3,6 +3,7 @@ from pathlib import Path
 import torch
 from ..utils.base_model import BaseModel
 alike_path = Path(__file__).parent / "../../third_party/ALIKE"
 sys.path.append(str(alike_path))
@@ -33,6 +34,7 @@ class Alike(BaseModel):
             scores_th=conf["detection_threshold"],
             n_limit=conf["max_keypoints"],
         )
     def _forward(self, data):
         image = data["image"]

 import torch
 from ..utils.base_model import BaseModel
+from hloc import logger
 alike_path = Path(__file__).parent / "../../third_party/ALIKE"
 sys.path.append(str(alike_path))
             scores_th=conf["detection_threshold"],
             n_limit=conf["max_keypoints"],
         )
+        logger.info(f"Load Alike model done.")
     def _forward(self, data):
         image = data["image"]

hloc/extractors/d2net.py CHANGED Viewed

@@ -4,13 +4,16 @@ import subprocess
 import torch
 from ..utils.base_model import BaseModel
 d2net_path = Path(__file__).parent / "../../third_party"
 sys.path.append(str(d2net_path))
 from d2net.lib.model_test import D2Net as _D2Net
 from d2net.lib.pyramid import process_multiscale
 d2net_path = Path(__file__).parent / "../../third_party/d2net"
 class D2Net(BaseModel):
     default_conf = {
         "model_name": "d2_tf.pth",
@@ -36,6 +39,7 @@ class D2Net(BaseModel):
         self.net = _D2Net(
             model_file=model_file, use_relu=conf["use_relu"], use_cuda=False
         )
     def _forward(self, data):
         image = data["image"]

 import torch
 from ..utils.base_model import BaseModel
+from hloc import logger
 d2net_path = Path(__file__).parent / "../../third_party"
 sys.path.append(str(d2net_path))
 from d2net.lib.model_test import D2Net as _D2Net
 from d2net.lib.pyramid import process_multiscale
 d2net_path = Path(__file__).parent / "../../third_party/d2net"
 class D2Net(BaseModel):
     default_conf = {
         "model_name": "d2_tf.pth",
         self.net = _D2Net(
             model_file=model_file, use_relu=conf["use_relu"], use_cuda=False
         )
+        logger.info(f"Load D2Net model done.")
     def _forward(self, data):
         image = data["image"]

hloc/extractors/darkfeat.py CHANGED Viewed

@@ -2,7 +2,7 @@ import sys
 from pathlib import Path
 import subprocess
 from ..utils.base_model import BaseModel
-from .. import logger
 darkfeat_path = Path(__file__).parent / "../../third_party/DarkFeat"
 sys.path.append(str(darkfeat_path))
@@ -43,6 +43,7 @@ class DarkFeat(BaseModel):
                     raise e
         self.net = DarkFeat_(model_path)
     def _forward(self, data):
         pred = self.net({"image": data["image"]})

 from pathlib import Path
 import subprocess
 from ..utils.base_model import BaseModel
+from hloc import logger
 darkfeat_path = Path(__file__).parent / "../../third_party/DarkFeat"
 sys.path.append(str(darkfeat_path))
                     raise e
         self.net = DarkFeat_(model_path)
+        logger.info(f"Load DarkFeat model done.")
     def _forward(self, data):
         pred = self.net({"image": data["image"]})

hloc/extractors/dedode.py CHANGED Viewed

@@ -4,7 +4,7 @@ import subprocess
 import torch
 from PIL import Image
 from ..utils.base_model import BaseModel
-from .. import logger
 import torchvision.transforms as transforms
 dedode_path = Path(__file__).parent / "../../third_party/DeDoDe"
@@ -15,6 +15,7 @@ from DeDoDe.utils import to_pixel_coords
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 class DeDoDe(BaseModel):
     default_conf = {
         "name": "dedode",
@@ -61,8 +62,6 @@ class DeDoDe(BaseModel):
             )
             subprocess.run(cmd, check=True)
-        logger.info(f"Loading DeDoDe model...")
         # load the model
         weights_detector = torch.load(model_detector_path, map_location="cpu")
         weights_descriptor = torch.load(

 import torch
 from PIL import Image
 from ..utils.base_model import BaseModel
+from hloc import logger
 import torchvision.transforms as transforms
 dedode_path = Path(__file__).parent / "../../third_party/DeDoDe"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 class DeDoDe(BaseModel):
     default_conf = {
         "name": "dedode",
             )
             subprocess.run(cmd, check=True)
         # load the model
         weights_detector = torch.load(model_detector_path, map_location="cpu")
         weights_descriptor = torch.load(

hloc/extractors/example.py CHANGED Viewed

@@ -13,6 +13,7 @@ sys.path.append(str(example_path))
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 class Example(BaseModel):
     # change to your default configs
     default_conf = {

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 class Example(BaseModel):
     # change to your default configs
     default_conf = {

hloc/extractors/lanet.py CHANGED Viewed

@@ -4,6 +4,7 @@ import subprocess
 import torch
 from ..utils.base_model import BaseModel
 lanet_path = Path(__file__).parent / "../../third_party/lanet"
 sys.path.append(str(lanet_path))
@@ -29,6 +30,7 @@ class LANet(BaseModel):
         self.net = PointModel(is_test=True)
         state_dict = torch.load(model_path, map_location="cpu")
         self.net.load_state_dict(state_dict["model_state"])
     def _forward(self, data):
         image = data["image"]

 import torch
 from ..utils.base_model import BaseModel
+from hloc import logger
 lanet_path = Path(__file__).parent / "../../third_party/lanet"
 sys.path.append(str(lanet_path))
         self.net = PointModel(is_test=True)
         state_dict = torch.load(model_path, map_location="cpu")
         self.net.load_state_dict(state_dict["model_state"])
+        logger.info(f"Load LANet model done.")
     def _forward(self, data):
         image = data["image"]

hloc/extractors/r2d2.py CHANGED Viewed

@@ -3,6 +3,7 @@ from pathlib import Path
 import torchvision.transforms as tvf
 from ..utils.base_model import BaseModel
 base_path = Path(__file__).parent / "../../third_party"
 sys.path.append(str(base_path))
@@ -34,6 +35,7 @@ class R2D2(BaseModel):
             rel_thr=conf["reliability_threshold"],
             rep_thr=conf["repetability_threshold"],
         )
     def _forward(self, data):
         img = data["image"]

 import torchvision.transforms as tvf
 from ..utils.base_model import BaseModel
+from hloc import logger
 base_path = Path(__file__).parent / "../../third_party"
 sys.path.append(str(base_path))
             rel_thr=conf["reliability_threshold"],
             rep_thr=conf["repetability_threshold"],
         )
+        logger.info(f"Load R2D2 model done.")
     def _forward(self, data):
         img = data["image"]

hloc/extractors/rekd.py CHANGED Viewed

@@ -4,6 +4,7 @@ import subprocess
 import torch
 from ..utils.base_model import BaseModel
 rekd_path = Path(__file__).parent / "../../third_party"
 sys.path.append(str(rekd_path))
@@ -28,6 +29,7 @@ class REKD(BaseModel):
         self.net = REKD_(is_test=True)
         state_dict = torch.load(model_path, map_location="cpu")
         self.net.load_state_dict(state_dict["model_state"])
     def _forward(self, data):
         image = data["image"]

 import torch
 from ..utils.base_model import BaseModel
+from hloc import logger
 rekd_path = Path(__file__).parent / "../../third_party"
 sys.path.append(str(rekd_path))
         self.net = REKD_(is_test=True)
         state_dict = torch.load(model_path, map_location="cpu")
         self.net.load_state_dict(state_dict["model_state"])
+        logger.info(f"Load REKD model done.")
     def _forward(self, data):
         image = data["image"]

hloc/extractors/rord.py CHANGED Viewed

@@ -4,13 +4,14 @@ import subprocess
 import torch
 from ..utils.base_model import BaseModel
-from .. import logger
 rord_path = Path(__file__).parent / "../../third_party"
 sys.path.append(str(rord_path))
 from RoRD.lib.model_test import D2Net as _RoRD
 from RoRD.lib.pyramid import process_multiscale
 class RoRD(BaseModel):
     default_conf = {
         "model_name": "rord.pth",
@@ -32,9 +33,7 @@ class RoRD(BaseModel):
             model_path.parent.mkdir(exist_ok=True)
             cmd_wo_proxy = ["gdown", link, "-O", str(model_path)]
             cmd = ["gdown", link, "-O", str(model_path), "--proxy", self.proxy]
-            logger.info(
-                f"Downloading the RoRD model with `{cmd_wo_proxy}`."
-            )
             try:
                 subprocess.run(cmd_wo_proxy, check=True)
             except subprocess.CalledProcessError as e:
@@ -44,10 +43,10 @@ class RoRD(BaseModel):
                 except subprocess.CalledProcessError as e:
                     logger.error(f"Failed to download the RoRD model.")
                     raise e
-        logger.info("RoRD model loaded.")
         self.net = _RoRD(
             model_file=model_path, use_relu=conf["use_relu"], use_cuda=False
         )
     def _forward(self, data):
         image = data["image"]

 import torch
 from ..utils.base_model import BaseModel
+from hloc import logger
 rord_path = Path(__file__).parent / "../../third_party"
 sys.path.append(str(rord_path))
 from RoRD.lib.model_test import D2Net as _RoRD
 from RoRD.lib.pyramid import process_multiscale
 class RoRD(BaseModel):
     default_conf = {
         "model_name": "rord.pth",
             model_path.parent.mkdir(exist_ok=True)
             cmd_wo_proxy = ["gdown", link, "-O", str(model_path)]
             cmd = ["gdown", link, "-O", str(model_path), "--proxy", self.proxy]
+            logger.info(f"Downloading the RoRD model with `{cmd_wo_proxy}`.")
             try:
                 subprocess.run(cmd_wo_proxy, check=True)
             except subprocess.CalledProcessError as e:
                 except subprocess.CalledProcessError as e:
                     logger.error(f"Failed to download the RoRD model.")
                     raise e
         self.net = _RoRD(
             model_file=model_path, use_relu=conf["use_relu"], use_cuda=False
         )
+        logger.info(f"Load RoRD model done.")
     def _forward(self, data):
         image = data["image"]

hloc/extractors/sift.py ADDED Viewed

	@@ -0,0 +1,224 @@

+import warnings
+import cv2
+import numpy as np
+import torch
+from kornia.color import rgb_to_grayscale
+from packaging import version
+from omegaconf import OmegaConf
+try:
+    import pycolmap
+except ImportError:
+    pycolmap = None
+from hloc import logger
+from ..utils.base_model import BaseModel
+def filter_dog_point(
+    points, scales, angles, image_shape, nms_radius, scores=None
+):
+    h, w = image_shape
+    ij = np.round(points - 0.5).astype(int).T[::-1]
+    # Remove duplicate points (identical coordinates).
+    # Pick highest scale or score
+    s = scales if scores is None else scores
+    buffer = np.zeros((h, w))
+    np.maximum.at(buffer, tuple(ij), s)
+    keep = np.where(buffer[tuple(ij)] == s)[0]
+    # Pick lowest angle (arbitrary).
+    ij = ij[:, keep]
+    buffer[:] = np.inf
+    o_abs = np.abs(angles[keep])
+    np.minimum.at(buffer, tuple(ij), o_abs)
+    mask = buffer[tuple(ij)] == o_abs
+    ij = ij[:, mask]
+    keep = keep[mask]
+    if nms_radius > 0:
+        # Apply NMS on the remaining points
+        buffer[:] = 0
+        buffer[tuple(ij)] = s[keep]  # scores or scale
+        local_max = torch.nn.functional.max_pool2d(
+            torch.from_numpy(buffer).unsqueeze(0),
+            kernel_size=nms_radius * 2 + 1,
+            stride=1,
+            padding=nms_radius,
+        ).squeeze(0)
+        is_local_max = buffer == local_max.numpy()
+        keep = keep[is_local_max[tuple(ij)]]
+    return keep
+def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor:
+    x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps)
+    x.clip_(min=eps).sqrt_()
+    return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps)
+def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray:
+    """
+    Detect keypoints using OpenCV Detector.
+    Optionally, perform description.
+    Args:
+        features: OpenCV based keypoints detector and descriptor
+        image: Grayscale image of uint8 data type
+    Returns:
+        keypoints: 1D array of detected cv2.KeyPoint
+        scores: 1D array of responses
+        descriptors: 1D array of descriptors
+    """
+    detections, descriptors = features.detectAndCompute(image, None)
+    points = np.array([k.pt for k in detections], dtype=np.float32)
+    scores = np.array([k.response for k in detections], dtype=np.float32)
+    scales = np.array([k.size for k in detections], dtype=np.float32)
+    angles = np.deg2rad(
+        np.array([k.angle for k in detections], dtype=np.float32)
+    )
+    return points, scores, scales, angles, descriptors
+class SIFT(BaseModel):
+    default_conf = {
+        "rootsift": True,
+        "nms_radius": 0,  # None to disable filtering entirely.
+        "max_keypoints": 4096,
+        "backend": "opencv",  # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda}
+        "detection_threshold": 0.0066667,  # from COLMAP
+        "edge_threshold": 10,
+        "first_octave": -1,  # only used by pycolmap, the default of COLMAP
+        "num_octaves": 4,
+    }
+    required_data_keys = ["image"]
+    def _init(self, conf):
+        self.conf = OmegaConf.create(self.conf)
+        backend = self.conf.backend
+        if backend.startswith("pycolmap"):
+            if pycolmap is None:
+                raise ImportError(
+                    "Cannot find module pycolmap: install it with pip"
+                    "or use backend=opencv."
+                )
+            options = {
+                "peak_threshold": self.conf.detection_threshold,
+                "edge_threshold": self.conf.edge_threshold,
+                "first_octave": self.conf.first_octave,
+                "num_octaves": self.conf.num_octaves,
+                "normalization": pycolmap.Normalization.L2,  # L1_ROOT is buggy.
+            }
+            device = (
+                "auto"
+                if backend == "pycolmap"
+                else backend.replace("pycolmap_", "")
+            )
+            if (
+                backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ) and pycolmap.__version__ < "0.5.0":
+                warnings.warn(
+                    "The pycolmap CPU SIFT is buggy in version < 0.5.0, "
+                    "consider upgrading pycolmap or use the CUDA version.",
+                    stacklevel=1,
+                )
+            else:
+                options["max_num_features"] = self.conf.max_keypoints
+            self.sift = pycolmap.Sift(options=options, device=device)
+        elif backend == "opencv":
+            self.sift = cv2.SIFT_create(
+                contrastThreshold=self.conf.detection_threshold,
+                nfeatures=self.conf.max_keypoints,
+                edgeThreshold=self.conf.edge_threshold,
+                nOctaveLayers=self.conf.num_octaves,
+            )
+        else:
+            backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"}
+            raise ValueError(
+                f"Unknown backend: {backend} not in "
+                f"{{{','.join(backends)}}}."
+            )
+        logger.info(f"Load SIFT model done.")
+    def extract_single_image(self, image: torch.Tensor):
+        image_np = image.cpu().numpy().squeeze(0)
+        if self.conf.backend.startswith("pycolmap"):
+            if version.parse(pycolmap.__version__) >= version.parse("0.5.0"):
+                detections, descriptors = self.sift.extract(image_np)
+                scores = None  # Scores are not exposed by COLMAP anymore.
+            else:
+                detections, scores, descriptors = self.sift.extract(image_np)
+            keypoints = detections[:, :2]  # Keep only (x, y).
+            scales, angles = detections[:, -2:].T
+            if scores is not None and (
+                self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ):
+                # Set the scores as a combination of abs. response and scale.
+                scores = np.abs(scores) * scales
+        elif self.conf.backend == "opencv":
+            # TODO: Check if opencv keypoints are already in corner convention
+            keypoints, scores, scales, angles, descriptors = run_opencv_sift(
+                self.sift, (image_np * 255.0).astype(np.uint8)
+            )
+        pred = {
+            "keypoints": keypoints,
+            "scales": scales,
+            "oris": angles,
+            "descriptors": descriptors,
+        }
+        if scores is not None:
+            pred["scores"] = scores
+        # sometimes pycolmap returns points outside the image. We remove them
+        if self.conf.backend.startswith("pycolmap"):
+            is_inside = (
+                pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]])
+            ).all(-1)
+            pred = {k: v[is_inside] for k, v in pred.items()}
+        if self.conf.nms_radius is not None:
+            keep = filter_dog_point(
+                pred["keypoints"],
+                pred["scales"],
+                pred["oris"],
+                image_np.shape,
+                self.conf.nms_radius,
+                scores=pred.get("scores"),
+            )
+            pred = {k: v[keep] for k, v in pred.items()}
+        pred = {k: torch.from_numpy(v) for k, v in pred.items()}
+        if scores is not None:
+            # Keep the k keypoints with highest score
+            num_points = self.conf.max_keypoints
+            if num_points is not None and len(pred["keypoints"]) > num_points:
+                indices = torch.topk(pred["scores"], num_points).indices
+                pred = {k: v[indices] for k, v in pred.items()}
+        return pred
+    def _forward(self, data: dict) -> dict:
+        image = data["image"]
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
+        device = image.device
+        image = image.cpu()
+        pred = []
+        for k in range(len(image)):
+            img = image[k]
+            if "image_size" in data.keys():
+                # avoid extracting points in padded areas
+                w, h = data["image_size"][k]
+                img = img[:, :h, :w]
+            p = self.extract_single_image(img)
+            pred.append(p)
+        pred = {
+            k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]
+        }
+        if self.conf.rootsift:
+            pred["descriptors"] = sift_to_rootsift(pred["descriptors"])
+        pred["descriptors"] = pred["descriptors"].permute(0, 2, 1)
+        pred["keypoint_scores"] = pred["scores"].clone()
+        return pred

hloc/extractors/superpoint.py CHANGED Viewed

@@ -3,6 +3,7 @@ from pathlib import Path
 import torch
 from ..utils.base_model import BaseModel
 sys.path.append(str(Path(__file__).parent / "../../third_party"))
 from SuperGluePretrainedNetwork.models import superpoint  # noqa E402
@@ -42,6 +43,7 @@ class SuperPoint(BaseModel):
         if conf["fix_sampling"]:
             superpoint.sample_descriptors = sample_descriptors_fix_sampling
         self.net = superpoint.SuperPoint(conf)
     def _forward(self, data):
         return self.net(data, self.conf)

 import torch
 from ..utils.base_model import BaseModel
+from hloc import logger
 sys.path.append(str(Path(__file__).parent / "../../third_party"))
 from SuperGluePretrainedNetwork.models import superpoint  # noqa E402
         if conf["fix_sampling"]:
             superpoint.sample_descriptors = sample_descriptors_fix_sampling
         self.net = superpoint.SuperPoint(conf)
+        logger.info(f"Load SuperPoint model done.")
     def _forward(self, data):
         return self.net(data, self.conf)

hloc/match_dense.py CHANGED Viewed

@@ -138,11 +138,8 @@ confs = {
         },
         "preprocessing": {
             "grayscale": False,
-            "force_resize": True,
-            "resize_max": 1024,
-            "width": 512,
-            "height": 512,
-            "dfactor": 8,
         },
     },
     "xfeat_dense": {

         },
         "preprocessing": {
             "grayscale": False,
+            "resize_max": 512,
+            "dfactor": 16,
         },
     },
     "xfeat_dense": {

hloc/match_features.py CHANGED Viewed

@@ -63,7 +63,7 @@ confs = {
         },
     },
     "disk-lightglue": {
-        "output": "matches-lightglue",
         "model": {
             "name": "lightglue",
             "match_threshold": 0.2,
@@ -79,6 +79,24 @@ confs = {
             "force_resize": False,
         },
     },
     "sgmnet": {
         "output": "matches-sgmnet",
         "model": {
@@ -339,19 +357,25 @@ def match_images(model, feat0, feat1):
         feat0["keypoints"] = feat0["keypoints"][0][None]
     if isinstance(feat1["keypoints"], list):
         feat1["keypoints"] = feat1["keypoints"][0][None]
-    pred = model(
-        {
-            "image0": feat0["image"],
-            "keypoints0": feat0["keypoints"],
-            "scores0": feat0["scores"][0].unsqueeze(0),
-            "descriptors0": desc0,
-            "image1": feat1["image"],
-            "keypoints1": feat1["keypoints"],
-            "scores1": feat1["scores"][0].unsqueeze(0),
-            "descriptors1": desc1,
-        }
-    )
     pred = {
         k: v.cpu().detach()[0] if isinstance(v, torch.Tensor) else v
         for k, v in pred.items()

         },
     },
     "disk-lightglue": {
+        "output": "matches-disk-lightglue",
         "model": {
             "name": "lightglue",
             "match_threshold": 0.2,
             "force_resize": False,
         },
     },
+    "sift-lightglue": {
+        "output": "matches-sift-lightglue",
+        "model": {
+            "name": "lightglue",
+            "match_threshold": 0.2,
+            "width_confidence": 0.99,  # for point pruning
+            "depth_confidence": 0.95,  # for early stopping,
+            "features": "sift",
+            "add_scale_ori": True,
+            "model_name": "sift_lightglue.pth",
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1024,
+            "dfactor": 8,
+            "force_resize": False,
+        },
+    },
     "sgmnet": {
         "output": "matches-sgmnet",
         "model": {
         feat0["keypoints"] = feat0["keypoints"][0][None]
     if isinstance(feat1["keypoints"], list):
         feat1["keypoints"] = feat1["keypoints"][0][None]
+    input_dict = {
+        "image0": feat0["image"],
+        "keypoints0": feat0["keypoints"],
+        "scores0": feat0["scores"][0].unsqueeze(0),
+        "descriptors0": desc0,
+        "image1": feat1["image"],
+        "keypoints1": feat1["keypoints"],
+        "scores1": feat1["scores"][0].unsqueeze(0),
+        "descriptors1": desc1,
+    }
+    if "scales" in feat0:
+        input_dict = {**input_dict, "scales0": feat0["scales"]}
+    if "scales" in feat1:
+        input_dict = {**input_dict, "scales1": feat1["scales"]}
+    if "oris" in feat0:
+        input_dict = {**input_dict, "oris0": feat0["oris"]}
+    if "oris" in feat1:
+        input_dict = {**input_dict, "oris1": feat1["oris"]}
+    pred = model(input_dict)
     pred = {
         k: v.cpu().detach()[0] if isinstance(v, torch.Tensor) else v
         for k, v in pred.items()

hloc/matchers/duster.py CHANGED Viewed

@@ -13,7 +13,7 @@ duster_path = Path(__file__).parent / "../../third_party/dust3r"
 sys.path.append(str(duster_path))
 from dust3r.inference import inference
-from dust3r.model import load_model
 from dust3r.image_pairs import make_pairs
 from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
 from dust3r.utils.geometry import find_reciprocal_matches, xy_grid
@@ -33,7 +33,11 @@ class Duster(BaseModel):
         self.normalize = tfm.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
         self.model_path = self.conf["model_path"]
         self.download_weights()
-        self.net = load_model(self.model_path, device)
         logger.info(f"Loaded Dust3r model")
     def download_weights(self):
@@ -68,8 +72,11 @@ class Duster(BaseModel):
     def _forward(self, data):
         img0, img1 = data["image0"], data["image1"]
-        # img0 = self.preprocess(img0)
-        # img1 = self.preprocess(img1)
         images = [
             {"img": img0, "idx": 0, "instance": 0},
@@ -79,22 +86,13 @@ class Duster(BaseModel):
             images, scene_graph="complete", prefilter=None, symmetrize=True
         )
         output = inference(pairs, self.net, device, batch_size=1)
         scene = global_aligner(
             output, device=device, mode=GlobalAlignerMode.PairViewer
         )
-        batch_size = 1
-        schedule = "cosine"
-        lr = 0.01
-        niter = 300
-        loss = scene.compute_global_alignment(
-            init="mst", niter=niter, schedule=schedule, lr=lr
-        )
         # retrieve useful values from scene:
         confidence_masks = scene.get_masks()
         pts3d = scene.get_pts3d()
-        imgs = scene.imgs
         pts2d_list, pts3d_list = [], []
         for i in range(2):
             conf_i = confidence_masks[i].cpu().numpy()
@@ -102,21 +100,29 @@ class Duster(BaseModel):
                 xy_grid(*imgs[i].shape[:2][::-1])[conf_i]
             )  # imgs[i].shape[:2] = (H, W)
             pts3d_list.append(pts3d[i].detach().cpu().numpy()[conf_i])
-        reciprocal_in_P2, nn2_in_P1, num_matches = find_reciprocal_matches(
-            *pts3d_list
-        )
-        logger.info(f"Found {num_matches} matches")
-        mkpts1 = pts2d_list[1][reciprocal_in_P2]
-        mkpts0 = pts2d_list[0][nn2_in_P1][reciprocal_in_P2]
-        top_k = self.conf["max_keypoints"]
-        if top_k is not None and len(mkpts0) > top_k:
-            keep = np.round(np.linspace(0, len(mkpts0) - 1, top_k)).astype(int)
-            mkpts0 = mkpts0[keep]
-            mkpts1 = mkpts1[keep]
-        pred = {
-            "keypoints0": torch.from_numpy(mkpts0),
-            "keypoints1": torch.from_numpy(mkpts1),
-        }
         return pred

 sys.path.append(str(duster_path))
 from dust3r.inference import inference
+from dust3r.model import load_model, AsymmetricCroCo3DStereo
 from dust3r.image_pairs import make_pairs
 from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
 from dust3r.utils.geometry import find_reciprocal_matches, xy_grid
         self.normalize = tfm.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
         self.model_path = self.conf["model_path"]
         self.download_weights()
+        # self.net = load_model(self.model_path, device)
+        self.net = AsymmetricCroCo3DStereo.from_pretrained(
+            self.model_path
+            # "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
+        ).to(device)
         logger.info(f"Loaded Dust3r model")
     def download_weights(self):
     def _forward(self, data):
         img0, img1 = data["image0"], data["image1"]
+        mean = torch.tensor([0.5, 0.5, 0.5]).to(device)
+        std = torch.tensor([0.5, 0.5, 0.5]).to(device)
+        img0 = (img0 - mean.view(1, 3, 1, 1)) / std.view(1, 3, 1, 1)
+        img1 = (img1 - mean.view(1, 3, 1, 1)) / std.view(1, 3, 1, 1)
         images = [
             {"img": img0, "idx": 0, "instance": 0},
             images, scene_graph="complete", prefilter=None, symmetrize=True
         )
         output = inference(pairs, self.net, device, batch_size=1)
         scene = global_aligner(
             output, device=device, mode=GlobalAlignerMode.PairViewer
         )
         # retrieve useful values from scene:
+        imgs = scene.imgs
         confidence_masks = scene.get_masks()
         pts3d = scene.get_pts3d()
         pts2d_list, pts3d_list = [], []
         for i in range(2):
             conf_i = confidence_masks[i].cpu().numpy()
                 xy_grid(*imgs[i].shape[:2][::-1])[conf_i]
             )  # imgs[i].shape[:2] = (H, W)
             pts3d_list.append(pts3d[i].detach().cpu().numpy()[conf_i])
+        if len(pts3d_list[1]) == 0:
+            pred = {
+                "keypoints0": torch.zeros([0, 2]),
+                "keypoints1": torch.zeros([0, 2]),
+            }
+            logger.warning(f"Matched {0} points")
+        else:
+            reciprocal_in_P2, nn2_in_P1, num_matches = find_reciprocal_matches(
+                *pts3d_list
+            )
+            logger.info(f"Found {num_matches} matches")
+            mkpts1 = pts2d_list[1][reciprocal_in_P2]
+            mkpts0 = pts2d_list[0][nn2_in_P1][reciprocal_in_P2]
+            top_k = self.conf["max_keypoints"]
+            if top_k is not None and len(mkpts0) > top_k:
+                keep = np.round(np.linspace(0, len(mkpts0) - 1, top_k)).astype(
+                    int
+                )
+                mkpts0 = mkpts0[keep]
+                mkpts1 = mkpts1[keep]
+            pred = {
+                "keypoints0": torch.from_numpy(mkpts0),
+                "keypoints1": torch.from_numpy(mkpts1),
+            }
         return pred

hloc/matchers/lightglue.py CHANGED Viewed

@@ -18,6 +18,7 @@ class LightGlue(BaseModel):
         "model_name": "superpoint_lightglue.pth",
         "flash": True,  # enable FlashAttention if available.
         "mp": False,  # enable mixed precision
     }
     required_inputs = [
         "image0",
@@ -44,9 +45,18 @@ class LightGlue(BaseModel):
             "keypoints": data["keypoints0"],
             "descriptors": data["descriptors0"].permute(0, 2, 1),
         }
         input["image1"] = {
             "image": data["image1"],
             "keypoints": data["keypoints1"],
             "descriptors": data["descriptors1"].permute(0, 2, 1),
         }
         return self.net(input)

         "model_name": "superpoint_lightglue.pth",
         "flash": True,  # enable FlashAttention if available.
         "mp": False,  # enable mixed precision
+        "add_scale_ori": False,
     }
     required_inputs = [
         "image0",
             "keypoints": data["keypoints0"],
             "descriptors": data["descriptors0"].permute(0, 2, 1),
         }
+        if "scales0" in data:
+            input["image0"] = {**input["image0"], "scales": data["scales0"]}
+        if "oris0" in data:
+            input["image0"] = {**input["image0"], "oris": data["oris0"]}
         input["image1"] = {
             "image": data["image1"],
             "keypoints": data["keypoints1"],
             "descriptors": data["descriptors1"].permute(0, 2, 1),
         }
+        if "scales1" in data:
+            input["image1"] = {**input["image1"], "scales": data["scales1"]}
+        if "oris1" in data:
+            input["image1"] = {**input["image1"], "oris": data["oris1"]}
         return self.net(input)

hloc/matchers/sgmnet.py CHANGED Viewed

@@ -99,8 +99,12 @@ class SGMNet(BaseModel):
         score2 = data["scores1"].reshape(-1, 1)
         desc1 = data["descriptors0"].permute(0, 2, 1)  # 1 x N x 128
         desc2 = data["descriptors1"].permute(0, 2, 1)
-        size1 = torch.tensor(data["image0"].shape[2:]).flip(0)  # W x H -> x & y
-        size2 = torch.tensor(data["image1"].shape[2:]).flip(0)  # W x H
         norm_x1 = self.normalize_size(x1, size1)
         norm_x2 = self.normalize_size(x2, size2)

         score2 = data["scores1"].reshape(-1, 1)
         desc1 = data["descriptors0"].permute(0, 2, 1)  # 1 x N x 128
         desc2 = data["descriptors1"].permute(0, 2, 1)
+        size1 = (
+            torch.tensor(data["image0"].shape[2:]).flip(0).to(x1.device)
+        )  # W x H -> x & y
+        size2 = (
+            torch.tensor(data["image1"].shape[2:]).flip(0).to(x2.device)
+        )  # W x H
         norm_x1 = self.normalize_size(x1, size1)
         norm_x2 = self.normalize_size(x2, size2)

hloc/matchers/sold2.py CHANGED Viewed

@@ -34,6 +34,7 @@ class SOLD2(BaseModel):
     weight_urls = {
         "sold2_wireframe.tar": "https://www.polybox.ethz.ch/index.php/s/blOrW89gqSLoHOk/download",
     }
     # Initialize the line matcher
     def _init(self, conf):
         checkpoint_path = conf["checkpoint_dir"] / conf["weights"]

     weight_urls = {
         "sold2_wireframe.tar": "https://www.polybox.ethz.ch/index.php/s/blOrW89gqSLoHOk/download",
     }
     # Initialize the line matcher
     def _init(self, conf):
         checkpoint_path = conf["checkpoint_dir"] / conf["weights"]

hloc/utils/viz.py CHANGED Viewed

@@ -71,6 +71,7 @@ def plot_keypoints(kpts, colors="lime", ps=4):
     except IndexError as e:
         pass
 def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.0):
     """Plot matches for a pair of existing images.
     Args:

     except IndexError as e:
         pass
 def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.0):
     """Plot matches for a pair of existing images.
     Args:

third_party/LightGlue/.flake8 ADDED Viewed

	@@ -0,0 +1,4 @@

+[flake8]
+max-line-length = 88
+extend-ignore = E203
+exclude = .git,__pycache__,build,.venv/

third_party/LightGlue/.github/workflows/code-quality.yml ADDED Viewed

	@@ -0,0 +1,24 @@

+name: Format and Lint Checks
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '*.py'
+  pull_request:
+    types: [ assigned, opened, synchronize, reopened ]
+jobs:
+  check:
+    name: Format and Lint Checks
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+      - run: python -m pip install --upgrade pip
+      - run: python -m pip install .[dev]
+      - run: python -m flake8 .
+      - run: python -m isort . --check-only --diff
+      - run: python -m black . --check --diff

third_party/LightGlue/.gitignore CHANGED Viewed

@@ -1,10 +1,166 @@
-*.egg-info
-*.pyc
-/.idea/
 /data/
 /outputs/
-__pycache__
 /lightglue/weights/
-lightglue/_flash/
 *-checkpoint.ipynb
-*.pth

 /data/
 /outputs/
 /lightglue/weights/
 *-checkpoint.ipynb
+*.pth
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/

third_party/LightGlue/LICENSE CHANGED Viewed

@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

       same "printed page" as the copyright notice for easier
       identification within third-party archives.
+   Copyright 2023 ETH Zurich
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

third_party/LightGlue/README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 <p align="center">
-  <h1 align="center"><ins>LightGlue ⚡️</ins><br>Local Feature Matching at Light Speed</h1>
   <p align="center">
     <a href="https://www.linkedin.com/in/philipplindenberger/">Philipp Lindenberger</a>
     ·
@@ -7,15 +7,14 @@
     ·
     <a href="https://www.microsoft.com/en-us/research/people/mapoll/">Marc&nbsp;Pollefeys</a>
   </p>
-<!-- <p align="center">
-    <img src="assets/larchitecture.svg" alt="Logo" height="40">
-</p> -->
-  <!-- <h2 align="center">PrePrint 2023</h2> -->
-  <h2 align="center"><p>
     <a href="https://arxiv.org/pdf/2306.13643.pdf" align="center">Paper</a> |
-    <a href="https://colab.research.google.com/github/cvg/LightGlue/blob/main/demo.ipynb" align="center">Colab</a>
-  </p></h2>
-  <div align="center"></div>
 </p>
 <p align="center">
     <a href="https://arxiv.org/abs/2306.13643"><img src="assets/easy_hard.jpg" alt="example" width=80%></a>
@@ -27,8 +26,8 @@
 This repository hosts the inference code of LightGlue, a lightweight feature matcher with high accuracy and blazing fast inference. It takes as input a set of keypoints and descriptors for each image and returns the indices of corresponding points. The architecture is based on adaptive pruning techniques, in both network width and depth - [check out the paper for more details](https://arxiv.org/pdf/2306.13643.pdf).
-We release pretrained weights of LightGlue with [SuperPoint](https://arxiv.org/abs/1712.07629) and [DISK](https://arxiv.org/abs/2006.13566) local features.
-The training end evaluation code will be released in July in a separate repo. To be notified, subscribe to [issue #6](https://github.com/cvg/LightGlue/issues/6).
 ## Installation and demo [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/cvg/LightGlue/blob/main/demo.ipynb)
@@ -44,14 +43,14 @@ We provide a [demo notebook](demo.ipynb) which shows how to perform feature extr
 Here is a minimal script to match two images:
 ```python
-from lightglue import LightGlue, SuperPoint, DISK
 from lightglue.utils import load_image, rbd
 # SuperPoint+LightGlue
 extractor = SuperPoint(max_num_keypoints=2048).eval().cuda()  # load the extractor
 matcher = LightGlue(features='superpoint').eval().cuda()  # load the matcher
-# or DISK+LightGlue
 extractor = DISK(max_num_keypoints=2048).eval().cuda()  # load the extractor
 matcher = LightGlue(features='disk').eval().cuda()  # load the matcher
@@ -88,6 +87,18 @@ feats0, feats1, matches01 = match_pair(extractor, matcher, image0, image1)
 ## Advanced configuration
 The default values give a good trade-off between speed and accuracy. To maximize the accuracy, use all keypoints and disable the adaptive mechanisms:
 ```python
 extractor = SuperPoint(max_num_keypoints=None)
@@ -99,31 +110,62 @@ To increase the speed with a small drop of accuracy, decrease the number of keyp
 extractor = SuperPoint(max_num_keypoints=1024)
 matcher = LightGlue(features='superpoint', depth_confidence=0.9, width_confidence=0.95)
 ```
-The maximum speed is obtained with [FlashAttention](https://arxiv.org/abs/2205.14135), which is automatically used when ```torch >= 2.0``` or if it is [installed from source](https://github.com/HazyResearch/flash-attention#installation-and-features).
 <details>
-<summary>[Detail of all parameters - click to expand]</summary>
-- [```n_layers```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L261): Number of stacked self+cross attention layers. Reduce this value for faster inference at the cost of accuracy (continuous red line in the plot above). Default: 9 (all layers).
-- [```flash```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L263): Enable FlashAttention. Significantly increases the speed and reduces the memory consumption without any impact on accuracy. Default: True (LightGlue automatically detects if FlashAttention is available).
-- [```mp```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L264): Enable mixed precision inference. Default: False (off)
-- [```depth_confidence```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L265): Controls the early stopping. A lower values stops more often at earlier layers. Default: 0.95, disable with -1.
-- [```width_confidence```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L266): Controls the iterative point pruning. A lower value prunes more points earlier. Default: 0.99, disable with -1.
-- [```filter_threshold```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L267): Match confidence. Increase this value to obtain less, but stronger matches. Default: 0.1
 </details>
 ## Other links
 - [hloc - the visual localization toolbox](https://github.com/cvg/Hierarchical-Localization/): run LightGlue for Structure-from-Motion and visual localization.
-- [LightGlue-ONNX](https://github.com/fabio-sim/LightGlue-ONNX): export LightGlue to the Open Neural Network Exchange format.
 - [Image Matching WebUI](https://github.com/Vincentqyw/image-matching-webui): a web GUI to easily compare different matchers, including LightGlue.
-- [kornia](kornia.readthedocs.io/) now exposes LightGlue via the interfaces [`LightGlue`](https://kornia.readthedocs.io/en/latest/feature.html#kornia.feature.LightGlue) and [`LightGlueMatcher`](https://kornia.readthedocs.io/en/latest/feature.html#kornia.feature.LightGlueMatcher).
-## BibTeX Citation
 If you use any ideas from the paper or code from this repo, please consider citing:
 ```txt
-@inproceedings{lindenberger23lightglue,
   author    = {Philipp Lindenberger and
                Paul-Edouard Sarlin and
                Marc Pollefeys},
@@ -132,3 +174,7 @@ If you use any ideas from the paper or code from this repo, please consider citi
   year      = {2023}
 }
 ```

 <p align="center">
+  <h1 align="center"><ins>LightGlue</ins> ⚡️<br>Local Feature Matching at Light Speed</h1>
   <p align="center">
     <a href="https://www.linkedin.com/in/philipplindenberger/">Philipp Lindenberger</a>
     ·
     ·
     <a href="https://www.microsoft.com/en-us/research/people/mapoll/">Marc&nbsp;Pollefeys</a>
   </p>
+  <h2 align="center">
+    <p>ICCV 2023</p>
     <a href="https://arxiv.org/pdf/2306.13643.pdf" align="center">Paper</a> |
+    <a href="https://colab.research.google.com/github/cvg/LightGlue/blob/main/demo.ipynb" align="center">Colab</a> |
+    <a href="https://psarlin.com/assets/LightGlue_ICCV2023_poster_compressed.pdf" align="center">Poster</a> |
+    <a href="https://github.com/cvg/glue-factory" align="center">Train your own!</a>
+  </h2>
 </p>
 <p align="center">
     <a href="https://arxiv.org/abs/2306.13643"><img src="assets/easy_hard.jpg" alt="example" width=80%></a>
 This repository hosts the inference code of LightGlue, a lightweight feature matcher with high accuracy and blazing fast inference. It takes as input a set of keypoints and descriptors for each image and returns the indices of corresponding points. The architecture is based on adaptive pruning techniques, in both network width and depth - [check out the paper for more details](https://arxiv.org/pdf/2306.13643.pdf).
+We release pretrained weights of LightGlue with [SuperPoint](https://arxiv.org/abs/1712.07629), [DISK](https://arxiv.org/abs/2006.13566), [ALIKED](https://arxiv.org/abs/2304.03608) and [SIFT](https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf) local features.
+The training and evaluation code can be found in our library [glue-factory](https://github.com/cvg/glue-factory/).
 ## Installation and demo [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/cvg/LightGlue/blob/main/demo.ipynb)
 Here is a minimal script to match two images:
 ```python
+from lightglue import LightGlue, SuperPoint, DISK, SIFT, ALIKED, DoGHardNet
 from lightglue.utils import load_image, rbd
 # SuperPoint+LightGlue
 extractor = SuperPoint(max_num_keypoints=2048).eval().cuda()  # load the extractor
 matcher = LightGlue(features='superpoint').eval().cuda()  # load the matcher
+# or DISK+LightGlue, ALIKED+LightGlue or SIFT+LightGlue
 extractor = DISK(max_num_keypoints=2048).eval().cuda()  # load the extractor
 matcher = LightGlue(features='disk').eval().cuda()  # load the matcher
 ## Advanced configuration
+<details>
+<summary>[Detail of all parameters - click to expand]</summary>
+- ```n_layers```: Number of stacked self+cross attention layers. Reduce this value for faster inference at the cost of accuracy (continuous red line in the plot above). Default: 9 (all layers).
+- ```flash```: Enable FlashAttention. Significantly increases the speed and reduces the memory consumption without any impact on accuracy. Default: True (LightGlue automatically detects if FlashAttention is available).
+- ```mp```: Enable mixed precision inference. Default: False (off)
+- ```depth_confidence```: Controls the early stopping. A lower values stops more often at earlier layers. Default: 0.95, disable with -1.
+- ```width_confidence```: Controls the iterative point pruning. A lower value prunes more points earlier. Default: 0.99, disable with -1.
+- ```filter_threshold```: Match confidence. Increase this value to obtain less, but stronger matches. Default: 0.1
+</details>
 The default values give a good trade-off between speed and accuracy. To maximize the accuracy, use all keypoints and disable the adaptive mechanisms:
 ```python
 extractor = SuperPoint(max_num_keypoints=None)
 extractor = SuperPoint(max_num_keypoints=1024)
 matcher = LightGlue(features='superpoint', depth_confidence=0.9, width_confidence=0.95)
 ```
+The maximum speed is obtained with a combination of:
+- [FlashAttention](https://arxiv.org/abs/2205.14135): automatically used when ```torch >= 2.0``` or if [installed from source](https://github.com/HazyResearch/flash-attention#installation-and-features).
+- PyTorch compilation, available when ```torch >= 2.0```:
+```python
+matcher = matcher.eval().cuda()
+matcher.compile(mode='reduce-overhead')
+```
+For inputs with fewer than 1536 keypoints (determined experimentally), this compiles LightGlue but disables point pruning (large overhead). For larger input sizes, it automatically falls backs to eager mode with point pruning. Adaptive depths is supported for any input size.
+## Benchmark
+<p align="center">
+  <a><img src="assets/benchmark.png" alt="Logo" width=80%></a>
+  <br>
+  <em>Benchmark results on GPU (RTX 3080). With compilation and adaptivity, LightGlue runs at 150 FPS @ 1024 keypoints and 50 FPS @ 4096 keypoints per image. This is a 4-10x speedup over SuperGlue. </em>
+</p>
+<p align="center">
+  <a><img src="assets/benchmark_cpu.png" alt="Logo" width=80%></a>
+  <br>
+  <em>Benchmark results on CPU (Intel i7 10700K). LightGlue runs at 20 FPS @ 512 keypoints. </em>
+</p>
+Obtain the same plots for your setup using our [benchmark script](benchmark.py):
+```
+python benchmark.py [--device cuda] [--add_superglue] [--num_keypoints 512 1024 2048 4096] [--compile]
+```
 <details>
+<summary>[Performance tip - click to expand]</summary>
+Note: **Point pruning** introduces an overhead that sometimes outweighs its benefits.
+Point pruning is thus enabled only when the there are more than N keypoints in an image, where N is hardware-dependent.
+We provide defaults optimized for current hardware (RTX 30xx GPUs).
+We suggest running the benchmark script and adjusting the thresholds for your hardware by updating `LightGlue.pruning_keypoint_thresholds['cuda']`.
 </details>
+## Training and evaluation
+With [Glue Factory](https://github.com/cvg/glue-factory), you can train LightGlue with your own local features, on your own dataset!
+You can also evaluate it and other baselines on standard benchmarks like HPatches and MegaDepth.
 ## Other links
 - [hloc - the visual localization toolbox](https://github.com/cvg/Hierarchical-Localization/): run LightGlue for Structure-from-Motion and visual localization.
+- [LightGlue-ONNX](https://github.com/fabio-sim/LightGlue-ONNX): export LightGlue to the Open Neural Network Exchange (ONNX) format with support for TensorRT and OpenVINO.
 - [Image Matching WebUI](https://github.com/Vincentqyw/image-matching-webui): a web GUI to easily compare different matchers, including LightGlue.
+- [kornia](https://kornia.readthedocs.io) now exposes LightGlue via the interfaces [`LightGlue`](https://kornia.readthedocs.io/en/latest/feature.html#kornia.feature.LightGlue) and [`LightGlueMatcher`](https://kornia.readthedocs.io/en/latest/feature.html#kornia.feature.LightGlueMatcher).
+## BibTeX citation
 If you use any ideas from the paper or code from this repo, please consider citing:
 ```txt
+@inproceedings{lindenberger2023lightglue,
   author    = {Philipp Lindenberger and
                Paul-Edouard Sarlin and
                Marc Pollefeys},
   year      = {2023}
 }
 ```
+## License
+The pre-trained weights of LightGlue and the code provided in this repository are released under the [Apache-2.0 license](./LICENSE). [DISK](https://github.com/cvlab-epfl/disk) follows this license as well but SuperPoint follows [a different, restrictive license](https://github.com/magicleap/SuperPointPretrainedNetwork/blob/master/LICENSE) (this includes its pre-trained weights and its [inference file](./lightglue/superpoint.py)). [ALIKED](https://github.com/Shiaoming/ALIKED) was published under a BSD-3-Clause license.

third_party/LightGlue/assets/DSC_0410.JPG CHANGED Viewed

Git LFS Details

SHA256: 1d6a86be44519faf4c86e9a869c5b298a5a7e1478f7479400c28aa2d018bd1b0
Pointer size: 131 Bytes
Size of remote file: 391 kB

third_party/LightGlue/assets/DSC_0411.JPG CHANGED Viewed

Git LFS Details

SHA256: 7211ee48ec2fbc082d2dabf8dbf503c853a473712375c2ad32a29d538a168a47
Pointer size: 131 Bytes
Size of remote file: 421 kB

third_party/LightGlue/assets/benchmark.png ADDED Viewed

Git LFS Details

SHA256: cae077138caeca75aa99cb8047b198a94bc995e488f00db245d94ad77498142f
Pointer size: 130 Bytes
Size of remote file: 70.3 kB

third_party/LightGlue/assets/benchmark_cpu.png ADDED Viewed

Git LFS Details

SHA256: 1801a05606f1d316173365a9692f85c95f2d8aa53570c35415dd4805ac1d075d
Pointer size: 130 Bytes
Size of remote file: 56.4 kB

third_party/LightGlue/benchmark.py ADDED Viewed

	@@ -0,0 +1,255 @@

+# Benchmark script for LightGlue on real images
+import argparse
+import time
+from collections import defaultdict
+from pathlib import Path
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch._dynamo
+from lightglue import LightGlue, SuperPoint
+from lightglue.utils import load_image
+torch.set_grad_enabled(False)
+def measure(matcher, data, device="cuda", r=100):
+    timings = np.zeros((r, 1))
+    if device.type == "cuda":
+        starter = torch.cuda.Event(enable_timing=True)
+        ender = torch.cuda.Event(enable_timing=True)
+    # warmup
+    for _ in range(10):
+        _ = matcher(data)
+    # measurements
+    with torch.no_grad():
+        for rep in range(r):
+            if device.type == "cuda":
+                starter.record()
+                _ = matcher(data)
+                ender.record()
+                # sync gpu
+                torch.cuda.synchronize()
+                curr_time = starter.elapsed_time(ender)
+            else:
+                start = time.perf_counter()
+                _ = matcher(data)
+                curr_time = (time.perf_counter() - start) * 1e3
+            timings[rep] = curr_time
+    mean_syn = np.sum(timings) / r
+    std_syn = np.std(timings)
+    return {"mean": mean_syn, "std": std_syn}
+def print_as_table(d, title, cnames):
+    print()
+    header = f"{title:30} " + " ".join([f"{x:>7}" for x in cnames])
+    print(header)
+    print("-" * len(header))
+    for k, l in d.items():
+        print(f"{k:30}", " ".join([f"{x:>7.1f}" for x in l]))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Benchmark script for LightGlue")
+    parser.add_argument(
+        "--device",
+        choices=["auto", "cuda", "cpu", "mps"],
+        default="auto",
+        help="device to benchmark on",
+    )
+    parser.add_argument("--compile", action="store_true", help="Compile LightGlue runs")
+    parser.add_argument(
+        "--no_flash", action="store_true", help="disable FlashAttention"
+    )
+    parser.add_argument(
+        "--no_prune_thresholds",
+        action="store_true",
+        help="disable pruning thresholds (i.e. always do pruning)",
+    )
+    parser.add_argument(
+        "--add_superglue",
+        action="store_true",
+        help="add SuperGlue to the benchmark (requires hloc)",
+    )
+    parser.add_argument(
+        "--measure", default="time", choices=["time", "log-time", "throughput"]
+    )
+    parser.add_argument(
+        "--repeat", "--r", type=int, default=100, help="repetitions of measurements"
+    )
+    parser.add_argument(
+        "--num_keypoints",
+        nargs="+",
+        type=int,
+        default=[256, 512, 1024, 2048, 4096],
+        help="number of keypoints (list separated by spaces)",
+    )
+    parser.add_argument(
+        "--matmul_precision", default="highest", choices=["highest", "high", "medium"]
+    )
+    parser.add_argument(
+        "--save", default=None, type=str, help="path where figure should be saved"
+    )
+    args = parser.parse_intermixed_args()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if args.device != "auto":
+        device = torch.device(args.device)
+    print("Running benchmark on device:", device)
+    images = Path("assets")
+    inputs = {
+        "easy": (
+            load_image(images / "DSC_0411.JPG"),
+            load_image(images / "DSC_0410.JPG"),
+        ),
+        "difficult": (
+            load_image(images / "sacre_coeur1.jpg"),
+            load_image(images / "sacre_coeur2.jpg"),
+        ),
+    }
+    configs = {
+        "LightGlue-full": {
+            "depth_confidence": -1,
+            "width_confidence": -1,
+        },
+        # 'LG-prune': {
+        #     'width_confidence': -1,
+        # },
+        # 'LG-depth': {
+        #     'depth_confidence': -1,
+        # },
+        "LightGlue-adaptive": {},
+    }
+    if args.compile:
+        configs = {**configs, **{k + "-compile": v for k, v in configs.items()}}
+    sg_configs = {
+        # 'SuperGlue': {},
+        "SuperGlue-fast": {"sinkhorn_iterations": 5}
+    }
+    torch.set_float32_matmul_precision(args.matmul_precision)
+    results = {k: defaultdict(list) for k, v in inputs.items()}
+    extractor = SuperPoint(max_num_keypoints=None, detection_threshold=-1)
+    extractor = extractor.eval().to(device)
+    figsize = (len(inputs) * 4.5, 4.5)
+    fig, axes = plt.subplots(1, len(inputs), sharey=True, figsize=figsize)
+    axes = axes if len(inputs) > 1 else [axes]
+    fig.canvas.manager.set_window_title(f"LightGlue benchmark ({device.type})")
+    for title, ax in zip(inputs.keys(), axes):
+        ax.set_xscale("log", base=2)
+        bases = [2**x for x in range(7, 16)]
+        ax.set_xticks(bases, bases)
+        ax.grid(which="major")
+        if args.measure == "log-time":
+            ax.set_yscale("log")
+            yticks = [10**x for x in range(6)]
+            ax.set_yticks(yticks, yticks)
+            mpos = [10**x * i for x in range(6) for i in range(2, 10)]
+            mlabel = [
+                10**x * i if i in [2, 5] else None
+                for x in range(6)
+                for i in range(2, 10)
+            ]
+            ax.set_yticks(mpos, mlabel, minor=True)
+            ax.grid(which="minor", linewidth=0.2)
+        ax.set_title(title)
+        ax.set_xlabel("# keypoints")
+        if args.measure == "throughput":
+            ax.set_ylabel("Throughput [pairs/s]")
+        else:
+            ax.set_ylabel("Latency [ms]")
+    for name, conf in configs.items():
+        print("Run benchmark for:", name)
+        torch.cuda.empty_cache()
+        matcher = LightGlue(features="superpoint", flash=not args.no_flash, **conf)
+        if args.no_prune_thresholds:
+            matcher.pruning_keypoint_thresholds = {
+                k: -1 for k in matcher.pruning_keypoint_thresholds
+            }
+        matcher = matcher.eval().to(device)
+        if name.endswith("compile"):
+            import torch._dynamo
+            torch._dynamo.reset()  # avoid buffer overflow
+            matcher.compile()
+        for pair_name, ax in zip(inputs.keys(), axes):
+            image0, image1 = [x.to(device) for x in inputs[pair_name]]
+            runtimes = []
+            for num_kpts in args.num_keypoints:
+                extractor.conf.max_num_keypoints = num_kpts
+                feats0 = extractor.extract(image0)
+                feats1 = extractor.extract(image1)
+                runtime = measure(
+                    matcher,
+                    {"image0": feats0, "image1": feats1},
+                    device=device,
+                    r=args.repeat,
+                )["mean"]
+                results[pair_name][name].append(
+                    1000 / runtime if args.measure == "throughput" else runtime
+                )
+            ax.plot(
+                args.num_keypoints, results[pair_name][name], label=name, marker="o"
+            )
+        del matcher, feats0, feats1
+    if args.add_superglue:
+        from hloc.matchers.superglue import SuperGlue
+        for name, conf in sg_configs.items():
+            print("Run benchmark for:", name)
+            matcher = SuperGlue(conf)
+            matcher = matcher.eval().to(device)
+            for pair_name, ax in zip(inputs.keys(), axes):
+                image0, image1 = [x.to(device) for x in inputs[pair_name]]
+                runtimes = []
+                for num_kpts in args.num_keypoints:
+                    extractor.conf.max_num_keypoints = num_kpts
+                    feats0 = extractor.extract(image0)
+                    feats1 = extractor.extract(image1)
+                    data = {
+                        "image0": image0[None],
+                        "image1": image1[None],
+                        **{k + "0": v for k, v in feats0.items()},
+                        **{k + "1": v for k, v in feats1.items()},
+                    }
+                    data["scores0"] = data["keypoint_scores0"]
+                    data["scores1"] = data["keypoint_scores1"]
+                    data["descriptors0"] = (
+                        data["descriptors0"].transpose(-1, -2).contiguous()
+                    )
+                    data["descriptors1"] = (
+                        data["descriptors1"].transpose(-1, -2).contiguous()
+                    )
+                    runtime = measure(matcher, data, device=device, r=args.repeat)[
+                        "mean"
+                    ]
+                    results[pair_name][name].append(
+                        1000 / runtime if args.measure == "throughput" else runtime
+                    )
+                ax.plot(
+                    args.num_keypoints, results[pair_name][name], label=name, marker="o"
+                )
+            del matcher, data, image0, image1, feats0, feats1
+    for name, runtimes in results.items():
+        print_as_table(runtimes, name, args.num_keypoints)
+    axes[0].legend()
+    fig.tight_layout()
+    if args.save:
+        plt.savefig(args.save, dpi=fig.dpi)
+    plt.show()

third_party/LightGlue/demo.ipynb CHANGED Viewed

@@ -16,16 +16,19 @@
    "source": [
     "# If we are on colab: this clones the repo and installs the dependencies\n",
     "from pathlib import Path\n",
-    "if Path.cwd().name != 'LightGlue':\n",
-    "  !git clone --quiet https://github.com/cvg/LightGlue/\n",
-    "  %cd LightGlue\n",
-    "  !pip install --progress-bar off --quiet -e .\n",
-    "    \n",
     "from lightglue import LightGlue, SuperPoint, DISK\n",
     "from lightglue.utils import load_image, rbd\n",
     "from lightglue import viz2d\n",
     "import torch\n",
-    "images = Path('assets')"
    ]
   },
   {
@@ -51,10 +54,10 @@
     }
    ],
    "source": [
-    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # 'mps', 'cpu'\n",
     "\n",
     "extractor = SuperPoint(max_num_keypoints=2048).eval().to(device)  # load the extractor\n",
-    "matcher = LightGlue(features='superpoint').eval().to(device)"
    ]
   },
   {
@@ -92,22 +95,24 @@
     }
    ],
    "source": [
-    "image0 = load_image(images / 'DSC_0411.JPG')\n",
-    "image1 = load_image(images / 'DSC_0410.JPG')\n",
     "\n",
     "feats0 = extractor.extract(image0.to(device))\n",
     "feats1 = extractor.extract(image1.to(device))\n",
-    "matches01 = matcher({'image0': feats0, 'image1': feats1})\n",
-    "feats0, feats1, matches01 = [rbd(x) for x in [feats0, feats1, matches01]]  # remove batch dimension\n",
     "\n",
-    "kpts0, kpts1, matches = feats0['keypoints'], feats1['keypoints'], matches01['matches']\n",
     "m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]\n",
     "\n",
     "axes = viz2d.plot_images([image0, image1])\n",
-    "viz2d.plot_matches(m_kpts0, m_kpts1, color='lime', lw=0.2)\n",
     "viz2d.add_text(0, f'Stop after {matches01[\"stop\"]} layers', fs=20)\n",
     "\n",
-    "kpc0, kpc1 = viz2d.cm_prune(matches01['prune0']), viz2d.cm_prune(matches01['prune1'])\n",
     "viz2d.plot_images([image0, image1])\n",
     "viz2d.plot_keypoints([kpts0, kpts1], colors=[kpc0, kpc1], ps=10)"
    ]
@@ -147,22 +152,24 @@
     }
    ],
    "source": [
-    "image0 = load_image(images / 'sacre_coeur1.jpg')\n",
-    "image1 = load_image(images / 'sacre_coeur2.jpg')\n",
     "\n",
     "feats0 = extractor.extract(image0.to(device))\n",
     "feats1 = extractor.extract(image1.to(device))\n",
-    "matches01 = matcher({'image0': feats0, 'image1': feats1})\n",
-    "feats0, feats1, matches01 = [rbd(x) for x in [feats0, feats1, matches01]]  # remove batch dimension\n",
     "\n",
-    "kpts0, kpts1, matches = feats0['keypoints'], feats1['keypoints'], matches01['matches']\n",
     "m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]\n",
     "\n",
     "axes = viz2d.plot_images([image0, image1])\n",
-    "viz2d.plot_matches(m_kpts0, m_kpts1, color='lime', lw=0.2)\n",
     "viz2d.add_text(0, f'Stop after {matches01[\"stop\"]} layers')\n",
     "\n",
-    "kpc0, kpc1 = viz2d.cm_prune(matches01['prune0']), viz2d.cm_prune(matches01['prune1'])\n",
     "viz2d.plot_images([image0, image1])\n",
     "viz2d.plot_keypoints([kpts0, kpts1], colors=[kpc0, kpc1], ps=6)"
    ]

    "source": [
     "# If we are on colab: this clones the repo and installs the dependencies\n",
     "from pathlib import Path\n",
+    "\n",
+    "if Path.cwd().name != \"LightGlue\":\n",
+    "    !git clone --quiet https://github.com/cvg/LightGlue/\n",
+    "    %cd LightGlue\n",
+    "    !pip install --progress-bar off --quiet -e .\n",
+    "\n",
     "from lightglue import LightGlue, SuperPoint, DISK\n",
     "from lightglue.utils import load_image, rbd\n",
     "from lightglue import viz2d\n",
     "import torch\n",
+    "\n",
+    "torch.set_grad_enabled(False)\n",
+    "images = Path(\"assets\")"
    ]
   },
   {
     }
    ],
    "source": [
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")  # 'mps', 'cpu'\n",
     "\n",
     "extractor = SuperPoint(max_num_keypoints=2048).eval().to(device)  # load the extractor\n",
+    "matcher = LightGlue(features=\"superpoint\").eval().to(device)"
    ]
   },
   {
     }
    ],
    "source": [
+    "image0 = load_image(images / \"DSC_0411.JPG\")\n",
+    "image1 = load_image(images / \"DSC_0410.JPG\")\n",
     "\n",
     "feats0 = extractor.extract(image0.to(device))\n",
     "feats1 = extractor.extract(image1.to(device))\n",
+    "matches01 = matcher({\"image0\": feats0, \"image1\": feats1})\n",
+    "feats0, feats1, matches01 = [\n",
+    "    rbd(x) for x in [feats0, feats1, matches01]\n",
+    "]  # remove batch dimension\n",
     "\n",
+    "kpts0, kpts1, matches = feats0[\"keypoints\"], feats1[\"keypoints\"], matches01[\"matches\"]\n",
     "m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]\n",
     "\n",
     "axes = viz2d.plot_images([image0, image1])\n",
+    "viz2d.plot_matches(m_kpts0, m_kpts1, color=\"lime\", lw=0.2)\n",
     "viz2d.add_text(0, f'Stop after {matches01[\"stop\"]} layers', fs=20)\n",
     "\n",
+    "kpc0, kpc1 = viz2d.cm_prune(matches01[\"prune0\"]), viz2d.cm_prune(matches01[\"prune1\"])\n",
     "viz2d.plot_images([image0, image1])\n",
     "viz2d.plot_keypoints([kpts0, kpts1], colors=[kpc0, kpc1], ps=10)"
    ]
     }
    ],
    "source": [
+    "image0 = load_image(images / \"sacre_coeur1.jpg\")\n",
+    "image1 = load_image(images / \"sacre_coeur2.jpg\")\n",
     "\n",
     "feats0 = extractor.extract(image0.to(device))\n",
     "feats1 = extractor.extract(image1.to(device))\n",
+    "matches01 = matcher({\"image0\": feats0, \"image1\": feats1})\n",
+    "feats0, feats1, matches01 = [\n",
+    "    rbd(x) for x in [feats0, feats1, matches01]\n",
+    "]  # remove batch dimension\n",
     "\n",
+    "kpts0, kpts1, matches = feats0[\"keypoints\"], feats1[\"keypoints\"], matches01[\"matches\"]\n",
     "m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]\n",
     "\n",
     "axes = viz2d.plot_images([image0, image1])\n",
+    "viz2d.plot_matches(m_kpts0, m_kpts1, color=\"lime\", lw=0.2)\n",
     "viz2d.add_text(0, f'Stop after {matches01[\"stop\"]} layers')\n",
     "\n",
+    "kpc0, kpc1 = viz2d.cm_prune(matches01[\"prune0\"]), viz2d.cm_prune(matches01[\"prune1\"])\n",
     "viz2d.plot_images([image0, image1])\n",
     "viz2d.plot_keypoints([kpts0, kpts1], colors=[kpc0, kpc1], ps=6)"
    ]

third_party/LightGlue/lightglue/__init__.py CHANGED Viewed

@@ -1,4 +1,7 @@
-from .lightglue import LightGlue
-from .superpoint import SuperPoint
-from .disk import DISK
-from .utils import match_pair

+from .aliked import ALIKED  # noqa
+from .disk import DISK  # noqa
+from .dog_hardnet import DoGHardNet  # noqa
+from .lightglue import LightGlue  # noqa
+from .sift import SIFT  # noqa
+from .superpoint import SuperPoint  # noqa
+from .utils import match_pair  # noqa

third_party/LightGlue/lightglue/aliked.py ADDED Viewed

	@@ -0,0 +1,758 @@

+# BSD 3-Clause License
+# Copyright (c) 2022, Zhao Xiaoming
+# All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# Authors:
+# Xiaoming Zhao, Xingming Wu, Weihai Chen, Peter C.Y. Chen, Qingsong Xu, and Zhengguo Li
+# Code from https://github.com/Shiaoming/ALIKED
+from typing import Callable, Optional
+import torch
+import torch.nn.functional as F
+import torchvision
+from kornia.color import grayscale_to_rgb
+from torch import nn
+from torch.nn.modules.utils import _pair
+from torchvision.models import resnet
+from .utils import Extractor
+def get_patches(
+    tensor: torch.Tensor, required_corners: torch.Tensor, ps: int
+) -> torch.Tensor:
+    c, h, w = tensor.shape
+    corner = (required_corners - ps / 2 + 1).long()
+    corner[:, 0] = corner[:, 0].clamp(min=0, max=w - 1 - ps)
+    corner[:, 1] = corner[:, 1].clamp(min=0, max=h - 1 - ps)
+    offset = torch.arange(0, ps)
+    kw = {"indexing": "ij"} if torch.__version__ >= "1.10" else {}
+    x, y = torch.meshgrid(offset, offset, **kw)
+    patches = torch.stack((x, y)).permute(2, 1, 0).unsqueeze(2)
+    patches = patches.to(corner) + corner[None, None]
+    pts = patches.reshape(-1, 2)
+    sampled = tensor.permute(1, 2, 0)[tuple(pts.T)[::-1]]
+    sampled = sampled.reshape(ps, ps, -1, c)
+    assert sampled.shape[:3] == patches.shape[:3]
+    return sampled.permute(2, 3, 0, 1)
+def simple_nms(scores: torch.Tensor, nms_radius: int):
+    """Fast Non-maximum suppression to remove nearby points"""
+    zeros = torch.zeros_like(scores)
+    max_mask = scores == torch.nn.functional.max_pool2d(
+        scores, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius
+    )
+    for _ in range(2):
+        supp_mask = (
+            torch.nn.functional.max_pool2d(
+                max_mask.float(),
+                kernel_size=nms_radius * 2 + 1,
+                stride=1,
+                padding=nms_radius,
+            )
+            > 0
+        )
+        supp_scores = torch.where(supp_mask, zeros, scores)
+        new_max_mask = supp_scores == torch.nn.functional.max_pool2d(
+            supp_scores, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius
+        )
+        max_mask = max_mask | (new_max_mask & (~supp_mask))
+    return torch.where(max_mask, scores, zeros)
+class DKD(nn.Module):
+    def __init__(
+        self,
+        radius: int = 2,
+        top_k: int = 0,
+        scores_th: float = 0.2,
+        n_limit: int = 20000,
+    ):
+        """
+        Args:
+            radius: soft detection radius, kernel size is (2 * radius + 1)
+            top_k: top_k > 0: return top k keypoints
+            scores_th: top_k <= 0 threshold mode:
+                scores_th > 0: return keypoints with scores>scores_th
+                else: return keypoints with scores > scores.mean()
+            n_limit: max number of keypoint in threshold mode
+        """
+        super().__init__()
+        self.radius = radius
+        self.top_k = top_k
+        self.scores_th = scores_th
+        self.n_limit = n_limit
+        self.kernel_size = 2 * self.radius + 1
+        self.temperature = 0.1  # tuned temperature
+        self.unfold = nn.Unfold(kernel_size=self.kernel_size, padding=self.radius)
+        # local xy grid
+        x = torch.linspace(-self.radius, self.radius, self.kernel_size)
+        # (kernel_size*kernel_size) x 2 : (w,h)
+        kw = {"indexing": "ij"} if torch.__version__ >= "1.10" else {}
+        self.hw_grid = (
+            torch.stack(torch.meshgrid([x, x], **kw)).view(2, -1).t()[:, [1, 0]]
+        )
+    def forward(
+        self,
+        scores_map: torch.Tensor,
+        sub_pixel: bool = True,
+        image_size: Optional[torch.Tensor] = None,
+    ):
+        """
+        :param scores_map: Bx1xHxW
+        :param descriptor_map: BxCxHxW
+        :param sub_pixel: whether to use sub-pixel keypoint detection
+        :return: kpts: list[Nx2,...]; kptscores: list[N,....] normalised position: -1~1
+        """
+        b, c, h, w = scores_map.shape
+        scores_nograd = scores_map.detach()
+        nms_scores = simple_nms(scores_nograd, self.radius)
+        # remove border
+        nms_scores[:, :, : self.radius, :] = 0
+        nms_scores[:, :, :, : self.radius] = 0
+        if image_size is not None:
+            for i in range(scores_map.shape[0]):
+                w, h = image_size[i].long()
+                nms_scores[i, :, h.item() - self.radius :, :] = 0
+                nms_scores[i, :, :, w.item() - self.radius :] = 0
+        else:
+            nms_scores[:, :, -self.radius :, :] = 0
+            nms_scores[:, :, :, -self.radius :] = 0
+        # detect keypoints without grad
+        if self.top_k > 0:
+            topk = torch.topk(nms_scores.view(b, -1), self.top_k)
+            indices_keypoints = [topk.indices[i] for i in range(b)]  # B x top_k
+        else:
+            if self.scores_th > 0:
+                masks = nms_scores > self.scores_th
+                if masks.sum() == 0:
+                    th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
+                    masks = nms_scores > th.reshape(b, 1, 1, 1)
+            else:
+                th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
+                masks = nms_scores > th.reshape(b, 1, 1, 1)
+            masks = masks.reshape(b, -1)
+            indices_keypoints = []  # list, B x (any size)
+            scores_view = scores_nograd.reshape(b, -1)
+            for mask, scores in zip(masks, scores_view):
+                indices = mask.nonzero()[:, 0]
+                if len(indices) > self.n_limit:
+                    kpts_sc = scores[indices]
+                    sort_idx = kpts_sc.sort(descending=True)[1]
+                    sel_idx = sort_idx[: self.n_limit]
+                    indices = indices[sel_idx]
+                indices_keypoints.append(indices)
+        wh = torch.tensor([w - 1, h - 1], device=scores_nograd.device)
+        keypoints = []
+        scoredispersitys = []
+        kptscores = []
+        if sub_pixel:
+            # detect soft keypoints with grad backpropagation
+            patches = self.unfold(scores_map)  # B x (kernel**2) x (H*W)
+            self.hw_grid = self.hw_grid.to(scores_map)  # to device
+            for b_idx in range(b):
+                patch = patches[b_idx].t()  # (H*W) x (kernel**2)
+                indices_kpt = indices_keypoints[
+                    b_idx
+                ]  # one dimension vector, say its size is M
+                patch_scores = patch[indices_kpt]  # M x (kernel**2)
+                keypoints_xy_nms = torch.stack(
+                    [indices_kpt % w, torch.div(indices_kpt, w, rounding_mode="trunc")],
+                    dim=1,
+                )  # Mx2
+                # max is detached to prevent undesired backprop loops in the graph
+                max_v = patch_scores.max(dim=1).values.detach()[:, None]
+                x_exp = (
+                    (patch_scores - max_v) / self.temperature
+                ).exp()  # M * (kernel**2), in [0, 1]
+                # \frac{ \sum{(i,j) \times \exp(x/T)} }{ \sum{\exp(x/T)} }
+                xy_residual = (
+                    x_exp @ self.hw_grid / x_exp.sum(dim=1)[:, None]
+                )  # Soft-argmax, Mx2
+                hw_grid_dist2 = (
+                    torch.norm(
+                        (self.hw_grid[None, :, :] - xy_residual[:, None, :])
+                        / self.radius,
+                        dim=-1,
+                    )
+                    ** 2
+                )
+                scoredispersity = (x_exp * hw_grid_dist2).sum(dim=1) / x_exp.sum(dim=1)
+                # compute result keypoints
+                keypoints_xy = keypoints_xy_nms + xy_residual
+                keypoints_xy = keypoints_xy / wh * 2 - 1  # (w,h) -> (-1~1,-1~1)
+                kptscore = torch.nn.functional.grid_sample(
+                    scores_map[b_idx].unsqueeze(0),
+                    keypoints_xy.view(1, 1, -1, 2),
+                    mode="bilinear",
+                    align_corners=True,
+                )[
+                    0, 0, 0, :
+                ]  # CxN
+                keypoints.append(keypoints_xy)
+                scoredispersitys.append(scoredispersity)
+                kptscores.append(kptscore)
+        else:
+            for b_idx in range(b):
+                indices_kpt = indices_keypoints[
+                    b_idx
+                ]  # one dimension vector, say its size is M
+                # To avoid warning: UserWarning: __floordiv__ is deprecated
+                keypoints_xy_nms = torch.stack(
+                    [indices_kpt % w, torch.div(indices_kpt, w, rounding_mode="trunc")],
+                    dim=1,
+                )  # Mx2
+                keypoints_xy = keypoints_xy_nms / wh * 2 - 1  # (w,h) -> (-1~1,-1~1)
+                kptscore = torch.nn.functional.grid_sample(
+                    scores_map[b_idx].unsqueeze(0),
+                    keypoints_xy.view(1, 1, -1, 2),
+                    mode="bilinear",
+                    align_corners=True,
+                )[
+                    0, 0, 0, :
+                ]  # CxN
+                keypoints.append(keypoints_xy)
+                scoredispersitys.append(kptscore)  # for jit.script compatability
+                kptscores.append(kptscore)
+        return keypoints, scoredispersitys, kptscores
+class InputPadder(object):
+    """Pads images such that dimensions are divisible by 8"""
+    def __init__(self, h: int, w: int, divis_by: int = 8):
+        self.ht = h
+        self.wd = w
+        pad_ht = (((self.ht // divis_by) + 1) * divis_by - self.ht) % divis_by
+        pad_wd = (((self.wd // divis_by) + 1) * divis_by - self.wd) % divis_by
+        self._pad = [
+            pad_wd // 2,
+            pad_wd - pad_wd // 2,
+            pad_ht // 2,
+            pad_ht - pad_ht // 2,
+        ]
+    def pad(self, x: torch.Tensor):
+        assert x.ndim == 4
+        return F.pad(x, self._pad, mode="replicate")
+    def unpad(self, x: torch.Tensor):
+        assert x.ndim == 4
+        ht = x.shape[-2]
+        wd = x.shape[-1]
+        c = [self._pad[2], ht - self._pad[3], self._pad[0], wd - self._pad[1]]
+        return x[..., c[0] : c[1], c[2] : c[3]]
+class DeformableConv2d(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=3,
+        stride=1,
+        padding=1,
+        bias=False,
+        mask=False,
+    ):
+        super(DeformableConv2d, self).__init__()
+        self.padding = padding
+        self.mask = mask
+        self.channel_num = (
+            3 * kernel_size * kernel_size if mask else 2 * kernel_size * kernel_size
+        )
+        self.offset_conv = nn.Conv2d(
+            in_channels,
+            self.channel_num,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=self.padding,
+            bias=True,
+        )
+        self.regular_conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=self.padding,
+            bias=bias,
+        )
+    def forward(self, x):
+        h, w = x.shape[2:]
+        max_offset = max(h, w) / 4.0
+        out = self.offset_conv(x)
+        if self.mask:
+            o1, o2, mask = torch.chunk(out, 3, dim=1)
+            offset = torch.cat((o1, o2), dim=1)
+            mask = torch.sigmoid(mask)
+        else:
+            offset = out
+            mask = None
+        offset = offset.clamp(-max_offset, max_offset)
+        x = torchvision.ops.deform_conv2d(
+            input=x,
+            offset=offset,
+            weight=self.regular_conv.weight,
+            bias=self.regular_conv.bias,
+            padding=self.padding,
+            mask=mask,
+        )
+        return x
+def get_conv(
+    inplanes,
+    planes,
+    kernel_size=3,
+    stride=1,
+    padding=1,
+    bias=False,
+    conv_type="conv",
+    mask=False,
+):
+    if conv_type == "conv":
+        conv = nn.Conv2d(
+            inplanes,
+            planes,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=bias,
+        )
+    elif conv_type == "dcn":
+        conv = DeformableConv2d(
+            inplanes,
+            planes,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=_pair(padding),
+            bias=bias,
+            mask=mask,
+        )
+    else:
+        raise TypeError
+    return conv
+class ConvBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        gate: Optional[Callable[..., nn.Module]] = None,
+        norm_layer: Optional[Callable[..., nn.Module]] = None,
+        conv_type: str = "conv",
+        mask: bool = False,
+    ):
+        super().__init__()
+        if gate is None:
+            self.gate = nn.ReLU(inplace=True)
+        else:
+            self.gate = gate
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self.conv1 = get_conv(
+            in_channels, out_channels, kernel_size=3, conv_type=conv_type, mask=mask
+        )
+        self.bn1 = norm_layer(out_channels)
+        self.conv2 = get_conv(
+            out_channels, out_channels, kernel_size=3, conv_type=conv_type, mask=mask
+        )
+        self.bn2 = norm_layer(out_channels)
+    def forward(self, x):
+        x = self.gate(self.bn1(self.conv1(x)))  # B x in_channels x H x W
+        x = self.gate(self.bn2(self.conv2(x)))  # B x out_channels x H x W
+        return x
+# modified based on torchvision\models\resnet.py#27->BasicBlock
+class ResBlock(nn.Module):
+    expansion: int = 1
+    def __init__(
+        self,
+        inplanes: int,
+        planes: int,
+        stride: int = 1,
+        downsample: Optional[nn.Module] = None,
+        groups: int = 1,
+        base_width: int = 64,
+        dilation: int = 1,
+        gate: Optional[Callable[..., nn.Module]] = None,
+        norm_layer: Optional[Callable[..., nn.Module]] = None,
+        conv_type: str = "conv",
+        mask: bool = False,
+    ) -> None:
+        super(ResBlock, self).__init__()
+        if gate is None:
+            self.gate = nn.ReLU(inplace=True)
+        else:
+            self.gate = gate
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError("ResBlock only supports groups=1 and base_width=64")
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in ResBlock")
+        # Both self.conv1 and self.downsample layers
+        # downsample the input when stride != 1
+        self.conv1 = get_conv(
+            inplanes, planes, kernel_size=3, conv_type=conv_type, mask=mask
+        )
+        self.bn1 = norm_layer(planes)
+        self.conv2 = get_conv(
+            planes, planes, kernel_size=3, conv_type=conv_type, mask=mask
+        )
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.gate(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.gate(out)
+        return out
+class SDDH(nn.Module):
+    def __init__(
+        self,
+        dims: int,
+        kernel_size: int = 3,
+        n_pos: int = 8,
+        gate=nn.ReLU(),
+        conv2D=False,
+        mask=False,
+    ):
+        super(SDDH, self).__init__()
+        self.kernel_size = kernel_size
+        self.n_pos = n_pos
+        self.conv2D = conv2D
+        self.mask = mask
+        self.get_patches_func = get_patches
+        # estimate offsets
+        self.channel_num = 3 * n_pos if mask else 2 * n_pos
+        self.offset_conv = nn.Sequential(
+            nn.Conv2d(
+                dims,
+                self.channel_num,
+                kernel_size=kernel_size,
+                stride=1,
+                padding=0,
+                bias=True,
+            ),
+            gate,
+            nn.Conv2d(
+                self.channel_num,
+                self.channel_num,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=True,
+            ),
+        )
+        # sampled feature conv
+        self.sf_conv = nn.Conv2d(
+            dims, dims, kernel_size=1, stride=1, padding=0, bias=False
+        )
+        # convM
+        if not conv2D:
+            # deformable desc weights
+            agg_weights = torch.nn.Parameter(torch.rand(n_pos, dims, dims))
+            self.register_parameter("agg_weights", agg_weights)
+        else:
+            self.convM = nn.Conv2d(
+                dims * n_pos, dims, kernel_size=1, stride=1, padding=0, bias=False
+            )
+    def forward(self, x, keypoints):
+        # x: [B,C,H,W]
+        # keypoints: list, [[N_kpts,2], ...] (w,h)
+        b, c, h, w = x.shape
+        wh = torch.tensor([[w - 1, h - 1]], device=x.device)
+        max_offset = max(h, w) / 4.0
+        offsets = []
+        descriptors = []
+        # get offsets for each keypoint
+        for ib in range(b):
+            xi, kptsi = x[ib], keypoints[ib]
+            kptsi_wh = (kptsi / 2 + 0.5) * wh
+            N_kpts = len(kptsi)
+            if self.kernel_size > 1:
+                patch = self.get_patches_func(
+                    xi, kptsi_wh.long(), self.kernel_size
+                )  # [N_kpts, C, K, K]
+            else:
+                kptsi_wh_long = kptsi_wh.long()
+                patch = (
+                    xi[:, kptsi_wh_long[:, 1], kptsi_wh_long[:, 0]]
+                    .permute(1, 0)
+                    .reshape(N_kpts, c, 1, 1)
+                )
+            offset = self.offset_conv(patch).clamp(
+                -max_offset, max_offset
+            )  # [N_kpts, 2*n_pos, 1, 1]
+            if self.mask:
+                offset = (
+                    offset[:, :, 0, 0].view(N_kpts, 3, self.n_pos).permute(0, 2, 1)
+                )  # [N_kpts, n_pos, 3]
+                offset = offset[:, :, :-1]  # [N_kpts, n_pos, 2]
+                mask_weight = torch.sigmoid(offset[:, :, -1])  # [N_kpts, n_pos]
+            else:
+                offset = (
+                    offset[:, :, 0, 0].view(N_kpts, 2, self.n_pos).permute(0, 2, 1)
+                )  # [N_kpts, n_pos, 2]
+            offsets.append(offset)  # for visualization
+            # get sample positions
+            pos = kptsi_wh.unsqueeze(1) + offset  # [N_kpts, n_pos, 2]
+            pos = 2.0 * pos / wh[None] - 1
+            pos = pos.reshape(1, N_kpts * self.n_pos, 1, 2)
+            # sample features
+            features = F.grid_sample(
+                xi.unsqueeze(0), pos, mode="bilinear", align_corners=True
+            )  # [1,C,(N_kpts*n_pos),1]
+            features = features.reshape(c, N_kpts, self.n_pos, 1).permute(
+                1, 0, 2, 3
+            )  # [N_kpts, C, n_pos, 1]
+            if self.mask:
+                features = torch.einsum("ncpo,np->ncpo", features, mask_weight)
+            features = torch.selu_(self.sf_conv(features)).squeeze(
+                -1
+            )  # [N_kpts, C, n_pos]
+            # convM
+            if not self.conv2D:
+                descs = torch.einsum(
+                    "ncp,pcd->nd", features, self.agg_weights
+                )  # [N_kpts, C]
+            else:
+                features = features.reshape(N_kpts, -1)[
+                    :, :, None, None
+                ]  # [N_kpts, C*n_pos, 1, 1]
+                descs = self.convM(features).squeeze()  # [N_kpts, C]
+            # normalize
+            descs = F.normalize(descs, p=2.0, dim=1)
+            descriptors.append(descs)
+        return descriptors, offsets
+class ALIKED(Extractor):
+    default_conf = {
+        "model_name": "aliked-n16",
+        "max_num_keypoints": -1,
+        "detection_threshold": 0.2,
+        "nms_radius": 2,
+    }
+    checkpoint_url = "https://github.com/Shiaoming/ALIKED/raw/main/models/{}.pth"
+    n_limit_max = 20000
+    # c1, c2, c3, c4, dim, K, M
+    cfgs = {
+        "aliked-t16": [8, 16, 32, 64, 64, 3, 16],
+        "aliked-n16": [16, 32, 64, 128, 128, 3, 16],
+        "aliked-n16rot": [16, 32, 64, 128, 128, 3, 16],
+        "aliked-n32": [16, 32, 64, 128, 128, 3, 32],
+    }
+    preprocess_conf = {
+        "resize": 1024,
+    }
+    required_data_keys = ["image"]
+    def __init__(self, **conf):
+        super().__init__(**conf)  # Update with default configuration.
+        conf = self.conf
+        c1, c2, c3, c4, dim, K, M = self.cfgs[conf.model_name]
+        conv_types = ["conv", "conv", "dcn", "dcn"]
+        conv2D = False
+        mask = False
+        # build model
+        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
+        self.pool4 = nn.AvgPool2d(kernel_size=4, stride=4)
+        self.norm = nn.BatchNorm2d
+        self.gate = nn.SELU(inplace=True)
+        self.block1 = ConvBlock(3, c1, self.gate, self.norm, conv_type=conv_types[0])
+        self.block2 = self.get_resblock(c1, c2, conv_types[1], mask)
+        self.block3 = self.get_resblock(c2, c3, conv_types[2], mask)
+        self.block4 = self.get_resblock(c3, c4, conv_types[3], mask)
+        self.conv1 = resnet.conv1x1(c1, dim // 4)
+        self.conv2 = resnet.conv1x1(c2, dim // 4)
+        self.conv3 = resnet.conv1x1(c3, dim // 4)
+        self.conv4 = resnet.conv1x1(dim, dim // 4)
+        self.upsample2 = nn.Upsample(
+            scale_factor=2, mode="bilinear", align_corners=True
+        )
+        self.upsample4 = nn.Upsample(
+            scale_factor=4, mode="bilinear", align_corners=True
+        )
+        self.upsample8 = nn.Upsample(
+            scale_factor=8, mode="bilinear", align_corners=True
+        )
+        self.upsample32 = nn.Upsample(
+            scale_factor=32, mode="bilinear", align_corners=True
+        )
+        self.score_head = nn.Sequential(
+            resnet.conv1x1(dim, 8),
+            self.gate,
+            resnet.conv3x3(8, 4),
+            self.gate,
+            resnet.conv3x3(4, 4),
+            self.gate,
+            resnet.conv3x3(4, 1),
+        )
+        self.desc_head = SDDH(dim, K, M, gate=self.gate, conv2D=conv2D, mask=mask)
+        self.dkd = DKD(
+            radius=conf.nms_radius,
+            top_k=-1 if conf.detection_threshold > 0 else conf.max_num_keypoints,
+            scores_th=conf.detection_threshold,
+            n_limit=conf.max_num_keypoints
+            if conf.max_num_keypoints > 0
+            else self.n_limit_max,
+        )
+        state_dict = torch.hub.load_state_dict_from_url(
+            self.checkpoint_url.format(conf.model_name), map_location="cpu"
+        )
+        self.load_state_dict(state_dict, strict=True)
+    def get_resblock(self, c_in, c_out, conv_type, mask):
+        return ResBlock(
+            c_in,
+            c_out,
+            1,
+            nn.Conv2d(c_in, c_out, 1),
+            gate=self.gate,
+            norm_layer=self.norm,
+            conv_type=conv_type,
+            mask=mask,
+        )
+    def extract_dense_map(self, image):
+        # Pads images such that dimensions are divisible by
+        div_by = 2**5
+        padder = InputPadder(image.shape[-2], image.shape[-1], div_by)
+        image = padder.pad(image)
+        # ================================== feature encoder
+        x1 = self.block1(image)  # B x c1 x H x W
+        x2 = self.pool2(x1)
+        x2 = self.block2(x2)  # B x c2 x H/2 x W/2
+        x3 = self.pool4(x2)
+        x3 = self.block3(x3)  # B x c3 x H/8 x W/8
+        x4 = self.pool4(x3)
+        x4 = self.block4(x4)  # B x dim x H/32 x W/32
+        # ================================== feature aggregation
+        x1 = self.gate(self.conv1(x1))  # B x dim//4 x H x W
+        x2 = self.gate(self.conv2(x2))  # B x dim//4 x H//2 x W//2
+        x3 = self.gate(self.conv3(x3))  # B x dim//4 x H//8 x W//8
+        x4 = self.gate(self.conv4(x4))  # B x dim//4 x H//32 x W//32
+        x2_up = self.upsample2(x2)  # B x dim//4 x H x W
+        x3_up = self.upsample8(x3)  # B x dim//4 x H x W
+        x4_up = self.upsample32(x4)  # B x dim//4 x H x W
+        x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1)
+        # ================================== score head
+        score_map = torch.sigmoid(self.score_head(x1234))
+        feature_map = torch.nn.functional.normalize(x1234, p=2, dim=1)
+        # Unpads images
+        feature_map = padder.unpad(feature_map)
+        score_map = padder.unpad(score_map)
+        return feature_map, score_map
+    def forward(self, data: dict) -> dict:
+        image = data["image"]
+        if image.shape[1] == 1:
+            image = grayscale_to_rgb(image)
+        feature_map, score_map = self.extract_dense_map(image)
+        keypoints, kptscores, scoredispersitys = self.dkd(
+            score_map, image_size=data.get("image_size")
+        )
+        descriptors, offsets = self.desc_head(feature_map, keypoints)
+        _, _, h, w = image.shape
+        wh = torch.tensor([w - 1, h - 1], device=image.device)
+        # no padding required
+        # we can set detection_threshold=-1 and conf.max_num_keypoints > 0
+        return {
+            "keypoints": wh * (torch.stack(keypoints) + 1) / 2.0,  # B x N x 2
+            "descriptors": torch.stack(descriptors),  # B x N x D
+            "keypoint_scores": torch.stack(kptscores),  # B x N
+        }

third_party/LightGlue/lightglue/disk.py CHANGED Viewed

@@ -1,11 +1,10 @@
-import torch
-import torch.nn as nn
 import kornia
-from types import SimpleNamespace
-from .utils import ImagePreprocessor
-class DISK(nn.Module):
     default_conf = {
         "weights": "depth",
         "max_num_keypoints": None,
@@ -16,7 +15,6 @@ class DISK(nn.Module):
     }
     preprocess_conf = {
-        **ImagePreprocessor.default_conf,
         "resize": 1024,
         "grayscale": False,
     }
@@ -24,9 +22,7 @@ class DISK(nn.Module):
     required_data_keys = ["image"]
     def __init__(self, **conf) -> None:
-        super().__init__()
-        self.conf = {**self.default_conf, **conf}
-        self.conf = SimpleNamespace(**self.conf)
         self.model = kornia.feature.DISK.from_pretrained(self.conf.weights)
     def forward(self, data: dict) -> dict:
@@ -34,6 +30,8 @@ class DISK(nn.Module):
         for key in self.required_data_keys:
             assert key in data, f"Missing key {key} in data"
         image = data["image"]
         features = self.model(
             image,
             n=self.conf.max_num_keypoints,
@@ -51,19 +49,7 @@ class DISK(nn.Module):
         descriptors = torch.stack(descriptors, 0)
         return {
-            "keypoints": keypoints.to(image),
-            "keypoint_scores": scores.to(image),
-            "descriptors": descriptors.to(image),
         }
-    def extract(self, img: torch.Tensor, **conf) -> dict:
-        """Perform extraction with online resizing"""
-        if img.dim() == 3:
-            img = img[None]  # add batch dim
-        assert img.dim() == 4 and img.shape[0] == 1
-        shape = img.shape[-2:][::-1]
-        img, scales = ImagePreprocessor(**{**self.preprocess_conf, **conf})(img)
-        feats = self.forward({"image": img})
-        feats["image_size"] = torch.tensor(shape)[None].to(img).float()
-        feats["keypoints"] = (feats["keypoints"] + 0.5) / scales[None] - 0.5
-        return feats

 import kornia
+import torch
+from .utils import Extractor
+class DISK(Extractor):
     default_conf = {
         "weights": "depth",
         "max_num_keypoints": None,
     }
     preprocess_conf = {
         "resize": 1024,
         "grayscale": False,
     }
     required_data_keys = ["image"]
     def __init__(self, **conf) -> None:
+        super().__init__(**conf)  # Update with default configuration.
         self.model = kornia.feature.DISK.from_pretrained(self.conf.weights)
     def forward(self, data: dict) -> dict:
         for key in self.required_data_keys:
             assert key in data, f"Missing key {key} in data"
         image = data["image"]
+        if image.shape[1] == 1:
+            image = kornia.color.grayscale_to_rgb(image)
         features = self.model(
             image,
             n=self.conf.max_num_keypoints,
         descriptors = torch.stack(descriptors, 0)
         return {
+            "keypoints": keypoints.to(image).contiguous(),
+            "keypoint_scores": scores.to(image).contiguous(),
+            "descriptors": descriptors.to(image).contiguous(),
         }

third_party/LightGlue/lightglue/dog_hardnet.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+from kornia.color import rgb_to_grayscale
+from kornia.feature import HardNet, LAFDescriptor, laf_from_center_scale_ori
+from .sift import SIFT
+class DoGHardNet(SIFT):
+    required_data_keys = ["image"]
+    def __init__(self, **conf):
+        super().__init__(**conf)
+        self.laf_desc = LAFDescriptor(HardNet(True)).eval()
+    def forward(self, data: dict) -> dict:
+        image = data["image"]
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
+        device = image.device
+        self.laf_desc = self.laf_desc.to(device)
+        self.laf_desc.descriptor = self.laf_desc.descriptor.eval()
+        pred = []
+        if "image_size" in data.keys():
+            im_size = data.get("image_size").long()
+        else:
+            im_size = None
+        for k in range(len(image)):
+            img = image[k]
+            if im_size is not None:
+                w, h = data["image_size"][k]
+                img = img[:, : h.to(torch.int32), : w.to(torch.int32)]
+            p = self.extract_single_image(img)
+            lafs = laf_from_center_scale_ori(
+                p["keypoints"].reshape(1, -1, 2),
+                6.0 * p["scales"].reshape(1, -1, 1, 1),
+                torch.rad2deg(p["oris"]).reshape(1, -1, 1),
+            ).to(device)
+            p["descriptors"] = self.laf_desc(img[None], lafs).reshape(-1, 128)
+            pred.append(p)
+        pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
+        return pred

third_party/LightGlue/lightglue/lightglue.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from pathlib import Path
 from types import SimpleNamespace
-import warnings
 import numpy as np
 import torch
-from torch import nn
 import torch.nn.functional as F
-from typing import Optional, List, Callable
 try:
     from flash_attn.modules.mha import FlashCrossAttention
@@ -21,15 +22,32 @@ torch.backends.cudnn.deterministic = True
 @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
-def normalize_keypoints(kpts: torch.Tensor, size: torch.Tensor) -> torch.Tensor:
-    if isinstance(size, torch.Size):
-        size = torch.tensor(size)[None]
-    shift = size.float().to(kpts) / 2
-    scale = size.max(1).values.float().to(kpts) / 2
-    kpts = (kpts - shift[:, None]) / scale[:, None, None]
     return kpts
 def rotate_half(x: torch.Tensor) -> torch.Tensor:
     x = x.unflatten(-1, (-1, 2))
     x1, x2 = x.unbind(dim=-1)
@@ -64,8 +82,8 @@ class TokenConfidence(nn.Module):
     def forward(self, desc0: torch.Tensor, desc1: torch.Tensor):
         """get confidence tokens"""
         return (
-            self.token(desc0.detach().float()).squeeze(-1),
-            self.token(desc1.detach().float()).squeeze(-1),
         )
@@ -79,29 +97,40 @@ class Attention(nn.Module):
                 stacklevel=2,
             )
         self.enable_flash = allow_flash and FLASH_AVAILABLE
         if allow_flash and FlashCrossAttention:
             self.flash_ = FlashCrossAttention()
-    def forward(self, q, k, v) -> torch.Tensor:
         if self.enable_flash and q.device.type == "cuda":
-            if FlashCrossAttention:
-                q, k, v = [x.transpose(-2, -3) for x in [q, k, v]]
-                m = self.flash_(q.half(), torch.stack([k, v], 2).half())
-                return m.transpose(-2, -3).to(q.dtype)
-            else:  # use torch 2.0 scaled_dot_product_attention with flash
                 args = [x.half().contiguous() for x in [q, k, v]]
-                with torch.backends.cuda.sdp_kernel(enable_flash=True):
-                    return F.scaled_dot_product_attention(*args).to(q.dtype)
-        elif hasattr(F, "scaled_dot_product_attention"):
             args = [x.contiguous() for x in [q, k, v]]
-            return F.scaled_dot_product_attention(*args).to(q.dtype)
         else:
             s = q.shape[-1] ** -0.5
-            attn = F.softmax(torch.einsum("...id,...jd->...ij", q, k) * s, -1)
             return torch.einsum("...ij,...jd->...id", attn, v)
-class Transformer(nn.Module):
     def __init__(
         self, embed_dim: int, num_heads: int, flash: bool = False, bias: bool = True
     ) -> None:
@@ -120,22 +149,23 @@ class Transformer(nn.Module):
             nn.Linear(2 * embed_dim, embed_dim),
         )
-    def _forward(self, x: torch.Tensor, encoding: Optional[torch.Tensor] = None):
         qkv = self.Wqkv(x)
         qkv = qkv.unflatten(-1, (self.num_heads, -1, 3)).transpose(1, 2)
         q, k, v = qkv[..., 0], qkv[..., 1], qkv[..., 2]
-        if encoding is not None:
-            q = apply_cached_rotary_emb(encoding, q)
-            k = apply_cached_rotary_emb(encoding, k)
-        context = self.inner_attn(q, k, v)
         message = self.out_proj(context.transpose(1, 2).flatten(start_dim=-2))
         return x + self.ffn(torch.cat([x, message], -1))
-    def forward(self, x0, x1, encoding0=None, encoding1=None):
-        return self._forward(x0, encoding0), self._forward(x1, encoding1)
-class CrossTransformer(nn.Module):
     def __init__(
         self, embed_dim: int, num_heads: int, flash: bool = False, bias: bool = True
     ) -> None:
@@ -153,7 +183,6 @@ class CrossTransformer(nn.Module):
             nn.GELU(),
             nn.Linear(2 * embed_dim, embed_dim),
         )
         if flash and FLASH_AVAILABLE:
             self.flash = Attention(True)
         else:
@@ -162,23 +191,31 @@ class CrossTransformer(nn.Module):
     def map_(self, func: Callable, x0: torch.Tensor, x1: torch.Tensor):
         return func(x0), func(x1)
-    def forward(self, x0: torch.Tensor, x1: torch.Tensor) -> List[torch.Tensor]:
         qk0, qk1 = self.map_(self.to_qk, x0, x1)
         v0, v1 = self.map_(self.to_v, x0, x1)
         qk0, qk1, v0, v1 = map(
             lambda t: t.unflatten(-1, (self.heads, -1)).transpose(1, 2),
             (qk0, qk1, v0, v1),
         )
-        if self.flash is not None:
-            m0 = self.flash(qk0, qk1, v1)
-            m1 = self.flash(qk1, qk0, v0)
         else:
             qk0, qk1 = qk0 * self.scale**0.5, qk1 * self.scale**0.5
-            sim = torch.einsum("b h i d, b h j d -> b h i j", qk0, qk1)
             attn01 = F.softmax(sim, dim=-1)
             attn10 = F.softmax(sim.transpose(-2, -1).contiguous(), dim=-1)
             m0 = torch.einsum("bhij, bhjd -> bhid", attn01, v1)
             m1 = torch.einsum("bhji, bhjd -> bhid", attn10.transpose(-2, -1), v0)
         m0, m1 = self.map_(lambda t: t.transpose(1, 2).flatten(start_dim=-2), m0, m1)
         m0, m1 = self.map_(self.to_out, m0, m1)
         x0 = x0 + self.ffn(torch.cat([x0, m0], -1))
@@ -186,6 +223,38 @@ class CrossTransformer(nn.Module):
         return x0, x1
 def sigmoid_log_double_softmax(
     sim: torch.Tensor, z0: torch.Tensor, z1: torch.Tensor
 ) -> torch.Tensor:
@@ -219,29 +288,26 @@ class MatchAssignment(nn.Module):
         scores = sigmoid_log_double_softmax(sim, z0, z1)
         return scores, sim
-    def scores(self, desc0: torch.Tensor, desc1: torch.Tensor):
-        m0 = torch.sigmoid(self.matchability(desc0)).squeeze(-1)
-        m1 = torch.sigmoid(self.matchability(desc1)).squeeze(-1)
-        return m0, m1
 def filter_matches(scores: torch.Tensor, th: float):
     """obtain matches from a log assignment matrix [Bx M+1 x N+1]"""
     max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
     m0, m1 = max0.indices, max1.indices
-    mutual0 = torch.arange(m0.shape[1]).to(m0)[None] == m1.gather(1, m0)
-    mutual1 = torch.arange(m1.shape[1]).to(m1)[None] == m0.gather(1, m1)
     max0_exp = max0.values.exp()
     zero = max0_exp.new_tensor(0)
     mscores0 = torch.where(mutual0, max0_exp, zero)
     mscores1 = torch.where(mutual1, mscores0.gather(1, m1), zero)
-    if th is not None:
-        valid0 = mutual0 & (mscores0 > th)
-    else:
-        valid0 = mutual0
     valid1 = mutual1 & valid0.gather(1, m1)
-    m0 = torch.where(valid0, m0, m0.new_tensor(-1))
-    m1 = torch.where(valid1, m1, m1.new_tensor(-1))
     return m0, m1, mscores0, mscores1
@@ -250,6 +316,7 @@ class LightGlue(nn.Module):
         "name": "lightglue",  # just for interfacing
         "input_dim": 256,  # input descriptor dimension (autoselected from weights)
         "descriptor_dim": 256,
         "n_layers": 9,
         "num_heads": 4,
         "flash": True,  # enable FlashAttention if available.
@@ -260,23 +327,56 @@ class LightGlue(nn.Module):
         "weights": None,
     }
     required_data_keys = ["image0", "image1"]
     version = "v0.1_arxiv"
     url = "https://github.com/cvg/LightGlue/releases/download/{}/{}_lightglue.pth"
     features = {
-        "superpoint": ("superpoint_lightglue", 256),
-        "disk": ("disk_lightglue", 128),
     }
     def __init__(self, features="superpoint", **conf) -> None:
         super().__init__()
-        self.conf = {**self.default_conf, **conf}
         if features is not None:
-            assert features in list(self.features.keys())
-            self.conf["weights"], self.conf["input_dim"] = self.features[features]
-        self.conf = conf = SimpleNamespace(**self.conf)
         if conf.input_dim != conf.descriptor_dim:
             self.input_proj = nn.Linear(conf.input_dim, conf.descriptor_dim, bias=True)
@@ -284,22 +384,30 @@ class LightGlue(nn.Module):
             self.input_proj = nn.Identity()
         head_dim = conf.descriptor_dim // conf.num_heads
-        self.posenc = LearnableFourierPositionalEncoding(2, head_dim, head_dim)
         h, n, d = conf.num_heads, conf.n_layers, conf.descriptor_dim
-        self.self_attn = nn.ModuleList(
-            [Transformer(d, h, conf.flash) for _ in range(n)]
-        )
-        self.cross_attn = nn.ModuleList(
-            [CrossTransformer(d, h, conf.flash) for _ in range(n)]
         )
         self.log_assignment = nn.ModuleList([MatchAssignment(d) for _ in range(n)])
         self.token_confidence = nn.ModuleList(
             [TokenConfidence(d) for _ in range(n - 1)]
         )
         if features is not None:
-            fname = f"{conf.weights}_{self.version}.pth".replace(".", "-")
             state_dict = torch.hub.load_state_dict_from_url(
                 self.url.format(self.version, features), file_name=fname
             )
@@ -308,9 +416,35 @@ class LightGlue(nn.Module):
             path = Path(__file__).parent
             path = path / "weights/{}.pth".format(self.conf.weights)
             state_dict = torch.load(str(path), map_location="cpu")
             self.load_state_dict(state_dict, strict=False)
-        print("Loaded LightGlue model")
     def forward(self, data: dict) -> dict:
         """
@@ -326,12 +460,15 @@ class LightGlue(nn.Module):
                 descriptors: [B x N x D]
                 image: [B x C x H x W] or image_size: [B x 2]
         Output (dict):
-            log_assignment: [B x M+1 x N+1]
             matches0: [B x M]
             matching_scores0: [B x M]
             matches1: [B x N]
             matching_scores1: [B x N]
-            matches: List[[Si x 2]], scores: List[[Si]]
         """
         with torch.autocast(enabled=self.conf.mp, device_type="cuda"):
             return self._forward(data)
@@ -340,20 +477,23 @@ class LightGlue(nn.Module):
         for key in self.required_data_keys:
             assert key in data, f"Missing key {key} in data"
         data0, data1 = data["image0"], data["image1"]
-        kpts0_, kpts1_ = data0["keypoints"], data1["keypoints"]
-        b, m, _ = kpts0_.shape
-        b, n, _ = kpts1_.shape
         size0, size1 = data0.get("image_size"), data1.get("image_size")
-        size0 = size0 if size0 is not None else data0["image"].shape[-2:][::-1]
-        size1 = size1 if size1 is not None else data1["image"].shape[-2:][::-1]
-        kpts0 = normalize_keypoints(kpts0_, size=size0)
-        kpts1 = normalize_keypoints(kpts1_, size=size1)
-        assert torch.all(kpts0 >= -1) and torch.all(kpts0 <= 1)
-        assert torch.all(kpts1 >= -1) and torch.all(kpts1 <= 1)
-        desc0 = data0["descriptors"].detach()
-        desc1 = data1["descriptors"].detach()
         assert desc0.shape[-1] == self.conf.input_dim
         assert desc1.shape[-1] == self.conf.input_dim
@@ -362,109 +502,154 @@ class LightGlue(nn.Module):
             desc0 = desc0.half()
             desc1 = desc1.half()
         desc0 = self.input_proj(desc0)
         desc1 = self.input_proj(desc1)
         # cache positional embeddings
         encoding0 = self.posenc(kpts0)
         encoding1 = self.posenc(kpts1)
         # GNN + final_proj + assignment
-        ind0 = torch.arange(0, m).to(device=kpts0.device)[None]
-        ind1 = torch.arange(0, n).to(device=kpts0.device)[None]
-        prune0 = torch.ones_like(ind0)  # store layer where pruning is detected
-        prune1 = torch.ones_like(ind1)
-        dec, wic = self.conf.depth_confidence, self.conf.width_confidence
         token0, token1 = None, None
         for i in range(self.conf.n_layers):
-            # self+cross attention
-            desc0, desc1 = self.self_attn[i](desc0, desc1, encoding0, encoding1)
-            desc0, desc1 = self.cross_attn[i](desc0, desc1)
             if i == self.conf.n_layers - 1:
                 continue  # no early stopping or adaptive width at last layer
-            if dec > 0:  # early stopping
                 token0, token1 = self.token_confidence[i](desc0, desc1)
-                if self.stop(token0, token1, self.conf_th(i), dec, m + n):
-                    break
-            if wic > 0:  # point pruning
-                match0, match1 = self.log_assignment[i].scores(desc0, desc1)
-                mask0 = self.get_mask(token0, match0, self.conf_th(i), 1 - wic)
-                mask1 = self.get_mask(token1, match1, self.conf_th(i), 1 - wic)
-                ind0, ind1 = ind0[mask0][None], ind1[mask1][None]
-                desc0, desc1 = desc0[mask0][None], desc1[mask1][None]
-                if desc0.shape[-2] == 0 or desc1.shape[-2] == 0:
                     break
-                encoding0 = encoding0[:, :, mask0][:, None]
-                encoding1 = encoding1[:, :, mask1][:, None]
-            prune0[:, ind0] += 1
-            prune1[:, ind1] += 1
-        if wic > 0:  # scatter with indices after pruning
-            scores_, _ = self.log_assignment[i](desc0, desc1)
-            dt, dev = scores_.dtype, scores_.device
-            scores = torch.zeros(b, m + 1, n + 1, dtype=dt, device=dev)
-            scores[:, :-1, :-1] = -torch.inf
-            scores[:, ind0[0], -1] = scores_[:, :-1, -1]
-            scores[:, -1, ind1[0]] = scores_[:, -1, :-1]
-            x, y = torch.meshgrid(ind0[0], ind1[0], indexing="ij")
-            scores[:, x, y] = scores_[:, :-1, :-1]
-        else:
-            scores, _ = self.log_assignment[i](desc0, desc1)
         m0, m1, mscores0, mscores1 = filter_matches(scores, self.conf.filter_threshold)
         matches, mscores = [], []
         for k in range(b):
             valid = m0[k] > -1
-            matches.append(torch.stack([torch.where(valid)[0], m0[k][valid]], -1))
             mscores.append(mscores0[k][valid])
         return {
-            "log_assignment": scores,
             "matches0": m0,
             "matches1": m1,
             "matching_scores0": mscores0,
             "matching_scores1": mscores1,
             "stop": i + 1,
-            "prune0": prune0,
-            "prune1": prune1,
             "matches": matches,
             "scores": mscores,
         }
-    def conf_th(self, i: int) -> float:
         """scaled confidence threshold"""
-        return np.clip(0.8 + 0.1 * np.exp(-4.0 * i / self.conf.n_layers), 0, 1)
-    def get_mask(
-        self,
-        confidence: torch.Tensor,
-        match: torch.Tensor,
-        conf_th: float,
-        match_th: float,
     ) -> torch.Tensor:
         """mask points which should be removed"""
-        if conf_th and confidence is not None:
-            mask = (
-                torch.where(confidence > conf_th, match, match.new_tensor(1.0))
-                > match_th
-            )
-        else:
-            mask = match > match_th
-        return mask
-    def stop(
         self,
-        token0: torch.Tensor,
-        token1: torch.Tensor,
-        conf_th: float,
-        inl_th: float,
-        seql: int,
     ) -> torch.Tensor:
         """evaluate stopping condition"""
-        tokens = torch.cat([token0, token1], -1)
-        if conf_th:
-            pos = 1.0 - (tokens < conf_th).float().sum() / seql
-            return pos > inl_th
         else:
-            return tokens.mean() > inl_th

+import warnings
 from pathlib import Path
 from types import SimpleNamespace
+from typing import Callable, List, Optional, Tuple
 import numpy as np
 import torch
 import torch.nn.functional as F
+from torch import nn
 try:
     from flash_attn.modules.mha import FlashCrossAttention
 @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
+def normalize_keypoints(
+    kpts: torch.Tensor, size: Optional[torch.Tensor] = None
+) -> torch.Tensor:
+    if size is None:
+        size = 1 + kpts.max(-2).values - kpts.min(-2).values
+    elif not isinstance(size, torch.Tensor):
+        size = torch.tensor(size, device=kpts.device, dtype=kpts.dtype)
+    size = size.to(kpts)
+    shift = size / 2
+    scale = size.max(-1).values / 2
+    kpts = (kpts - shift[..., None, :]) / scale[..., None, None]
     return kpts
+def pad_to_length(x: torch.Tensor, length: int) -> Tuple[torch.Tensor]:
+    if length <= x.shape[-2]:
+        return x, torch.ones_like(x[..., :1], dtype=torch.bool)
+    pad = torch.ones(
+        *x.shape[:-2], length - x.shape[-2], x.shape[-1], device=x.device, dtype=x.dtype
+    )
+    y = torch.cat([x, pad], dim=-2)
+    mask = torch.zeros(*y.shape[:-1], 1, dtype=torch.bool, device=x.device)
+    mask[..., : x.shape[-2], :] = True
+    return y, mask
 def rotate_half(x: torch.Tensor) -> torch.Tensor:
     x = x.unflatten(-1, (-1, 2))
     x1, x2 = x.unbind(dim=-1)
     def forward(self, desc0: torch.Tensor, desc1: torch.Tensor):
         """get confidence tokens"""
         return (
+            self.token(desc0.detach()).squeeze(-1),
+            self.token(desc1.detach()).squeeze(-1),
         )
                 stacklevel=2,
             )
         self.enable_flash = allow_flash and FLASH_AVAILABLE
+        self.has_sdp = hasattr(F, "scaled_dot_product_attention")
         if allow_flash and FlashCrossAttention:
             self.flash_ = FlashCrossAttention()
+        if self.has_sdp:
+            torch.backends.cuda.enable_flash_sdp(allow_flash)
+    def forward(self, q, k, v, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
+        if q.shape[-2] == 0 or k.shape[-2] == 0:
+            return q.new_zeros((*q.shape[:-1], v.shape[-1]))
         if self.enable_flash and q.device.type == "cuda":
+            # use torch 2.0 scaled_dot_product_attention with flash
+            if self.has_sdp:
                 args = [x.half().contiguous() for x in [q, k, v]]
+                v = F.scaled_dot_product_attention(*args, attn_mask=mask).to(q.dtype)
+                return v if mask is None else v.nan_to_num()
+            else:
+                assert mask is None
+                q, k, v = [x.transpose(-2, -3).contiguous() for x in [q, k, v]]
+                m = self.flash_(q.half(), torch.stack([k, v], 2).half())
+                return m.transpose(-2, -3).to(q.dtype).clone()
+        elif self.has_sdp:
             args = [x.contiguous() for x in [q, k, v]]
+            v = F.scaled_dot_product_attention(*args, attn_mask=mask)
+            return v if mask is None else v.nan_to_num()
         else:
             s = q.shape[-1] ** -0.5
+            sim = torch.einsum("...id,...jd->...ij", q, k) * s
+            if mask is not None:
+                sim.masked_fill(~mask, -float("inf"))
+            attn = F.softmax(sim, -1)
             return torch.einsum("...ij,...jd->...id", attn, v)
+class SelfBlock(nn.Module):
     def __init__(
         self, embed_dim: int, num_heads: int, flash: bool = False, bias: bool = True
     ) -> None:
             nn.Linear(2 * embed_dim, embed_dim),
         )
+    def forward(
+        self,
+        x: torch.Tensor,
+        encoding: torch.Tensor,
+        mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         qkv = self.Wqkv(x)
         qkv = qkv.unflatten(-1, (self.num_heads, -1, 3)).transpose(1, 2)
         q, k, v = qkv[..., 0], qkv[..., 1], qkv[..., 2]
+        q = apply_cached_rotary_emb(encoding, q)
+        k = apply_cached_rotary_emb(encoding, k)
+        context = self.inner_attn(q, k, v, mask=mask)
         message = self.out_proj(context.transpose(1, 2).flatten(start_dim=-2))
         return x + self.ffn(torch.cat([x, message], -1))
+class CrossBlock(nn.Module):
     def __init__(
         self, embed_dim: int, num_heads: int, flash: bool = False, bias: bool = True
     ) -> None:
             nn.GELU(),
             nn.Linear(2 * embed_dim, embed_dim),
         )
         if flash and FLASH_AVAILABLE:
             self.flash = Attention(True)
         else:
     def map_(self, func: Callable, x0: torch.Tensor, x1: torch.Tensor):
         return func(x0), func(x1)
+    def forward(
+        self, x0: torch.Tensor, x1: torch.Tensor, mask: Optional[torch.Tensor] = None
+    ) -> List[torch.Tensor]:
         qk0, qk1 = self.map_(self.to_qk, x0, x1)
         v0, v1 = self.map_(self.to_v, x0, x1)
         qk0, qk1, v0, v1 = map(
             lambda t: t.unflatten(-1, (self.heads, -1)).transpose(1, 2),
             (qk0, qk1, v0, v1),
         )
+        if self.flash is not None and qk0.device.type == "cuda":
+            m0 = self.flash(qk0, qk1, v1, mask)
+            m1 = self.flash(
+                qk1, qk0, v0, mask.transpose(-1, -2) if mask is not None else None
+            )
         else:
             qk0, qk1 = qk0 * self.scale**0.5, qk1 * self.scale**0.5
+            sim = torch.einsum("bhid, bhjd -> bhij", qk0, qk1)
+            if mask is not None:
+                sim = sim.masked_fill(~mask, -float("inf"))
             attn01 = F.softmax(sim, dim=-1)
             attn10 = F.softmax(sim.transpose(-2, -1).contiguous(), dim=-1)
             m0 = torch.einsum("bhij, bhjd -> bhid", attn01, v1)
             m1 = torch.einsum("bhji, bhjd -> bhid", attn10.transpose(-2, -1), v0)
+            if mask is not None:
+                m0, m1 = m0.nan_to_num(), m1.nan_to_num()
         m0, m1 = self.map_(lambda t: t.transpose(1, 2).flatten(start_dim=-2), m0, m1)
         m0, m1 = self.map_(self.to_out, m0, m1)
         x0 = x0 + self.ffn(torch.cat([x0, m0], -1))
         return x0, x1
+class TransformerLayer(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        self.self_attn = SelfBlock(*args, **kwargs)
+        self.cross_attn = CrossBlock(*args, **kwargs)
+    def forward(
+        self,
+        desc0,
+        desc1,
+        encoding0,
+        encoding1,
+        mask0: Optional[torch.Tensor] = None,
+        mask1: Optional[torch.Tensor] = None,
+    ):
+        if mask0 is not None and mask1 is not None:
+            return self.masked_forward(desc0, desc1, encoding0, encoding1, mask0, mask1)
+        else:
+            desc0 = self.self_attn(desc0, encoding0)
+            desc1 = self.self_attn(desc1, encoding1)
+            return self.cross_attn(desc0, desc1)
+    # This part is compiled and allows padding inputs
+    def masked_forward(self, desc0, desc1, encoding0, encoding1, mask0, mask1):
+        mask = mask0 & mask1.transpose(-1, -2)
+        mask0 = mask0 & mask0.transpose(-1, -2)
+        mask1 = mask1 & mask1.transpose(-1, -2)
+        desc0 = self.self_attn(desc0, encoding0, mask0)
+        desc1 = self.self_attn(desc1, encoding1, mask1)
+        return self.cross_attn(desc0, desc1, mask)
 def sigmoid_log_double_softmax(
     sim: torch.Tensor, z0: torch.Tensor, z1: torch.Tensor
 ) -> torch.Tensor:
         scores = sigmoid_log_double_softmax(sim, z0, z1)
         return scores, sim
+    def get_matchability(self, desc: torch.Tensor):
+        return torch.sigmoid(self.matchability(desc)).squeeze(-1)
 def filter_matches(scores: torch.Tensor, th: float):
     """obtain matches from a log assignment matrix [Bx M+1 x N+1]"""
     max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
     m0, m1 = max0.indices, max1.indices
+    indices0 = torch.arange(m0.shape[1], device=m0.device)[None]
+    indices1 = torch.arange(m1.shape[1], device=m1.device)[None]
+    mutual0 = indices0 == m1.gather(1, m0)
+    mutual1 = indices1 == m0.gather(1, m1)
     max0_exp = max0.values.exp()
     zero = max0_exp.new_tensor(0)
     mscores0 = torch.where(mutual0, max0_exp, zero)
     mscores1 = torch.where(mutual1, mscores0.gather(1, m1), zero)
+    valid0 = mutual0 & (mscores0 > th)
     valid1 = mutual1 & valid0.gather(1, m1)
+    m0 = torch.where(valid0, m0, -1)
+    m1 = torch.where(valid1, m1, -1)
     return m0, m1, mscores0, mscores1
         "name": "lightglue",  # just for interfacing
         "input_dim": 256,  # input descriptor dimension (autoselected from weights)
         "descriptor_dim": 256,
+        "add_scale_ori": False,
         "n_layers": 9,
         "num_heads": 4,
         "flash": True,  # enable FlashAttention if available.
         "weights": None,
     }
+    # Point pruning involves an overhead (gather).
+    # Therefore, we only activate it if there are enough keypoints.
+    pruning_keypoint_thresholds = {
+        "cpu": -1,
+        "mps": -1,
+        "cuda": 1024,
+        "flash": 1536,
+    }
     required_data_keys = ["image0", "image1"]
     version = "v0.1_arxiv"
     url = "https://github.com/cvg/LightGlue/releases/download/{}/{}_lightglue.pth"
     features = {
+        "superpoint": {
+            "weights": "superpoint_lightglue",
+            "input_dim": 256,
+        },
+        "disk": {
+            "weights": "disk_lightglue",
+            "input_dim": 128,
+        },
+        "aliked": {
+            "weights": "aliked_lightglue",
+            "input_dim": 128,
+        },
+        "sift": {
+            "weights": "sift_lightglue",
+            "input_dim": 128,
+            "add_scale_ori": True,
+        },
+        "doghardnet": {
+            "weights": "doghardnet_lightglue",
+            "input_dim": 128,
+            "add_scale_ori": True,
+        },
     }
     def __init__(self, features="superpoint", **conf) -> None:
         super().__init__()
+        self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf})
         if features is not None:
+            if features not in self.features:
+                raise ValueError(
+                    f"Unsupported features: {features} not in "
+                    f"{{{','.join(self.features)}}}"
+                )
+            for k, v in self.features[features].items():
+                setattr(conf, k, v)
         if conf.input_dim != conf.descriptor_dim:
             self.input_proj = nn.Linear(conf.input_dim, conf.descriptor_dim, bias=True)
             self.input_proj = nn.Identity()
         head_dim = conf.descriptor_dim // conf.num_heads
+        self.posenc = LearnableFourierPositionalEncoding(
+            2 + 2 * self.conf.add_scale_ori, head_dim, head_dim
+        )
         h, n, d = conf.num_heads, conf.n_layers, conf.descriptor_dim
+        self.transformers = nn.ModuleList(
+            [TransformerLayer(d, h, conf.flash) for _ in range(n)]
         )
         self.log_assignment = nn.ModuleList([MatchAssignment(d) for _ in range(n)])
         self.token_confidence = nn.ModuleList(
             [TokenConfidence(d) for _ in range(n - 1)]
         )
+        self.register_buffer(
+            "confidence_thresholds",
+            torch.Tensor(
+                [self.confidence_threshold(i) for i in range(self.conf.n_layers)]
+            ),
+        )
+        state_dict = None
         if features is not None:
+            fname = f"{conf.weights}_{self.version.replace('.', '-')}.pth"
             state_dict = torch.hub.load_state_dict_from_url(
                 self.url.format(self.version, features), file_name=fname
             )
             path = Path(__file__).parent
             path = path / "weights/{}.pth".format(self.conf.weights)
             state_dict = torch.load(str(path), map_location="cpu")
+        if state_dict:
+            # rename old state dict entries
+            for i in range(self.conf.n_layers):
+                pattern = f"self_attn.{i}", f"transformers.{i}.self_attn"
+                state_dict = {k.replace(*pattern): v for k, v in state_dict.items()}
+                pattern = f"cross_attn.{i}", f"transformers.{i}.cross_attn"
+                state_dict = {k.replace(*pattern): v for k, v in state_dict.items()}
             self.load_state_dict(state_dict, strict=False)
+        # static lengths LightGlue is compiled for (only used with torch.compile)
+        self.static_lengths = None
+    def compile(
+        self, mode="reduce-overhead", static_lengths=[256, 512, 768, 1024, 1280, 1536]
+    ):
+        if self.conf.width_confidence != -1:
+            warnings.warn(
+                "Point pruning is partially disabled for compiled forward.",
+                stacklevel=2,
+            )
+        torch._inductor.cudagraph_mark_step_begin()
+        for i in range(self.conf.n_layers):
+            self.transformers[i].masked_forward = torch.compile(
+                self.transformers[i].masked_forward, mode=mode, fullgraph=True
+            )
+        self.static_lengths = static_lengths
     def forward(self, data: dict) -> dict:
         """
                 descriptors: [B x N x D]
                 image: [B x C x H x W] or image_size: [B x 2]
         Output (dict):
             matches0: [B x M]
             matching_scores0: [B x M]
             matches1: [B x N]
             matching_scores1: [B x N]
+            matches: List[[Si x 2]]
+            scores: List[[Si]]
+            stop: int
+            prune0: [B x M]
+            prune1: [B x N]
         """
         with torch.autocast(enabled=self.conf.mp, device_type="cuda"):
             return self._forward(data)
         for key in self.required_data_keys:
             assert key in data, f"Missing key {key} in data"
         data0, data1 = data["image0"], data["image1"]
+        kpts0, kpts1 = data0["keypoints"], data1["keypoints"]
+        b, m, _ = kpts0.shape
+        b, n, _ = kpts1.shape
+        device = kpts0.device
         size0, size1 = data0.get("image_size"), data1.get("image_size")
+        kpts0 = normalize_keypoints(kpts0, size0).clone()
+        kpts1 = normalize_keypoints(kpts1, size1).clone()
+        if self.conf.add_scale_ori:
+            kpts0 = torch.cat(
+                [kpts0] + [data0[k].unsqueeze(-1) for k in ("scales", "oris")], -1
+            )
+            kpts1 = torch.cat(
+                [kpts1] + [data1[k].unsqueeze(-1) for k in ("scales", "oris")], -1
+            )
+        desc0 = data0["descriptors"].detach().contiguous()
+        desc1 = data1["descriptors"].detach().contiguous()
         assert desc0.shape[-1] == self.conf.input_dim
         assert desc1.shape[-1] == self.conf.input_dim
             desc0 = desc0.half()
             desc1 = desc1.half()
+        mask0, mask1 = None, None
+        c = max(m, n)
+        do_compile = self.static_lengths and c <= max(self.static_lengths)
+        if do_compile:
+            kn = min([k for k in self.static_lengths if k >= c])
+            desc0, mask0 = pad_to_length(desc0, kn)
+            desc1, mask1 = pad_to_length(desc1, kn)
+            kpts0, _ = pad_to_length(kpts0, kn)
+            kpts1, _ = pad_to_length(kpts1, kn)
         desc0 = self.input_proj(desc0)
         desc1 = self.input_proj(desc1)
         # cache positional embeddings
         encoding0 = self.posenc(kpts0)
         encoding1 = self.posenc(kpts1)
         # GNN + final_proj + assignment
+        do_early_stop = self.conf.depth_confidence > 0
+        do_point_pruning = self.conf.width_confidence > 0 and not do_compile
+        pruning_th = self.pruning_min_kpts(device)
+        if do_point_pruning:
+            ind0 = torch.arange(0, m, device=device)[None]
+            ind1 = torch.arange(0, n, device=device)[None]
+            # We store the index of the layer at which pruning is detected.
+            prune0 = torch.ones_like(ind0)
+            prune1 = torch.ones_like(ind1)
         token0, token1 = None, None
         for i in range(self.conf.n_layers):
+            if desc0.shape[1] == 0 or desc1.shape[1] == 0:  # no keypoints
+                break
+            desc0, desc1 = self.transformers[i](
+                desc0, desc1, encoding0, encoding1, mask0=mask0, mask1=mask1
+            )
             if i == self.conf.n_layers - 1:
                 continue  # no early stopping or adaptive width at last layer
+            if do_early_stop:
                 token0, token1 = self.token_confidence[i](desc0, desc1)
+                if self.check_if_stop(token0[..., :m], token1[..., :n], i, m + n):
                     break
+            if do_point_pruning and desc0.shape[-2] > pruning_th:
+                scores0 = self.log_assignment[i].get_matchability(desc0)
+                prunemask0 = self.get_pruning_mask(token0, scores0, i)
+                keep0 = torch.where(prunemask0)[1]
+                ind0 = ind0.index_select(1, keep0)
+                desc0 = desc0.index_select(1, keep0)
+                encoding0 = encoding0.index_select(-2, keep0)
+                prune0[:, ind0] += 1
+            if do_point_pruning and desc1.shape[-2] > pruning_th:
+                scores1 = self.log_assignment[i].get_matchability(desc1)
+                prunemask1 = self.get_pruning_mask(token1, scores1, i)
+                keep1 = torch.where(prunemask1)[1]
+                ind1 = ind1.index_select(1, keep1)
+                desc1 = desc1.index_select(1, keep1)
+                encoding1 = encoding1.index_select(-2, keep1)
+                prune1[:, ind1] += 1
+        if desc0.shape[1] == 0 or desc1.shape[1] == 0:  # no keypoints
+            m0 = desc0.new_full((b, m), -1, dtype=torch.long)
+            m1 = desc1.new_full((b, n), -1, dtype=torch.long)
+            mscores0 = desc0.new_zeros((b, m))
+            mscores1 = desc1.new_zeros((b, n))
+            matches = desc0.new_empty((b, 0, 2), dtype=torch.long)
+            mscores = desc0.new_empty((b, 0))
+            if not do_point_pruning:
+                prune0 = torch.ones_like(mscores0) * self.conf.n_layers
+                prune1 = torch.ones_like(mscores1) * self.conf.n_layers
+            return {
+                "matches0": m0,
+                "matches1": m1,
+                "matching_scores0": mscores0,
+                "matching_scores1": mscores1,
+                "stop": i + 1,
+                "matches": matches,
+                "scores": mscores,
+                "prune0": prune0,
+                "prune1": prune1,
+            }
+        desc0, desc1 = desc0[..., :m, :], desc1[..., :n, :]  # remove padding
+        scores, _ = self.log_assignment[i](desc0, desc1)
         m0, m1, mscores0, mscores1 = filter_matches(scores, self.conf.filter_threshold)
         matches, mscores = [], []
         for k in range(b):
             valid = m0[k] > -1
+            m_indices_0 = torch.where(valid)[0]
+            m_indices_1 = m0[k][valid]
+            if do_point_pruning:
+                m_indices_0 = ind0[k, m_indices_0]
+                m_indices_1 = ind1[k, m_indices_1]
+            matches.append(torch.stack([m_indices_0, m_indices_1], -1))
             mscores.append(mscores0[k][valid])
+        # TODO: Remove when hloc switches to the compact format.
+        if do_point_pruning:
+            m0_ = torch.full((b, m), -1, device=m0.device, dtype=m0.dtype)
+            m1_ = torch.full((b, n), -1, device=m1.device, dtype=m1.dtype)
+            m0_[:, ind0] = torch.where(m0 == -1, -1, ind1.gather(1, m0.clamp(min=0)))
+            m1_[:, ind1] = torch.where(m1 == -1, -1, ind0.gather(1, m1.clamp(min=0)))
+            mscores0_ = torch.zeros((b, m), device=mscores0.device)
+            mscores1_ = torch.zeros((b, n), device=mscores1.device)
+            mscores0_[:, ind0] = mscores0
+            mscores1_[:, ind1] = mscores1
+            m0, m1, mscores0, mscores1 = m0_, m1_, mscores0_, mscores1_
+        else:
+            prune0 = torch.ones_like(mscores0) * self.conf.n_layers
+            prune1 = torch.ones_like(mscores1) * self.conf.n_layers
         return {
             "matches0": m0,
             "matches1": m1,
             "matching_scores0": mscores0,
             "matching_scores1": mscores1,
             "stop": i + 1,
             "matches": matches,
             "scores": mscores,
+            "prune0": prune0,
+            "prune1": prune1,
         }
+    def confidence_threshold(self, layer_index: int) -> float:
         """scaled confidence threshold"""
+        threshold = 0.8 + 0.1 * np.exp(-4.0 * layer_index / self.conf.n_layers)
+        return np.clip(threshold, 0, 1)
+    def get_pruning_mask(
+        self, confidences: torch.Tensor, scores: torch.Tensor, layer_index: int
     ) -> torch.Tensor:
         """mask points which should be removed"""
+        keep = scores > (1 - self.conf.width_confidence)
+        if confidences is not None:  # Low-confidence points are never pruned.
+            keep |= confidences <= self.confidence_thresholds[layer_index]
+        return keep
+    def check_if_stop(
         self,
+        confidences0: torch.Tensor,
+        confidences1: torch.Tensor,
+        layer_index: int,
+        num_points: int,
     ) -> torch.Tensor:
         """evaluate stopping condition"""
+        confidences = torch.cat([confidences0, confidences1], -1)
+        threshold = self.confidence_thresholds[layer_index]
+        ratio_confident = 1.0 - (confidences < threshold).float().sum() / num_points
+        return ratio_confident > self.conf.depth_confidence
+    def pruning_min_kpts(self, device: torch.device):
+        if self.conf.flash and FLASH_AVAILABLE and device.type == "cuda":
+            return self.pruning_keypoint_thresholds["flash"]
         else:
+            return self.pruning_keypoint_thresholds[device.type]

third_party/LightGlue/lightglue/sift.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import warnings
+import cv2
+import numpy as np
+import torch
+from kornia.color import rgb_to_grayscale
+from packaging import version
+try:
+    import pycolmap
+except ImportError:
+    pycolmap = None
+from .utils import Extractor
+def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None):
+    h, w = image_shape
+    ij = np.round(points - 0.5).astype(int).T[::-1]
+    # Remove duplicate points (identical coordinates).
+    # Pick highest scale or score
+    s = scales if scores is None else scores
+    buffer = np.zeros((h, w))
+    np.maximum.at(buffer, tuple(ij), s)
+    keep = np.where(buffer[tuple(ij)] == s)[0]
+    # Pick lowest angle (arbitrary).
+    ij = ij[:, keep]
+    buffer[:] = np.inf
+    o_abs = np.abs(angles[keep])
+    np.minimum.at(buffer, tuple(ij), o_abs)
+    mask = buffer[tuple(ij)] == o_abs
+    ij = ij[:, mask]
+    keep = keep[mask]
+    if nms_radius > 0:
+        # Apply NMS on the remaining points
+        buffer[:] = 0
+        buffer[tuple(ij)] = s[keep]  # scores or scale
+        local_max = torch.nn.functional.max_pool2d(
+            torch.from_numpy(buffer).unsqueeze(0),
+            kernel_size=nms_radius * 2 + 1,
+            stride=1,
+            padding=nms_radius,
+        ).squeeze(0)
+        is_local_max = buffer == local_max.numpy()
+        keep = keep[is_local_max[tuple(ij)]]
+    return keep
+def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor:
+    x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps)
+    x.clip_(min=eps).sqrt_()
+    return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps)
+def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray:
+    """
+    Detect keypoints using OpenCV Detector.
+    Optionally, perform description.
+    Args:
+        features: OpenCV based keypoints detector and descriptor
+        image: Grayscale image of uint8 data type
+    Returns:
+        keypoints: 1D array of detected cv2.KeyPoint
+        scores: 1D array of responses
+        descriptors: 1D array of descriptors
+    """
+    detections, descriptors = features.detectAndCompute(image, None)
+    points = np.array([k.pt for k in detections], dtype=np.float32)
+    scores = np.array([k.response for k in detections], dtype=np.float32)
+    scales = np.array([k.size for k in detections], dtype=np.float32)
+    angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32))
+    return points, scores, scales, angles, descriptors
+class SIFT(Extractor):
+    default_conf = {
+        "rootsift": True,
+        "nms_radius": 0,  # None to disable filtering entirely.
+        "max_num_keypoints": 4096,
+        "backend": "opencv",  # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda}
+        "detection_threshold": 0.0066667,  # from COLMAP
+        "edge_threshold": 10,
+        "first_octave": -1,  # only used by pycolmap, the default of COLMAP
+        "num_octaves": 4,
+    }
+    preprocess_conf = {
+        "resize": 1024,
+    }
+    required_data_keys = ["image"]
+    def __init__(self, **conf):
+        super().__init__(**conf)  # Update with default configuration.
+        backend = self.conf.backend
+        if backend.startswith("pycolmap"):
+            if pycolmap is None:
+                raise ImportError(
+                    "Cannot find module pycolmap: install it with pip"
+                    "or use backend=opencv."
+                )
+            options = {
+                "peak_threshold": self.conf.detection_threshold,
+                "edge_threshold": self.conf.edge_threshold,
+                "first_octave": self.conf.first_octave,
+                "num_octaves": self.conf.num_octaves,
+                "normalization": pycolmap.Normalization.L2,  # L1_ROOT is buggy.
+            }
+            device = (
+                "auto" if backend == "pycolmap" else backend.replace("pycolmap_", "")
+            )
+            if (
+                backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ) and pycolmap.__version__ < "0.5.0":
+                warnings.warn(
+                    "The pycolmap CPU SIFT is buggy in version < 0.5.0, "
+                    "consider upgrading pycolmap or use the CUDA version.",
+                    stacklevel=1,
+                )
+            else:
+                options["max_num_features"] = self.conf.max_num_keypoints
+            self.sift = pycolmap.Sift(options=options, device=device)
+        elif backend == "opencv":
+            self.sift = cv2.SIFT_create(
+                contrastThreshold=self.conf.detection_threshold,
+                nfeatures=self.conf.max_num_keypoints,
+                edgeThreshold=self.conf.edge_threshold,
+                nOctaveLayers=self.conf.num_octaves,
+            )
+        else:
+            backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"}
+            raise ValueError(
+                f"Unknown backend: {backend} not in " f"{{{','.join(backends)}}}."
+            )
+    def extract_single_image(self, image: torch.Tensor):
+        image_np = image.cpu().numpy().squeeze(0)
+        if self.conf.backend.startswith("pycolmap"):
+            if version.parse(pycolmap.__version__) >= version.parse("0.5.0"):
+                detections, descriptors = self.sift.extract(image_np)
+                scores = None  # Scores are not exposed by COLMAP anymore.
+            else:
+                detections, scores, descriptors = self.sift.extract(image_np)
+            keypoints = detections[:, :2]  # Keep only (x, y).
+            scales, angles = detections[:, -2:].T
+            if scores is not None and (
+                self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ):
+                # Set the scores as a combination of abs. response and scale.
+                scores = np.abs(scores) * scales
+        elif self.conf.backend == "opencv":
+            # TODO: Check if opencv keypoints are already in corner convention
+            keypoints, scores, scales, angles, descriptors = run_opencv_sift(
+                self.sift, (image_np * 255.0).astype(np.uint8)
+            )
+        pred = {
+            "keypoints": keypoints,
+            "scales": scales,
+            "oris": angles,
+            "descriptors": descriptors,
+        }
+        if scores is not None:
+            pred["keypoint_scores"] = scores
+        # sometimes pycolmap returns points outside the image. We remove them
+        if self.conf.backend.startswith("pycolmap"):
+            is_inside = (
+                pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]])
+            ).all(-1)
+            pred = {k: v[is_inside] for k, v in pred.items()}
+        if self.conf.nms_radius is not None:
+            keep = filter_dog_point(
+                pred["keypoints"],
+                pred["scales"],
+                pred["oris"],
+                image_np.shape,
+                self.conf.nms_radius,
+                scores=pred.get("keypoint_scores"),
+            )
+            pred = {k: v[keep] for k, v in pred.items()}
+        pred = {k: torch.from_numpy(v) for k, v in pred.items()}
+        if scores is not None:
+            # Keep the k keypoints with highest score
+            num_points = self.conf.max_num_keypoints
+            if num_points is not None and len(pred["keypoints"]) > num_points:
+                indices = torch.topk(pred["keypoint_scores"], num_points).indices
+                pred = {k: v[indices] for k, v in pred.items()}
+        return pred
+    def forward(self, data: dict) -> dict:
+        image = data["image"]
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
+        device = image.device
+        image = image.cpu()
+        pred = []
+        for k in range(len(image)):
+            img = image[k]
+            if "image_size" in data.keys():
+                # avoid extracting points in padded areas
+                w, h = data["image_size"][k]
+                img = img[:, :h, :w]
+            p = self.extract_single_image(img)
+            pred.append(p)
+        pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
+        if self.conf.rootsift:
+            pred["descriptors"] = sift_to_rootsift(pred["descriptors"])
+        return pred

third_party/LightGlue/lightglue/superpoint.py CHANGED Viewed

@@ -43,8 +43,10 @@
 # Adapted by Remi Pautrat, Philipp Lindenberger
 import torch
 from torch import nn
-from .utils import ImagePreprocessor
 def simple_nms(scores, nms_radius: int):
@@ -77,7 +79,9 @@ def sample_descriptors(keypoints, descriptors, s: int = 8):
     """Interpolate descriptors at keypoint locations"""
     b, c, h, w = descriptors.shape
     keypoints = keypoints - s / 2 + 0.5
-    keypoints /= torch.tensor([(w * s - s / 2 - 0.5), (h * s - s / 2 - 0.5)],).to(
         keypoints
     )[None]
     keypoints = keypoints * 2 - 1  # normalize to (-1, 1)
@@ -91,7 +95,7 @@ def sample_descriptors(keypoints, descriptors, s: int = 8):
     return descriptors
-class SuperPoint(nn.Module):
     """SuperPoint Convolutional Detector and Descriptor
     SuperPoint: Self-Supervised Interest Point Detection and
@@ -109,17 +113,13 @@ class SuperPoint(nn.Module):
     }
     preprocess_conf = {
-        **ImagePreprocessor.default_conf,
         "resize": 1024,
-        "grayscale": True,
     }
     required_data_keys = ["image"]
     def __init__(self, **conf):
-        super().__init__()
-        self.conf = {**self.default_conf, **conf}
         self.relu = nn.ReLU(inplace=True)
         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
         c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
@@ -138,26 +138,23 @@ class SuperPoint(nn.Module):
         self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
         self.convDb = nn.Conv2d(
-            c5, self.conf["descriptor_dim"], kernel_size=1, stride=1, padding=0
         )
-        url = "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/superpoint_v1.pth"
         self.load_state_dict(torch.hub.load_state_dict_from_url(url))
-        mk = self.conf["max_num_keypoints"]
-        if mk is not None and mk <= 0:
             raise ValueError("max_num_keypoints must be positive or None")
-        print("Loaded SuperPoint model")
     def forward(self, data: dict) -> dict:
         """Compute keypoints, scores, descriptors for image"""
         for key in self.required_data_keys:
             assert key in data, f"Missing key {key} in data"
         image = data["image"]
-        if image.shape[1] == 3:  # RGB
-            scale = image.new_tensor([0.299, 0.587, 0.114]).view(1, 3, 1, 1)
-            image = (image * scale).sum(1, keepdim=True)
         # Shared Encoder
         x = self.relu(self.conv1a(image))
         x = self.relu(self.conv1b(x))
@@ -178,18 +175,18 @@ class SuperPoint(nn.Module):
         b, _, h, w = scores.shape
         scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
         scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h * 8, w * 8)
-        scores = simple_nms(scores, self.conf["nms_radius"])
         # Discard keypoints near the image borders
-        if self.conf["remove_borders"]:
-            pad = self.conf["remove_borders"]
             scores[:, :pad] = -1
             scores[:, :, :pad] = -1
             scores[:, -pad:] = -1
             scores[:, :, -pad:] = -1
         # Extract keypoints
-        best_kp = torch.where(scores > self.conf["detection_threshold"])
         scores = scores[best_kp]
         # Separate into batches
@@ -199,11 +196,11 @@ class SuperPoint(nn.Module):
         scores = [scores[best_kp[0] == i] for i in range(b)]
         # Keep the k keypoints with highest score
-        if self.conf["max_num_keypoints"] is not None:
             keypoints, scores = list(
                 zip(
                     *[
-                        top_k_keypoints(k, s, self.conf["max_num_keypoints"])
                         for k, s in zip(keypoints, scores)
                     ]
                 )
@@ -226,17 +223,5 @@ class SuperPoint(nn.Module):
         return {
             "keypoints": torch.stack(keypoints, 0),
             "keypoint_scores": torch.stack(scores, 0),
-            "descriptors": torch.stack(descriptors, 0).transpose(-1, -2),
         }
-    def extract(self, img: torch.Tensor, **conf) -> dict:
-        """Perform extraction with online resizing"""
-        if img.dim() == 3:
-            img = img[None]  # add batch dim
-        assert img.dim() == 4 and img.shape[0] == 1
-        shape = img.shape[-2:][::-1]
-        img, scales = ImagePreprocessor(**{**self.preprocess_conf, **conf})(img)
-        feats = self.forward({"image": img})
-        feats["image_size"] = torch.tensor(shape)[None].to(img).float()
-        feats["keypoints"] = (feats["keypoints"] + 0.5) / scales[None] - 0.5
-        return feats

 # Adapted by Remi Pautrat, Philipp Lindenberger
 import torch
+from kornia.color import rgb_to_grayscale
 from torch import nn
+from .utils import Extractor
 def simple_nms(scores, nms_radius: int):
     """Interpolate descriptors at keypoint locations"""
     b, c, h, w = descriptors.shape
     keypoints = keypoints - s / 2 + 0.5
+    keypoints /= torch.tensor(
+        [(w * s - s / 2 - 0.5), (h * s - s / 2 - 0.5)],
+    ).to(
         keypoints
     )[None]
     keypoints = keypoints * 2 - 1  # normalize to (-1, 1)
     return descriptors
+class SuperPoint(Extractor):
     """SuperPoint Convolutional Detector and Descriptor
     SuperPoint: Self-Supervised Interest Point Detection and
     }
     preprocess_conf = {
         "resize": 1024,
     }
     required_data_keys = ["image"]
     def __init__(self, **conf):
+        super().__init__(**conf)  # Update with default configuration.
         self.relu = nn.ReLU(inplace=True)
         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
         c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
         self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
         self.convDb = nn.Conv2d(
+            c5, self.conf.descriptor_dim, kernel_size=1, stride=1, padding=0
         )
+        url = "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/superpoint_v1.pth"  # noqa
         self.load_state_dict(torch.hub.load_state_dict_from_url(url))
+        if self.conf.max_num_keypoints is not None and self.conf.max_num_keypoints <= 0:
             raise ValueError("max_num_keypoints must be positive or None")
     def forward(self, data: dict) -> dict:
         """Compute keypoints, scores, descriptors for image"""
         for key in self.required_data_keys:
             assert key in data, f"Missing key {key} in data"
         image = data["image"]
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
         # Shared Encoder
         x = self.relu(self.conv1a(image))
         x = self.relu(self.conv1b(x))
         b, _, h, w = scores.shape
         scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
         scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h * 8, w * 8)
+        scores = simple_nms(scores, self.conf.nms_radius)
         # Discard keypoints near the image borders
+        if self.conf.remove_borders:
+            pad = self.conf.remove_borders
             scores[:, :pad] = -1
             scores[:, :, :pad] = -1
             scores[:, -pad:] = -1
             scores[:, :, -pad:] = -1
         # Extract keypoints
+        best_kp = torch.where(scores > self.conf.detection_threshold)
         scores = scores[best_kp]
         # Separate into batches
         scores = [scores[best_kp[0] == i] for i in range(b)]
         # Keep the k keypoints with highest score
+        if self.conf.max_num_keypoints is not None:
             keypoints, scores = list(
                 zip(
                     *[
+                        top_k_keypoints(k, s, self.conf.max_num_keypoints)
                         for k, s in zip(keypoints, scores)
                     ]
                 )
         return {
             "keypoints": torch.stack(keypoints, 0),
             "keypoint_scores": torch.stack(scores, 0),
+            "descriptors": torch.stack(descriptors, 0).transpose(-1, -2).contiguous(),
         }

third_party/LightGlue/lightglue/utils.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from pathlib import Path
-import torch
-import kornia
 import cv2
 import numpy as np
-from typing import Union, List, Optional, Callable, Tuple
-import collections.abc as collections
-from types import SimpleNamespace
 class ImagePreprocessor:
@@ -15,7 +16,6 @@ class ImagePreprocessor:
         "interpolation": "bilinear",
         "align_corners": None,
         "antialias": True,
-        "grayscale": False,  # convert rgb to grayscale
     }
     def __init__(self, **conf) -> None:
@@ -35,10 +35,6 @@ class ImagePreprocessor:
                 align_corners=self.conf.align_corners,
             )
         scale = torch.Tensor([img.shape[-1] / w, img.shape[-2] / h]).to(img)
-        if self.conf.grayscale and img.shape[-3] == 3:
-            img = kornia.color.rgb_to_grayscale(img)
-        elif not self.conf.grayscale and img.shape[-3] == 1:
-            img = kornia.color.grayscale_to_rgb(img)
         return img, scale
@@ -132,6 +128,25 @@ def load_image(path: Path, resize: int = None, **kwargs) -> torch.Tensor:
     return numpy_image_to_torch(image)
 def match_pair(
     extractor,
     matcher,

+import collections.abc as collections
 from pathlib import Path
+from types import SimpleNamespace
+from typing import Callable, List, Optional, Tuple, Union
 import cv2
+import kornia
 import numpy as np
+import torch
 class ImagePreprocessor:
         "interpolation": "bilinear",
         "align_corners": None,
         "antialias": True,
     }
     def __init__(self, **conf) -> None:
                 align_corners=self.conf.align_corners,
             )
         scale = torch.Tensor([img.shape[-1] / w, img.shape[-2] / h]).to(img)
         return img, scale
     return numpy_image_to_torch(image)
+class Extractor(torch.nn.Module):
+    def __init__(self, **conf):
+        super().__init__()
+        self.conf = SimpleNamespace(**{**self.default_conf, **conf})
+    @torch.no_grad()
+    def extract(self, img: torch.Tensor, **conf) -> dict:
+        """Perform extraction with online resizing"""
+        if img.dim() == 3:
+            img = img[None]  # add batch dim
+        assert img.dim() == 4 and img.shape[0] == 1
+        shape = img.shape[-2:][::-1]
+        img, scales = ImagePreprocessor(**{**self.preprocess_conf, **conf})(img)
+        feats = self.forward({"image": img})
+        feats["image_size"] = torch.tensor(shape)[None].to(img).float()
+        feats["keypoints"] = (feats["keypoints"] + 0.5) / scales[None] - 0.5
+        return feats
 def match_pair(
     extractor,
     matcher,

third_party/LightGlue/lightglue/viz2d.py CHANGED Viewed

@@ -6,8 +6,8 @@
 """
 import matplotlib
-import matplotlib.pyplot as plt
 import matplotlib.patheffects as path_effects
 import numpy as np
 import torch

 """
 import matplotlib
 import matplotlib.patheffects as path_effects
+import matplotlib.pyplot as plt
 import numpy as np
 import torch

third_party/LightGlue/pyproject.toml ADDED Viewed

	@@ -0,0 +1,30 @@

+[project]
+name = "lightglue"
+description = "LightGlue: Local Feature Matching at Light Speed"
+version = "0.0"
+authors = [
+    {name = "Philipp Lindenberger"},
+    {name = "Paul-Edouard Sarlin"},
+]
+readme = "README.md"
+requires-python = ">=3.6"
+license = {file = "LICENSE"}
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+]
+urls = {Repository = "https://github.com/cvg/LightGlue/"}
+dynamic = ["dependencies"]
+[project.optional-dependencies]
+dev = ["black==23.12.1", "flake8", "isort"]
+[tool.setuptools]
+packages = ["lightglue"]
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
+[tool.isort]
+profile = "black"

third_party/LightGlue/setup.py DELETED Viewed

@@ -1,27 +0,0 @@
-from pathlib import Path
-from setuptools import setup
-description = ["LightGlue"]
-with open(str(Path(__file__).parent / "README.md"), "r", encoding="utf-8") as f:
-    readme = f.read()
-with open(str(Path(__file__).parent / "requirements.txt"), "r") as f:
-    dependencies = f.read().split("\n")
-setup(
-    name="lightglue",
-    version="0.0",
-    packages=["lightglue"],
-    python_requires=">=3.6",
-    install_requires=dependencies,
-    author="Philipp Lindenberger, Paul-Edouard Sarlin",
-    description=description,
-    long_description=readme,
-    long_description_content_type="text/markdown",
-    url="https://github.com/cvg/LightGlue/",
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: Apache Software License",
-        "Operating System :: OS Independent",
-    ],
-)