gim-online

Running

App Files Files Community

Vincentqyw commited on Jul 23, 2023

Commit

2673dcd

1 Parent(s): 45354a0

add: lightglue

Browse files

Files changed (21) hide show

pre-requirements.txt +0 -3
third_party/LightGlue/.gitattributes +1 -0
third_party/LightGlue/.gitignore +10 -0
third_party/LightGlue/LICENSE +201 -0
third_party/LightGlue/README.md +134 -0
third_party/LightGlue/assets/DSC_0410.JPG +0 -0
third_party/LightGlue/assets/DSC_0411.JPG +0 -0
third_party/LightGlue/assets/architecture.svg +0 -0
third_party/LightGlue/assets/easy_hard.jpg +0 -0
third_party/LightGlue/assets/sacre_coeur1.jpg +0 -0
third_party/LightGlue/assets/sacre_coeur2.jpg +0 -0
third_party/LightGlue/assets/teaser.svg +1499 -0
third_party/LightGlue/demo.ipynb +0 -0
third_party/LightGlue/lightglue/__init__.py +4 -0
third_party/LightGlue/lightglue/disk.py +70 -0
third_party/LightGlue/lightglue/lightglue.py +466 -0
third_party/LightGlue/lightglue/superpoint.py +230 -0
third_party/LightGlue/lightglue/utils.py +135 -0
third_party/LightGlue/lightglue/viz2d.py +161 -0
third_party/LightGlue/requirements.txt +6 -0
third_party/LightGlue/setup.py +27 -0

pre-requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-# python>=3.10.4
 torch>=1.12.1
 torchvision>=0.13.1
 torchmetrics>=0.6.0
@@ -9,5 +8,3 @@ einops>=0.3.0
 kornia>=0.6
 gradio
 gradio_client==0.2.7
-# datasets[vision]>=2.4.0

 torch>=1.12.1
 torchvision>=0.13.1
 torchmetrics>=0.6.0
 kornia>=0.6
 gradio
 gradio_client==0.2.7

third_party/LightGlue/.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.ipynb linguist-documentation

third_party/LightGlue/.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+*.egg-info
+*.pyc
+/.idea/
+/data/
+/outputs/
+__pycache__
+/lightglue/weights/
+lightglue/_flash/
+*-checkpoint.ipynb
+*.pth

third_party/LightGlue/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

third_party/LightGlue/README.md ADDED Viewed

	@@ -0,0 +1,134 @@

+<p align="center">
+  <h1 align="center"><ins>LightGlue ⚡️</ins><br>Local Feature Matching at Light Speed</h1>
+  <p align="center">
+    <a href="https://www.linkedin.com/in/philipplindenberger/">Philipp Lindenberger</a>
+    ·
+    <a href="https://psarlin.com/">Paul-Edouard&nbsp;Sarlin</a>
+    ·
+    <a href="https://www.microsoft.com/en-us/research/people/mapoll/">Marc&nbsp;Pollefeys</a>
+  </p>
+<!-- <p align="center">
+    <img src="assets/larchitecture.svg" alt="Logo" height="40">
+</p> -->
+  <!-- <h2 align="center">PrePrint 2023</h2> -->
+  <h2 align="center"><p>
+    <a href="https://arxiv.org/pdf/2306.13643.pdf" align="center">Paper</a> |
+    <a href="https://colab.research.google.com/github/cvg/LightGlue/blob/main/demo.ipynb" align="center">Colab</a>
+  </p></h2>
+  <div align="center"></div>
+</p>
+<p align="center">
+    <a href="https://arxiv.org/abs/2306.13643"><img src="assets/easy_hard.jpg" alt="example" width=80%></a>
+    <br>
+    <em>LightGlue is a deep neural network that matches sparse local features across image pairs.<br>An adaptive mechanism makes it fast for easy pairs (top) and reduces the computational complexity for difficult ones (bottom).</em>
+</p>
+##
+This repository hosts the inference code of LightGlue, a lightweight feature matcher with high accuracy and blazing fast inference. It takes as input a set of keypoints and descriptors for each image and returns the indices of corresponding points. The architecture is based on adaptive pruning techniques, in both network width and depth - [check out the paper for more details](https://arxiv.org/pdf/2306.13643.pdf).
+We release pretrained weights of LightGlue with [SuperPoint](https://arxiv.org/abs/1712.07629) and [DISK](https://arxiv.org/abs/2006.13566) local features.
+The training end evaluation code will be released in July in a separate repo. To be notified, subscribe to [issue #6](https://github.com/cvg/LightGlue/issues/6).
+## Installation and demo [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/cvg/LightGlue/blob/main/demo.ipynb)
+Install this repo using pip:
+```bash
+git clone https://github.com/cvg/LightGlue.git && cd LightGlue
+python -m pip install -e .
+```
+We provide a [demo notebook](demo.ipynb) which shows how to perform feature extraction and matching on an image pair.
+Here is a minimal script to match two images:
+```python
+from lightglue import LightGlue, SuperPoint, DISK
+from lightglue.utils import load_image, rbd
+# SuperPoint+LightGlue
+extractor = SuperPoint(max_num_keypoints=2048).eval().cuda()  # load the extractor
+matcher = LightGlue(features='superpoint').eval().cuda()  # load the matcher
+# or DISK+LightGlue
+extractor = DISK(max_num_keypoints=2048).eval().cuda()  # load the extractor
+matcher = LightGlue(features='disk').eval().cuda()  # load the matcher
+# load each image as a torch.Tensor on GPU with shape (3,H,W), normalized in [0,1]
+image0 = load_image('path/to/image_0.jpg').cuda()
+image1 = load_image('path/to/image_1.jpg').cuda()
+# extract local features
+feats0 = extractor.extract(image0)  # auto-resize the image, disable with resize=None
+feats1 = extractor.extract(image1)
+# match the features
+matches01 = matcher({'image0': feats0, 'image1': feats1})
+feats0, feats1, matches01 = [rbd(x) for x in [feats0, feats1, matches01]]  # remove batch dimension
+matches = matches01['matches']  # indices with shape (K,2)
+points0 = feats0['keypoints'][matches[..., 0]]  # coordinates in image #0, shape (K,2)
+points1 = feats1['keypoints'][matches[..., 1]]  # coordinates in image #1, shape (K,2)
+```
+We also provide a convenience method to match a pair of images:
+```python
+from lightglue import match_pair
+feats0, feats1, matches01 = match_pair(extractor, matcher, image0, image1)
+```
+##
+<p align="center">
+  <a href="https://arxiv.org/abs/2306.13643"><img src="assets/teaser.svg" alt="Logo" width=50%></a>
+  <br>
+  <em>LightGlue can adjust its depth (number of layers) and width (number of keypoints) per image pair, with a marginal impact on accuracy.</em>
+</p>
+## Advanced configuration
+The default values give a good trade-off between speed and accuracy. To maximize the accuracy, use all keypoints and disable the adaptive mechanisms:
+```python
+extractor = SuperPoint(max_num_keypoints=None)
+matcher = LightGlue(features='superpoint', depth_confidence=-1, width_confidence=-1)
+```
+To increase the speed with a small drop of accuracy, decrease the number of keypoints and lower the adaptive thresholds:
+```python
+extractor = SuperPoint(max_num_keypoints=1024)
+matcher = LightGlue(features='superpoint', depth_confidence=0.9, width_confidence=0.95)
+```
+The maximum speed is obtained with [FlashAttention](https://arxiv.org/abs/2205.14135), which is automatically used when ```torch >= 2.0``` or if it is [installed from source](https://github.com/HazyResearch/flash-attention#installation-and-features).
+<details>
+<summary>[Detail of all parameters - click to expand]</summary>
+- [```n_layers```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L261): Number of stacked self+cross attention layers. Reduce this value for faster inference at the cost of accuracy (continuous red line in the plot above). Default: 9 (all layers).
+- [```flash```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L263): Enable FlashAttention. Significantly increases the speed and reduces the memory consumption without any impact on accuracy. Default: True (LightGlue automatically detects if FlashAttention is available).
+- [```mp```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L264): Enable mixed precision inference. Default: False (off)
+- [```depth_confidence```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L265): Controls the early stopping. A lower values stops more often at earlier layers. Default: 0.95, disable with -1.
+- [```width_confidence```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L266): Controls the iterative point pruning. A lower value prunes more points earlier. Default: 0.99, disable with -1.
+- [```filter_threshold```](https://github.com/cvg/LightGlue/blob/main/lightglue/lightglue.py#L267): Match confidence. Increase this value to obtain less, but stronger matches. Default: 0.1
+</details>
+## Other links
+- [hloc - the visual localization toolbox](https://github.com/cvg/Hierarchical-Localization/): run LightGlue for Structure-from-Motion and visual localization.
+- [LightGlue-ONNX](https://github.com/fabio-sim/LightGlue-ONNX): export LightGlue to the Open Neural Network Exchange format.
+- [Image Matching WebUI](https://github.com/Vincentqyw/image-matching-webui): a web GUI to easily compare different matchers, including LightGlue.
+- [kornia](kornia.readthedocs.io/) now exposes LightGlue via the interfaces [`LightGlue`](https://kornia.readthedocs.io/en/latest/feature.html#kornia.feature.LightGlue) and [`LightGlueMatcher`](https://kornia.readthedocs.io/en/latest/feature.html#kornia.feature.LightGlueMatcher).
+## BibTeX Citation
+If you use any ideas from the paper or code from this repo, please consider citing:
+```txt
+@inproceedings{lindenberger23lightglue,
+  author    = {Philipp Lindenberger and
+               Paul-Edouard Sarlin and
+               Marc Pollefeys},
+  title     = {{LightGlue: Local Feature Matching at Light Speed}},
+  booktitle = {ICCV},
+  year      = {2023}
+}
+```

third_party/LightGlue/assets/DSC_0410.JPG ADDED Viewed

third_party/LightGlue/assets/DSC_0411.JPG ADDED Viewed

third_party/LightGlue/assets/architecture.svg ADDED Viewed

third_party/LightGlue/assets/easy_hard.jpg ADDED Viewed

third_party/LightGlue/assets/sacre_coeur1.jpg ADDED Viewed

third_party/LightGlue/assets/sacre_coeur2.jpg ADDED Viewed

third_party/LightGlue/assets/teaser.svg ADDED Viewed

third_party/LightGlue/demo.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

third_party/LightGlue/lightglue/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .lightglue import LightGlue
+from .superpoint import SuperPoint
+from .disk import DISK
+from .utils import match_pair

third_party/LightGlue/lightglue/disk.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import torch
+import torch.nn as nn
+import kornia
+from types import SimpleNamespace
+from .utils import ImagePreprocessor
+class DISK(nn.Module):
+    default_conf = {
+        'weights': 'depth',
+        'max_num_keypoints': None,
+        'desc_dim': 128,
+        'nms_window_size': 5,
+        'detection_threshold': 0.0,
+        'pad_if_not_divisible': True,
+    }
+    preprocess_conf = {
+        **ImagePreprocessor.default_conf,
+        'resize': 1024,
+        'grayscale': False,
+    }
+    required_data_keys = ['image']
+    def __init__(self, **conf) -> None:
+        super().__init__()
+        self.conf = {**self.default_conf, **conf}
+        self.conf = SimpleNamespace(**self.conf)
+        self.model = kornia.feature.DISK.from_pretrained(self.conf.weights)
+    def forward(self, data: dict) -> dict:
+        """ Compute keypoints, scores, descriptors for image """
+        for key in self.required_data_keys:
+            assert key in data, f'Missing key {key} in data'
+        image = data['image']
+        features = self.model(
+            image,
+            n=self.conf.max_num_keypoints,
+            window_size=self.conf.nms_window_size,
+            score_threshold=self.conf.detection_threshold,
+            pad_if_not_divisible=self.conf.pad_if_not_divisible
+        )
+        keypoints = [f.keypoints for f in features]
+        scores = [f.detection_scores for f in features]
+        descriptors = [f.descriptors for f in features]
+        del features
+        keypoints = torch.stack(keypoints, 0)
+        scores = torch.stack(scores, 0)
+        descriptors = torch.stack(descriptors, 0)
+        return {
+            'keypoints': keypoints.to(image),
+            'keypoint_scores': scores.to(image),
+            'descriptors': descriptors.to(image),
+        }
+    def extract(self, img: torch.Tensor, **conf) -> dict:
+        """ Perform extraction with online resizing"""
+        if img.dim() == 3:
+            img = img[None]  # add batch dim
+        assert img.dim() == 4 and img.shape[0] == 1
+        shape = img.shape[-2:][::-1]
+        img, scales = ImagePreprocessor(
+            **{**self.preprocess_conf, **conf})(img)
+        feats = self.forward({'image': img})
+        feats['image_size'] = torch.tensor(shape)[None].to(img).float()
+        feats['keypoints'] = (feats['keypoints'] + .5) / scales[None] - .5
+        return feats

third_party/LightGlue/lightglue/lightglue.py ADDED Viewed

	@@ -0,0 +1,466 @@

+from pathlib import Path
+from types import SimpleNamespace
+import warnings
+import numpy as np
+import torch
+from torch import nn
+import torch.nn.functional as F
+from typing import Optional, List, Callable
+try:
+    from flash_attn.modules.mha import FlashCrossAttention
+except ModuleNotFoundError:
+    FlashCrossAttention = None
+if FlashCrossAttention or hasattr(F, 'scaled_dot_product_attention'):
+    FLASH_AVAILABLE = True
+else:
+    FLASH_AVAILABLE = False
+torch.backends.cudnn.deterministic = True
+@torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
+def normalize_keypoints(
+        kpts: torch.Tensor,
+        size: torch.Tensor) -> torch.Tensor:
+    if isinstance(size, torch.Size):
+        size = torch.tensor(size)[None]
+    shift = size.float().to(kpts) / 2
+    scale = size.max(1).values.float().to(kpts) / 2
+    kpts = (kpts - shift[:, None]) / scale[:, None, None]
+    return kpts
+def rotate_half(x: torch.Tensor) -> torch.Tensor:
+    x = x.unflatten(-1, (-1, 2))
+    x1, x2 = x.unbind(dim=-1)
+    return torch.stack((-x2, x1), dim=-1).flatten(start_dim=-2)
+def apply_cached_rotary_emb(
+        freqs: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
+    return (t * freqs[0]) + (rotate_half(t) * freqs[1])
+class LearnableFourierPositionalEncoding(nn.Module):
+    def __init__(self, M: int, dim: int, F_dim: int = None,
+                 gamma: float = 1.0) -> None:
+        super().__init__()
+        F_dim = F_dim if F_dim is not None else dim
+        self.gamma = gamma
+        self.Wr = nn.Linear(M, F_dim // 2, bias=False)
+        nn.init.normal_(self.Wr.weight.data, mean=0, std=self.gamma ** -2)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """ encode position vector """
+        projected = self.Wr(x)
+        cosines, sines = torch.cos(projected), torch.sin(projected)
+        emb = torch.stack([cosines, sines], 0).unsqueeze(-3)
+        return emb.repeat_interleave(2, dim=-1)
+class TokenConfidence(nn.Module):
+    def __init__(self, dim: int) -> None:
+        super().__init__()
+        self.token = nn.Sequential(
+            nn.Linear(dim, 1),
+            nn.Sigmoid()
+        )
+    def forward(self, desc0: torch.Tensor, desc1: torch.Tensor):
+        """ get confidence tokens """
+        return (
+            self.token(desc0.detach().float()).squeeze(-1),
+            self.token(desc1.detach().float()).squeeze(-1))
+class Attention(nn.Module):
+    def __init__(self, allow_flash: bool) -> None:
+        super().__init__()
+        if allow_flash and not FLASH_AVAILABLE:
+            warnings.warn(
+                'FlashAttention is not available. For optimal speed, '
+                'consider installing torch >= 2.0 or flash-attn.',
+                stacklevel=2,
+            )
+        self.enable_flash = allow_flash and FLASH_AVAILABLE
+        if allow_flash and FlashCrossAttention:
+            self.flash_ = FlashCrossAttention()
+    def forward(self, q, k, v) -> torch.Tensor:
+        if self.enable_flash and q.device.type == 'cuda':
+            if FlashCrossAttention:
+                q, k, v = [x.transpose(-2, -3) for x in [q, k, v]]
+                m = self.flash_(q.half(), torch.stack([k, v], 2).half())
+                return m.transpose(-2, -3).to(q.dtype)
+            else:  # use torch 2.0 scaled_dot_product_attention with flash
+                args = [x.half().contiguous() for x in [q, k, v]]
+                with torch.backends.cuda.sdp_kernel(enable_flash=True):
+                    return F.scaled_dot_product_attention(*args).to(q.dtype)
+        elif hasattr(F, 'scaled_dot_product_attention'):
+            args = [x.contiguous() for x in [q, k, v]]
+            return F.scaled_dot_product_attention(*args).to(q.dtype)
+        else:
+            s = q.shape[-1] ** -0.5
+            attn = F.softmax(torch.einsum('...id,...jd->...ij', q, k) * s, -1)
+            return torch.einsum('...ij,...jd->...id', attn, v)
+class Transformer(nn.Module):
+    def __init__(self, embed_dim: int, num_heads: int,
+                 flash: bool = False, bias: bool = True) -> None:
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        assert self.embed_dim % num_heads == 0
+        self.head_dim = self.embed_dim // num_heads
+        self.Wqkv = nn.Linear(embed_dim, 3*embed_dim, bias=bias)
+        self.inner_attn = Attention(flash)
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+        self.ffn = nn.Sequential(
+            nn.Linear(2*embed_dim, 2*embed_dim),
+            nn.LayerNorm(2*embed_dim, elementwise_affine=True),
+            nn.GELU(),
+            nn.Linear(2*embed_dim, embed_dim)
+        )
+    def _forward(self, x: torch.Tensor,
+                 encoding: Optional[torch.Tensor] = None):
+        qkv = self.Wqkv(x)
+        qkv = qkv.unflatten(-1, (self.num_heads, -1, 3)).transpose(1, 2)
+        q, k, v = qkv[..., 0], qkv[..., 1], qkv[..., 2]
+        if encoding is not None:
+            q = apply_cached_rotary_emb(encoding, q)
+            k = apply_cached_rotary_emb(encoding, k)
+        context = self.inner_attn(q, k, v)
+        message = self.out_proj(
+            context.transpose(1, 2).flatten(start_dim=-2))
+        return x + self.ffn(torch.cat([x, message], -1))
+    def forward(self, x0, x1, encoding0=None, encoding1=None):
+        return self._forward(x0, encoding0), self._forward(x1, encoding1)
+class CrossTransformer(nn.Module):
+    def __init__(self, embed_dim: int, num_heads: int,
+                 flash: bool = False, bias: bool = True) -> None:
+        super().__init__()
+        self.heads = num_heads
+        dim_head = embed_dim // num_heads
+        self.scale = dim_head ** -0.5
+        inner_dim = dim_head * num_heads
+        self.to_qk = nn.Linear(embed_dim, inner_dim, bias=bias)
+        self.to_v = nn.Linear(embed_dim, inner_dim, bias=bias)
+        self.to_out = nn.Linear(inner_dim, embed_dim, bias=bias)
+        self.ffn = nn.Sequential(
+            nn.Linear(2*embed_dim, 2*embed_dim),
+            nn.LayerNorm(2*embed_dim, elementwise_affine=True),
+            nn.GELU(),
+            nn.Linear(2*embed_dim, embed_dim)
+        )
+        if flash and FLASH_AVAILABLE:
+            self.flash = Attention(True)
+        else:
+            self.flash = None
+    def map_(self, func: Callable, x0: torch.Tensor, x1: torch.Tensor):
+        return func(x0), func(x1)
+    def forward(self, x0: torch.Tensor, x1: torch.Tensor) -> List[torch.Tensor]:
+        qk0, qk1 = self.map_(self.to_qk, x0, x1)
+        v0, v1 = self.map_(self.to_v, x0, x1)
+        qk0, qk1, v0, v1 = map(
+            lambda t: t.unflatten(-1, (self.heads, -1)).transpose(1, 2),
+            (qk0, qk1, v0, v1))
+        if self.flash is not None:
+            m0 = self.flash(qk0, qk1, v1)
+            m1 = self.flash(qk1, qk0, v0)
+        else:
+            qk0, qk1 = qk0 * self.scale**0.5, qk1 * self.scale**0.5
+            sim = torch.einsum('b h i d, b h j d -> b h i j', qk0, qk1)
+            attn01 = F.softmax(sim, dim=-1)
+            attn10 = F.softmax(sim.transpose(-2, -1).contiguous(), dim=-1)
+            m0 = torch.einsum('bhij, bhjd -> bhid', attn01, v1)
+            m1 = torch.einsum('bhji, bhjd -> bhid', attn10.transpose(-2, -1), v0)
+        m0, m1 = self.map_(lambda t: t.transpose(1, 2).flatten(start_dim=-2),
+                           m0, m1)
+        m0, m1 = self.map_(self.to_out, m0, m1)
+        x0 = x0 + self.ffn(torch.cat([x0, m0], -1))
+        x1 = x1 + self.ffn(torch.cat([x1, m1], -1))
+        return x0, x1
+def sigmoid_log_double_softmax(
+        sim: torch.Tensor, z0: torch.Tensor, z1: torch.Tensor) -> torch.Tensor:
+    """ create the log assignment matrix from logits and similarity"""
+    b, m, n = sim.shape
+    certainties = F.logsigmoid(z0) + F.logsigmoid(z1).transpose(1, 2)
+    scores0 = F.log_softmax(sim, 2)
+    scores1 = F.log_softmax(
+        sim.transpose(-1, -2).contiguous(), 2).transpose(-1, -2)
+    scores = sim.new_full((b, m+1, n+1), 0)
+    scores[:, :m, :n] = (scores0 + scores1 + certainties)
+    scores[:, :-1, -1] = F.logsigmoid(-z0.squeeze(-1))
+    scores[:, -1, :-1] = F.logsigmoid(-z1.squeeze(-1))
+    return scores
+class MatchAssignment(nn.Module):
+    def __init__(self, dim: int) -> None:
+        super().__init__()
+        self.dim = dim
+        self.matchability = nn.Linear(dim, 1, bias=True)
+        self.final_proj = nn.Linear(dim, dim, bias=True)
+    def forward(self, desc0: torch.Tensor, desc1: torch.Tensor):
+        """ build assignment matrix from descriptors """
+        mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1)
+        _, _, d = mdesc0.shape
+        mdesc0, mdesc1 = mdesc0 / d**.25, mdesc1 / d**.25
+        sim = torch.einsum('bmd,bnd->bmn', mdesc0, mdesc1)
+        z0 = self.matchability(desc0)
+        z1 = self.matchability(desc1)
+        scores = sigmoid_log_double_softmax(sim, z0, z1)
+        return scores, sim
+    def scores(self, desc0: torch.Tensor, desc1: torch.Tensor):
+        m0 = torch.sigmoid(self.matchability(desc0)).squeeze(-1)
+        m1 = torch.sigmoid(self.matchability(desc1)).squeeze(-1)
+        return m0, m1
+def filter_matches(scores: torch.Tensor, th: float):
+    """ obtain matches from a log assignment matrix [Bx M+1 x N+1]"""
+    max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
+    m0, m1 = max0.indices, max1.indices
+    mutual0 = torch.arange(m0.shape[1]).to(m0)[None] == m1.gather(1, m0)
+    mutual1 = torch.arange(m1.shape[1]).to(m1)[None] == m0.gather(1, m1)
+    max0_exp = max0.values.exp()
+    zero = max0_exp.new_tensor(0)
+    mscores0 = torch.where(mutual0, max0_exp, zero)
+    mscores1 = torch.where(mutual1, mscores0.gather(1, m1), zero)
+    if th is not None:
+        valid0 = mutual0 & (mscores0 > th)
+    else:
+        valid0 = mutual0
+    valid1 = mutual1 & valid0.gather(1, m1)
+    m0 = torch.where(valid0, m0, m0.new_tensor(-1))
+    m1 = torch.where(valid1, m1, m1.new_tensor(-1))
+    return m0, m1, mscores0, mscores1
+class LightGlue(nn.Module):
+    default_conf = {
+        'name': 'lightglue',  # just for interfacing
+        'input_dim': 256,  # input descriptor dimension (autoselected from weights)
+        'descriptor_dim': 256,
+        'n_layers': 9,
+        'num_heads': 4,
+        'flash': True,  # enable FlashAttention if available.
+        'mp': False,  # enable mixed precision
+        'depth_confidence': 0.95,  # early stopping, disable with -1
+        'width_confidence': 0.99,  # point pruning, disable with -1
+        'filter_threshold': 0.1,  # match threshold
+        'weights': None,
+    }
+    required_data_keys = [
+        'image0', 'image1']
+    version = "v0.1_arxiv"
+    url = "https://github.com/cvg/LightGlue/releases/download/{}/{}_lightglue.pth"
+    features = {
+        'superpoint': ('superpoint_lightglue', 256),
+        'disk': ('disk_lightglue', 128)
+    }
+    def __init__(self, features='superpoint', **conf) -> None:
+        super().__init__()
+        self.conf = {**self.default_conf, **conf}
+        if features is not None:
+            assert (features in list(self.features.keys()))
+            self.conf['weights'], self.conf['input_dim'] = \
+                self.features[features]
+        self.conf = conf = SimpleNamespace(**self.conf)
+        if conf.input_dim != conf.descriptor_dim:
+            self.input_proj = nn.Linear(
+                conf.input_dim, conf.descriptor_dim, bias=True)
+        else:
+            self.input_proj = nn.Identity()
+        head_dim = conf.descriptor_dim // conf.num_heads
+        self.posenc = LearnableFourierPositionalEncoding(2, head_dim, head_dim)
+        h, n, d = conf.num_heads, conf.n_layers, conf.descriptor_dim
+        self.self_attn = nn.ModuleList(
+            [Transformer(d, h, conf.flash) for _ in range(n)])
+        self.cross_attn = nn.ModuleList(
+            [CrossTransformer(d, h, conf.flash) for _ in range(n)])
+        self.log_assignment = nn.ModuleList(
+            [MatchAssignment(d) for _ in range(n)])
+        self.token_confidence = nn.ModuleList([
+            TokenConfidence(d) for _ in range(n-1)])
+        if features is not None:
+            fname = f'{conf.weights}_{self.version}.pth'.replace('.', '-')
+            state_dict = torch.hub.load_state_dict_from_url(
+                self.url.format(self.version, features), file_name=fname)
+            self.load_state_dict(state_dict, strict=False)
+        elif conf.weights is not None:
+            path = Path(__file__).parent
+            path = path / 'weights/{}.pth'.format(self.conf.weights)
+            state_dict = torch.load(str(path), map_location='cpu')
+            self.load_state_dict(state_dict, strict=False)
+        print('Loaded LightGlue model')
+    def forward(self, data: dict) -> dict:
+        """
+        Match keypoints and descriptors between two images
+        Input (dict):
+            image0: dict
+                keypoints: [B x M x 2]
+                descriptors: [B x M x D]
+                image: [B x C x H x W] or image_size: [B x 2]
+            image1: dict
+                keypoints: [B x N x 2]
+                descriptors: [B x N x D]
+                image: [B x C x H x W] or image_size: [B x 2]
+        Output (dict):
+            log_assignment: [B x M+1 x N+1]
+            matches0: [B x M]
+            matching_scores0: [B x M]
+            matches1: [B x N]
+            matching_scores1: [B x N]
+            matches: List[[Si x 2]], scores: List[[Si]]
+        """
+        with torch.autocast(enabled=self.conf.mp, device_type='cuda'):
+            return self._forward(data)
+    def _forward(self, data: dict) -> dict:
+        for key in self.required_data_keys:
+            assert key in data, f'Missing key {key} in data'
+        data0, data1 = data['image0'], data['image1']
+        kpts0_, kpts1_ = data0['keypoints'], data1['keypoints']
+        b, m, _ = kpts0_.shape
+        b, n, _ = kpts1_.shape
+        size0, size1 = data0.get('image_size'), data1.get('image_size')
+        size0 = size0 if size0 is not None else data0['image'].shape[-2:][::-1]
+        size1 = size1 if size1 is not None else data1['image'].shape[-2:][::-1]
+        kpts0 = normalize_keypoints(kpts0_, size=size0)
+        kpts1 = normalize_keypoints(kpts1_, size=size1)
+        assert torch.all(kpts0 >= -1) and torch.all(kpts0 <= 1)
+        assert torch.all(kpts1 >= -1) and torch.all(kpts1 <= 1)
+        desc0 = data0['descriptors'].detach()
+        desc1 = data1['descriptors'].detach()
+        assert desc0.shape[-1] == self.conf.input_dim
+        assert desc1.shape[-1] == self.conf.input_dim
+        if torch.is_autocast_enabled():
+            desc0 = desc0.half()
+            desc1 = desc1.half()
+        desc0 = self.input_proj(desc0)
+        desc1 = self.input_proj(desc1)
+        # cache positional embeddings
+        encoding0 = self.posenc(kpts0)
+        encoding1 = self.posenc(kpts1)
+        # GNN + final_proj + assignment
+        ind0 = torch.arange(0, m).to(device=kpts0.device)[None]
+        ind1 = torch.arange(0, n).to(device=kpts0.device)[None]
+        prune0 = torch.ones_like(ind0)  # store layer where pruning is detected
+        prune1 = torch.ones_like(ind1)
+        dec, wic = self.conf.depth_confidence, self.conf.width_confidence
+        token0, token1 = None, None
+        for i in range(self.conf.n_layers):
+            # self+cross attention
+            desc0, desc1 = self.self_attn[i](
+                desc0, desc1, encoding0, encoding1)
+            desc0, desc1 = self.cross_attn[i](desc0, desc1)
+            if i == self.conf.n_layers - 1:
+                continue  # no early stopping or adaptive width at last layer
+            if dec > 0:  # early stopping
+                token0, token1 = self.token_confidence[i](desc0, desc1)
+                if self.stop(token0, token1, self.conf_th(i), dec, m+n):
+                    break
+            if wic > 0:  # point pruning
+                match0, match1 = self.log_assignment[i].scores(desc0, desc1)
+                mask0 = self.get_mask(token0, match0, self.conf_th(i), 1-wic)
+                mask1 = self.get_mask(token1, match1, self.conf_th(i), 1-wic)
+                ind0, ind1 = ind0[mask0][None], ind1[mask1][None]
+                desc0, desc1 = desc0[mask0][None], desc1[mask1][None]
+                if desc0.shape[-2] == 0 or desc1.shape[-2] == 0:
+                    break
+                encoding0 = encoding0[:, :, mask0][:, None]
+                encoding1 = encoding1[:, :, mask1][:, None]
+            prune0[:, ind0] += 1
+            prune1[:, ind1] += 1
+        if wic > 0:  # scatter with indices after pruning
+            scores_, _ = self.log_assignment[i](desc0, desc1)
+            dt, dev = scores_.dtype, scores_.device
+            scores = torch.zeros(b, m+1, n+1, dtype=dt, device=dev)
+            scores[:, :-1, :-1] = -torch.inf
+            scores[:, ind0[0], -1] = scores_[:, :-1, -1]
+            scores[:, -1, ind1[0]] = scores_[:, -1, :-1]
+            x, y = torch.meshgrid(ind0[0], ind1[0], indexing='ij')
+            scores[:, x, y] = scores_[:, :-1, :-1]
+        else:
+            scores, _ = self.log_assignment[i](desc0, desc1)
+        m0, m1, mscores0, mscores1 = filter_matches(
+            scores, self.conf.filter_threshold)
+        matches, mscores = [], []
+        for k in range(b):
+            valid = m0[k] > -1
+            matches.append(torch.stack([torch.where(valid)[0], m0[k][valid]], -1))
+            mscores.append(mscores0[k][valid])
+        return {
+            'log_assignment': scores,
+            'matches0': m0,
+            'matches1': m1,
+            'matching_scores0': mscores0,
+            'matching_scores1': mscores1,
+            'stop': i+1,
+            'prune0': prune0,
+            'prune1': prune1,
+            'matches': matches,
+            'scores': mscores,
+        }
+    def conf_th(self, i: int) -> float:
+        """ scaled confidence threshold """
+        return np.clip(
+            0.8 + 0.1 * np.exp(-4.0 * i / self.conf.n_layers), 0, 1)
+    def get_mask(self, confidence: torch.Tensor, match: torch.Tensor,
+                 conf_th: float, match_th: float) -> torch.Tensor:
+        """ mask points which should be removed """
+        if conf_th and confidence is not None:
+            mask = torch.where(confidence > conf_th, match,
+                               match.new_tensor(1.0)) > match_th
+        else:
+            mask = match > match_th
+        return mask
+    def stop(self, token0: torch.Tensor, token1: torch.Tensor,
+             conf_th: float, inl_th: float, seql: int) -> torch.Tensor:
+        """ evaluate stopping condition"""
+        tokens = torch.cat([token0, token1], -1)
+        if conf_th:
+            pos = 1.0 - (tokens < conf_th).float().sum() / seql
+            return pos > inl_th
+        else:
+            return tokens.mean() > inl_th

third_party/LightGlue/lightglue/superpoint.py ADDED Viewed

	@@ -0,0 +1,230 @@

+# %BANNER_BEGIN%
+# ---------------------------------------------------------------------
+# %COPYRIGHT_BEGIN%
+#
+#  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
+#
+#  Unpublished Copyright (c) 2020
+#  Magic Leap, Inc., All Rights Reserved.
+#
+# NOTICE:  All information contained herein is, and remains the property
+# of COMPANY. The intellectual and technical concepts contained herein
+# are proprietary to COMPANY and may be covered by U.S. and Foreign
+# Patents, patents in process, and are protected by trade secret or
+# copyright law.  Dissemination of this information or reproduction of
+# this material is strictly forbidden unless prior written permission is
+# obtained from COMPANY.  Access to the source code contained herein is
+# hereby forbidden to anyone except current COMPANY employees, managers
+# or contractors who have executed Confidentiality and Non-disclosure
+# agreements explicitly covering such access.
+#
+# The copyright notice above does not evidence any actual or intended
+# publication or disclosure  of  this source code, which includes
+# information that is confidential and/or proprietary, and is a trade
+# secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
+# PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
+# SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
+# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
+# INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
+# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
+# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
+# USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
+#
+# %COPYRIGHT_END%
+# ----------------------------------------------------------------------
+# %AUTHORS_BEGIN%
+#
+#  Originating Authors: Paul-Edouard Sarlin
+#
+# %AUTHORS_END%
+# --------------------------------------------------------------------*/
+# %BANNER_END%
+# Adapted by Remi Pautrat, Philipp Lindenberger
+import torch
+from torch import nn
+from .utils import ImagePreprocessor
+def simple_nms(scores, nms_radius: int):
+    """ Fast Non-maximum suppression to remove nearby points """
+    assert (nms_radius >= 0)
+    def max_pool(x):
+        return torch.nn.functional.max_pool2d(
+            x, kernel_size=nms_radius*2+1, stride=1, padding=nms_radius)
+    zeros = torch.zeros_like(scores)
+    max_mask = scores == max_pool(scores)
+    for _ in range(2):
+        supp_mask = max_pool(max_mask.float()) > 0
+        supp_scores = torch.where(supp_mask, zeros, scores)
+        new_max_mask = supp_scores == max_pool(supp_scores)
+        max_mask = max_mask | (new_max_mask & (~supp_mask))
+    return torch.where(max_mask, scores, zeros)
+def top_k_keypoints(keypoints, scores, k):
+    if k >= len(keypoints):
+        return keypoints, scores
+    scores, indices = torch.topk(scores, k, dim=0, sorted=True)
+    return keypoints[indices], scores
+def sample_descriptors(keypoints, descriptors, s: int = 8):
+    """ Interpolate descriptors at keypoint locations """
+    b, c, h, w = descriptors.shape
+    keypoints = keypoints - s / 2 + 0.5
+    keypoints /= torch.tensor([(w*s - s/2 - 0.5), (h*s - s/2 - 0.5)],
+                              ).to(keypoints)[None]
+    keypoints = keypoints*2 - 1  # normalize to (-1, 1)
+    args = {'align_corners': True} if torch.__version__ >= '1.3' else {}
+    descriptors = torch.nn.functional.grid_sample(
+        descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear', **args)
+    descriptors = torch.nn.functional.normalize(
+        descriptors.reshape(b, c, -1), p=2, dim=1)
+    return descriptors
+class SuperPoint(nn.Module):
+    """SuperPoint Convolutional Detector and Descriptor
+    SuperPoint: Self-Supervised Interest Point Detection and
+    Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew
+    Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629
+    """
+    default_conf = {
+        'descriptor_dim': 256,
+        'nms_radius': 4,
+        'max_num_keypoints': None,
+        'detection_threshold': 0.0005,
+        'remove_borders': 4,
+    }
+    preprocess_conf = {
+        **ImagePreprocessor.default_conf,
+        'resize': 1024,
+        'grayscale': True,
+    }
+    required_data_keys = ['image']
+    def __init__(self, **conf):
+        super().__init__()
+        self.conf = {**self.default_conf, **conf}
+        self.relu = nn.ReLU(inplace=True)
+        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
+        c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
+        self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
+        self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
+        self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
+        self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
+        self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
+        self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
+        self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
+        self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
+        self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
+        self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0)
+        self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
+        self.convDb = nn.Conv2d(
+            c5, self.conf['descriptor_dim'],
+            kernel_size=1, stride=1, padding=0)
+        url = "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/superpoint_v1.pth"
+        self.load_state_dict(torch.hub.load_state_dict_from_url(url))
+        mk = self.conf['max_num_keypoints']
+        if mk is not None and mk <= 0:
+            raise ValueError('max_num_keypoints must be positive or None')
+        print('Loaded SuperPoint model')
+    def forward(self, data: dict) -> dict:
+        """ Compute keypoints, scores, descriptors for image """
+        for key in self.required_data_keys:
+            assert key in data, f'Missing key {key} in data'
+        image = data['image']
+        if image.shape[1] == 3:  # RGB
+            scale = image.new_tensor([0.299, 0.587, 0.114]).view(1, 3, 1, 1)
+            image = (image*scale).sum(1, keepdim=True)
+        # Shared Encoder
+        x = self.relu(self.conv1a(image))
+        x = self.relu(self.conv1b(x))
+        x = self.pool(x)
+        x = self.relu(self.conv2a(x))
+        x = self.relu(self.conv2b(x))
+        x = self.pool(x)
+        x = self.relu(self.conv3a(x))
+        x = self.relu(self.conv3b(x))
+        x = self.pool(x)
+        x = self.relu(self.conv4a(x))
+        x = self.relu(self.conv4b(x))
+        # Compute the dense keypoint scores
+        cPa = self.relu(self.convPa(x))
+        scores = self.convPb(cPa)
+        scores = torch.nn.functional.softmax(scores, 1)[:, :-1]
+        b, _, h, w = scores.shape
+        scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
+        scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8)
+        scores = simple_nms(scores, self.conf['nms_radius'])
+        # Discard keypoints near the image borders
+        if self.conf['remove_borders']:
+            pad = self.conf['remove_borders']
+            scores[:, :pad] = -1
+            scores[:, :, :pad] = -1
+            scores[:, -pad:] = -1
+            scores[:, :, -pad:] = -1
+        # Extract keypoints
+        best_kp = torch.where(scores > self.conf['detection_threshold'])
+        scores = scores[best_kp]
+        # Separate into batches
+        keypoints = [torch.stack(best_kp[1:3], dim=-1)[best_kp[0] == i]
+                     for i in range(b)]
+        scores = [scores[best_kp[0] == i] for i in range(b)]
+        # Keep the k keypoints with highest score
+        if self.conf['max_num_keypoints'] is not None:
+            keypoints, scores = list(zip(*[
+                top_k_keypoints(k, s, self.conf['max_num_keypoints'])
+                for k, s in zip(keypoints, scores)]))
+        # Convert (h, w) to (x, y)
+        keypoints = [torch.flip(k, [1]).float() for k in keypoints]
+        # Compute the dense descriptors
+        cDa = self.relu(self.convDa(x))
+        descriptors = self.convDb(cDa)
+        descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1)
+        # Extract descriptors
+        descriptors = [sample_descriptors(k[None], d[None], 8)[0]
+                       for k, d in zip(keypoints, descriptors)]
+        return {
+            'keypoints': torch.stack(keypoints, 0),
+            'keypoint_scores': torch.stack(scores, 0),
+            'descriptors': torch.stack(descriptors, 0).transpose(-1, -2),
+        }
+    def extract(self, img: torch.Tensor, **conf) -> dict:
+        """ Perform extraction with online resizing"""
+        if img.dim() == 3:
+            img = img[None]  # add batch dim
+        assert img.dim() == 4 and img.shape[0] == 1
+        shape = img.shape[-2:][::-1]
+        img, scales = ImagePreprocessor(
+            **{**self.preprocess_conf, **conf})(img)
+        feats = self.forward({'image': img})
+        feats['image_size'] = torch.tensor(shape)[None].to(img).float()
+        feats['keypoints'] = (feats['keypoints'] + .5) / scales[None] - .5
+        return feats

third_party/LightGlue/lightglue/utils.py ADDED Viewed

	@@ -0,0 +1,135 @@

+from pathlib import Path
+import torch
+import kornia
+import cv2
+import numpy as np
+from typing import Union, List, Optional, Callable, Tuple
+import collections.abc as collections
+from types import SimpleNamespace
+class ImagePreprocessor:
+    default_conf = {
+        'resize': None,  # target edge length, None for no resizing
+        'side': 'long',
+        'interpolation': 'bilinear',
+        'align_corners': None,
+        'antialias': True,
+        'grayscale': False,  # convert rgb to grayscale
+    }
+    def __init__(self, **conf) -> None:
+        super().__init__()
+        self.conf = {**self.default_conf, **conf}
+        self.conf = SimpleNamespace(**self.conf)
+    def __call__(self, img: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Resize and preprocess an image, return image and resize scale"""
+        h, w = img.shape[-2:]
+        if self.conf.resize is not None:
+            img = kornia.geometry.transform.resize(
+                img, self.conf.resize, side=self.conf.side,
+                antialias=self.conf.antialias,
+                align_corners=self.conf.align_corners)
+        scale = torch.Tensor([img.shape[-1] / w, img.shape[-2] / h]).to(img)
+        if self.conf.grayscale and img.shape[-3] == 3:
+            img = kornia.color.rgb_to_grayscale(img)
+        elif not self.conf.grayscale and img.shape[-3] == 1:
+            img = kornia.color.grayscale_to_rgb(img)
+        return img, scale
+def map_tensor(input_, func: Callable):
+    string_classes = (str, bytes)
+    if isinstance(input_, string_classes):
+        return input_
+    elif isinstance(input_, collections.Mapping):
+        return {k: map_tensor(sample, func) for k, sample in input_.items()}
+    elif isinstance(input_, collections.Sequence):
+        return [map_tensor(sample, func) for sample in input_]
+    elif isinstance(input_, torch.Tensor):
+        return func(input_)
+    else:
+        return input_
+def batch_to_device(batch: dict, device: str = 'cpu',
+                    non_blocking: bool = True):
+    """Move batch (dict) to device"""
+    def _func(tensor):
+        return tensor.to(device=device, non_blocking=non_blocking).detach()
+    return map_tensor(batch, _func)
+def rbd(data: dict) -> dict:
+    """Remove batch dimension from elements in data"""
+    return {k: v[0] if isinstance(v, (torch.Tensor, np.ndarray, list)) else v
+            for k, v in data.items()}
+def read_image(path: Path, grayscale: bool = False) -> np.ndarray:
+    """Read an image from path as RGB or grayscale"""
+    if not Path(path).exists():
+        raise FileNotFoundError(f'No image at path {path}.')
+    mode = cv2.IMREAD_GRAYSCALE if grayscale else cv2.IMREAD_COLOR
+    image = cv2.imread(str(path), mode)
+    if image is None:
+        raise IOError(f'Could not read image at {path}.')
+    if not grayscale:
+        image = image[..., ::-1]
+    return image
+def numpy_image_to_torch(image: np.ndarray) -> torch.Tensor:
+    """Normalize the image tensor and reorder the dimensions."""
+    if image.ndim == 3:
+        image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
+    elif image.ndim == 2:
+        image = image[None]  # add channel axis
+    else:
+        raise ValueError(f'Not an image: {image.shape}')
+    return torch.tensor(image / 255., dtype=torch.float)
+def resize_image(image: np.ndarray, size: Union[List[int], int],
+                 fn: str = 'max', interp: Optional[str] = 'area',
+                 ) -> np.ndarray:
+    """Resize an image to a fixed size, or according to max or min edge."""
+    h, w = image.shape[:2]
+    fn = {'max': max, 'min': min}[fn]
+    if isinstance(size, int):
+        scale = size / fn(h, w)
+        h_new, w_new = int(round(h*scale)), int(round(w*scale))
+        scale = (w_new / w, h_new / h)
+    elif isinstance(size, (tuple, list)):
+        h_new, w_new = size
+        scale = (w_new / w, h_new / h)
+    else:
+        raise ValueError(f'Incorrect new size: {size}')
+    mode = {
+        'linear': cv2.INTER_LINEAR,
+        'cubic': cv2.INTER_CUBIC,
+        'nearest': cv2.INTER_NEAREST,
+        'area': cv2.INTER_AREA}[interp]
+    return cv2.resize(image, (w_new, h_new), interpolation=mode), scale
+def load_image(path: Path, resize: int = None, **kwargs) -> torch.Tensor:
+    image = read_image(path)
+    if resize is not None:
+        image, _ = resize_image(image, resize, **kwargs)
+    return numpy_image_to_torch(image)
+def match_pair(extractor, matcher,
+               image0: torch.Tensor, image1: torch.Tensor,
+               device: str = 'cpu', **preprocess):
+    """Match a pair of images (image0, image1) with an extractor and matcher"""
+    feats0 = extractor.extract(image0, **preprocess)
+    feats1 = extractor.extract(image1, **preprocess)
+    matches01 = matcher({'image0': feats0, 'image1': feats1})
+    data = [feats0, feats1, matches01]
+    # remove batch dim and move to target device
+    feats0, feats1, matches01 = [batch_to_device(rbd(x), device) for x in data]
+    return feats0, feats1, matches01

third_party/LightGlue/lightglue/viz2d.py ADDED Viewed

	@@ -0,0 +1,161 @@

+"""
+2D visualization primitives based on Matplotlib.
+1) Plot images with `plot_images`.
+2) Call `plot_keypoints` or `plot_matches` any number of times.
+3) Optionally: save a .png or .pdf plot (nice in papers!) with `save_plot`.
+"""
+import matplotlib
+import matplotlib.pyplot as plt
+import matplotlib.patheffects as path_effects
+import numpy as np
+import torch
+def cm_RdGn(x):
+    """Custom colormap: red (0) -> yellow (0.5) -> green (1)."""
+    x = np.clip(x, 0, 1)[..., None]*2
+    c = x*np.array([[0, 1., 0]]) + (2-x)*np.array([[1., 0, 0]])
+    return np.clip(c, 0, 1)
+def cm_BlRdGn(x_):
+    """Custom colormap: blue (-1) -> red (0.0) -> green (1)."""
+    x = np.clip(x_, 0, 1)[..., None]*2
+    c = x*np.array([[0, 1., 0, 1.]]) + (2-x)*np.array([[1., 0, 0, 1.]])
+    xn = -np.clip(x_, -1, 0)[..., None]*2
+    cn = xn*np.array([[0, 0.1, 1, 1.]]) + (2-xn)*np.array([[1., 0, 0, 1.]])
+    out = np.clip(np.where(x_[..., None] < 0, cn, c), 0, 1)
+    return out
+def cm_prune(x_):
+    """ Custom colormap to visualize pruning """
+    if isinstance(x_, torch.Tensor):
+        x_ = x_.cpu().numpy()
+    max_i = max(x_)
+    norm_x = np.where(x_ == max_i, -1, (x_-1) / 9)
+    return cm_BlRdGn(norm_x)
+def plot_images(imgs, titles=None, cmaps='gray', dpi=100, pad=.5,
+                adaptive=True):
+    """Plot a set of images horizontally.
+    Args:
+        imgs: list of NumPy RGB (H, W, 3) or PyTorch RGB (3, H, W) or mono (H, W).
+        titles: a list of strings, as titles for each image.
+        cmaps: colormaps for monochrome images.
+        adaptive: whether the figure size should fit the image aspect ratios.
+    """
+    # conversion to (H, W, 3) for torch.Tensor
+    imgs = [img.permute(1, 2, 0).cpu().numpy()
+            if (isinstance(img, torch.Tensor) and img.dim() == 3) else img
+            for img in imgs]
+    n = len(imgs)
+    if not isinstance(cmaps, (list, tuple)):
+        cmaps = [cmaps] * n
+    if adaptive:
+        ratios = [i.shape[1] / i.shape[0] for i in imgs]  # W / H
+    else:
+        ratios = [4/3] * n
+    figsize = [sum(ratios)*4.5, 4.5]
+    fig, ax = plt.subplots(
+        1, n, figsize=figsize, dpi=dpi, gridspec_kw={'width_ratios': ratios})
+    if n == 1:
+        ax = [ax]
+    for i in range(n):
+        ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
+        ax[i].get_yaxis().set_ticks([])
+        ax[i].get_xaxis().set_ticks([])
+        ax[i].set_axis_off()
+        for spine in ax[i].spines.values():  # remove frame
+            spine.set_visible(False)
+        if titles:
+            ax[i].set_title(titles[i])
+    fig.tight_layout(pad=pad)
+def plot_keypoints(kpts, colors='lime', ps=4, axes=None, a=1.0):
+    """Plot keypoints for existing images.
+    Args:
+        kpts: list of ndarrays of size (N, 2).
+        colors: string, or list of list of tuples (one for each keypoints).
+        ps: size of the keypoints as float.
+    """
+    if not isinstance(colors, list):
+        colors = [colors] * len(kpts)
+    if not isinstance(a, list):
+        a = [a] * len(kpts)
+    if axes is None:
+        axes = plt.gcf().axes
+    for ax, k, c, alpha in zip(axes, kpts, colors, a):
+        if isinstance(k, torch.Tensor):
+            k = k.cpu().numpy()
+        ax.scatter(k[:, 0], k[:, 1], c=c, s=ps, linewidths=0, alpha=alpha)
+def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, a=1., labels=None,
+                 axes=None):
+    """Plot matches for a pair of existing images.
+    Args:
+        kpts0, kpts1: corresponding keypoints of size (N, 2).
+        color: color of each match, string or RGB tuple. Random if not given.
+        lw: width of the lines.
+        ps: size of the end points (no endpoint if ps=0)
+        indices: indices of the images to draw the matches on.
+        a: alpha opacity of the match lines.
+    """
+    fig = plt.gcf()
+    if axes is None:
+        ax = fig.axes
+        ax0, ax1 = ax[0], ax[1]
+    else:
+        ax0, ax1 = axes
+    if isinstance(kpts0, torch.Tensor):
+        kpts0 = kpts0.cpu().numpy()
+    if isinstance(kpts1, torch.Tensor):
+        kpts1 = kpts1.cpu().numpy()
+    assert len(kpts0) == len(kpts1)
+    if color is None:
+        color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist()
+    elif len(color) > 0 and not isinstance(color[0], (tuple, list)):
+        color = [color] * len(kpts0)
+    if lw > 0:
+        for i in range(len(kpts0)):
+            line = matplotlib.patches.ConnectionPatch(
+                xyA=(kpts0[i, 0], kpts0[i, 1]), xyB=(kpts1[i, 0], kpts1[i, 1]),
+                coordsA=ax0.transData, coordsB=ax1.transData,
+                axesA=ax0, axesB=ax1,
+                zorder=1, color=color[i], linewidth=lw, clip_on=True,
+                alpha=a, label=None if labels is None else labels[i],
+                picker=5.0)
+            line.set_annotation_clip(True)
+            fig.add_artist(line)
+    # freeze the axes to prevent the transform to change
+    ax0.autoscale(enable=False)
+    ax1.autoscale(enable=False)
+    if ps > 0:
+        ax0.scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
+        ax1.scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
+def add_text(idx, text, pos=(0.01, 0.99), fs=15, color='w',
+             lcolor='k', lwidth=2, ha='left', va='top'):
+    ax = plt.gcf().axes[idx]
+    t = ax.text(*pos, text, fontsize=fs, ha=ha, va=va,
+                color=color, transform=ax.transAxes)
+    if lcolor is not None:
+        t.set_path_effects([
+            path_effects.Stroke(linewidth=lwidth, foreground=lcolor),
+            path_effects.Normal()])
+def save_plot(path, **kw):
+    """Save the current figure without any white margin."""
+    plt.savefig(path, bbox_inches='tight', pad_inches=0, **kw)

third_party/LightGlue/requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch>=1.9.1
+torchvision>=0.3
+numpy
+opencv-python
+matplotlib
+kornia>=0.6.11

third_party/LightGlue/setup.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from pathlib import Path
+from setuptools import setup
+description = ['LightGlue']
+with open(str(Path(__file__).parent / 'README.md'), 'r', encoding='utf-8') as f:
+    readme = f.read()
+with open(str(Path(__file__).parent / 'requirements.txt'), 'r') as f:
+    dependencies = f.read().split('\n')
+setup(
+    name='lightglue',
+    version='0.0',
+    packages=['lightglue'],
+    python_requires='>=3.6',
+    install_requires=dependencies,
+    author='Philipp Lindenberger, Paul-Edouard Sarlin',
+    description=description,
+    long_description=readme,
+    long_description_content_type="text/markdown",
+    url='https://github.com/cvg/LightGlue/',
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: OS Independent",
+    ],
+)