|
import logging |
|
import os |
|
import cv2 |
|
import torch |
|
from copy import deepcopy |
|
import torch.nn.functional as F |
|
from torchvision.transforms import ToTensor |
|
import math |
|
|
|
from alnet import ALNet |
|
from soft_detect import DKD |
|
import time |
|
|
|
configs = { |
|
"alike-t": { |
|
"c1": 8, |
|
"c2": 16, |
|
"c3": 32, |
|
"c4": 64, |
|
"dim": 64, |
|
"single_head": True, |
|
"radius": 2, |
|
"model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-t.pth"), |
|
}, |
|
"alike-s": { |
|
"c1": 8, |
|
"c2": 16, |
|
"c3": 48, |
|
"c4": 96, |
|
"dim": 96, |
|
"single_head": True, |
|
"radius": 2, |
|
"model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-s.pth"), |
|
}, |
|
"alike-n": { |
|
"c1": 16, |
|
"c2": 32, |
|
"c3": 64, |
|
"c4": 128, |
|
"dim": 128, |
|
"single_head": True, |
|
"radius": 2, |
|
"model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-n.pth"), |
|
}, |
|
"alike-l": { |
|
"c1": 32, |
|
"c2": 64, |
|
"c3": 128, |
|
"c4": 128, |
|
"dim": 128, |
|
"single_head": False, |
|
"radius": 2, |
|
"model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-l.pth"), |
|
}, |
|
} |
|
|
|
|
|
class ALike(ALNet): |
|
def __init__( |
|
self, |
|
|
|
c1: int = 32, |
|
c2: int = 64, |
|
c3: int = 128, |
|
c4: int = 128, |
|
dim: int = 128, |
|
single_head: bool = False, |
|
|
|
radius: int = 2, |
|
top_k: int = 500, |
|
scores_th: float = 0.5, |
|
n_limit: int = 5000, |
|
device: str = "cpu", |
|
model_path: str = "", |
|
): |
|
super().__init__(c1, c2, c3, c4, dim, single_head) |
|
self.radius = radius |
|
self.top_k = top_k |
|
self.n_limit = n_limit |
|
self.scores_th = scores_th |
|
self.dkd = DKD( |
|
radius=self.radius, |
|
top_k=self.top_k, |
|
scores_th=self.scores_th, |
|
n_limit=self.n_limit, |
|
) |
|
self.device = device |
|
|
|
if model_path != "": |
|
state_dict = torch.load(model_path, self.device) |
|
self.load_state_dict(state_dict) |
|
self.to(self.device) |
|
self.eval() |
|
logging.info(f"Loaded model parameters from {model_path}") |
|
logging.info( |
|
f"Number of model parameters: {sum(p.numel() for p in self.parameters() if p.requires_grad) / 1e3}KB" |
|
) |
|
|
|
def extract_dense_map(self, image, ret_dict=False): |
|
|
|
|
|
|
|
device = image.device |
|
b, c, h, w = image.shape |
|
h_ = math.ceil(h / 32) * 32 if h % 32 != 0 else h |
|
w_ = math.ceil(w / 32) * 32 if w % 32 != 0 else w |
|
if h_ != h: |
|
h_padding = torch.zeros(b, c, h_ - h, w, device=device) |
|
image = torch.cat([image, h_padding], dim=2) |
|
if w_ != w: |
|
w_padding = torch.zeros(b, c, h_, w_ - w, device=device) |
|
image = torch.cat([image, w_padding], dim=3) |
|
|
|
|
|
scores_map, descriptor_map = super().forward(image) |
|
|
|
|
|
if h_ != h or w_ != w: |
|
descriptor_map = descriptor_map[:, :, :h, :w] |
|
scores_map = scores_map[:, :, :h, :w] |
|
|
|
|
|
|
|
descriptor_map = torch.nn.functional.normalize(descriptor_map, p=2, dim=1) |
|
|
|
if ret_dict: |
|
return { |
|
"descriptor_map": descriptor_map, |
|
"scores_map": scores_map, |
|
} |
|
else: |
|
return descriptor_map, scores_map |
|
|
|
def forward(self, img, image_size_max=99999, sort=False, sub_pixel=False): |
|
""" |
|
:param img: np.array HxWx3, RGB |
|
:param image_size_max: maximum image size, otherwise, the image will be resized |
|
:param sort: sort keypoints by scores |
|
:param sub_pixel: whether to use sub-pixel accuracy |
|
:return: a dictionary with 'keypoints', 'descriptors', 'scores', and 'time' |
|
""" |
|
H, W, three = img.shape |
|
assert three == 3, "input image shape should be [HxWx3]" |
|
|
|
|
|
image = deepcopy(img) |
|
max_hw = max(H, W) |
|
if max_hw > image_size_max: |
|
ratio = float(image_size_max / max_hw) |
|
image = cv2.resize(image, dsize=None, fx=ratio, fy=ratio) |
|
|
|
|
|
image = ( |
|
torch.from_numpy(image) |
|
.to(self.device) |
|
.to(torch.float32) |
|
.permute(2, 0, 1)[None] |
|
/ 255.0 |
|
) |
|
|
|
|
|
start = time.time() |
|
|
|
with torch.no_grad(): |
|
descriptor_map, scores_map = self.extract_dense_map(image) |
|
keypoints, descriptors, scores, _ = self.dkd( |
|
scores_map, descriptor_map, sub_pixel=sub_pixel |
|
) |
|
keypoints, descriptors, scores = keypoints[0], descriptors[0], scores[0] |
|
keypoints = (keypoints + 1) / 2 * keypoints.new_tensor([[W - 1, H - 1]]) |
|
|
|
if sort: |
|
indices = torch.argsort(scores, descending=True) |
|
keypoints = keypoints[indices] |
|
descriptors = descriptors[indices] |
|
scores = scores[indices] |
|
|
|
end = time.time() |
|
|
|
return { |
|
"keypoints": keypoints.cpu().numpy(), |
|
"descriptors": descriptors.cpu().numpy(), |
|
"scores": scores.cpu().numpy(), |
|
"scores_map": scores_map.cpu().numpy(), |
|
"time": end - start, |
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
import numpy as np |
|
from thop import profile |
|
|
|
net = ALike(c1=32, c2=64, c3=128, c4=128, dim=128, single_head=False) |
|
|
|
image = np.random.random((640, 480, 3)).astype(np.float32) |
|
flops, params = profile(net, inputs=(image, 9999, False), verbose=False) |
|
print("{:<30} {:<8} GFLops".format("Computational complexity: ", flops / 1e9)) |
|
print("{:<30} {:<8} KB".format("Number of parameters: ", params / 1e3)) |
|
|