# Copyright 2019-present NAVER Corp. # CC BY-NC-SA 3.0 # Available only for non-commercial use import pdb import torch import torch.nn as nn import torch.nn.functional as F class BaseNet(nn.Module): """Takes a list of images as input, and returns for each image: - a pixelwise descriptor - a pixelwise confidence """ def softmax(self, ux): if ux.shape[1] == 1: x = F.softplus(ux) return x / (1 + x) # for sure in [0,1], much less plateaus than softmax elif ux.shape[1] == 2: return F.softmax(ux, dim=1)[:, 1:2] def normalize(self, x, ureliability, urepeatability): return dict( descriptors=F.normalize(x, p=2, dim=1), repeatability=self.softmax(urepeatability), reliability=self.softmax(ureliability), ) def forward_one(self, x): raise NotImplementedError() def forward(self, imgs, **kw): res = [self.forward_one(img) for img in imgs] # merge all dictionaries into one res = {k: [r[k] for r in res if k in r] for k in {k for r in res for k in r}} return dict(res, imgs=imgs, **kw) class PatchNet(BaseNet): """Helper class to construct a fully-convolutional network that extract a l2-normalized patch descriptor. """ def __init__(self, inchan=3, dilated=True, dilation=1, bn=True, bn_affine=False): BaseNet.__init__(self) self.inchan = inchan self.curchan = inchan self.dilated = dilated self.dilation = dilation self.bn = bn self.bn_affine = bn_affine self.ops = nn.ModuleList([]) def _make_bn(self, outd): return nn.BatchNorm2d(outd, affine=self.bn_affine) def _add_conv( self, outd, k=3, stride=1, dilation=1, bn=True, relu=True, k_pool=1, pool_type="max", ): # as in the original implementation, dilation is applied at the end of layer, so it will have impact only from next layer d = self.dilation * dilation if self.dilated: conv_params = dict(padding=((k - 1) * d) // 2, dilation=d, stride=1) self.dilation *= stride else: conv_params = dict(padding=((k - 1) * d) // 2, dilation=d, stride=stride) self.ops.append(nn.Conv2d(self.curchan, outd, kernel_size=k, **conv_params)) if bn and self.bn: self.ops.append(self._make_bn(outd)) if relu: self.ops.append(nn.ReLU(inplace=True)) self.curchan = outd if k_pool > 1: if pool_type == "avg": self.ops.append(torch.nn.AvgPool2d(kernel_size=k_pool)) elif pool_type == "max": self.ops.append(torch.nn.MaxPool2d(kernel_size=k_pool)) else: print(f"Error, unknown pooling type {pool_type}...") def forward_one(self, x): assert self.ops, "You need to add convolutions first" for n, op in enumerate(self.ops): x = op(x) return self.normalize(x) class L2_Net(PatchNet): """Compute a 128D descriptor for all overlapping 32x32 patches. From the L2Net paper (CVPR'17). """ def __init__(self, dim=128, **kw): PatchNet.__init__(self, **kw) add_conv = lambda n, **kw: self._add_conv((n * dim) // 128, **kw) add_conv(32) add_conv(32) add_conv(64, stride=2) add_conv(64) add_conv(128, stride=2) add_conv(128) add_conv(128, k=7, stride=8, bn=False, relu=False) self.out_dim = dim class Quad_L2Net(PatchNet): """Same than L2_Net, but replace the final 8x8 conv by 3 successive 2x2 convs.""" def __init__(self, dim=128, mchan=4, relu22=False, **kw): PatchNet.__init__(self, **kw) self._add_conv(8 * mchan) self._add_conv(8 * mchan) self._add_conv(16 * mchan, stride=2) self._add_conv(16 * mchan) self._add_conv(32 * mchan, stride=2) self._add_conv(32 * mchan) # replace last 8x8 convolution with 3 2x2 convolutions self._add_conv(32 * mchan, k=2, stride=2, relu=relu22) self._add_conv(32 * mchan, k=2, stride=2, relu=relu22) self._add_conv(dim, k=2, stride=2, bn=False, relu=False) self.out_dim = dim class Quad_L2Net_ConfCFS(Quad_L2Net): """Same than Quad_L2Net, with 2 confidence maps for repeatability and reliability.""" def __init__(self, **kw): Quad_L2Net.__init__(self, **kw) # reliability classifier self.clf = nn.Conv2d(self.out_dim, 2, kernel_size=1) # repeatability classifier: for some reasons it's a softplus, not a softmax! # Why? I guess it's a mistake that was left unnoticed in the code for a long time... self.sal = nn.Conv2d(self.out_dim, 1, kernel_size=1) def forward_one(self, x): assert self.ops, "You need to add convolutions first" for op in self.ops: x = op(x) # compute the confidence maps ureliability = self.clf(x**2) urepeatability = self.sal(x**2) return self.normalize(x, ureliability, urepeatability) class Fast_Quad_L2Net(PatchNet): """Faster version of Quad l2 net, replacing one dilated conv with one pooling to diminish image resolution thus increase inference time Dilation factors and pooling: 1,1,1, pool2, 1,1, 2,2, 4, 8, upsample2 """ def __init__(self, dim=128, mchan=4, relu22=False, downsample_factor=2, **kw): PatchNet.__init__(self, **kw) self._add_conv(8 * mchan) self._add_conv(8 * mchan) self._add_conv( 16 * mchan, k_pool=downsample_factor ) # added avg pooling to decrease img resolution self._add_conv(16 * mchan) self._add_conv(32 * mchan, stride=2) self._add_conv(32 * mchan) # replace last 8x8 convolution with 3 2x2 convolutions self._add_conv(32 * mchan, k=2, stride=2, relu=relu22) self._add_conv(32 * mchan, k=2, stride=2, relu=relu22) self._add_conv(dim, k=2, stride=2, bn=False, relu=False) # Go back to initial image resolution with upsampling self.ops.append( torch.nn.Upsample( scale_factor=downsample_factor, mode="bilinear", align_corners=False ) ) self.out_dim = dim class Fast_Quad_L2Net_ConfCFS(Fast_Quad_L2Net): """Fast r2d2 architecture""" def __init__(self, **kw): Fast_Quad_L2Net.__init__(self, **kw) # reliability classifier self.clf = nn.Conv2d(self.out_dim, 2, kernel_size=1) # repeatability classifier: for some reasons it's a softplus, not a softmax! # Why? I guess it's a mistake that was left unnoticed in the code for a long time... self.sal = nn.Conv2d(self.out_dim, 1, kernel_size=1) def forward_one(self, x): assert self.ops, "You need to add convolutions first" for op in self.ops: x = op(x) # compute the confidence maps ureliability = self.clf(x**2) urepeatability = self.sal(x**2) return self.normalize(x, ureliability, urepeatability)