gim-online

Running

File size: 7,241 Bytes

404d2af
 
 
 
 
 
 
 
 
 
8b973ee
 
 
 
404d2af
8b973ee
404d2af
 
 
 
 
8b973ee
404d2af
 
8b973ee
 
 
 
 
404d2af
 
 
 
 
 
 
8b973ee
404d2af
 
 
8b973ee
 
 
404d2af
8b973ee
404d2af
 
 
 
 
 
 
 
 
 
 
 
 
8b973ee
 
 
 
 
 
 
 
 
 
 
404d2af
 
8b973ee
 
404d2af
 
8b973ee
 
 
 
 
 
404d2af
8b973ee
404d2af
8b973ee
404d2af
8b973ee
404d2af
 
 
8b973ee
404d2af
 
8b973ee
404d2af
 
 
 
8b973ee
 
 
404d2af
8b973ee
 
404d2af
8b973ee
404d2af
 
 
 
 
 
 
 
 
 
8b973ee
 
 
 
404d2af
8b973ee
 
 
 
 
 
404d2af
8b973ee
 
404d2af
 
 
 
8b973ee
 
404d2af
8b973ee
404d2af
 
 
 
 
8b973ee
404d2af
 
 
 
 
 
 
 
 
 
 
8b973ee
 
404d2af
 
 
8b973ee
 
404d2af
 
8b973ee
 
 
 
 
 
 
 
 
404d2af
8b973ee
 
404d2af
8b973ee
404d2af
8b973ee
 
 
 
 
 
404d2af
8b973ee
 
 
 
 
 
404d2af
 
 
8b973ee
404d2af
 
8b973ee
 
404d2af
 
 
 
 
 
 
8b973ee

# Copyright 2019-present NAVER Corp.
# CC BY-NC-SA 3.0
# Available only for non-commercial use

import pdb
import torch
import torch.nn as nn
import torch.nn.functional as F


class BaseNet(nn.Module):
    """Takes a list of images as input, and returns for each image:
    - a pixelwise descriptor
    - a pixelwise confidence
    """

    def softmax(self, ux):
        if ux.shape[1] == 1:
            x = F.softplus(ux)
            return x / (1 + x)  # for sure in [0,1], much less plateaus than softmax
        elif ux.shape[1] == 2:
            return F.softmax(ux, dim=1)[:, 1:2]

    def normalize(self, x, ureliability, urepeatability):
        return dict(
            descriptors=F.normalize(x, p=2, dim=1),
            repeatability=self.softmax(urepeatability),
            reliability=self.softmax(ureliability),
        )

    def forward_one(self, x):
        raise NotImplementedError()

    def forward(self, imgs, **kw):
        res = [self.forward_one(img) for img in imgs]
        # merge all dictionaries into one
        res = {k: [r[k] for r in res if k in r] for k in {k for r in res for k in r}}
        return dict(res, imgs=imgs, **kw)


class PatchNet(BaseNet):
    """Helper class to construct a fully-convolutional network that
    extract a l2-normalized patch descriptor.
    """

    def __init__(self, inchan=3, dilated=True, dilation=1, bn=True, bn_affine=False):
        BaseNet.__init__(self)
        self.inchan = inchan
        self.curchan = inchan
        self.dilated = dilated
        self.dilation = dilation
        self.bn = bn
        self.bn_affine = bn_affine
        self.ops = nn.ModuleList([])

    def _make_bn(self, outd):
        return nn.BatchNorm2d(outd, affine=self.bn_affine)

    def _add_conv(
        self,
        outd,
        k=3,
        stride=1,
        dilation=1,
        bn=True,
        relu=True,
        k_pool=1,
        pool_type="max",
    ):
        # as in the original implementation, dilation is applied at the end of layer, so it will have impact only from next layer
        d = self.dilation * dilation
        if self.dilated:
            conv_params = dict(padding=((k - 1) * d) // 2, dilation=d, stride=1)
            self.dilation *= stride
        else:
            conv_params = dict(padding=((k - 1) * d) // 2, dilation=d, stride=stride)
        self.ops.append(nn.Conv2d(self.curchan, outd, kernel_size=k, **conv_params))
        if bn and self.bn:
            self.ops.append(self._make_bn(outd))
        if relu:
            self.ops.append(nn.ReLU(inplace=True))
        self.curchan = outd

        if k_pool > 1:
            if pool_type == "avg":
                self.ops.append(torch.nn.AvgPool2d(kernel_size=k_pool))
            elif pool_type == "max":
                self.ops.append(torch.nn.MaxPool2d(kernel_size=k_pool))
            else:
                print(f"Error, unknown pooling type {pool_type}...")

    def forward_one(self, x):
        assert self.ops, "You need to add convolutions first"
        for n, op in enumerate(self.ops):
            x = op(x)
        return self.normalize(x)


class L2_Net(PatchNet):
    """Compute a 128D descriptor for all overlapping 32x32 patches.
    From the L2Net paper (CVPR'17).
    """

    def __init__(self, dim=128, **kw):
        PatchNet.__init__(self, **kw)
        add_conv = lambda n, **kw: self._add_conv((n * dim) // 128, **kw)
        add_conv(32)
        add_conv(32)
        add_conv(64, stride=2)
        add_conv(64)
        add_conv(128, stride=2)
        add_conv(128)
        add_conv(128, k=7, stride=8, bn=False, relu=False)
        self.out_dim = dim


class Quad_L2Net(PatchNet):
    """Same than L2_Net, but replace the final 8x8 conv by 3 successive 2x2 convs."""

    def __init__(self, dim=128, mchan=4, relu22=False, **kw):
        PatchNet.__init__(self, **kw)
        self._add_conv(8 * mchan)
        self._add_conv(8 * mchan)
        self._add_conv(16 * mchan, stride=2)
        self._add_conv(16 * mchan)
        self._add_conv(32 * mchan, stride=2)
        self._add_conv(32 * mchan)
        # replace last 8x8 convolution with 3 2x2 convolutions
        self._add_conv(32 * mchan, k=2, stride=2, relu=relu22)
        self._add_conv(32 * mchan, k=2, stride=2, relu=relu22)
        self._add_conv(dim, k=2, stride=2, bn=False, relu=False)
        self.out_dim = dim


class Quad_L2Net_ConfCFS(Quad_L2Net):
    """Same than Quad_L2Net, with 2 confidence maps for repeatability and reliability."""

    def __init__(self, **kw):
        Quad_L2Net.__init__(self, **kw)
        # reliability classifier
        self.clf = nn.Conv2d(self.out_dim, 2, kernel_size=1)
        # repeatability classifier: for some reasons it's a softplus, not a softmax!
        # Why? I guess it's a mistake that was left unnoticed in the code for a long time...
        self.sal = nn.Conv2d(self.out_dim, 1, kernel_size=1)

    def forward_one(self, x):
        assert self.ops, "You need to add convolutions first"
        for op in self.ops:
            x = op(x)
        # compute the confidence maps
        ureliability = self.clf(x**2)
        urepeatability = self.sal(x**2)
        return self.normalize(x, ureliability, urepeatability)


class Fast_Quad_L2Net(PatchNet):
    """Faster version of Quad l2 net, replacing one dilated conv with one pooling to diminish image resolution thus increase inference time
    Dilation  factors and pooling:
        1,1,1, pool2, 1,1, 2,2, 4, 8, upsample2
    """

    def __init__(self, dim=128, mchan=4, relu22=False, downsample_factor=2, **kw):

        PatchNet.__init__(self, **kw)
        self._add_conv(8 * mchan)
        self._add_conv(8 * mchan)
        self._add_conv(
            16 * mchan, k_pool=downsample_factor
        )  # added avg pooling to decrease img resolution
        self._add_conv(16 * mchan)
        self._add_conv(32 * mchan, stride=2)
        self._add_conv(32 * mchan)

        # replace last 8x8 convolution with 3 2x2 convolutions
        self._add_conv(32 * mchan, k=2, stride=2, relu=relu22)
        self._add_conv(32 * mchan, k=2, stride=2, relu=relu22)
        self._add_conv(dim, k=2, stride=2, bn=False, relu=False)

        # Go back to initial image resolution with upsampling
        self.ops.append(
            torch.nn.Upsample(
                scale_factor=downsample_factor, mode="bilinear", align_corners=False
            )
        )

        self.out_dim = dim


class Fast_Quad_L2Net_ConfCFS(Fast_Quad_L2Net):
    """Fast r2d2 architecture"""

    def __init__(self, **kw):
        Fast_Quad_L2Net.__init__(self, **kw)
        # reliability classifier
        self.clf = nn.Conv2d(self.out_dim, 2, kernel_size=1)

        # repeatability classifier: for some reasons it's a softplus, not a softmax!
        # Why? I guess it's a mistake that was left unnoticed in the code for a long time...
        self.sal = nn.Conv2d(self.out_dim, 1, kernel_size=1)

    def forward_one(self, x):
        assert self.ops, "You need to add convolutions first"
        for op in self.ops:
            x = op(x)
        # compute the confidence maps
        ureliability = self.clf(x**2)
        urepeatability = self.sal(x**2)
        return self.normalize(x, ureliability, urepeatability)