Spaces:

Realcat
/

image-matching-webui

Running

image-matching-webui / hloc /extractors /netvlad.py

Vincentqyw

update: rord

49a0323 12 months ago

5.87 kB

	from pathlib import Path
	import subprocess
	import numpy as np
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import torchvision.models as models
	from scipy.io import loadmat

	from ..utils.base_model import BaseModel
	from .. import logger

	EPS = 1e-6


	class NetVLADLayer(nn.Module):
	def __init__(self, input_dim=512, K=64, score_bias=False, intranorm=True):
	super().__init__()
	self.score_proj = nn.Conv1d(
	input_dim, K, kernel_size=1, bias=score_bias
	)
	centers = nn.parameter.Parameter(torch.empty([input_dim, K]))
	nn.init.xavier_uniform_(centers)
	self.register_parameter("centers", centers)
	self.intranorm = intranorm
	self.output_dim = input_dim * K

	def forward(self, x):
	b = x.size(0)
	scores = self.score_proj(x)
	scores = F.softmax(scores, dim=1)
	diff = x.unsqueeze(2) - self.centers.unsqueeze(0).unsqueeze(-1)
	desc = (scores.unsqueeze(1) * diff).sum(dim=-1)
	if self.intranorm:
	# From the official MATLAB implementation.
	desc = F.normalize(desc, dim=1)
	desc = desc.view(b, -1)
	desc = F.normalize(desc, dim=1)
	return desc


	class NetVLAD(BaseModel):
	default_conf = {"model_name": "VGG16-NetVLAD-Pitts30K", "whiten": True}
	required_inputs = ["image"]

	# Models exported using
	# https://github.com/uzh-rpg/netvlad_tf_open/blob/master/matlab/net_class2struct.m.
	dir_models = {
	"VGG16-NetVLAD-Pitts30K": "https://cvg-data.inf.ethz.ch/hloc/netvlad/Pitts30K_struct.mat",
	"VGG16-NetVLAD-TokyoTM": "https://cvg-data.inf.ethz.ch/hloc/netvlad/TokyoTM_struct.mat",
	}

	def _init(self, conf):
	assert conf["model_name"] in self.dir_models.keys()

	# Download the checkpoint.
	checkpoint = Path(
	torch.hub.get_dir(), "netvlad", conf["model_name"] + ".mat"
	)
	if not checkpoint.exists():
	checkpoint.parent.mkdir(exist_ok=True, parents=True)
	link = self.dir_models[conf["model_name"]]
	cmd = ["wget", link, "-O", str(checkpoint)]
	logger.info(f"Downloading the NetVLAD model with `{cmd}`.")
	subprocess.run(cmd, check=True)

	# Create the network.
	# Remove classification head.
	backbone = list(models.vgg16().children())[0]
	# Remove last ReLU + MaxPool2d.
	self.backbone = nn.Sequential(*list(backbone.children())[:-2])

	self.netvlad = NetVLADLayer()

	if conf["whiten"]:
	self.whiten = nn.Linear(self.netvlad.output_dim, 4096)

	# Parse MATLAB weights using https://github.com/uzh-rpg/netvlad_tf_open
	mat = loadmat(checkpoint, struct_as_record=False, squeeze_me=True)

	# CNN weights.
	for layer, mat_layer in zip(
	self.backbone.children(), mat["net"].layers
	):
	if isinstance(layer, nn.Conv2d):
	w = mat_layer.weights[0] # Shape: S x S x IN x OUT
	b = mat_layer.weights[1] # Shape: OUT
	# Prepare for PyTorch - enforce float32 and right shape.
	# w should have shape: OUT x IN x S x S
	# b should have shape: OUT
	w = torch.tensor(w).float().permute([3, 2, 0, 1])
	b = torch.tensor(b).float()
	# Update layer weights.
	layer.weight = nn.Parameter(w)
	layer.bias = nn.Parameter(b)

	# NetVLAD weights.
	score_w = mat["net"].layers[30].weights[0] # D x K
	# centers are stored as opposite in official MATLAB code
	center_w = -mat["net"].layers[30].weights[1] # D x K
	# Prepare for PyTorch - make sure it is float32 and has right shape.
	# score_w should have shape K x D x 1
	# center_w should have shape D x K
	score_w = torch.tensor(score_w).float().permute([1, 0]).unsqueeze(-1)
	center_w = torch.tensor(center_w).float()
	# Update layer weights.
	self.netvlad.score_proj.weight = nn.Parameter(score_w)
	self.netvlad.centers = nn.Parameter(center_w)

	# Whitening weights.
	if conf["whiten"]:
	w = mat["net"].layers[33].weights[0] # Shape: 1 x 1 x IN x OUT
	b = mat["net"].layers[33].weights[1] # Shape: OUT
	# Prepare for PyTorch - make sure it is float32 and has right shape
	w = torch.tensor(w).float().squeeze().permute([1, 0]) # OUT x IN
	b = torch.tensor(b.squeeze()).float() # Shape: OUT
	# Update layer weights.
	self.whiten.weight = nn.Parameter(w)
	self.whiten.bias = nn.Parameter(b)

	# Preprocessing parameters.
	self.preprocess = {
	"mean": mat["net"].meta.normalization.averageImage[0, 0],
	"std": np.array([1, 1, 1], dtype=np.float32),
	}

	def _forward(self, data):
	image = data["image"]
	assert image.shape[1] == 3
	assert image.min() >= -EPS and image.max() <= 1 + EPS
	image = torch.clamp(image * 255, 0.0, 255.0) # Input should be 0-255.
	mean = self.preprocess["mean"]
	std = self.preprocess["std"]
	image = image - image.new_tensor(mean).view(1, -1, 1, 1)
	image = image / image.new_tensor(std).view(1, -1, 1, 1)

	# Feature extraction.
	descriptors = self.backbone(image)
	b, c, _, _ = descriptors.size()
	descriptors = descriptors.view(b, c, -1)

	# NetVLAD layer.
	descriptors = F.normalize(descriptors, dim=1) # Pre-normalization.
	desc = self.netvlad(descriptors)

	# Whiten if needed.
	if hasattr(self, "whiten"):
	desc = self.whiten(desc)
	desc = F.normalize(desc, dim=1) # Final L2 normalization.

	return {"global_descriptor": desc}