Spaces:

Realcat
/

image-matching-webui

Running

image-matching-webui / third_party /d2net /extract_features.py

Vincentqyw

fix: roma

c74a070 11 months ago

No virus

4.07 kB

	import argparse

	import numpy as np

	import imageio

	import torch

	from tqdm import tqdm

	import scipy
	import scipy.io
	import scipy.misc

	from lib.model_test import D2Net
	from lib.utils import preprocess_image
	from lib.pyramid import process_multiscale

	# CUDA
	use_cuda = torch.cuda.is_available()
	device = torch.device("cuda:0" if use_cuda else "cpu")

	# Argument parsing
	parser = argparse.ArgumentParser(description="Feature extraction script")

	parser.add_argument(
	"--image_list_file",
	type=str,
	required=True,
	help="path to a file containing a list of images to process",
	)

	parser.add_argument(
	"--preprocessing",
	type=str,
	default="caffe",
	help="image preprocessing (caffe or torch)",
	)
	parser.add_argument(
	"--model_file", type=str, default="models/d2_tf.pth", help="path to the full model"
	)

	parser.add_argument(
	"--max_edge", type=int, default=1600, help="maximum image size at network input"
	)
	parser.add_argument(
	"--max_sum_edges",
	type=int,
	default=2800,
	help="maximum sum of image sizes at network input",
	)

	parser.add_argument(
	"--output_extension", type=str, default=".d2-net", help="extension for the output"
	)
	parser.add_argument(
	"--output_type", type=str, default="npz", help="output file type (npz or mat)"
	)

	parser.add_argument(
	"--multiscale",
	dest="multiscale",
	action="store_true",
	help="extract multiscale features",
	)
	parser.set_defaults(multiscale=False)

	parser.add_argument(
	"--no-relu",
	dest="use_relu",
	action="store_false",
	help="remove ReLU after the dense feature extraction module",
	)
	parser.set_defaults(use_relu=True)

	args = parser.parse_args()

	print(args)

	# Creating CNN model
	model = D2Net(model_file=args.model_file, use_relu=args.use_relu, use_cuda=use_cuda)

	# Process the file
	with open(args.image_list_file, "r") as f:
	lines = f.readlines()
	for line in tqdm(lines, total=len(lines)):
	path = line.strip()

	image = imageio.imread(path)
	if len(image.shape) == 2:
	image = image[:, :, np.newaxis]
	image = np.repeat(image, 3, -1)

	# TODO: switch to PIL.Image due to deprecation of scipy.misc.imresize.
	resized_image = image
	if max(resized_image.shape) > args.max_edge:
	resized_image = scipy.misc.imresize(
	resized_image, args.max_edge / max(resized_image.shape)
	).astype("float")
	if sum(resized_image.shape[:2]) > args.max_sum_edges:
	resized_image = scipy.misc.imresize(
	resized_image, args.max_sum_edges / sum(resized_image.shape[:2])
	).astype("float")

	fact_i = image.shape[0] / resized_image.shape[0]
	fact_j = image.shape[1] / resized_image.shape[1]

	input_image = preprocess_image(resized_image, preprocessing=args.preprocessing)
	with torch.no_grad():
	if args.multiscale:
	keypoints, scores, descriptors = process_multiscale(
	torch.tensor(
	input_image[np.newaxis, :, :, :].astype(np.float32), device=device
	),
	model,
	)
	else:
	keypoints, scores, descriptors = process_multiscale(
	torch.tensor(
	input_image[np.newaxis, :, :, :].astype(np.float32), device=device
	),
	model,
	scales=[1],
	)

	# Input image coordinates
	keypoints[:, 0] *= fact_i
	keypoints[:, 1] *= fact_j
	# i, j -> u, v
	keypoints = keypoints[:, [1, 0, 2]]

	if args.output_type == "npz":
	with open(path + args.output_extension, "wb") as output_file:
	np.savez(
	output_file, keypoints=keypoints, scores=scores, descriptors=descriptors
	)
	elif args.output_type == "mat":
	with open(path + args.output_extension, "wb") as output_file:
	scipy.io.savemat(
	output_file,
	{"keypoints": keypoints, "scores": scores, "descriptors": descriptors},
	)
	else:
	raise ValueError("Unknown output type.")