import argparse import numpy as np import imageio import torch from tqdm import tqdm import scipy import scipy.io import scipy.misc from lib.model_test import D2Net from lib.utils import preprocess_image from lib.pyramid import process_multiscale # CUDA use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # Argument parsing parser = argparse.ArgumentParser(description='Feature extraction script') parser.add_argument( '--image_list_file', type=str, required=True, help='path to a file containing a list of images to process' ) parser.add_argument( '--preprocessing', type=str, default='caffe', help='image preprocessing (caffe or torch)' ) parser.add_argument( '--model_file', type=str, default='models/d2_tf.pth', help='path to the full model' ) parser.add_argument( '--max_edge', type=int, default=1600, help='maximum image size at network input' ) parser.add_argument( '--max_sum_edges', type=int, default=2800, help='maximum sum of image sizes at network input' ) parser.add_argument( '--output_extension', type=str, default='.d2-net', help='extension for the output' ) parser.add_argument( '--output_type', type=str, default='npz', help='output file type (npz or mat)' ) parser.add_argument( '--multiscale', dest='multiscale', action='store_true', help='extract multiscale features' ) parser.set_defaults(multiscale=False) parser.add_argument( '--no-relu', dest='use_relu', action='store_false', help='remove ReLU after the dense feature extraction module' ) parser.set_defaults(use_relu=True) args = parser.parse_args() print(args) # Creating CNN model model = D2Net( model_file=args.model_file, use_relu=args.use_relu, use_cuda=use_cuda ) # Process the file with open(args.image_list_file, 'r') as f: lines = f.readlines() for line in tqdm(lines, total=len(lines)): path = line.strip() image = imageio.imread(path) if len(image.shape) == 2: image = image[:, :, np.newaxis] image = np.repeat(image, 3, -1) # TODO: switch to PIL.Image due to deprecation of scipy.misc.imresize. resized_image = image if max(resized_image.shape) > args.max_edge: resized_image = scipy.misc.imresize( resized_image, args.max_edge / max(resized_image.shape) ).astype('float') if sum(resized_image.shape[: 2]) > args.max_sum_edges: resized_image = scipy.misc.imresize( resized_image, args.max_sum_edges / sum(resized_image.shape[: 2]) ).astype('float') fact_i = image.shape[0] / resized_image.shape[0] fact_j = image.shape[1] / resized_image.shape[1] input_image = preprocess_image( resized_image, preprocessing=args.preprocessing ) with torch.no_grad(): if args.multiscale: keypoints, scores, descriptors = process_multiscale( torch.tensor( input_image[np.newaxis, :, :, :].astype(np.float32), device=device ), model ) else: keypoints, scores, descriptors = process_multiscale( torch.tensor( input_image[np.newaxis, :, :, :].astype(np.float32), device=device ), model, scales=[1] ) # Input image coordinates keypoints[:, 0] *= fact_i keypoints[:, 1] *= fact_j # i, j -> u, v keypoints = keypoints[:, [1, 0, 2]] if args.output_type == 'npz': with open(path + args.output_extension, 'wb') as output_file: np.savez( output_file, keypoints=keypoints, scores=scores, descriptors=descriptors ) elif args.output_type == 'mat': with open(path + args.output_extension, 'wb') as output_file: scipy.io.savemat( output_file, { 'keypoints': keypoints, 'scores': scores, 'descriptors': descriptors } ) else: raise ValueError('Unknown output type.')