Spaces:

Realcat
/

image-matching-webui

Running

image-matching-webui / third_party /gim /demo.py

Realcat

add: GIM (https://github.com/xuelunshen/gim)

4d4dd90 7 months ago

16.6 kB

	# -- coding: utf-8 --
	# @Author : xuelun

	import cv2
	import torch
	import argparse
	import warnings
	import numpy as np
	import matplotlib.pyplot as plt
	import torchvision.transforms.functional as F

	from os.path import join

	from dkm.models.model_zoo.DKMv3 import DKMv3
	from gluefactory.superpoint import SuperPoint
	from gluefactory.models.matchers.lightglue import LightGlue

	DEFAULT_MIN_NUM_MATCHES = 4
	DEFAULT_RANSAC_MAX_ITER = 10000
	DEFAULT_RANSAC_CONFIDENCE = 0.999
	DEFAULT_RANSAC_REPROJ_THRESHOLD = 8
	DEFAULT_RANSAC_METHOD = "USAC_MAGSAC"

	RANSAC_ZOO = {
	"RANSAC": cv2.RANSAC,
	"USAC_FAST": cv2.USAC_FAST,
	"USAC_MAGSAC": cv2.USAC_MAGSAC,
	"USAC_PROSAC": cv2.USAC_PROSAC,
	"USAC_DEFAULT": cv2.USAC_DEFAULT,
	"USAC_FM_8PTS": cv2.USAC_FM_8PTS,
	"USAC_ACCURATE": cv2.USAC_ACCURATE,
	"USAC_PARALLEL": cv2.USAC_PARALLEL,
	}


	def read_image(path, grayscale=False):
	if grayscale:
	mode = cv2.IMREAD_GRAYSCALE
	else:
	mode = cv2.IMREAD_COLOR
	image = cv2.imread(str(path), mode)
	if image is None:
	raise ValueError(f'Cannot read image {path}.')
	if not grayscale and len(image.shape) == 3:
	image = image[:, :, ::-1] # BGR to RGB
	return image


	def resize_image(image, size, interp):
	assert interp.startswith('cv2_')
	if interp.startswith('cv2_'):
	interp = getattr(cv2, 'INTER_'+interp[len('cv2_'):].upper())
	h, w = image.shape[:2]
	if interp == cv2.INTER_AREA and (w < size[0] or h < size[1]):
	interp = cv2.INTER_LINEAR
	resized = cv2.resize(image, size, interpolation=interp)
	# elif interp.startswith('pil_'):
	# interp = getattr(PIL.Image, interp[len('pil_'):].upper())
	# resized = PIL.Image.fromarray(image.astype(np.uint8))
	# resized = resized.resize(size, resample=interp)
	# resized = np.asarray(resized, dtype=image.dtype)
	else:
	raise ValueError(
	f'Unknown interpolation {interp}.')
	return resized


	def fast_make_matching_figure(data, b_id):
	color0 = (data['color0'][b_id].permute(1, 2, 0).cpu().detach().numpy() * 255).round().astype(np.uint8) # (rH, rW, 3)
	color1 = (data['color1'][b_id].permute(1, 2, 0).cpu().detach().numpy() * 255).round().astype(np.uint8) # (rH, rW, 3)
	gray0 = cv2.cvtColor(color0, cv2.COLOR_RGB2GRAY)
	gray1 = cv2.cvtColor(color1, cv2.COLOR_RGB2GRAY)
	kpts0 = data['mkpts0_f'].cpu().detach().numpy()
	kpts1 = data['mkpts1_f'].cpu().detach().numpy()
	mconf = data['mconf'].cpu().detach().numpy()
	inliers = data['inliers']

	rows = 2
	margin = 2
	(h0, w0), (h1, w1) = data['hw0_i'], data['hw1_i']
	h = max(h0, h1)
	H, W = margin * (rows + 1) + h * rows, margin * 3 + w0 + w1

	# canvas
	out = 255 * np.ones((H, W), np.uint8)

	wx = [margin, margin + w0, margin + w0 + margin, margin + w0 + margin + w1]
	hx = lambda row: margin * row + h * (row-1)
	out = np.stack([out] * 3, -1)

	sh = hx(row=1)
	out[sh: sh + h0, wx[0]: wx[1]] = color0
	out[sh: sh + h1, wx[2]: wx[3]] = color1

	sh = hx(row=2)
	out[sh: sh + h0, wx[0]: wx[1]] = color0
	out[sh: sh + h1, wx[2]: wx[3]] = color1
	mkpts0, mkpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int)
	for (x0, y0), (x1, y1) in zip(mkpts0[inliers], mkpts1[inliers]):
	c = (0, 255, 0)
	cv2.circle(out, (x0, y0 + sh), 3, c, -1, lineType=cv2.LINE_AA)
	cv2.circle(out, (x1 + margin + w0, y1 + sh), 3, c, -1, lineType=cv2.LINE_AA)

	return out


	def fast_make_matching_overlay(data, b_id):
	color0 = (data['color0'][b_id].permute(1, 2, 0).cpu().detach().numpy() * 255).round().astype(np.uint8) # (rH, rW, 3)
	color1 = (data['color1'][b_id].permute(1, 2, 0).cpu().detach().numpy() * 255).round().astype(np.uint8) # (rH, rW, 3)
	gray0 = cv2.cvtColor(color0, cv2.COLOR_RGB2GRAY)
	gray1 = cv2.cvtColor(color1, cv2.COLOR_RGB2GRAY)
	kpts0 = data['mkpts0_f'].cpu().detach().numpy()
	kpts1 = data['mkpts1_f'].cpu().detach().numpy()
	mconf = data['mconf'].cpu().detach().numpy()
	inliers = data['inliers']

	rows = 2
	margin = 2
	(h0, w0), (h1, w1) = data['hw0_i'], data['hw1_i']
	h = max(h0, h1)
	H, W = margin * (rows + 1) + h * rows, margin * 3 + w0 + w1

	# canvas
	out = 255 * np.ones((H, W), np.uint8)

	wx = [margin, margin + w0, margin + w0 + margin, margin + w0 + margin + w1]
	hx = lambda row: margin * row + h * (row-1)
	out = np.stack([out] * 3, -1)

	sh = hx(row=1)
	out[sh: sh + h0, wx[0]: wx[1]] = color0
	out[sh: sh + h1, wx[2]: wx[3]] = color1

	sh = hx(row=2)
	out[sh: sh + h0, wx[0]: wx[1]] = color0
	out[sh: sh + h1, wx[2]: wx[3]] = color1
	mkpts0, mkpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int)
	for (x0, y0), (x1, y1) in zip(mkpts0[inliers], mkpts1[inliers]):
	c = (0, 255, 0)
	cv2.line(out, (x0, y0 + sh), (x1 + margin + w0, y1 + sh), color=c, thickness=1, lineType=cv2.LINE_AA)
	cv2.circle(out, (x0, y0 + sh), 3, c, -1, lineType=cv2.LINE_AA)
	cv2.circle(out, (x1 + margin + w0, y1 + sh), 3, c, -1, lineType=cv2.LINE_AA)

	return out


	def preprocess(image: np.ndarray, grayscale: bool = False, resize_max: int = None,
	dfactor: int = 8):
	image = image.astype(np.float32, copy=False)
	size = image.shape[:2][::-1]
	scale = np.array([1.0, 1.0])

	if resize_max:
	scale = resize_max / max(size)
	if scale < 1.0:
	size_new = tuple(int(round(x*scale)) for x in size)
	image = resize_image(image, size_new, 'cv2_area')
	scale = np.array(size) / np.array(size_new)

	if grayscale:
	assert image.ndim == 2, image.shape
	image = image[None]
	else:
	image = image.transpose((2, 0, 1)) # HxWxC to CxHxW
	image = torch.from_numpy(image / 255.0).float()

	# assure that the size is divisible by dfactor
	size_new = tuple(map(
	lambda x: int(x // dfactor * dfactor),
	image.shape[-2:]))
	image = F.resize(image, size=size_new)
	scale = np.array(size) / np.array(size_new)[::-1]
	return image, scale


	def compute_geom(data,
	ransac_method=DEFAULT_RANSAC_METHOD,
	ransac_reproj_threshold=DEFAULT_RANSAC_REPROJ_THRESHOLD,
	ransac_confidence=DEFAULT_RANSAC_CONFIDENCE,
	ransac_max_iter=DEFAULT_RANSAC_MAX_ITER,
	) -> dict:

	mkpts0 = data["mkpts0_f"].cpu().detach().numpy()
	mkpts1 = data["mkpts1_f"].cpu().detach().numpy()

	if len(mkpts0) < 2 * DEFAULT_MIN_NUM_MATCHES:
	return {}

	h1, w1 = data["hw0_i"]

	geo_info = {}

	F, inliers = cv2.findFundamentalMat(
	mkpts0,
	mkpts1,
	method=RANSAC_ZOO[ransac_method],
	ransacReprojThreshold=ransac_reproj_threshold,
	confidence=ransac_confidence,
	maxIters=ransac_max_iter,
	)
	if F is not None:
	geo_info["Fundamental"] = F.tolist()

	H, _ = cv2.findHomography(
	mkpts1,
	mkpts0,
	method=RANSAC_ZOO[ransac_method],
	ransacReprojThreshold=ransac_reproj_threshold,
	confidence=ransac_confidence,
	maxIters=ransac_max_iter,
	)
	if H is not None:
	geo_info["Homography"] = H.tolist()
	_, H1, H2 = cv2.stereoRectifyUncalibrated(
	mkpts0.reshape(-1, 2),
	mkpts1.reshape(-1, 2),
	F,
	imgSize=(w1, h1),
	)
	geo_info["H1"] = H1.tolist()
	geo_info["H2"] = H2.tolist()

	return geo_info


	def wrap_images(img0, img1, geo_info, geom_type):
	img0 = img0[0].permute((1, 2, 0)).cpu().detach().numpy()[..., ::-1]
	img1 = img1[0].permute((1, 2, 0)).cpu().detach().numpy()[..., ::-1]

	h1, w1, _ = img0.shape
	h2, w2, _ = img1.shape

	rectified_image0 = img0
	rectified_image1 = None
	H = np.array(geo_info["Homography"])
	F = np.array(geo_info["Fundamental"])

	title = []
	if geom_type == "Homography":
	rectified_image1 = cv2.warpPerspective(
	img1, H, (img0.shape[1], img0.shape[0])
	)
	title = ["Image 0", "Image 1 - warped"]
	elif geom_type == "Fundamental":
	H1, H2 = np.array(geo_info["H1"]), np.array(geo_info["H2"])
	rectified_image0 = cv2.warpPerspective(img0, H1, (w1, h1))
	rectified_image1 = cv2.warpPerspective(img1, H2, (w2, h2))
	title = ["Image 0 - warped", "Image 1 - warped"]
	else:
	print("Error: Unknown geometry type")

	fig = plot_images(
	[rectified_image0.squeeze(), rectified_image1.squeeze()],
	title,
	dpi=300,
	)

	img = fig2im(fig)

	plt.close(fig)

	return img


	def plot_images(imgs, titles=None, cmaps="gray", dpi=100, size=5, pad=0.5):
	"""Plot a set of images horizontally.
	Args:
	imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W).
	titles: a list of strings, as titles for each image.
	cmaps: colormaps for monochrome images.
	dpi:
	size:
	pad:
	"""
	n = len(imgs)
	if not isinstance(cmaps, (list, tuple)):
	cmaps = [cmaps] * n

	figsize = (size * n, size * 6 / 5) if size is not None else None
	fig, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi)

	if n == 1:
	ax = [ax]
	for i in range(n):
	ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
	ax[i].get_yaxis().set_ticks([])
	ax[i].get_xaxis().set_ticks([])
	ax[i].set_axis_off()
	for spine in ax[i].spines.values(): # remove frame
	spine.set_visible(False)
	if titles:
	ax[i].set_title(titles[i])

	fig.tight_layout(pad=pad)

	return fig


	def fig2im(fig):
	fig.canvas.draw()
	w, h = fig.canvas.get_width_height()
	buf_ndarray = np.frombuffer(fig.canvas.tostring_rgb(), dtype="u1")
	im = buf_ndarray.reshape(h, w, 3)
	return im


	if __name__ == '__main__':
	model_zoo = ['gim_dkm', 'gim_lightglue']

	# model
	parser = argparse.ArgumentParser()
	parser.add_argument('--model', type=str, default='gim_dkm', choices=model_zoo)
	args = parser.parse_args()

	# device
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	# load model
	ckpt = None
	model = None
	detector = None
	if args.model == 'gim_dkm':
	ckpt = 'gim_dkm_100h.ckpt'
	model = DKMv3(weights=None, h=672, w=896)
	elif args.model == 'gim_lightglue':
	ckpt = 'gim_lightglue_100h.ckpt'
	detector = SuperPoint({
	'max_num_keypoints': 2048,
	'force_num_keypoints': True,
	'detection_threshold': 0.0,
	'nms_radius': 3,
	'trainable': False,
	})
	model = LightGlue({
	'filter_threshold': 0.1,
	'flash': False,
	'checkpointed': True,
	})

	# weights path
	checkpoints_path = join('weights', ckpt)

	# load state dict
	if args.model == 'gim_dkm':
	state_dict = torch.load(checkpoints_path, map_location='cpu')
	if 'state_dict' in state_dict.keys(): state_dict = state_dict['state_dict']
	for k in list(state_dict.keys()):
	if k.startswith('model.'):
	state_dict[k.replace('model.', '', 1)] = state_dict.pop(k)
	if 'encoder.net.fc' in k:
	state_dict.pop(k)
	model.load_state_dict(state_dict)

	elif args.model == 'gim_lightglue':
	state_dict = torch.load(checkpoints_path, map_location='cpu')
	if 'state_dict' in state_dict.keys(): state_dict = state_dict['state_dict']
	for k in list(state_dict.keys()):
	if k.startswith('model.'):
	state_dict.pop(k)
	if k.startswith('superpoint.'):
	state_dict[k.replace('superpoint.', '', 1)] = state_dict.pop(k)
	detector.load_state_dict(state_dict)

	state_dict = torch.load(checkpoints_path, map_location='cpu')
	if 'state_dict' in state_dict.keys(): state_dict = state_dict['state_dict']
	for k in list(state_dict.keys()):
	if k.startswith('superpoint.'):
	state_dict.pop(k)
	if k.startswith('model.'):
	state_dict[k.replace('model.', '', 1)] = state_dict.pop(k)
	model.load_state_dict(state_dict)

	# eval mode
	if detector is not None:
	detector = detector.eval().to(device)
	model = model.eval().to(device)

	name0 = 'a1'
	name1 = 'a2'
	postfix = '.png'
	image_dir = join('assets', 'demo')
	img_path0 = join(image_dir, name0 + postfix)
	img_path1 = join(image_dir, name1 + postfix)

	image0 = read_image(img_path0)
	image1 = read_image(img_path1)
	image0, scale0 = preprocess(image0)
	image1, scale1 = preprocess(image1)

	image0 = image0.to(device)[None]
	image1 = image1.to(device)[None]

	data = dict(color0=image0, color1=image1, image0=image0, image1=image1)

	if args.model == 'gim_dkm':
	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	dense_matches, dense_certainty = model.match(image0, image1)
	sparse_matches, mconf = model.sample(dense_matches, dense_certainty, 5000)

	height0, width0 = image0.shape[-2:]
	height1, width1 = image1.shape[-2:]

	kpts0 = sparse_matches[:, :2]
	kpts0 = torch.stack((
	width0 * (kpts0[:, 0] + 1) / 2, height0 * (kpts0[:, 1] + 1) / 2), dim=-1,)
	kpts1 = sparse_matches[:, 2:]
	kpts1 = torch.stack((
	width1 * (kpts1[:, 0] + 1) / 2, height1 * (kpts1[:, 1] + 1) / 2), dim=-1,)
	b_ids = torch.where(mconf[None])[0]
	elif args.model == 'gim_lightglue':
	gray0 = read_image(img_path0, grayscale=True)
	gray1 = read_image(img_path1, grayscale=True)
	gray0 = preprocess(gray0, grayscale=True)[0]
	gray1 = preprocess(gray1, grayscale=True)[0]

	gray0 = gray0.to(device)[None]
	gray1 = gray1.to(device)[None]
	scale0 = torch.tensor(scale0).to(device)[None]
	scale1 = torch.tensor(scale1).to(device)[None]

	data.update(dict(gray0=gray0, gray1=gray1))

	size0 = torch.tensor(data["gray0"].shape[-2:][::-1])[None]
	size1 = torch.tensor(data["gray1"].shape[-2:][::-1])[None]

	data.update(dict(size0=size0, size1=size1))
	data.update(dict(scale0=scale0, scale1=scale1))

	pred = {}
	pred.update({k + '0': v for k, v in detector({
	"image": data["gray0"],
	"image_size": data["size0"],
	}).items()})
	pred.update({k + '1': v for k, v in detector({
	"image": data["gray1"],
	"image_size": data["size1"],
	}).items()})
	pred.update(model({pred, data,
	**{'resize0': data['size0'], 'resize1': data['size1']}}))

	kpts0 = torch.cat([kp * s for kp, s in zip(pred['keypoints0'], data['scale0'][:, None])])
	kpts1 = torch.cat([kp * s for kp, s in zip(pred['keypoints1'], data['scale1'][:, None])])
	m_bids = torch.nonzero(pred['keypoints0'].sum(dim=2) > -1)[:, 0]
	matches = pred['matches']
	bs = data['image0'].size(0)
	kpts0 = torch.cat([kpts0[m_bids == b_id][matches[b_id][..., 0]] for b_id in range(bs)])
	kpts1 = torch.cat([kpts1[m_bids == b_id][matches[b_id][..., 1]] for b_id in range(bs)])
	b_ids = torch.cat([m_bids[m_bids == b_id][matches[b_id][..., 0]] for b_id in range(bs)])
	mconf = torch.cat(pred['scores'])

	# robust fitting
	_, mask = cv2.findFundamentalMat(kpts0.cpu().detach().numpy(),
	kpts1.cpu().detach().numpy(),
	cv2.USAC_MAGSAC, ransacReprojThreshold=1.0,
	confidence=0.999999, maxIters=10000)
	mask = mask.ravel() > 0

	data.update({
	'hw0_i': image0.shape[-2:],
	'hw1_i': image1.shape[-2:],
	'mkpts0_f': kpts0,
	'mkpts1_f': kpts1,
	'm_bids': b_ids,
	'mconf': mconf,
	'inliers': mask,
	})

	# save visualization
	alpha = 0.5
	out = fast_make_matching_figure(data, b_id=0)
	overlay = fast_make_matching_overlay(data, b_id=0)
	out = cv2.addWeighted(out, 1 - alpha, overlay, alpha, 0)
	cv2.imwrite(join(image_dir, f'{name0}_{name1}_{args.model}_match.png'), out[..., ::-1])

	geom_info = compute_geom(data)
	wrapped_images = wrap_images(image0, image1, geom_info,
	"Homography")
	cv2.imwrite(join(image_dir, f'{name0}_{name1}_{args.model}_warp.png'), wrapped_images)