xfeat / app.py
qubvel-hf's picture
qubvel-hf HF staff
Clean proj with LFS
9b7fcdb
raw
history blame
3.89 kB
import cv2
import numpy as np
import gradio as gr
from modules.xfeat import XFeat
from utils import visualize_matches
HEADER = """
<div align="center">
<p>
<span style="font-size: 30px; vertical-align: bottom;"> XFeat: Accelerated Features for Lightweight Image Matching</span>
</p>
<p style="margin-top: -15px;">
<a href="https://arxiv.org/abs/2404.19174" target="_blank" style="color: grey;">ArXiv Paper</a>
&nbsp;
<a href="https://github.com/verlab/accelerated_features" target="_blank" style="color: grey;">GitHub Repository</a>
</p>
<p>
Upload two images ๐Ÿ–ผ๏ธ of the object and identify matches between them ๐Ÿš€
</p>
</div>
"""
ABSTRACT = """
We introduce a lightweight and accurate architecture for resource-efficient visual correspondence. Our method, dubbed XFeat (Accelerated Features), revisits fundamental design choices in convolutional neural networks for detecting, extracting, and matching local features. Our new model satisfies a critical need for fast and robust algorithms suitable to resource-limited devices. In particular, accurate image matching requires sufficiently large image resolutions -- for this reason, we keep the resolution as large as possible while limiting the number of channels in the network. Besides, our model is designed to offer the choice of matching at the sparse or semi-dense levels, each of which may be more suitable for different downstream applications, such as visual navigation and augmented reality. Our model is the first to offer semi-dense matching efficiently, leveraging a novel match refinement module that relies on coarse local descriptors. XFeat is versatile and hardware-independent, surpassing current deep learning-based local features in speed (up to 5x faster) with comparable or better accuracy, proven in pose estimation and visual localization. We showcase it running in real-time on an inexpensive laptop CPU without specialized hardware optimizations.
"""
def find_matches(image_0, image_1):
image_0_bgr = cv2.cvtColor(image_0, cv2.COLOR_RGB2BGR)
image_1_bgr = cv2.cvtColor(image_1, cv2.COLOR_RGB2BGR)
xfeat = XFeat(weights="weights/xfeat.pt", top_k=4096)
#Use out-of-the-box function for extraction + MNN matching
match_kp0, match_kp1 = xfeat.match_xfeat(image_0_bgr, image_1_bgr, top_k = 4096)
# canvas = warp_corners_and_draw_matches(mkpts_0, mkpts_1, image_0, image_1)
_, mask = cv2.findHomography(match_kp0, match_kp1, cv2.USAC_MAGSAC, 3.5, maxIters=1_000, confidence=0.999)
keep = mask.flatten().astype(bool)
match_kp0 = match_kp0[keep]
match_kp1 = match_kp1[keep]
num_filtered_matches = len(match_kp0)
viz = visualize_matches(
image_0,
image_1,
match_kp0,
match_kp1,
np.eye(num_filtered_matches),
show_keypoints=True,
highlight_unmatched=True,
title=f"{num_filtered_matches} matches",
line_width=2,
)
return viz
with gr.Blocks() as demo:
gr.Markdown(HEADER)
with gr.Accordion("Abstract (click to open)", open=False):
gr.Image("assets/xfeat_arq.png")
gr.Markdown(ABSTRACT)
with gr.Row():
image_1 = gr.Image()
image_2 = gr.Image()
with gr.Row():
button = gr.Button(value="Find Matches")
clear = gr.ClearButton(value="Clear")
output = gr.Image()
button.click(find_matches, [image_1, image_2], output)
clear.add([image_1, image_2, output])
gr.Examples(
examples=[
["assets/ref.png", "assets/tgt.png"],
["assets/demo1.jpg", "assets/demo2.jpg"],
["assets/tower-1.webp", "assets/tower-2.jpeg"],
],
inputs=[image_1, image_2],
outputs=[output],
fn=find_matches,
cache_examples=None,
)
if __name__ == "__main__":
demo.launch()