gim-online / app.py
xuelunshen's picture
Update
8b53ab6
import argparse
import gradio as gr
from common.utils import (
matcher_zoo,
ransac_zoo,
change_estimate_geom,
run_matching,
gen_examples,
DEFAULT_RANSAC_METHOD,
DEFAULT_SETTING_GEOMETRY,
DEFAULT_RANSAC_REPROJ_THRESHOLD,
DEFAULT_RANSAC_CONFIDENCE,
DEFAULT_RANSAC_MAX_ITER,
DEFAULT_MATCHING_THRESHOLD,
DEFAULT_SETTING_MAX_FEATURES,
DEFAULT_DEFAULT_KEYPOINT_THRESHOLD,
)
model = "xuelunshen/gim"
DESCRIPTION = """
<div style="display: flex; justify-content: center; align-items: center;">
<div style="width: 100%; text-align: center; font-size: 30px;">
<strong>GIM: Learning Generalizable Image Matcher From Internet Videos</strong>
</div>
</div>
<div style="display: flex; justify-content: center; padding: 10px; align-items: center;">
<div style="width: 75%; text-align: center; background-color: #FFFFE0;">
LICENSE: This repository is under the MIT License. This content/model is provided here for research purposes only. Any use beyond this is your sole responsibility and subject to your securing the necessary rights for your purpose.
</div>
</div>
<div style="display: flex; justify-content: center; align-items: flex-start; flex-wrap: wrap;">
<div><a href="https://iclr.cc/Conferences/2024"><img src="https://img.shields.io/badge/%F0%9F%8C%9F_ICLR'2024_Spotlight-37414c" alt='ICLR 2024 Spotlight'></a></div>&nbsp;
<div><a href="https://xuelunshen.com/gim"><img src="https://img.shields.io/badge/Project_Page-3A464E?logo=gumtree" alt='Project Page'></a></div>&nbsp;
<div><a href="https://github.com/xuelunshen/gim"><img src="https://img.shields.io/badge/Source_Code-black?logo=Github" alt='Github Source Code'></a></div>&nbsp;
<div><a href="https://arxiv.org/abs/2402.11095"><img src="https://img.shields.io/badge/arXiv-2402.11095-b31b1b?logo=arxiv" alt='arxiv'></a></div>&nbsp;
<div><a href="https://www.youtube.com/watch?v=FU_MJLD8LeY"><img src="https://img.shields.io/badge/Overview_Video-E33122?logo=Youtube" alt='Overview Video'></a></div>
</div>
<p></p>
<div style="display: flex; justify-content: center; align-items: flex-start; flex-wrap: wrap;">
<div><a href="https://en.xmu.edu.cn"><img src="https://img.shields.io/badge/Xiamen_University-183F9D?logo=Google%20Scholar&logoColor=white" alt='Intel'></a></div>&nbsp;
<div><a href="https://www.intel.com"><img src="https://img.shields.io/badge/Labs-0071C5?logo=intel" alt='Intel'></a></div>&nbsp;
<div><a href="https://www.dji.com"><img src="https://img.shields.io/badge/DJI-131313?logo=DJI" alt='Intel'></a></div>
</div>
<div style="display: flex; justify-content: center; align-items: flex-start;">
<div style="width: 50%; padding: 10px;">
<p align="left">
<span style="color: black;">
You can click on the example images below or upload a pair of images. Running a match takes about 3.5 minutes (because the code is deployed on free CPU). Please wait patiently and keep the window in the foreground during operation
</span>
<br>
<span style="color: gray; font-size: smaller;">
Thanks to <a href="https://huggingface.co/spaces/Realcat/image-matching-webui" style="color: gray;">https://huggingface.co/spaces/Realcat/image-matching-webui</a> for providing the UI framework.
</span>
</p>
</div>
<div style="width: 50%; padding: 10px;">
<p align="left">
<span style="color: black;">
你可以点击下面的示例图片或者上传图片
</span>
<br>
<span style="color: black;">
运行一次匹配需要 3.5 分钟左右的时间 (因为代码部署在免费cpu上)
</span>
<br>
<span style="color: black;">
请你耐心等待, 运行期间保持窗口在最前面
</span>
<br>
<span style="color: gray; font-size: smaller;">
感谢 <a href="https://huggingface.co/spaces/Realcat/image-matching-webui" style="color: gray;">https://huggingface.co/spaces/Realcat/image-matching-webui</a> 提供的交互界面.
</span>
</p>
</div>
</div>
"""
def ui_change_imagebox(choice):
"""
Updates the image box with the given choice.
Args:
choice (list): The list of image sources to be displayed in the image box.
Returns:
dict: A dictionary containing the updated value, sources, and type for the image box.
"""
return {
"value": None, # The updated value of the image box
"sources": choice, # The list of image sources to be displayed
"__type__": "update", # The type of update for the image box
}
def ui_reset_state(*args):
"""
Reset the state of the UI.
Returns:
tuple: A tuple containing the initial values for the UI state.
"""
key = list(matcher_zoo.keys())[0] # Get the first key from matcher_zoo
return (
None, # image0
None, # image1
DEFAULT_MATCHING_THRESHOLD, # matching_threshold
DEFAULT_SETTING_MAX_FEATURES, # max_features
DEFAULT_DEFAULT_KEYPOINT_THRESHOLD, # keypoint_threshold
key, # matcher
ui_change_imagebox("upload"), # input image0
ui_change_imagebox("upload"), # input image1
"upload", # match_image_src
None, # keypoints
None, # raw matches
None, # ransac matches
{}, # matches result info
{}, # matcher config
None, # warped image
{}, # geometry result
DEFAULT_RANSAC_METHOD, # ransac_method
DEFAULT_RANSAC_REPROJ_THRESHOLD, # ransac_reproj_threshold
DEFAULT_RANSAC_CONFIDENCE, # ransac_confidence
DEFAULT_RANSAC_MAX_ITER, # ransac_max_iter
DEFAULT_SETTING_GEOMETRY, # geometry
)
# "footer {visibility: hidden}"
def run(config):
"""
Runs the application.
Args:
config (dict): A dictionary containing configuration parameters for the application.
Returns:
None
"""
with gr.Blocks(css="style.css") as app:
gr.Markdown(DESCRIPTION)
with gr.Row(equal_height=False):
with gr.Column():
with gr.Row():
matcher_list = gr.Dropdown(
choices=list(matcher_zoo.keys()),
value="gim",
label="Matching Model",
interactive=True,
)
match_image_src = gr.Radio(
["upload", "webcam"],
label="Image Source",
value="upload",
)
with gr.Row():
input_image0 = gr.Image(
label="Image 0",
type="numpy",
image_mode="RGB",
height=300,
interactive=True,
)
input_image1 = gr.Image(
label="Image 1",
type="numpy",
image_mode="RGB",
height=300,
interactive=True,
)
with gr.Row():
button_reset = gr.Button(value="Reset")
button_run = gr.Button(value="Run Match", variant="primary")
with gr.Accordion("Advanced Setting", open=False):
with gr.Accordion("Matching Setting", open=True):
with gr.Row():
match_setting_threshold = gr.Slider(
minimum=0.0,
maximum=1,
step=0.001,
label="Match thres.",
value=0.1,
)
match_setting_max_features = gr.Slider(
minimum=10,
maximum=10000,
step=10,
label="Max features",
value=1000,
)
# TODO: add line settings
with gr.Row():
detect_keypoints_threshold = gr.Slider(
minimum=0,
maximum=1,
step=0.001,
label="Keypoint thres.",
value=0.015,
)
detect_line_threshold = gr.Slider(
minimum=0.1,
maximum=1,
step=0.01,
label="Line thres.",
value=0.2,
)
# matcher_lists = gr.Radio(
# ["NN-mutual", "Dual-Softmax"],
# label="Matcher mode",
# value="NN-mutual",
# )
with gr.Accordion("RANSAC Setting", open=True):
with gr.Row(equal_height=False):
# enable_ransac = gr.Checkbox(label="Enable RANSAC")
ransac_method = gr.Dropdown(
choices=ransac_zoo.keys(),
value=DEFAULT_RANSAC_METHOD,
label="RANSAC Method",
interactive=True,
)
ransac_reproj_threshold = gr.Slider(
minimum=0.0,
maximum=12,
step=0.01,
label="Ransac Reproj threshold",
value=8.0,
)
ransac_confidence = gr.Slider(
minimum=0.0,
maximum=1,
step=0.00001,
label="Ransac Confidence",
value=0.99999,
)
ransac_max_iter = gr.Slider(
minimum=0.0,
maximum=100000,
step=100,
label="Ransac Iterations",
value=10000,
)
with gr.Accordion("Geometry Setting", open=False):
with gr.Row(equal_height=False):
# show_geom = gr.Checkbox(label="Show Geometry")
choice_estimate_geom = gr.Radio(
["Fundamental", "Homography"],
label="Reconstruct Geometry",
value=DEFAULT_SETTING_GEOMETRY,
)
# with gr.Column():
# collect inputs
inputs = [
input_image0,
input_image1,
match_setting_threshold,
match_setting_max_features,
detect_keypoints_threshold,
matcher_list,
ransac_method,
ransac_reproj_threshold,
ransac_confidence,
ransac_max_iter,
choice_estimate_geom,
]
# Add some examples
with gr.Row():
# Example inputs
gr.Examples(
examples=gen_examples(),
inputs=inputs,
outputs=[],
fn=run_matching,
cache_examples=False,
label=(
"Examples (click one of the images below to Run"
" Match)"
),
)
with gr.Accordion("Open for More!", open=False):
gr.Markdown(
f"""
<h3>Supported Algorithms</h3>
{", ".join(matcher_zoo.keys())}
"""
)
with gr.Column():
output_keypoints = gr.Image(label="Keypoints", type="numpy")
output_matches_raw = gr.Image(label="Raw Matches", type="numpy")
output_matches_ransac = gr.Image(
label="Ransac Matches", type="numpy"
)
output_wrapped = gr.Image(
label="Wrapped Pair", type="numpy"
)
with gr.Accordion(
"Open for More: Matches Statistics", open=False
):
matches_result_info = gr.JSON(label="Matches Statistics")
matcher_info = gr.JSON(label="Match info")
with gr.Accordion(
"Open for More: Geometry info", open=False
):
geometry_result = gr.JSON(
label="Reconstructed Geometry"
)
# callbacks
match_image_src.change(
fn=ui_change_imagebox,
inputs=match_image_src,
outputs=input_image0,
)
match_image_src.change(
fn=ui_change_imagebox,
inputs=match_image_src,
outputs=input_image1,
)
# collect outputs
outputs = [
output_keypoints,
output_matches_raw,
output_matches_ransac,
matches_result_info,
matcher_info,
geometry_result,
output_wrapped,
]
# button callbacks
button_run.click(fn=run_matching, inputs=inputs, outputs=outputs)
# Reset images
reset_outputs = [
input_image0,
input_image1,
match_setting_threshold,
match_setting_max_features,
detect_keypoints_threshold,
matcher_list,
input_image0,
input_image1,
match_image_src,
output_keypoints,
output_matches_raw,
output_matches_ransac,
matches_result_info,
matcher_info,
output_wrapped,
geometry_result,
ransac_method,
ransac_reproj_threshold,
ransac_confidence,
ransac_max_iter,
choice_estimate_geom,
]
button_reset.click(
fn=ui_reset_state, inputs=inputs, outputs=reset_outputs
)
# estimate geo
choice_estimate_geom.change(
fn=change_estimate_geom,
inputs=[
input_image0,
input_image1,
geometry_result,
choice_estimate_geom,
],
outputs=[output_wrapped, geometry_result],
)
import datetime
print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'app.queue().launch start')
app.queue().launch(share=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--config_path",
type=str,
default="config.yaml",
help="configuration file path",
)
args = parser.parse_args()
config = None
run(config)