import os import re from types import SimpleNamespace from typing import Any import gradio as gr import numpy as np from detectron2 import engine from inference import main, setup_cfg # internal settings NUM_PROCESSES = 1 CROP = False SCORE_THRESHOLD = 0.8 MAX_PARTS = 5 ARGS = SimpleNamespace( config_file="configs/coco/instance-segmentation/swin/opd_v1_real.yaml", model="...", input_format="RGB", output=".output", cpu=True, ) def predict(rgb_image: str, depth_image: str, intrinsics: np.ndarray, num_samples: int) -> list[Any]: def find_gifs(path: str) -> list[str]: """Scrape folders for all generated gif files.""" for file in os.listdir(path): sub_path = os.path.join(path, file) if os.path.isdir(sub_path): for image_file in os.listdir(sub_path): if re.match(r".*\.gif$", image_file): yield os.path.join(sub_path, image_file) cfg = setup_cfg(ARGS) engine.launch( main, NUM_PROCESSES, args=( cfg, rgb_image, depth_image, intrinsics, num_samples, CROP, SCORE_THRESHOLD, ), ) # process output # TODO: may want to select these in decreasing order of score pre_outputs = list(find_gifs(ARGS.output)) outputs = [] for idx in range(MAX_PARTS): # hide unused components if idx < len(pre_outputs): outputs.append(gr.update(value=pre_outputs[idx], visible=True)) else: outputs.append(gr.update(visible=False)) return outputs def variable_outputs(idx): idx = int(idx) with gr.Blocks() as app: gr.Markdown( """ # OPDMulti Demo Upload an image to see its range of motion. """ ) # TODO: add gr.Examples with gr.Row(): rgb_image = gr.Image( image_mode="RGB", source="upload", type="filepath", label="RGB Image", show_label=True, interactive=True ) depth_image = gr.Image( image_mode="L", source="upload", type="filepath", label="Depth Image", show_label=True, interactive=True ) intrinsics = gr.Dataframe( value=[ [ 214.85935872395834, 0.0, 0.0, ], [ 0.0, 214.85935872395834, 0.0, ], [ 125.90160319010417, 95.13726399739583, 1.0, ], ], row_count=(3, "fixed"), col_count=(3, "fixed"), datatype="number", type="numpy", label="Intrinsics matrix", show_label=True, interactive=True, ) num_samples = gr.Number( value=10, label="Number of samples", show_label=True, interactive=True, precision=0, minimum=3, maximum=20, ) submit_btn = gr.Button("Run model") # TODO: do we want to set a maximum limit on how many parts we render? We could also show the number of components # identified. outputs = [gr.Image(type="filepath", label=f"Part {idx + 1}", visible=False) for idx in range(MAX_PARTS)] # TODO: maybe need to use a queue here so we don't overload the instance submit_btn.click( fn=predict, inputs=[rgb_image, depth_image, intrinsics, num_samples], outputs=outputs, api_name="run_model" ) app.launch()