File size: 2,898 Bytes
ce1fcfd 334df79 ce1fcfd 334df79 ce1fcfd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
import os
from torch import is_inference
from pq3d.inference import inference
MESH_DIR = 'assets/mesh'
MESH_NAMES = sorted([os.path.splitext(fname)[0] for fname in os.listdir(MESH_DIR)])
def change_scene(dropdown_scene: str):
# reset 3D scene and chatbot history
return os.path.join(MESH_DIR, f'{dropdown_scene}.glb')
with gr.Blocks(title='PQ3D Demo') as demo:
gr.HTML(value="<h1 align='center'>Unifying 3D Vision Language Understanding vis Promptable Queries </h1>")
#gr.HTML(value="<div align='center' style='margin-top:-1em; margin-bottom:-1em;'><img src='/file=assets/leo.svg' width='4%'></div>")
# gr.HTML(value="<img src='/file=assets/teaser.png' alt='Teaser' width='760px' style='display: block; margin: auto;'>")
#gr.HTML(value="<p align='center' style='font-size: 1.2em; color: #485fc7;'><a href='https://arxiv.org/abs/2311.12871' target='_blank'>arXiv</a> | <a href='https://embodied-generalist.github.io/' target='_blank'>Project Page</a> | <a href='https://github.com/embodied-generalist/embodied-generalist' target='_blank'>Code</a></p>")
#gr.HTML(value="<p align='center' style='font-size: 1.15em;'><i>LEO: an embodied generalist agent capable of perceiving, grounding, reasoning, planning, and acting in 3D world.</i></p>")
with gr.Row():
with gr.Column(scale=5):
dropdown_scene = gr.Dropdown(
choices=MESH_NAMES,
value='scene0050_00',
interactive=True,
label='Select a 3D scene',
)
model_3d = gr.Model3D(
value=os.path.join(MESH_DIR, f'scene0050_00.glb'),
clear_color=[0.0, 0.0, 0.0, 0.0],
label='3D Scene',
camera_position=(80, 100, 6),
height=659,
)
gr.HTML(
"""<center><strong>
👆 SCROLL and DRAG on the 3D Scene
to zoom in/out and rotate. Press CTRL and DRAG to pan.
</strong></center>
"""
)
dropdown_scene.change(
fn=change_scene,
inputs=[dropdown_scene],
outputs=[model_3d],
queue=False
)
def inference_wrapper(text):
scan_id = model_3d.value['orig_name'].split('.')[0]
inst_id, response = inference(scan_id, text)
return f"assets/mask/{scan_id}/{scan_id}_obj_{inst_id}.glb", response
gr.Interface(
fn=inference_wrapper,
inputs=["text"],
outputs=[gr.Model3D(
clear_color=[0.0, 0.0, 0.0, 0.0], camera_position=(80, 100, 6), label="3D Model"), "text"],
examples=[
["armchair"], ["Sofa"], ["left computer on the desk"]
],
title="Input text, Output 3D Mask, Red denotes predicted object"
)
demo.queue().launch(share=True, allowed_paths=['assets']) |