GuideFlow3D / demos /run_gradio_demo.py
suvadityamuk's picture
chore: update duration on zerogpu decorators to max-120sec
3143ef7
import os
import sys
import spaces
import base64
import tempfile
from omegaconf import OmegaConf
from typing import Optional, Union, Tuple
import gradio as gr
GUIDEFLOW_YELLOW = "#ccad57"
GUIDEFLOW_BLUE = "#2459c2"
GUIDEFLOW_GREEN = "#8edf9f"
os.environ["CUMM_DISABLE_JIT"] = "1"
os.environ["SPCONV_DISABLE_JIT"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Add project root to Python path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.insert(0, project_root)
# --- START XVFB GLOBALLY ---
# Check if we are in a headless environment and DISPLAY is not set
if os.environ.get("DISPLAY") is None:
print("[INFO] Starting Xvfb for headless rendering...")
from pyvirtualdisplay import Display
# Start Xvfb. visible=0 means headless.
# size=(1920, 1080) matches your previous xvfb-run settings.
display = Display(visible=0, size=(1920, 1080))
display.start()
# Ensure DISPLAY env var is set for subprocesses
if os.environ.get("DISPLAY") is None:
# PyVirtualDisplay usually sets this, but fallback if needed
os.environ["DISPLAY"] = f":{display.display}"
print(f"[INFO] Xvfb started on {os.environ['DISPLAY']}")
# --- LOGO SETUP (BASE64) ---
def image_to_base64(image_path):
"""Encodes an image to a base64 string for direct HTML embedding."""
if not os.path.exists(image_path):
return ""
with open(image_path, "rb") as img_file:
encoded_string = base64.b64encode(img_file.read()).decode('utf-8')
return f"data:image/png;base64,{encoded_string}"
logo_rel_path = os.path.join("demos", "assets", "logo.png")
logo_abs_path = os.path.join(project_root, logo_rel_path)
logo_src = image_to_base64(logo_abs_path)
BLENDER_LINK = 'https://download.blender.org/release/Blender3.0/blender-3.0.1-linux-x64.tar.xz'
BLENDER_INSTALLATION_PATH = '/tmp'
BLENDER_PATH = f'{BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64/blender'
def _install_blender():
if not os.path.exists(BLENDER_PATH):
os.system('sudo apt-get update')
os.system('sudo apt-get install -y libxrender1 libxi6 libxkbcommon-x11-0 libsm6')
os.system(f'wget {BLENDER_LINK} -P {BLENDER_INSTALLATION_PATH}')
os.system(f'tar -xvf {BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64.tar.xz -C {BLENDER_INSTALLATION_PATH}')
_install_blender()
# Attempt import, handle failure gracefully for the demo shell
try:
from demos.pipeline_fn import GuideFlow3dPipeline
except ImportError:
GuideFlow3dPipeline = None
pipe = None
cfg = None
# Initialize Pipeline
try:
cfg_path = os.path.join(project_root, 'config', 'default.yaml')
if os.path.exists(cfg_path):
cfg = OmegaConf.load(cfg_path)
if GuideFlow3dPipeline:
pipe = GuideFlow3dPipeline().from_pretrained(cfg)
except Exception as e:
print(f"Error initializing pipeline: {e}")
pass
output_dir = os.path.join(os.getcwd(), "all_outputs")
os.makedirs(output_dir, exist_ok=True)
# --- MAPPING HELPERS ---
# Dictionary mapping static thumbnail images to actual GLB files
THUMB_TO_GLB = {
# Structure Mesh Examples
"example_data/thumbs/example1_thumb.png": "example_data/example1.glb",
# Reference Appearance Mesh Examples
"example_data/thumbs/B07QC84LP1_thumb.png": "example_data/B07QC84LP1.glb"
}
# Create a lookup based on basename to be robust against Gradio temp paths
THUMB_BASENAME_TO_GLB = {os.path.basename(k): v for k, v in THUMB_TO_GLB.items()}
def load_mesh_from_thumb(thumb_path: str) -> Optional[str]:
"""Callback to return the GLB path associated with a thumbnail."""
if not thumb_path:
return None
basename = os.path.basename(thumb_path)
return THUMB_BASENAME_TO_GLB.get(basename, None)
def _ensure_glb_path(result: Union[str, bytes, os.PathLike]) -> str:
"""Normalize various return types from fn() to a .glb file path."""
if isinstance(result, (str, os.PathLike)):
path = os.fspath(result)
if not os.path.exists(path):
raise gr.Error("Returned mesh path does not exist.")
return path
if isinstance(result, (bytes, bytearray)):
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".glb")
tmp.write(result)
tmp.flush()
tmp.close()
return tmp.name
# @spaces.GPU(duration=360)
def on_run(
guidance_mode_state: str,
app_struct_mesh: Optional[str],
app_ref_mesh: Optional[str],
app_ref_image: Optional[str],
sim_struct_mesh: Optional[str],
sim_ref_text: Optional[str],
sim_ref_image: Optional[str],
target_up_label: str,
reference_up_label: str,
cfg_strength: float,
num_steps: int,
learning_rate: float,
) -> Tuple[str, Optional[str]]:
current_mode = guidance_mode_state.lower()
if current_mode == "appearance":
target_mesh_path = app_struct_mesh
reference_mesh_path = app_ref_mesh
reference_image_path = app_ref_image
reference_text = None
else:
target_mesh_path = sim_struct_mesh
reference_text = sim_ref_text
reference_image_path = sim_ref_image
reference_mesh_path = None
if not target_mesh_path:
raise gr.Error(f"Target Structure mesh is required for {current_mode} mode.")
if pipe is None:
raise gr.Error("Pipeline not initialized. Check logs.")
args = {
"structure_mesh": target_mesh_path,
"output_dir": output_dir,
"convert_target_yup_to_zup": target_up_label == "Z-up",
"convert_appearance_yup_to_zup": reference_up_label == "Z-up",
"appearance_mesh": reference_mesh_path,
"appearance_image": reference_image_path,
"appearance_text": (reference_text or "").strip(),
}
fn = None
if current_mode == "appearance":
if not reference_mesh_path:
raise gr.Error("Appearance mode requires a reference mesh.")
fn = pipe.run_appearance
args.pop("appearance_text", None)
else: # similarity
if not reference_text:
raise gr.Error("Similarity mode requires a text prompt.")
fn = pipe.run_self_similarity
args.pop("appearance_mesh", None)
args.pop("appearance_image", None)
args.pop("convert_appearance_yup_to_zup", None)
if cfg:
updated_cfg = cfg # OmegaConf.load(cfg)
updated_cfg.cfg_strength = cfg_strength
updated_cfg.steps = num_steps
updated_cfg.learning_rate = learning_rate
pipe.cfg = updated_cfg
try:
result_mesh, result_video = fn(**args)
mesh_path = _ensure_glb_path(result_mesh)
video_path = _ensure_glb_path(result_video)
return mesh_path, video_path
except Exception as e:
raise gr.Error(f"Generation failed: {str(e)}")
# --- UI Styling & Header ---
css = f"""
body, .gradio-container {{
background-color: #ffffff !important;
color: #1f2937 !important;
}}
.dark body, .dark .gradio-container {{
background-color: #ffffff !important;
color: #1f2937 !important;
}}
h1, h2, h3, span, p {{
font-family: 'Inter', 'Roboto', sans-serif;
}}
.guideflow-header {{
display: flex;
flex-direction: column;
align-items: center;
margin-bottom: 1rem;
}}
.logo-row {{
display: flex;
align-items: baseline;
gap: 0.2rem;
}}
.logo-img {{
height: 4rem;
width: auto;
transform: translateY(0.5rem);
}}
.gradient-title {{
font-size: 3.5rem;
font-weight: 800;
background: linear-gradient(90deg, {GUIDEFLOW_GREEN}, {GUIDEFLOW_BLUE}, {GUIDEFLOW_YELLOW});
-webkit-background-clip: text;
background-clip: text;
color: transparent;
line-height: 1.2;
}}
.subtitle {{
font-size: 1.5rem;
font-weight: 600;
color: {GUIDEFLOW_YELLOW};
margin-top: 0.5rem;
text-align: center;
}}
.authors {{
font-size: 1rem;
color: #334155;
margin-top: 0.5rem;
}}
.affiliations {{
font-size: 0.9rem;
color: #6b7280;
margin-top: 0.2rem;
}}
.venue {{
font-size: 1.1rem;
font-weight: 700;
color: #111827;
margin-top: 0.5rem;
}}
.links a {{
color: {GUIDEFLOW_BLUE};
text-decoration: none;
margin: 0 0.5rem;
font-weight: 500;
}}
.links a:hover {{
text-decoration: underline;
}}
.demo-credit {{
font-size: 0.9rem;
color: #64748b;
margin-top: 0.5rem;
}}
.instructions-container {{
max-width: 800px;
margin: 0 auto 2rem auto;
text-align: left;
padding: 0 1rem;
}}
.input-row {{ align-items: flex-start; margin-bottom: 1rem; }}
"""
HEADER_HTML = f"""
<div class="guideflow-header">
<div class="logo-row">
<img src="{logo_src}" class="logo-img" alt="GuideFlow3D Logo" />
<span class="gradient-title">uideFlow3D</span>
</div>
<div class="subtitle">Optimization-Guided Rectified Flow For Appearance Transfer</div>
<div class="authors">
<a href="https://sayands.github.io/" target="_blank">Sayan Deb Sarkar</a><sup>1</sup> &nbsp;&nbsp;
<a href="https://vevenom.github.io/" target="_blank">Sinisa Stekovic</a><sup>2</sup> &nbsp;&nbsp;
<a href="https://vincentlepetit.github.io/" target="_blank">Vincent Lepetit</a><sup>2</sup> &nbsp;&nbsp;
<a href="https://ir0.github.io/" target="_blank">Iro Armeni</a><sup>1</sup>
</div>
<div class="affiliations">
<sup>1</sup>Stanford University &nbsp;&nbsp; <sup>2</sup>ENPC, IP Paris
</div>
<div class="venue">NeurIPS 2025</div>
<div class="links" style="margin-top:10px;">
<a href="https://arxiv.org/abs/2510.16136" target="_blank">Paper</a> |
<a href="https://sayands.github.io/guideflow3d" target="_blank">Project Page</a> |
<a href="https://github.com/sayands/guideflow3d" target="_blank">GitHub</a>
</div>
<div class="demo-credit">
Demo made by <a href="https://suvadityamuk.com" target="_blank" style="color: inherit; text-decoration: underline;">Suvaditya Mukherjee</a>
</div>
</div>
"""
INSTRUCTIONS_MD = """
<div class="instructions-container">
<h3>Instructions</h3>
<ol>
<li><strong>Upload a Structure Mesh (.glb):</strong> This defines the shape of your 3D object.</li>
<li><strong>Choose Guidance Mode:</strong> Select "Self-Similarity" (Text) or "Appearance" (Mesh/Image) using the tabs.</li>
<li><strong>Provide Reference:</strong> Enter a text prompt or upload a reference image/mesh.</li>
<li><strong>Run:</strong> Click "Generate 3D Asset" to create the result.</li>
</ol>
</div>
"""
# Example Data
EX_STRUCT_THUMBS = [["example_data/thumbs/example1_thumb.png"]]
EX_MESH_THUMBS = [["example_data/thumbs/B07QC84LP1_thumb.png"]]
EX_IMG = ["example_data/B07QC84LP1_orig.png"]
EX_TEXT = ["a wooden chair", "a marble statue", "a golden trophy"]
with gr.Blocks(
title="GuideFlow3D",
) as demo:
gr.HTML(HEADER_HTML)
gr.HTML(INSTRUCTIONS_MD)
guidance_mode_state = gr.State(value="Similarity")
with gr.Tabs() as guidance_tabs:
# --- TAB 1: SELF-SIMILARITY (LEFT) ---
with gr.TabItem("Self-Similarity", id="tab_similarity") as tab_sim:
gr.Markdown("### Similarity Editing Inputs")
with gr.Row(elem_classes="input-row"):
with gr.Column(scale=3):
sim_struct_mesh = gr.Model3D(label="Structure Mesh (.glb)", interactive=True, height=300)
with gr.Column(scale=2):
sim_struct_hidden = gr.Image(type="filepath", visible=False)
# sim_struct_mesh_examples = gr.Examples(examples=EX_STRUCT_THUMBS, inputs=sim_struct_hidden, label="Structure Examples")
sim_struct_mesh_examples = gr.Examples(
examples=EX_STRUCT_THUMBS,
inputs=sim_struct_hidden,
outputs=sim_struct_mesh, # Target the 3D viewer directly
fn=load_mesh_from_thumb, # Run the conversion function
run_on_click=True, # Force execution on click
label="Structure Examples"
)
with gr.Row(elem_classes="input-row"):
with gr.Column(scale=3):
sim_ref_text = gr.Textbox(label="Reference Text Prompt", placeholder="Describe the appearance...", lines=2)
with gr.Column(scale=2):
gr.Examples(examples=EX_TEXT, inputs=sim_ref_text, label="Prompt Examples")
with gr.Row(elem_classes="input-row"):
with gr.Column(scale=3):
sim_ref_image = gr.Image(label="Reference Appearance Image (Optional)", type="filepath", height=250)
with gr.Column(scale=2):
gr.Examples(examples=EX_IMG, inputs=sim_ref_image, label="Image Examples")
# --- TAB 2: APPEARANCE (RIGHT) ---
with gr.TabItem("Appearance", id="tab_appearance") as tab_app:
gr.Markdown("### Appearance Transfer Inputs")
with gr.Row(elem_classes="input-row"):
with gr.Column(scale=3):
app_struct_mesh = gr.Model3D(label="Structure Mesh (.glb)", interactive=True, height=300)
with gr.Column(scale=2):
app_struct_hidden = gr.Image(type="filepath", visible=False)
# app_struct_mesh_examples = gr.Examples(examples=EX_STRUCT_THUMBS, inputs=app_struct_hidden, label="Structure Examples")
app_struct_mesh_examples = gr.Examples(
examples=EX_STRUCT_THUMBS,
inputs=app_struct_hidden,
outputs=app_struct_mesh, # Target the 3D viewer directly
fn=load_mesh_from_thumb, # Run the conversion function
run_on_click=True, # Force execution on click
label="Structure Examples"
)
with gr.Row(elem_classes="input-row"):
with gr.Column(scale=3):
app_ref_image = gr.Image(label="Reference Appearance Image", type="filepath", height=250)
with gr.Column(scale=2):
gr.Examples(examples=EX_IMG, inputs=app_ref_image, label="Image Examples")
with gr.Row(elem_classes="input-row"):
with gr.Column(scale=3):
app_ref_mesh = gr.Model3D(label="Reference Appearance Mesh (.glb)", interactive=True, height=300)
with gr.Column(scale=2):
app_ref_mesh_hidden = gr.Image(type="filepath", visible=False)
# app_ref_mesh_examples = gr.Examples(examples=EX_MESH_THUMBS, inputs=app_ref_mesh_hidden, label="Mesh Examples")
app_ref_mesh_examples = gr.Examples(
examples=EX_MESH_THUMBS,
inputs=app_ref_mesh_hidden,
outputs=app_ref_mesh, # Target the 3D viewer directly
fn=load_mesh_from_thumb, # Run the conversion function
run_on_click=True, # Force execution on click
label="Mesh Examples"
)
# --- ADVANCED SETTINGS ---
with gr.Accordion("Advanced Settings", open=False):
with gr.Row():
target_up = gr.Radio(["Y-up", "Z-up"], value="Y-up", label="Target Mesh Up-Axis")
reference_up = gr.Radio(["Y-up", "Z-up"], value="Y-up", label="Ref Mesh Up-Axis")
with gr.Row():
cfg_strength = gr.Slider(0.1, 10.0, value=5.0, step=0.1, label="CFG Strength")
num_steps = gr.Slider(50, 1000, value=300, step=50, label="Diffusion Steps")
learning_rate = gr.Number(value=5e-4, label="Learning Rate")
# --- RUN BUTTON ---
with gr.Row():
run_btn = gr.Button("Generate 3D Asset", variant="primary", size="lg")
# --- OUTPUTS ---
gr.Markdown("### Results")
with gr.Row():
with gr.Column():
output_model = gr.Model3D(label="Output Mesh", interactive=False, clear_color=[1.0, 1.0, 1.0, 0.0])
with gr.Column():
output_video = gr.Video(label="Output Video", autoplay=True, loop=True, interactive=False)
# --- EVENT BINDING ---
# sim_struct_hidden.change(fn=load_mesh_from_thumb, inputs=sim_struct_hidden, outputs=sim_struct_mesh)
# app_struct_hidden.change(fn=load_mesh_from_thumb, inputs=app_struct_hidden, outputs=app_struct_mesh)
# app_ref_mesh_hidden.change(fn=load_mesh_from_thumb, inputs=app_ref_mesh_hidden, outputs=app_ref_mesh)
tab_sim.select(lambda: "Similarity", outputs=guidance_mode_state)
tab_app.select(lambda: "Appearance", outputs=guidance_mode_state)
run_btn.click(
fn=on_run,
inputs=[
guidance_mode_state,
app_struct_mesh, app_ref_mesh, app_ref_image,
sim_struct_mesh, sim_ref_text, sim_ref_image,
target_up, reference_up, cfg_strength, num_steps, learning_rate
],
outputs=[output_model, output_video]
)
demo.load(None, None, None, js="() => { document.body.classList.remove('dark'); }")
if __name__ == "__main__":
# demo.queue().launch(share=True, allowed_paths=[project_root], mcp_server=True) # Useful for Colab runs
demo.queue().launch(
allowed_paths=[project_root],
mcp_server=True,
css=css,
theme=gr.themes.Default(
primary_hue="sky",
secondary_hue="lime"
).set(
body_background_fill="white",
background_fill_primary="white",
block_background_fill="white",
input_background_fill="#f9fafb"
)
)