ObjectInsertion / app.py
Leema Krishna Murali
Initial commit
f3d0a26
import gradio as gr
import numpy as np
from PIL import Image
from visualizer import draw_box_on_frame, create_comparison_strip
from preview import preview_trajectory
from pipeline_adapter import (
extract_first_frame,
load_all_frames,
run_pipeline_motion_edit,
run_pipeline_insertion # ← need to add this
)
def build_interface():
# Load Qwen-Image-Edit once at startup (not per-click β€” model is ~20GB)
_qwen_edit_pipe = None
try:
from frame_editor import load_qwen_image_edit
_qwen_edit_pipe = load_qwen_image_edit(use_lightning=True, device="cuda")
print("Qwen-Image-Edit ready.")
except Exception as e:
print(f"Qwen-Image-Edit not available: {e}")
with gr.Blocks(title="TRACE Prototype", theme=gr.themes.Soft()) as demo:
gr.Markdown("# TRACE Prototype β€” Object Motion Editing")
with gr.Tabs():
# ── Tab 1: Motion Edit (existing) ─────────────────────────
# with gr.Tab("Motion Path Edit"):
# gr.Markdown(
# "Move an **existing object** in the video "
# "to a new trajectory."
# )
# with gr.Row():
# with gr.Column():
# video_input_edit = gr.Video(label="Input Video")
# video_info_edit = gr.Markdown("")
# with gr.Column():
# first_frame_edit = gr.Image(
# label="First Frame + Trajectory Preview",
# interactive=False
# )
# gr.Markdown("**Start Box** β€” draw around the object")
# with gr.Row():
# sx1 = gr.Number(label="x1", value=100, precision=0)
# sy1 = gr.Number(label="y1", value=100, precision=0)
# sx2 = gr.Number(label="x2", value=200, precision=0)
# sy2 = gr.Number(label="y2", value=200, precision=0)
# gr.Markdown("**End Box** β€” where you want it to go")
# with gr.Row():
# ex1 = gr.Number(label="x1", value=500, precision=0)
# ey1 = gr.Number(label="y1", value=200, precision=0)
# ex2 = gr.Number(label="x2", value=600, precision=0)
# ey2 = gr.Number(label="y2", value=300, precision=0)
# prompt_edit = gr.Textbox(
# label="Scene Description",
# placeholder="a dog running in a park..."
# )
# with gr.Row():
# stage1_method = gr.Radio(
# choices=["linear", "cotracker"],
# value="linear",
# label="Stage 1 Method"
# )
# use_vace_edit = gr.Checkbox(
# label="Use VACE",
# value=False
# )
# run_edit_btn = gr.Button("Run Motion Edit", variant="primary")
# with gr.Row():
# output_video_edit = gr.Video(label="Output Video")
# metrics_edit = gr.Markdown("")
# comparison_edit = gr.Image(label="Frame Comparison", interactive=False)
# ── Tab 2: Object Insertion (NEW β€” uses Qwen) ─────────────
with gr.Tab("Object Insertion"):
gr.Markdown(
"Insert a **new object** into the video using "
"Qwen to edit the first frame, then propagate."
)
with gr.Row():
with gr.Column():
video_input_ins = gr.Video(label="Input Video")
video_info_ins = gr.Markdown("")
with gr.Column():
first_frame_ins = gr.Image(
label="First Frame Preview",
interactive=False
)
gr.Markdown("**Insertion Box** β€” where to place the new object")
with gr.Row():
ix1 = gr.Number(label="x1", value=40, precision=0)
iy1 = gr.Number(label="y1", value=40, precision=0)
ix2 = gr.Number(label="x2", value=300, precision=0)
iy2 = gr.Number(label="y2", value=300, precision=0)
gr.Markdown("**End Box** β€” where the object should arrive")
with gr.Row():
iex1 = gr.Number(label="x1", value=500, precision=0)
iey1 = gr.Number(label="y1", value=200, precision=0)
iex2 = gr.Number(label="x2", value=600, precision=0)
iey2 = gr.Number(label="y2", value=300, precision=0)
# ── The Qwen-specific inputs ───────────────────────────
gr.Markdown("**Object Description** β€” what Qwen will insert")
with gr.Row():
with gr.Column():
object_description = gr.Textbox(
label="Object to Insert (Qwen prompt)",
placeholder="a red helium balloon with a white string",
info="Qwen uses this to paint the object into frame 1"
)
scene_prompt = gr.Textbox(
label="Full Scene Prompt (for video synthesis)",
placeholder="a peaceful park scene with a red balloon"
)
with gr.Column():
gr.Markdown("Using **Qwen-Image-Edit-2511** for object insertion")
# use_vace_ins = gr.Checkbox(
# label="Use VACE",
# value=False
# )
# ── Qwen output preview before running video ───────────
gr.Markdown("**Step 1 Preview** β€” see Qwen's edit before running video")
preview_qwen_btn = gr.Button(
"Preview First Frame Edit",
variant="secondary"
)
edited_frame_preview = gr.Image(
label="Qwen-Edited First Frame",
interactive=False
)
qwen_status = gr.Markdown("")
# gr.Markdown("---")
# run_ins_btn = gr.Button(
# "Run Full Insertion Pipeline",
# variant="primary"
# )
# with gr.Row():
# output_video_ins = gr.Video(label="Output Video")
# metrics_ins = gr.Markdown("")
# comparison_ins = gr.Image(
# label="Frame Comparison",
# interactive=False
# )
# ── Wire Up Tab 1 ─────────────────────────────────────────────
#_state = {"frames": None, "first_frame": None}
# def on_video_upload_edit(video_path):
# if video_path is None:
# return None, "Upload a video."
# first_frame = extract_first_frame(video_path)
# _state["first_frame"] = first_frame
# return Image.fromarray(first_frame), "Video loaded."
# def on_boxes_changed_edit(sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2):
# if _state["first_frame"] is None:
# return None
# from preview import preview_trajectory
# preview = preview_trajectory(
# _state["first_frame"],
# [sx1, sy1, sx2, sy2],
# [ex1, ey1, ex2, ey2]
# )
# return Image.fromarray(preview)
# video_input_edit.change(
# fn=on_video_upload_edit,
# inputs=[video_input_edit],
# outputs=[first_frame_edit, video_info_edit]
# )
# for inp in [sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2]:
# inp.change(
# fn=on_boxes_changed_edit,
# inputs=[sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2],
# outputs=[first_frame_edit]
# )
# def on_run_edit(video_path, sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2,
# prompt, stage1_method, use_vace, progress=gr.Progress()):
# if video_path is None:
# raise gr.Error("Please upload a video first.")
# if sx2 <= sx1 or sy2 <= sy1:
# raise gr.Error("Start box is invalid: x2 must be > x1, y2 must be > y1")
# if ex2 <= ex1 or ey2 <= ey1:
# raise gr.Error("End box is invalid: x2 must be > x1, y2 must be > y1")
# def prog(frac, msg):
# progress(frac, desc=msg)
# output_path, result_frames, pred_boxes, metrics = \
# run_pipeline_motion_edit(
# video_path=video_path,
# start_box=[sx1, sy1, sx2, sy2],
# end_box=[ex1, ey1, ex2, ey2],
# prompt=prompt,
# stage1_method=stage1_method,
# use_vace=use_vace,
# progress_callback=prog
# )
# if _state["frames"] is None:
# _state["frames"] = load_all_frames(video_path)
# comparison = create_comparison_strip(
# _state["frames"],
# result_frames,
# pred_boxes,
# sample_ts=[0, 20, 40, 60, 80]
# )
# return output_path, Image.fromarray(comparison), metrics
# run_edit_btn.click(
# fn=on_run_edit,
# inputs=[
# video_input_edit,
# sx1, sy1, sx2, sy2,
# ex1, ey1, ex2, ey2,
# prompt_edit, stage1_method, use_vace_edit
# ],
# outputs=[output_video_edit, comparison_edit, metrics_edit]
# )
# ── Wire Up Tab 2 (Qwen insertion) ────────────────────────────
_ins_state = {"first_frame": None, "edited_frame": None}
def on_video_upload_ins(video_path):
if video_path is None:
return None, "Upload a video."
first_frame = extract_first_frame(video_path)
_ins_state["first_frame"] = first_frame
return Image.fromarray(first_frame), "Video loaded."
def on_preview_qwen(
video_path,
ix1, iy1, ix2, iy2,
object_description,
progress=gr.Progress()
):
if _ins_state["first_frame"] is None:
raise gr.Error("Upload a video first.")
if not object_description.strip():
raise gr.Error("Enter an object description.")
if _qwen_edit_pipe is None:
raise gr.Error("Qwen-Image-Edit failed to load at startup. Check logs.")
insertion_box = [ix1, iy1, ix2, iy2]
progress(0.3, "Editing first frame with Qwen-Image-Edit...")
from frame_editor import insert_object_qwen_edit
edited = insert_object_qwen_edit(
first_frame=_ins_state["first_frame"],
box=insertion_box,
object_description=object_description,
pipe=_qwen_edit_pipe,
)
_ins_state["edited_frame"] = edited
preview = draw_box_on_frame(
edited,
insertion_box,
color=(255, 220, 0),
label="inserted here"
)
progress(1.0, "Done!")
return (
Image.fromarray(preview),
"First frame edited."
)
def on_run_insertion(
video_path,
ix1, iy1, ix2, iy2,
iex1, iey1, iex2, iey2,
scene_prompt,
use_vace_ins,
progress=gr.Progress()
):
if _ins_state["edited_frame"] is None:
raise gr.Error(
"Run 'Preview First Frame Edit' first β€” "
"the edited frame is needed as appearance reference."
)
output_path, result_frames, pred_boxes, metrics = \
run_pipeline_insertion(
video_path=video_path,
edited_first_frame=_ins_state["edited_frame"],
start_box=[ix1, iy1, ix2, iy2],
end_box=[iex1, iey1, iex2, iey2],
prompt=scene_prompt,
use_vace=use_vace_ins,
progress_callback=lambda f, m: progress(f, desc=m)
)
frames = load_all_frames(video_path)
comparison = create_comparison_strip(
frames, result_frames, pred_boxes
)
return (
output_path,
Image.fromarray(comparison),
metrics
)
video_input_ins.change(
fn=on_video_upload_ins,
inputs=[video_input_ins],
outputs=[first_frame_ins, video_info_ins]
)
preview_qwen_btn.click(
fn=on_preview_qwen,
inputs=[
video_input_ins,
ix1, iy1, ix2, iy2,
object_description,
],
outputs=[edited_frame_preview, qwen_status]
)
# run_ins_btn.click(
# fn=on_run_insertion,
# inputs=[
# video_input_ins,
# ix1, iy1, ix2, iy2,
# iex1, iey1, iex2, iey2,
# scene_prompt,
# use_vace_ins
# ],
# outputs=[output_video_ins, comparison_ins, metrics_ins]
# )
return demo
if __name__ == "__main__":
demo = build_interface()
demo.launch(share=True)