Spaces:
Runtime error
Runtime error
add handcrafted camera poses and add motionctrl+videocrafter2 cmcm
Browse files- app.py +213 -480
- gradio_utils/camera_utils.py +14 -4
- gradio_utils/page_control.py +580 -0
app.py
CHANGED
@@ -13,7 +13,7 @@ from omegaconf import OmegaConf
|
|
13 |
from PIL import Image
|
14 |
from pytorch_lightning import seed_everything
|
15 |
|
16 |
-
from gradio_utils.camera_utils import CAMERA_MOTION_MODE, process_camera
|
17 |
from gradio_utils.traj_utils import (OBJECT_MOTION_MODE, get_provided_traj,
|
18 |
process_points, process_traj)
|
19 |
from gradio_utils.utils import vis_camera
|
@@ -23,17 +23,46 @@ from main.evaluation.motionctrl_inference import (DEFAULT_NEGATIVE_PROMPT,
|
|
23 |
post_prompt)
|
24 |
from utils.utils import instantiate_from_config
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
#### Description ####
|
30 |
title = r"""<h1 align="center">MotionCtrl: A Unified and Flexible Motion Controller for Video Generation</h1>"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
description = r"""
|
33 |
<b>Official Gradio demo</b> for <a href='https://github.com/TencentARC/MotionCtrl' target='_blank'><b>MotionCtrl: A Unified and Flexible Motion Controller for Video Generation</b></a>.<br>
|
34 |
π₯ MotionCtrl is capable of independently and flexibly controling the camera motion and object motion of a generated video, with only a unified model.<br>
|
35 |
π€ Try to control the motion of the generated videos yourself!<br>
|
36 |
-
βββ
|
|
|
|
|
|
|
37 |
"""
|
38 |
article = r"""
|
39 |
If MotionCtrl is helpful, please help to β the <a href='https://github.com/TencentARC/MotionCtrl' target='_blank'>Github Repo</a>. Thanks!
|
@@ -78,12 +107,12 @@ button {border-radius: 8px !important;}
|
|
78 |
|
79 |
|
80 |
T_base = [
|
81 |
-
[1.,0.,0.], ## W2C
|
82 |
-
[-1.,0.,0.], ## W2C
|
83 |
-
[0., 1., 0.], ## W2C
|
84 |
-
[0.,-1.,0.], ## W2C
|
85 |
-
[0.,0.,1.], ## W2C
|
86 |
-
[0.,0.,-1.], ## W2C
|
87 |
]
|
88 |
radius = 1
|
89 |
n = 16
|
@@ -99,6 +128,7 @@ res = []
|
|
99 |
res_forsave = []
|
100 |
T_range = 1.8
|
101 |
|
|
|
102 |
|
103 |
|
104 |
for i in range(0, 16):
|
@@ -111,34 +141,14 @@ for i in range(0, 16):
|
|
111 |
|
112 |
fig = vis_camera(res)
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
"motion":[],
|
122 |
-
"mode": "Customized Mode 1: First A then B", # "First A then B", "Both A and B", "Custom"
|
123 |
-
"speed": 1.0,
|
124 |
-
"complex": None
|
125 |
-
}
|
126 |
|
127 |
-
|
128 |
-
global camera_dict
|
129 |
-
RT = process_camera(camera_dict) # [t, 3, 4]
|
130 |
-
if camera_dict['complex'] is not None:
|
131 |
-
# rescale T to [-2,2]
|
132 |
-
for i in range(3):
|
133 |
-
min_T = np.min(RT[:,i,-1])
|
134 |
-
max_T = np.max(RT[:,i,-1])
|
135 |
-
if min_T < -2 or max_T > 2:
|
136 |
-
RT[:,i,-1] = RT[:,i,-1] - min_T
|
137 |
-
RT[:,i,-1] = RT[:,i,-1] / (np.max(RT[:,:,-1]) + 1e-6)
|
138 |
-
RT[:,i,-1] = RT[:,i,-1] * 4
|
139 |
-
RT[:,i,-1] = RT[:,i,-1] - 2
|
140 |
-
|
141 |
-
fig = vis_camera(RT)
|
142 |
|
143 |
if info_mode == MODE[0]:
|
144 |
vis_step3_prompt_generate = True
|
@@ -174,6 +184,7 @@ def fn_vis_camera(info_mode):
|
|
174 |
|
175 |
def fn_vis_traj():
|
176 |
global traj_list
|
|
|
177 |
xy_range = 1024
|
178 |
points = process_points(traj_list)
|
179 |
imgs = []
|
@@ -194,7 +205,22 @@ def fn_vis_traj():
|
|
194 |
|
195 |
# size = (512, 512)
|
196 |
fps = 10
|
197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
writer = imageio.get_writer(path, format='mp4', mode='I', fps=fps)
|
199 |
for img in imgs:
|
200 |
writer.append_data(img)
|
@@ -214,114 +240,53 @@ def fn_vis_traj():
|
|
214 |
gr.update(visible=vis_start), \
|
215 |
gr.update(visible=vis_gen_video, value=None)
|
216 |
|
217 |
-
def display_camera_info(camera_dict, camera_mode=None):
|
218 |
-
if camera_dict['complex'] is not None:
|
219 |
-
res = f"complex : {camera_dict['complex']}. "
|
220 |
-
else:
|
221 |
-
res = ""
|
222 |
-
res += f"motion : {[_ for _ in camera_dict['motion']]}. "
|
223 |
-
res += f"speed : {camera_dict['speed']}. "
|
224 |
-
if camera_mode == CAMERA_MOTION_MODE[2]:
|
225 |
-
res += f"mode : {camera_dict['mode']}. "
|
226 |
-
return res
|
227 |
-
|
228 |
-
def add_traj_point(evt: gr.SelectData, ):
|
229 |
-
global traj_list
|
230 |
-
traj_list.append(evt.index)
|
231 |
-
traj_str = [f"{traj}" for traj in traj_list]
|
232 |
-
return ", ".join(traj_str)
|
233 |
|
234 |
-
def add_provided_traj(traj_name):
|
235 |
-
global traj_list
|
236 |
-
traj_list = get_provided_traj(traj_name)
|
237 |
-
traj_str = [f"{traj}" for traj in traj_list]
|
238 |
-
return ", ".join(traj_str)
|
239 |
|
240 |
-
def add_camera_motion(camera_motion, camera_mode):
|
241 |
-
global camera_dict
|
242 |
-
if camera_dict['complex'] is not None:
|
243 |
-
camera_dict['complex'] = None
|
244 |
-
if camera_mode == CAMERA_MOTION_MODE[2] and len(camera_dict['motion']) <2:
|
245 |
-
camera_dict['motion'].append(camera_motion)
|
246 |
-
else:
|
247 |
-
camera_dict['motion']=[camera_motion]
|
248 |
-
|
249 |
-
return display_camera_info(camera_dict, camera_mode)
|
250 |
-
|
251 |
-
def add_complex_camera_motion(camera_motion):
|
252 |
-
global camera_dict
|
253 |
-
camera_dict['complex']=camera_motion
|
254 |
-
return display_camera_info(camera_dict)
|
255 |
-
|
256 |
-
def change_camera_mode(combine_type, camera_mode):
|
257 |
-
global camera_dict
|
258 |
-
camera_dict['mode'] = combine_type
|
259 |
-
|
260 |
-
return display_camera_info(camera_dict, camera_mode)
|
261 |
-
|
262 |
-
def change_camera_speed(camera_speed):
|
263 |
-
global camera_dict
|
264 |
-
camera_dict['speed'] = camera_speed
|
265 |
-
return display_camera_info(camera_dict)
|
266 |
-
|
267 |
-
def reset_camera():
|
268 |
-
global camera_dict
|
269 |
-
camera_dict = {
|
270 |
-
"motion":[],
|
271 |
-
"mode": "Customized Mode 1: First A then B",
|
272 |
-
"speed": 1.0,
|
273 |
-
"complex": None
|
274 |
-
}
|
275 |
-
return display_camera_info(camera_dict)
|
276 |
-
|
277 |
-
|
278 |
-
def fn_traj_droplast():
|
279 |
-
global traj_list
|
280 |
-
|
281 |
-
if traj_list:
|
282 |
-
traj_list.pop()
|
283 |
-
|
284 |
-
if traj_list:
|
285 |
-
traj_str = [f"{traj}" for traj in traj_list]
|
286 |
-
return ", ".join(traj_str)
|
287 |
-
else:
|
288 |
-
return "Click to specify trajectory"
|
289 |
-
|
290 |
-
def fn_traj_reset():
|
291 |
-
global traj_list
|
292 |
-
traj_list = []
|
293 |
-
return "Click to specify trajectory"
|
294 |
|
295 |
###########################################
|
296 |
-
|
|
|
297 |
config_path='./configs/inference/config_both.yaml'
|
298 |
if not os.path.exists(model_path):
|
299 |
os.system(f'wget https://huggingface.co/TencentARC/MotionCtrl/resolve/main/motionctrl.pth?download=true -P .')
|
300 |
|
301 |
config = OmegaConf.load(config_path)
|
302 |
model_config = config.pop("model", OmegaConf.create())
|
303 |
-
|
304 |
if torch.cuda.is_available():
|
305 |
-
|
|
|
|
|
|
|
306 |
|
307 |
-
|
308 |
-
|
|
|
309 |
|
|
|
|
|
|
|
|
|
|
|
310 |
|
311 |
-
|
|
|
|
|
312 |
global traj_list
|
313 |
global camera_dict
|
314 |
|
315 |
-
RT = process_camera(camera_dict).reshape(-1,12)
|
316 |
traj_flow = process_traj(traj_list).transpose(3,0,1,2)
|
317 |
-
print(prompts)
|
318 |
-
print(RT.shape)
|
319 |
-
print(traj_flow.shape)
|
320 |
|
321 |
-
|
|
|
|
|
|
|
|
|
|
|
322 |
unconditional_guidance_scale = 7.5
|
323 |
unconditional_guidance_scale_temporal = None
|
324 |
-
|
325 |
ddim_steps= 50
|
326 |
ddim_eta=1.0
|
327 |
cond_T=800
|
@@ -415,15 +380,13 @@ def model_run(prompts, infer_mode, seed, n_samples):
|
|
415 |
batch_variants = torch.stack(batch_variants, dim=1)
|
416 |
batch_variants = batch_variants[0]
|
417 |
|
418 |
-
# file_path = save_results(batch_variants, "MotionCtrl", "gradio_temp", fps=10)
|
419 |
file_path = save_results(batch_variants, fps=10)
|
420 |
-
print(file_path)
|
421 |
|
422 |
return gr.update(value=file_path, width=256*n_samples, height=256)
|
423 |
|
424 |
-
# return
|
425 |
|
426 |
-
def save_results(video, fps=10):
|
427 |
|
428 |
# b,c,t,h,w
|
429 |
video = video.detach().cpu()
|
@@ -435,7 +398,10 @@ def save_results(video, fps=10):
|
|
435 |
grid = (grid + 1.0) / 2.0
|
436 |
grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1) # [t, h, w*n, 3]
|
437 |
|
438 |
-
|
|
|
|
|
|
|
439 |
|
440 |
writer = imageio.get_writer(path, format='mp4', mode='I', fps=fps)
|
441 |
for i in range(grid.shape[0]):
|
@@ -446,357 +412,35 @@ def save_results(video, fps=10):
|
|
446 |
|
447 |
return path
|
448 |
|
449 |
-
def visualized_step2(infer_mode):
|
450 |
-
|
451 |
-
# reset
|
452 |
-
reset_camera()
|
453 |
-
fn_traj_reset()
|
454 |
-
|
455 |
-
# camera motion control
|
456 |
-
vis_basic_camera_motion = False
|
457 |
-
vis_basic_camera_motion_des = False
|
458 |
-
vis_custom_camera_motion = False
|
459 |
-
vis_custom_run_status = False
|
460 |
-
vis_complex_camera_motion = False
|
461 |
-
vis_complex_camera_motion_des = False
|
462 |
-
vis_U = False
|
463 |
-
vis_D = False
|
464 |
-
vis_L = False
|
465 |
-
vis_R = False
|
466 |
-
vis_I = False
|
467 |
-
vis_O = False
|
468 |
-
vis_ACW = False
|
469 |
-
vis_CW = False
|
470 |
-
vis_combine1 = False
|
471 |
-
vis_combine2 = False
|
472 |
-
vis_speed = False
|
473 |
-
|
474 |
-
vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
|
475 |
-
vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
|
476 |
-
|
477 |
-
vis_camera_args = False
|
478 |
-
vis_camera_reset = False
|
479 |
-
vis_camera_vis = False
|
480 |
-
vis_vis_camera = False
|
481 |
-
|
482 |
-
# object motion control
|
483 |
-
vis_provided_traj = False
|
484 |
-
vis_provided_traj_des = False
|
485 |
-
vis_draw_yourself = False
|
486 |
-
vis_draw_run_status = False
|
487 |
-
|
488 |
-
vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
|
489 |
-
vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
|
490 |
-
|
491 |
-
traj_args = False
|
492 |
-
traj_droplast, traj_reset = False, False
|
493 |
-
traj_vis = False
|
494 |
-
traj_input, vis_traj = False, False
|
495 |
-
|
496 |
-
|
497 |
-
# generate video
|
498 |
-
vis_step3_prompt_generate = False
|
499 |
-
vis_prompt = False
|
500 |
-
vis_num_samples = False
|
501 |
-
vis_seed = False
|
502 |
-
vis_start = False
|
503 |
-
vis_gen_video = False
|
504 |
-
|
505 |
-
if infer_mode == MODE[0]:
|
506 |
-
vis_step2_camera_motion = True
|
507 |
-
vis_step2_camera_motion_des = True
|
508 |
-
vis_camera_mode = True
|
509 |
-
vis_camera_info = True
|
510 |
-
|
511 |
-
vis_step2_object_motion = False
|
512 |
-
vis_step2_object_motion_des = False
|
513 |
-
vis_traj_mode = False
|
514 |
-
vis_traj_info = False
|
515 |
-
|
516 |
-
step2_camera_object_motion = False
|
517 |
-
step2_camera_object_motion_des = False
|
518 |
-
|
519 |
-
elif infer_mode == MODE[1]:
|
520 |
-
vis_step2_camera_motion = False
|
521 |
-
vis_step2_camera_motion_des = False
|
522 |
-
vis_camera_mode = False
|
523 |
-
vis_camera_info = False
|
524 |
-
|
525 |
-
vis_step2_object_motion = True
|
526 |
-
vis_step2_object_motion_des = True
|
527 |
-
vis_traj_mode = True
|
528 |
-
vis_traj_info = True
|
529 |
-
|
530 |
-
step2_camera_object_motion = False
|
531 |
-
step2_camera_object_motion_des = False
|
532 |
-
else: #infer_mode == MODE[2]:
|
533 |
-
vis_step2_camera_motion = False
|
534 |
-
vis_step2_camera_motion_des = False
|
535 |
-
vis_camera_mode = False
|
536 |
-
vis_camera_info = False
|
537 |
-
|
538 |
-
vis_step2_object_motion = False
|
539 |
-
vis_step2_object_motion_des = False
|
540 |
-
vis_traj_mode = False
|
541 |
-
vis_traj_info = False
|
542 |
-
|
543 |
-
step2_camera_object_motion = True
|
544 |
-
step2_camera_object_motion_des = True
|
545 |
-
|
546 |
-
vis_basic_camera_motion = True
|
547 |
-
vis_basic_camera_motion_des = True
|
548 |
-
vis_U = True
|
549 |
-
vis_D = True
|
550 |
-
vis_L = True
|
551 |
-
vis_R = True
|
552 |
-
vis_I = True
|
553 |
-
vis_O = True
|
554 |
-
vis_ACW = True
|
555 |
-
vis_CW = True
|
556 |
-
vis_speed = True
|
557 |
-
|
558 |
-
vis_camera_args = True
|
559 |
-
vis_camera_reset = True
|
560 |
-
vis_camera_vis = True
|
561 |
-
vis_vis_camera = True
|
562 |
-
|
563 |
-
|
564 |
-
return gr.update(visible=vis_step2_camera_motion), \
|
565 |
-
gr.update(visible=vis_step2_camera_motion_des), \
|
566 |
-
gr.update(visible=vis_camera_mode), \
|
567 |
-
gr.update(visible=vis_camera_info), \
|
568 |
-
gr.update(visible=vis_basic_camera_motion), \
|
569 |
-
gr.update(visible=vis_basic_camera_motion_des), \
|
570 |
-
gr.update(visible=vis_custom_camera_motion), \
|
571 |
-
gr.update(visible=vis_custom_run_status), \
|
572 |
-
gr.update(visible=vis_complex_camera_motion), \
|
573 |
-
gr.update(visible=vis_complex_camera_motion_des), \
|
574 |
-
gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
|
575 |
-
gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
|
576 |
-
gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), \
|
577 |
-
gr.update(visible=vis_speed), \
|
578 |
-
gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
|
579 |
-
gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
|
580 |
-
gr.update(visible=vis_camera_args, value=None), \
|
581 |
-
gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
|
582 |
-
gr.update(visible=vis_vis_camera, value=None), \
|
583 |
-
gr.update(visible=vis_step2_object_motion), \
|
584 |
-
gr.update(visible=vis_step2_object_motion_des), \
|
585 |
-
gr.update(visible=vis_traj_mode), \
|
586 |
-
gr.update(visible=vis_traj_info), \
|
587 |
-
gr.update(visible=vis_provided_traj), \
|
588 |
-
gr.update(visible=vis_provided_traj_des), \
|
589 |
-
gr.update(visible=vis_draw_yourself), \
|
590 |
-
gr.update(visible=vis_draw_run_status), \
|
591 |
-
gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
|
592 |
-
gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
|
593 |
-
gr.update(visible=traj_args), \
|
594 |
-
gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
|
595 |
-
gr.update(visible=traj_vis), \
|
596 |
-
gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
|
597 |
-
gr.update(visible=step2_camera_object_motion), \
|
598 |
-
gr.update(visible=step2_camera_object_motion_des), \
|
599 |
-
gr.update(visible=vis_step3_prompt_generate), \
|
600 |
-
gr.update(visible=vis_prompt), \
|
601 |
-
gr.update(visible=vis_num_samples), \
|
602 |
-
gr.update(visible=vis_seed), \
|
603 |
-
gr.update(visible=vis_start), \
|
604 |
-
gr.update(visible=vis_gen_video)
|
605 |
-
|
606 |
-
def visualized_camera_poses(step2_camera_motion):
|
607 |
-
reset_camera()
|
608 |
-
|
609 |
-
# generate video
|
610 |
-
vis_step3_prompt_generate = False
|
611 |
-
vis_prompt = False
|
612 |
-
vis_num_samples = False
|
613 |
-
vis_seed = False
|
614 |
-
vis_start = False
|
615 |
-
vis_gen_video = False
|
616 |
-
|
617 |
-
if step2_camera_motion == CAMERA_MOTION_MODE[0]:
|
618 |
-
vis_basic_camera_motion = True
|
619 |
-
vis_basic_camera_motion_des = True
|
620 |
-
vis_custom_camera_motion = False
|
621 |
-
vis_custom_run_status = False
|
622 |
-
vis_complex_camera_motion = False
|
623 |
-
vis_complex_camera_motion_des = False
|
624 |
-
vis_U = True
|
625 |
-
vis_D = True
|
626 |
-
vis_L = True
|
627 |
-
vis_R = True
|
628 |
-
vis_I = True
|
629 |
-
vis_O = True
|
630 |
-
vis_ACW = True
|
631 |
-
vis_CW = True
|
632 |
-
vis_combine1 = False
|
633 |
-
vis_combine2 = False
|
634 |
-
vis_speed = True
|
635 |
-
|
636 |
-
vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
|
637 |
-
vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
|
638 |
-
|
639 |
-
elif step2_camera_motion == CAMERA_MOTION_MODE[1]:
|
640 |
-
vis_basic_camera_motion = False
|
641 |
-
vis_basic_camera_motion_des = False
|
642 |
-
vis_custom_camera_motion = False
|
643 |
-
vis_custom_run_status = False
|
644 |
-
vis_complex_camera_motion = True
|
645 |
-
vis_complex_camera_motion_des = True
|
646 |
-
vis_U = False
|
647 |
-
vis_D = False
|
648 |
-
vis_L = False
|
649 |
-
vis_R = False
|
650 |
-
vis_I = False
|
651 |
-
vis_O = False
|
652 |
-
vis_ACW = False
|
653 |
-
vis_CW = False
|
654 |
-
vis_combine1 = False
|
655 |
-
vis_combine2 = False
|
656 |
-
vis_speed = False
|
657 |
-
|
658 |
-
vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = True, True, True, True
|
659 |
-
vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = True, True, True, True
|
660 |
-
|
661 |
-
else: # step2_camera_motion = CAMERA_MOTION_MODE[2]:
|
662 |
-
vis_basic_camera_motion = False
|
663 |
-
vis_basic_camera_motion_des = False
|
664 |
-
vis_custom_camera_motion = True
|
665 |
-
vis_custom_run_status = True
|
666 |
-
vis_complex_camera_motion = False
|
667 |
-
vis_complex_camera_motion_des = False
|
668 |
-
vis_U = True
|
669 |
-
vis_D = True
|
670 |
-
vis_L = True
|
671 |
-
vis_R = True
|
672 |
-
vis_I = True
|
673 |
-
vis_O = True
|
674 |
-
vis_ACW = True
|
675 |
-
vis_CW = True
|
676 |
-
vis_combine1 = True
|
677 |
-
vis_combine2 = True
|
678 |
-
vis_speed = True
|
679 |
-
|
680 |
-
vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
|
681 |
-
vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
|
682 |
-
|
683 |
-
vis_camera_args = True
|
684 |
-
vis_camera_reset = True
|
685 |
-
vis_camera_vis = True
|
686 |
-
vis_vis_camera = True
|
687 |
-
|
688 |
-
return gr.update(visible=vis_basic_camera_motion), \
|
689 |
-
gr.update(visible=vis_basic_camera_motion_des), \
|
690 |
-
gr.update(visible=vis_custom_camera_motion), \
|
691 |
-
gr.update(visible=vis_custom_run_status), \
|
692 |
-
gr.update(visible=vis_complex_camera_motion), \
|
693 |
-
gr.update(visible=vis_complex_camera_motion_des), \
|
694 |
-
gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
|
695 |
-
gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
|
696 |
-
gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), \
|
697 |
-
gr.update(visible=vis_speed), \
|
698 |
-
gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
|
699 |
-
gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
|
700 |
-
gr.update(visible=vis_camera_args, value=None), \
|
701 |
-
gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
|
702 |
-
gr.update(visible=vis_vis_camera, value=None), \
|
703 |
-
gr.update(visible=vis_step3_prompt_generate), \
|
704 |
-
gr.update(visible=vis_prompt), \
|
705 |
-
gr.update(visible=vis_num_samples), \
|
706 |
-
gr.update(visible=vis_seed), \
|
707 |
-
gr.update(visible=vis_start), \
|
708 |
-
gr.update(visible=vis_gen_video)
|
709 |
-
|
710 |
-
def visualized_traj_poses(step2_object_motion):
|
711 |
-
|
712 |
-
fn_traj_reset()
|
713 |
-
|
714 |
-
# generate video
|
715 |
-
vis_step3_prompt_generate = False
|
716 |
-
vis_prompt = False
|
717 |
-
vis_num_samples = False
|
718 |
-
vis_seed = False
|
719 |
-
vis_start = False
|
720 |
-
vis_gen_video = False
|
721 |
-
|
722 |
-
if step2_object_motion == "Provided Trajectory":
|
723 |
-
vis_provided_traj = True
|
724 |
-
vis_provided_traj_des = True
|
725 |
-
vis_draw_yourself = False
|
726 |
-
vis_draw_run_status = False
|
727 |
-
|
728 |
-
vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = True, True, True, True
|
729 |
-
vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = True, True, True, True
|
730 |
-
|
731 |
-
traj_args = True
|
732 |
-
traj_droplast, traj_reset = False, True
|
733 |
-
traj_vis = True
|
734 |
-
traj_input, vis_traj = False, True
|
735 |
-
|
736 |
-
|
737 |
-
elif step2_object_motion == "Custom Trajectory":
|
738 |
-
vis_provided_traj = False
|
739 |
-
vis_provided_traj_des = False
|
740 |
-
vis_draw_yourself = True
|
741 |
-
vis_draw_run_status = True
|
742 |
-
|
743 |
-
vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
|
744 |
-
vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
|
745 |
-
|
746 |
-
traj_args = True
|
747 |
-
traj_droplast, traj_reset = True, True
|
748 |
-
traj_vis = True
|
749 |
-
traj_input, vis_traj = True, True
|
750 |
-
|
751 |
-
return gr.update(visible=vis_provided_traj), \
|
752 |
-
gr.update(visible=vis_provided_traj_des), \
|
753 |
-
gr.update(visible=vis_draw_yourself), \
|
754 |
-
gr.update(visible=vis_draw_run_status), \
|
755 |
-
gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
|
756 |
-
gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
|
757 |
-
gr.update(visible=traj_args), \
|
758 |
-
gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
|
759 |
-
gr.update(visible=traj_vis), \
|
760 |
-
gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
|
761 |
-
gr.update(visible=vis_step3_prompt_generate), \
|
762 |
-
gr.update(visible=vis_prompt), \
|
763 |
-
gr.update(visible=vis_num_samples), \
|
764 |
-
gr.update(visible=vis_seed), \
|
765 |
-
gr.update(visible=vis_start), \
|
766 |
-
gr.update(visible=vis_gen_video)
|
767 |
|
768 |
def main(args):
|
769 |
demo = gr.Blocks()
|
770 |
with demo:
|
771 |
|
772 |
gr.Markdown(title)
|
|
|
773 |
gr.Markdown(description)
|
774 |
|
775 |
-
# state = gr.State({
|
776 |
-
# "mode": "camera_only",
|
777 |
-
# "camera_input": [],
|
778 |
-
# "traj_input": [],
|
779 |
-
# })
|
780 |
|
781 |
with gr.Column():
|
782 |
-
'''
|
783 |
# step 0: select based model.
|
784 |
gr.Markdown("## Step0: Selecting the model", show_label=False)
|
785 |
gr.Markdown( f'- {BASE_MODEL[0]}: **MotionCtrl** deployed on {BASE_MODEL[0]}', show_label=False)
|
786 |
gr.Markdown( f'- {BASE_MODEL[1]}: **MotionCtrl** deployed on {BASE_MODEL[1]}', show_label=False)
|
787 |
-
gr.
|
788 |
-
|
789 |
-
gr.Radio(choices=BASE_MODEL, value=BASE_MODEL[0], label="Based Model", interactive=
|
790 |
-
|
791 |
|
792 |
# step 1: select motion control mode
|
793 |
-
gr.Markdown("## Step 1/3: Selecting the motion control mode", show_label=False)
|
794 |
-
gr.Markdown( f'- {MODE[0]}: Control the camera motion only
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
|
|
800 |
|
801 |
# step2 - camera + object motion control
|
802 |
step2_camera_object_motion = gr.Markdown("---\n## Step 2/3: Select the camera poses and trajectory", show_label=False, visible=False)
|
@@ -834,18 +478,40 @@ def main(args):
|
|
834 |
|
835 |
# step2.3 - camera motion control - custom
|
836 |
custom_camera_motion = gr.Markdown(f"---\n### {CAMERA_MOTION_MODE[2]}", show_label=False, visible=False)
|
837 |
-
custom_run_status = gr.Markdown(f"\n 1. Click two of the basic camera poses, such as `Pan Up` and `Pan Left`; \
|
838 |
-
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
844 |
show_label=False, visible=False)
|
845 |
|
|
|
|
|
|
|
846 |
gr.HighlightedText(value=[("",""), ("1. Select two of the basic camera poses; 2. Select Customized Mode 1 OR Customized Mode 2. 3. Visualized Camera to show the customized camera poses", "Normal")],
|
847 |
color_map={"Normal": "green", "Error": "red", "Clear clicks": "gray", "Add mask": "green", "Remove mask": "red"}, visible=False)
|
848 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
849 |
with gr.Row():
|
850 |
U = gr.Button(value="Pan Up", visible=False)
|
851 |
D = gr.Button(value="Pan Down", visible=False)
|
@@ -857,9 +523,9 @@ def main(args):
|
|
857 |
ACW = gr.Button(value="ACW", visible=False)
|
858 |
CW = gr.Button(value="CW", visible=False)
|
859 |
|
860 |
-
with gr.Row():
|
861 |
-
|
862 |
-
|
863 |
|
864 |
with gr.Row():
|
865 |
speed = gr.Slider(minimum=0, maximum=2, step=0.2, label="Motion Speed", value=1.0, visible=False)
|
@@ -941,12 +607,62 @@ def main(args):
|
|
941 |
with gr.Column():
|
942 |
step3_prompt_generate = gr.Markdown("---\n## Step 3/3: Add prompt and Generate videos", show_label=False, visible=False)
|
943 |
prompt = gr.Textbox(value="a dog sitting on grass", label="Prompt", interactive=True, visible=False)
|
944 |
-
n_samples = gr.Number(value=
|
945 |
seed = gr.Number(value=1234, precision=0, interactive=True, label="Seed", visible=False)
|
946 |
start = gr.Button(value="Start generation !", visible=False)
|
947 |
with gr.Column():
|
948 |
gen_video = gr.Video(value=None, label="Generate Video", visible=False)
|
949 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
950 |
mode_info.click(
|
951 |
fn=visualized_step2,
|
952 |
inputs=[infer_mode],
|
@@ -963,7 +679,7 @@ def main(args):
|
|
963 |
complex_camera_motion_des,
|
964 |
U, D, L, R,
|
965 |
I, O, ACW, CW,
|
966 |
-
combine1, combine2,
|
967 |
speed,
|
968 |
Pose_1, Pose_2, Pose_3, Pose_4,
|
969 |
Pose_5, Pose_6, Pose_7, Pose_8,
|
@@ -1006,7 +722,7 @@ def main(args):
|
|
1006 |
complex_camera_motion_des,
|
1007 |
U, D, L, R,
|
1008 |
I, O, ACW, CW,
|
1009 |
-
combine1, combine2,
|
1010 |
speed,
|
1011 |
Pose_1, Pose_2, Pose_3, Pose_4,
|
1012 |
Pose_5, Pose_6, Pose_7, Pose_8,
|
@@ -1044,10 +760,27 @@ def main(args):
|
|
1044 |
speed.change(fn=change_camera_speed, inputs=speed, outputs=camera_args)
|
1045 |
camera_reset.click(fn=reset_camera, inputs=None, outputs=[camera_args])
|
1046 |
|
1047 |
-
combine1.click(fn=change_camera_mode,
|
1048 |
-
|
1049 |
-
|
1050 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1051 |
|
1052 |
Pose_1.click(fn=add_complex_camera_motion, inputs=Pose_1, outputs=camera_args)
|
1053 |
Pose_2.click(fn=add_complex_camera_motion, inputs=Pose_2, outputs=camera_args)
|
@@ -1073,7 +806,7 @@ def main(args):
|
|
1073 |
traj_reset.click(fn=fn_traj_reset, inputs=None, outputs=traj_args)
|
1074 |
|
1075 |
|
1076 |
-
start.click(fn=model_run, inputs=[prompt, infer_mode, seed, n_samples], outputs=gen_video)
|
1077 |
|
1078 |
gr.Markdown(article)
|
1079 |
|
|
|
13 |
from PIL import Image
|
14 |
from pytorch_lightning import seed_everything
|
15 |
|
16 |
+
from gradio_utils.camera_utils import CAMERA_MOTION_MODE, process_camera, create_relative
|
17 |
from gradio_utils.traj_utils import (OBJECT_MOTION_MODE, get_provided_traj,
|
18 |
process_points, process_traj)
|
19 |
from gradio_utils.utils import vis_camera
|
|
|
23 |
post_prompt)
|
24 |
from utils.utils import instantiate_from_config
|
25 |
|
26 |
+
from gradio_utils.page_control import (MODE, BASE_MODEL, traj_list, camera_dict,
|
27 |
+
reset_camera,
|
28 |
+
visualized_step1, visualized_step2,
|
29 |
+
visualized_camera_poses, visualized_traj_poses,
|
30 |
+
add_camera_motion, add_complex_camera_motion,
|
31 |
+
input_raw_camera_pose,
|
32 |
+
change_camera_mode, change_camera_speed,
|
33 |
+
add_traj_point, add_provided_traj,
|
34 |
+
fn_traj_droplast, fn_traj_reset)
|
35 |
+
|
36 |
os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
37 |
+
SPACE_ID = os.environ.get('SPACE_ID', '')
|
38 |
+
|
39 |
+
DIY_MODE = ['Customized Mode 1: First A then B',
|
40 |
+
'Customized Mode 2: Both A and B',
|
41 |
+
'Customized Mode 3: RAW Camera Poses']
|
42 |
|
43 |
|
44 |
#### Description ####
|
45 |
title = r"""<h1 align="center">MotionCtrl: A Unified and Flexible Motion Controller for Video Generation</h1>"""
|
46 |
+
# subtitle = r"""<h2 align="center">Deployed on SVD Generation</h2>"""
|
47 |
+
important_link = r"""
|
48 |
+
<div align='center'>
|
49 |
+
<a href='https://huggingface.co/spaces/TencentARC/MotionCtrl_SVD'>[Demo MotionCtrl + SVD]</a>
|
50 |
+
  <a href='https://wzhouxiff.github.io/projects/MotionCtrl/assets/paper/MotionCtrl.pdf'>[Paper]</a>
|
51 |
+
  <a href='https://wzhouxiff.github.io/projects/MotionCtrl/'>[Project Page]</a>
|
52 |
+
  <a href='https://github.com/TencentARC/MotionCtrl'>[Code]</a>
|
53 |
+
  <a href='https://github.com/TencentARC/MotionCtrl/blob/svd/doc/showcase_svd.md'>[Showcases]</a>
|
54 |
+
  <a href='https://github.com/TencentARC/MotionCtrl/blob/svd/doc/tutorial.md'>[Tutorial]</a>
|
55 |
+
</div>
|
56 |
+
"""
|
57 |
|
58 |
description = r"""
|
59 |
<b>Official Gradio demo</b> for <a href='https://github.com/TencentARC/MotionCtrl' target='_blank'><b>MotionCtrl: A Unified and Flexible Motion Controller for Video Generation</b></a>.<br>
|
60 |
π₯ MotionCtrl is capable of independently and flexibly controling the camera motion and object motion of a generated video, with only a unified model.<br>
|
61 |
π€ Try to control the motion of the generated videos yourself!<br>
|
62 |
+
βββ This demo provides model of **MotionCtrl** deployed on **LVDM/VideoCrafter** and **VideoCrafte2**.
|
63 |
+
Deployments in **LVDM/VideoCrafter** include both Camera and Object Motion Control,
|
64 |
+
while deployments in **VideoCrafte2** only include Camera Motion Control.
|
65 |
+
<br>
|
66 |
"""
|
67 |
article = r"""
|
68 |
If MotionCtrl is helpful, please help to β the <a href='https://github.com/TencentARC/MotionCtrl' target='_blank'>Github Repo</a>. Thanks!
|
|
|
107 |
|
108 |
|
109 |
T_base = [
|
110 |
+
[1.,0.,0.], ## W2C left
|
111 |
+
[-1.,0.,0.], ## W2C right
|
112 |
+
[0., 1., 0.], ## W2C up
|
113 |
+
[0.,-1.,0.], ## W2C down
|
114 |
+
[0.,0.,1.], ## W2C zoom out
|
115 |
+
[0.,0.,-1.], ## W2C zoom in
|
116 |
]
|
117 |
radius = 1
|
118 |
n = 16
|
|
|
128 |
res_forsave = []
|
129 |
T_range = 1.8
|
130 |
|
131 |
+
exp_no = 0
|
132 |
|
133 |
|
134 |
for i in range(0, 16):
|
|
|
141 |
|
142 |
fig = vis_camera(res)
|
143 |
|
144 |
+
def fn_vis_camera(info_mode, camera_args=None):
|
145 |
+
global camera_dict
|
146 |
+
RT = process_camera(camera_dict, camera_args) # [t, 3, 4]
|
|
|
147 |
|
148 |
+
rescale_T = 1.0
|
149 |
+
rescale_T = max(rescale_T, np.max(np.abs(RT[:,:,-1])) / 1.9)
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
+
fig = vis_camera(create_relative(RT), rescale_T=rescale_T)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
if info_mode == MODE[0]:
|
154 |
vis_step3_prompt_generate = True
|
|
|
184 |
|
185 |
def fn_vis_traj():
|
186 |
global traj_list
|
187 |
+
global exp_no
|
188 |
xy_range = 1024
|
189 |
points = process_points(traj_list)
|
190 |
imgs = []
|
|
|
205 |
|
206 |
# size = (512, 512)
|
207 |
fps = 10
|
208 |
+
|
209 |
+
out_dir = f'./results_trajs/{exp_no}'
|
210 |
+
os.makedirs(out_dir, exist_ok=True)
|
211 |
+
exp_no += 1
|
212 |
+
|
213 |
+
traj_flow = process_traj(traj_list).transpose(3,0,1,2)
|
214 |
+
|
215 |
+
np.save(f'{out_dir}/traj_flow.npy', traj_flow)
|
216 |
+
with open(f'{out_dir}/traj_list.txt', 'w') as f:
|
217 |
+
for item in traj_list:
|
218 |
+
f.write(f"{item[0]}, {item[1]}\n")
|
219 |
+
|
220 |
+
if out_dir is None:
|
221 |
+
path = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False).name
|
222 |
+
else:
|
223 |
+
path = os.path.join(out_dir, 'traj.mp4')
|
224 |
writer = imageio.get_writer(path, format='mp4', mode='I', fps=fps)
|
225 |
for img in imgs:
|
226 |
writer.append_data(img)
|
|
|
240 |
gr.update(visible=vis_start), \
|
241 |
gr.update(visible=vis_gen_video, value=None)
|
242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
|
|
|
|
|
|
|
|
|
|
244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
###########################################
|
247 |
+
|
248 |
+
model_path='./checkpoints/motionctrl.pth'
|
249 |
config_path='./configs/inference/config_both.yaml'
|
250 |
if not os.path.exists(model_path):
|
251 |
os.system(f'wget https://huggingface.co/TencentARC/MotionCtrl/resolve/main/motionctrl.pth?download=true -P .')
|
252 |
|
253 |
config = OmegaConf.load(config_path)
|
254 |
model_config = config.pop("model", OmegaConf.create())
|
255 |
+
model_v1 = instantiate_from_config(model_config)
|
256 |
if torch.cuda.is_available():
|
257 |
+
model_v1 = model_v1.cuda()
|
258 |
+
|
259 |
+
model_v1 = load_model_checkpoint(model_v1, model_path)
|
260 |
+
model_v1.eval()
|
261 |
|
262 |
+
v2_model_path = './checkpoints/videocrafter2_motionctrl_cmcm.ckpt'
|
263 |
+
if not os.path.exists(v2_model_path):
|
264 |
+
os.system(f'wget https://huggingface.co/TencentARC/MotionCtrl/resolve/main/videocrafter2_motionctrl_cmcm.ckpt?download=true -P .')
|
265 |
|
266 |
+
model_v2 = instantiate_from_config(model_config)
|
267 |
+
model_v2 = load_model_checkpoint(model_v2, v2_model_path)
|
268 |
+
|
269 |
+
if torch.cuda.is_available():
|
270 |
+
model_v2 = model_v2.cuda()
|
271 |
|
272 |
+
model_v2.eval()
|
273 |
+
|
274 |
+
def model_run(prompts, choose_model, infer_mode, seed, n_samples, camera_args=None):
|
275 |
global traj_list
|
276 |
global camera_dict
|
277 |
|
278 |
+
RT = process_camera(camera_dict, camera_args).reshape(-1,12)
|
279 |
traj_flow = process_traj(traj_list).transpose(3,0,1,2)
|
|
|
|
|
|
|
280 |
|
281 |
+
if choose_model == BASE_MODEL[0]:
|
282 |
+
model = model_v1
|
283 |
+
noise_shape = [1, 4, 16, 32, 32]
|
284 |
+
else:
|
285 |
+
model = model_v2
|
286 |
+
noise_shape = [1, 4, 16, 40, 64]
|
287 |
unconditional_guidance_scale = 7.5
|
288 |
unconditional_guidance_scale_temporal = None
|
289 |
+
|
290 |
ddim_steps= 50
|
291 |
ddim_eta=1.0
|
292 |
cond_T=800
|
|
|
380 |
batch_variants = torch.stack(batch_variants, dim=1)
|
381 |
batch_variants = batch_variants[0]
|
382 |
|
|
|
383 |
file_path = save_results(batch_variants, fps=10)
|
|
|
384 |
|
385 |
return gr.update(value=file_path, width=256*n_samples, height=256)
|
386 |
|
387 |
+
# return
|
388 |
|
389 |
+
def save_results(video, fps=10, out_dir=None):
|
390 |
|
391 |
# b,c,t,h,w
|
392 |
video = video.detach().cpu()
|
|
|
398 |
grid = (grid + 1.0) / 2.0
|
399 |
grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1) # [t, h, w*n, 3]
|
400 |
|
401 |
+
if out_dir is None:
|
402 |
+
path = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False).name
|
403 |
+
else:
|
404 |
+
path = os.path.join(out_dir, 'motionctrl.mp4')
|
405 |
|
406 |
writer = imageio.get_writer(path, format='mp4', mode='I', fps=fps)
|
407 |
for i in range(grid.shape[0]):
|
|
|
412 |
|
413 |
return path
|
414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
|
416 |
def main(args):
|
417 |
demo = gr.Blocks()
|
418 |
with demo:
|
419 |
|
420 |
gr.Markdown(title)
|
421 |
+
gr.Markdown(important_link)
|
422 |
gr.Markdown(description)
|
423 |
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
with gr.Column():
|
|
|
426 |
# step 0: select based model.
|
427 |
gr.Markdown("## Step0: Selecting the model", show_label=False)
|
428 |
gr.Markdown( f'- {BASE_MODEL[0]}: **MotionCtrl** deployed on {BASE_MODEL[0]}', show_label=False)
|
429 |
gr.Markdown( f'- {BASE_MODEL[1]}: **MotionCtrl** deployed on {BASE_MODEL[1]}', show_label=False)
|
430 |
+
# gr.HighlightedText(value=[("",""), (f'Choosing {BASE_MODEL[1]} requires time for loading new model. Please be patient.', "Normal")],
|
431 |
+
# color_map={"Normal": "green", "Error": "red", "Clear clicks": "gray", "Add mask": "green", "Remove mask": "red"}, visible=True)
|
432 |
+
choose_model = gr.Radio(choices=BASE_MODEL, value=BASE_MODEL[0], label="Based Model", interactive=True)
|
433 |
+
choose_model_button = gr.Button(value="Proceed")
|
434 |
|
435 |
# step 1: select motion control mode
|
436 |
+
step1 = gr.Markdown("## Step 1/3: Selecting the motion control mode", show_label=False, visible=False)
|
437 |
+
setp1_dec = gr.Markdown( f'\n - {MODE[0]}: Control the camera motion only \
|
438 |
+
\n- {MODE[1]}: Control the object motion only \
|
439 |
+
\n- {MODE[2]}: Control both the camera and object motion \
|
440 |
+
\n- Click `Proceed` to go into next step',
|
441 |
+
show_label=False, visible=False)
|
442 |
+
infer_mode = gr.Radio(choices=MODE, value=MODE[0], label="Motion Control Mode", interactive=True, visible=False)
|
443 |
+
mode_info = gr.Button(value="Proceed", visible=False)
|
444 |
|
445 |
# step2 - camera + object motion control
|
446 |
step2_camera_object_motion = gr.Markdown("---\n## Step 2/3: Select the camera poses and trajectory", show_label=False, visible=False)
|
|
|
478 |
|
479 |
# step2.3 - camera motion control - custom
|
480 |
custom_camera_motion = gr.Markdown(f"---\n### {CAMERA_MOTION_MODE[2]}", show_label=False, visible=False)
|
481 |
+
# custom_run_status = gr.Markdown(f"\n 1. Click two of the basic camera poses, such as `Pan Up` and `Pan Left`; \
|
482 |
+
# \n 2. Click `Customized Mode 1: First A then B` or `Customized Mode 1: First A then B` \
|
483 |
+
# \n - `Customized Mode 1: First A then B`: The camera first `Pan Up` and then `Pan Left`; \
|
484 |
+
# \n - `Customized Mode 2: Both A and B`: The camera move towards the upper left corner; \
|
485 |
+
# \n 3. Slide the `Motion speed` to get a speed value. The large the value, the fast the camera motion; \
|
486 |
+
# \n 4. Click `Visualize Camera and Proceed` to visualize the camera poses and go proceed; \
|
487 |
+
# \n 5. Click `Reset Camera` to reset the camera poses (If needed). ",
|
488 |
+
# show_label=False, visible=False)
|
489 |
+
custom_run_status = gr.Markdown(f"\n 1. Click `{DIY_MODE[0]}`, `{DIY_MODE[1]}`, or `{DIY_MODE[2]}` \
|
490 |
+
\n - `Customized Mode 1: First A then B`: For example, click `Pan Up` and `Pan Left`, the camera will first `Pan Up` and then `Pan Left`; \
|
491 |
+
\n - `Customized Mode 2: Both A and B`: For example, click `Pan Up` and `Pan Left`, the camera will move towards the upper left corner; \
|
492 |
+
\n - `{DIY_MODE[2]}`: Input the RAW RT matrix yourselves. \
|
493 |
+
\n 2. Slide the `Motion speed` to get a speed value. The large the value, the fast the camera motion; \
|
494 |
+
\n 3. Click `Visualize Camera and Proceed` to visualize the camera poses and go proceed; \
|
495 |
+
\n 4. Click `Reset Camera` to reset the camera poses (If needed). ",
|
496 |
show_label=False, visible=False)
|
497 |
|
498 |
+
# gr.HighlightedText(value=[("",""), ("1. Select two of the basic camera poses; 2. Select Customized Mode 1 OR Customized Mode 2. 3. Visualized Camera to show the customized camera poses", "Normal")],
|
499 |
+
# color_map={"Normal": "green", "Error": "red", "Clear clicks": "gray", "Add mask": "green", "Remove mask": "red"}, visible=False)
|
500 |
+
|
501 |
gr.HighlightedText(value=[("",""), ("1. Select two of the basic camera poses; 2. Select Customized Mode 1 OR Customized Mode 2. 3. Visualized Camera to show the customized camera poses", "Normal")],
|
502 |
color_map={"Normal": "green", "Error": "red", "Clear clicks": "gray", "Add mask": "green", "Remove mask": "red"}, visible=False)
|
503 |
|
504 |
+
with gr.Row():
|
505 |
+
combine1 = gr.Button(value=DIY_MODE[0], visible=False)
|
506 |
+
combine2 = gr.Button(value=DIY_MODE[1], visible=False)
|
507 |
+
combine3 = gr.Button(value=DIY_MODE[2], visible=False)
|
508 |
+
with gr.Row():
|
509 |
+
combine3_des = gr.Markdown(f"---\n#### Input your camera pose in the following textbox. \
|
510 |
+
A total of 14 lines and each line contains 12 float number, indicated \
|
511 |
+
the RT matrix in the shape of 1x12. \
|
512 |
+
The example is RT matrix of ZOOM IN.", show_label=False, visible=False)
|
513 |
+
|
514 |
+
|
515 |
with gr.Row():
|
516 |
U = gr.Button(value="Pan Up", visible=False)
|
517 |
D = gr.Button(value="Pan Down", visible=False)
|
|
|
523 |
ACW = gr.Button(value="ACW", visible=False)
|
524 |
CW = gr.Button(value="CW", visible=False)
|
525 |
|
526 |
+
# with gr.Row():
|
527 |
+
# combine1 = gr.Button(value="Customized Mode 1: First A then B", visible=False)
|
528 |
+
# combine2 = gr.Button(value="Customized Mode 2: Both A and B", visible=False)
|
529 |
|
530 |
with gr.Row():
|
531 |
speed = gr.Slider(minimum=0, maximum=2, step=0.2, label="Motion Speed", value=1.0, visible=False)
|
|
|
607 |
with gr.Column():
|
608 |
step3_prompt_generate = gr.Markdown("---\n## Step 3/3: Add prompt and Generate videos", show_label=False, visible=False)
|
609 |
prompt = gr.Textbox(value="a dog sitting on grass", label="Prompt", interactive=True, visible=False)
|
610 |
+
n_samples = gr.Number(value=2, precision=0, interactive=True, label="n_samples", visible=False)
|
611 |
seed = gr.Number(value=1234, precision=0, interactive=True, label="Seed", visible=False)
|
612 |
start = gr.Button(value="Start generation !", visible=False)
|
613 |
with gr.Column():
|
614 |
gen_video = gr.Video(value=None, label="Generate Video", visible=False)
|
615 |
|
616 |
+
choose_model_button.click(
|
617 |
+
fn=visualized_step1,
|
618 |
+
inputs=[choose_model],
|
619 |
+
outputs=[
|
620 |
+
step1, setp1_dec, infer_mode, mode_info,
|
621 |
+
step2_camera_motion,
|
622 |
+
step2_camera_motion_des,
|
623 |
+
camera_mode,
|
624 |
+
camera_info,
|
625 |
+
|
626 |
+
basic_camera_motion,
|
627 |
+
basic_camera_motion_des,
|
628 |
+
custom_camera_motion,
|
629 |
+
custom_run_status,
|
630 |
+
complex_camera_motion,
|
631 |
+
complex_camera_motion_des,
|
632 |
+
U, D, L, R,
|
633 |
+
I, O, ACW, CW,
|
634 |
+
combine1, combine2, combine3, combine3_des,
|
635 |
+
speed,
|
636 |
+
Pose_1, Pose_2, Pose_3, Pose_4,
|
637 |
+
Pose_5, Pose_6, Pose_7, Pose_8,
|
638 |
+
camera_args,
|
639 |
+
camera_reset, camera_vis,
|
640 |
+
vis_camera,
|
641 |
+
|
642 |
+
step2_object_motion,
|
643 |
+
step2_object_motion_des,
|
644 |
+
object_mode,
|
645 |
+
object_info,
|
646 |
+
|
647 |
+
provided_traj,
|
648 |
+
provided_traj_des,
|
649 |
+
draw_traj,
|
650 |
+
draw_run_status,
|
651 |
+
traj_1, traj_2, traj_3, traj_4,
|
652 |
+
traj_5, traj_6, traj_7, traj_8,
|
653 |
+
traj_args,
|
654 |
+
traj_droplast, traj_reset,
|
655 |
+
traj_vis,
|
656 |
+
traj_input, vis_traj,
|
657 |
+
|
658 |
+
step2_camera_object_motion,
|
659 |
+
step2_camera_object_motion_des,
|
660 |
+
|
661 |
+
step3_prompt_generate, prompt, n_samples, seed, start, gen_video,
|
662 |
+
|
663 |
+
],
|
664 |
+
)
|
665 |
+
|
666 |
mode_info.click(
|
667 |
fn=visualized_step2,
|
668 |
inputs=[infer_mode],
|
|
|
679 |
complex_camera_motion_des,
|
680 |
U, D, L, R,
|
681 |
I, O, ACW, CW,
|
682 |
+
combine1, combine2, combine3, combine3_des,
|
683 |
speed,
|
684 |
Pose_1, Pose_2, Pose_3, Pose_4,
|
685 |
Pose_5, Pose_6, Pose_7, Pose_8,
|
|
|
722 |
complex_camera_motion_des,
|
723 |
U, D, L, R,
|
724 |
I, O, ACW, CW,
|
725 |
+
combine1, combine2, combine3, combine3_des,
|
726 |
speed,
|
727 |
Pose_1, Pose_2, Pose_3, Pose_4,
|
728 |
Pose_5, Pose_6, Pose_7, Pose_8,
|
|
|
760 |
speed.change(fn=change_camera_speed, inputs=speed, outputs=camera_args)
|
761 |
camera_reset.click(fn=reset_camera, inputs=None, outputs=[camera_args])
|
762 |
|
763 |
+
combine1.click(fn=change_camera_mode,
|
764 |
+
inputs=[combine1, camera_mode],
|
765 |
+
outputs=[camera_args,
|
766 |
+
U, D, L, R,
|
767 |
+
I, O, ACW, CW, speed,
|
768 |
+
combine3_des])
|
769 |
+
combine2.click(fn=change_camera_mode,
|
770 |
+
inputs=[combine2, camera_mode],
|
771 |
+
outputs=[camera_args,
|
772 |
+
U, D, L, R,
|
773 |
+
I, O, ACW, CW, speed,
|
774 |
+
combine3_des])
|
775 |
+
combine3.click(fn=input_raw_camera_pose,
|
776 |
+
inputs=[combine3, camera_mode],
|
777 |
+
outputs=[camera_args,
|
778 |
+
U, D, L, R,
|
779 |
+
I, O, ACW, CW,
|
780 |
+
speed,
|
781 |
+
combine3_des])
|
782 |
+
|
783 |
+
camera_vis.click(fn=fn_vis_camera, inputs=[infer_mode, camera_args], outputs=[vis_camera, object_mode, object_info, step3_prompt_generate, prompt, n_samples, seed, start, gen_video])
|
784 |
|
785 |
Pose_1.click(fn=add_complex_camera_motion, inputs=Pose_1, outputs=camera_args)
|
786 |
Pose_2.click(fn=add_complex_camera_motion, inputs=Pose_2, outputs=camera_args)
|
|
|
806 |
traj_reset.click(fn=fn_traj_reset, inputs=None, outputs=traj_args)
|
807 |
|
808 |
|
809 |
+
start.click(fn=model_run, inputs=[prompt, choose_model, infer_mode, seed, n_samples, camera_args], outputs=gen_video)
|
810 |
|
811 |
gr.Markdown(article)
|
812 |
|
gradio_utils/camera_utils.py
CHANGED
@@ -95,7 +95,20 @@ def combine_camera_motion(RT_0, RT_1):
|
|
95 |
|
96 |
return np.concatenate([RT_0, RT_1], axis=0)
|
97 |
|
98 |
-
def process_camera(camera_dict):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
# "First A then B", "Both A and B", "Custom"
|
100 |
if camera_dict['complex'] is not None:
|
101 |
with open(COMPLEX_CAMERA[camera_dict['complex']]) as f:
|
@@ -105,9 +118,6 @@ def process_camera(camera_dict):
|
|
105 |
return RT
|
106 |
|
107 |
|
108 |
-
motion_list = camera_dict['motion']
|
109 |
-
mode = camera_dict['mode']
|
110 |
-
speed = camera_dict['speed']
|
111 |
print(len(motion_list))
|
112 |
if len(motion_list) == 0:
|
113 |
angle = np.array([0,0,0])
|
|
|
95 |
|
96 |
return np.concatenate([RT_0, RT_1], axis=0)
|
97 |
|
98 |
+
def process_camera(camera_dict, camera_args=None, num_frames=16):
|
99 |
+
speed = camera_dict['speed']
|
100 |
+
motion_list = camera_dict['motion']
|
101 |
+
mode = camera_dict['mode']
|
102 |
+
|
103 |
+
if mode == 'Customized Mode 3: RAW Camera Poses':
|
104 |
+
print(camera_args)
|
105 |
+
RT = camera_args.strip().split()
|
106 |
+
assert(len(RT) == num_frames*12), "The number of camera poses should be equal to the number of frames"
|
107 |
+
RT = [float(x) for x in RT]
|
108 |
+
RT = np.array(RT).reshape(-1, 3, 4)
|
109 |
+
RT[:, :, -1] = RT[:, :, -1] * np.array([1.5, 1, 1.3]) * speed
|
110 |
+
return RT
|
111 |
+
|
112 |
# "First A then B", "Both A and B", "Custom"
|
113 |
if camera_dict['complex'] is not None:
|
114 |
with open(COMPLEX_CAMERA[camera_dict['complex']]) as f:
|
|
|
118 |
return RT
|
119 |
|
120 |
|
|
|
|
|
|
|
121 |
print(len(motion_list))
|
122 |
if len(motion_list) == 0:
|
123 |
angle = np.array([0,0,0])
|
gradio_utils/page_control.py
ADDED
@@ -0,0 +1,580 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_utils.camera_utils import CAMERA_MOTION_MODE
|
3 |
+
from gradio_utils.traj_utils import get_provided_traj
|
4 |
+
|
5 |
+
MODE = ["control camera poses", "control object trajectory", "control both camera and object motion"]
|
6 |
+
|
7 |
+
BASE_MODEL = ['LVDM/VideoCrafter', 'VideoCrafter2']
|
8 |
+
|
9 |
+
def display_camera_info(camera_dict, camera_mode=None):
|
10 |
+
if camera_dict['complex'] is not None:
|
11 |
+
res = f"complex : {camera_dict['complex']}. "
|
12 |
+
else:
|
13 |
+
res = ""
|
14 |
+
res += f"motion : {[_ for _ in camera_dict['motion']]}. "
|
15 |
+
res += f"speed : {camera_dict['speed']}. "
|
16 |
+
if camera_mode == CAMERA_MOTION_MODE[2]:
|
17 |
+
res += f"mode : {camera_dict['mode']}. "
|
18 |
+
return res
|
19 |
+
|
20 |
+
traj_list = []
|
21 |
+
camera_dict = {
|
22 |
+
"motion":[],
|
23 |
+
"mode": "Customized Mode 1: First A then B", # "First A then B", "Both A and B", "Custom"
|
24 |
+
"speed": 1.0,
|
25 |
+
"complex": None
|
26 |
+
}
|
27 |
+
|
28 |
+
def reset_camera():
|
29 |
+
# global camera_dict
|
30 |
+
camera_dict = {
|
31 |
+
"motion":[],
|
32 |
+
"mode": "Customized Mode 1: First A then B",
|
33 |
+
"speed": 1.0,
|
34 |
+
"complex": None
|
35 |
+
}
|
36 |
+
return display_camera_info(camera_dict)
|
37 |
+
|
38 |
+
def fn_traj_reset():
|
39 |
+
# global traj_list
|
40 |
+
traj_list = []
|
41 |
+
return "Click to specify trajectory"
|
42 |
+
|
43 |
+
def visualized_step1(model_name):
|
44 |
+
|
45 |
+
# reset
|
46 |
+
reset_camera()
|
47 |
+
fn_traj_reset()
|
48 |
+
|
49 |
+
# camera motion control
|
50 |
+
vis_basic_camera_motion = False
|
51 |
+
vis_basic_camera_motion_des = False
|
52 |
+
vis_custom_camera_motion = False
|
53 |
+
vis_custom_run_status = False
|
54 |
+
vis_complex_camera_motion = False
|
55 |
+
vis_complex_camera_motion_des = False
|
56 |
+
vis_U = False
|
57 |
+
vis_D = False
|
58 |
+
vis_L = False
|
59 |
+
vis_R = False
|
60 |
+
vis_I = False
|
61 |
+
vis_O = False
|
62 |
+
vis_ACW = False
|
63 |
+
vis_CW = False
|
64 |
+
vis_combine1 = False
|
65 |
+
vis_combine2 = False
|
66 |
+
vis_combine3 = False
|
67 |
+
vis_combine3_des = False
|
68 |
+
vis_speed = False
|
69 |
+
|
70 |
+
vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
|
71 |
+
vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
|
72 |
+
|
73 |
+
vis_camera_args = False
|
74 |
+
vis_camera_reset = False
|
75 |
+
vis_camera_vis = False
|
76 |
+
vis_vis_camera = False
|
77 |
+
|
78 |
+
# object motion control
|
79 |
+
vis_provided_traj = False
|
80 |
+
vis_provided_traj_des = False
|
81 |
+
vis_draw_yourself = False
|
82 |
+
vis_draw_run_status = False
|
83 |
+
|
84 |
+
vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
|
85 |
+
vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
|
86 |
+
|
87 |
+
traj_args = False
|
88 |
+
traj_droplast, traj_reset = False, False
|
89 |
+
traj_vis = False
|
90 |
+
traj_input, vis_traj = False, False
|
91 |
+
|
92 |
+
|
93 |
+
# generate video
|
94 |
+
vis_step3_prompt_generate = False
|
95 |
+
vis_prompt = False
|
96 |
+
vis_num_samples = False
|
97 |
+
vis_seed = False
|
98 |
+
vis_start = False
|
99 |
+
vis_gen_video = False
|
100 |
+
|
101 |
+
vis_step2_camera_motion = False
|
102 |
+
vis_step2_camera_motion_des = False
|
103 |
+
vis_camera_mode = False
|
104 |
+
vis_camera_info = False
|
105 |
+
|
106 |
+
vis_step2_object_motion = False
|
107 |
+
vis_step2_object_motion_des = False
|
108 |
+
vis_traj_mode = False
|
109 |
+
vis_traj_info = False
|
110 |
+
|
111 |
+
step2_camera_object_motion = False
|
112 |
+
step2_camera_object_motion_des = False
|
113 |
+
|
114 |
+
vis_step1 = True
|
115 |
+
vis_step1_dec = True
|
116 |
+
vis_infer_mode = True
|
117 |
+
mode_info = True
|
118 |
+
|
119 |
+
if model_name == BASE_MODEL[0]:
|
120 |
+
interative_mode = True
|
121 |
+
else:
|
122 |
+
interative_mode = False
|
123 |
+
|
124 |
+
return gr.update(visible=vis_step1), \
|
125 |
+
gr.update(visible=vis_step1_dec), \
|
126 |
+
gr.update(visible=vis_infer_mode, value=MODE[0], interactive=interative_mode), \
|
127 |
+
gr.update(visible=mode_info), \
|
128 |
+
gr.update(visible=vis_step2_camera_motion), \
|
129 |
+
gr.update(visible=vis_step2_camera_motion_des), \
|
130 |
+
gr.update(visible=vis_camera_mode), \
|
131 |
+
gr.update(visible=vis_camera_info), \
|
132 |
+
gr.update(visible=vis_basic_camera_motion), \
|
133 |
+
gr.update(visible=vis_basic_camera_motion_des), \
|
134 |
+
gr.update(visible=vis_custom_camera_motion), \
|
135 |
+
gr.update(visible=vis_custom_run_status), \
|
136 |
+
gr.update(visible=vis_complex_camera_motion), \
|
137 |
+
gr.update(visible=vis_complex_camera_motion_des), \
|
138 |
+
gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
|
139 |
+
gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
|
140 |
+
gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), gr.update(visible=vis_combine3), gr.update(visible=vis_combine3_des), \
|
141 |
+
gr.update(visible=vis_speed), \
|
142 |
+
gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
|
143 |
+
gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
|
144 |
+
gr.update(visible=vis_camera_args, value=None), \
|
145 |
+
gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
|
146 |
+
gr.update(visible=vis_vis_camera, value=None), \
|
147 |
+
gr.update(visible=vis_step2_object_motion), \
|
148 |
+
gr.update(visible=vis_step2_object_motion_des), \
|
149 |
+
gr.update(visible=vis_traj_mode), \
|
150 |
+
gr.update(visible=vis_traj_info), \
|
151 |
+
gr.update(visible=vis_provided_traj), \
|
152 |
+
gr.update(visible=vis_provided_traj_des), \
|
153 |
+
gr.update(visible=vis_draw_yourself), \
|
154 |
+
gr.update(visible=vis_draw_run_status), \
|
155 |
+
gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
|
156 |
+
gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
|
157 |
+
gr.update(visible=traj_args), \
|
158 |
+
gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
|
159 |
+
gr.update(visible=traj_vis), \
|
160 |
+
gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
|
161 |
+
gr.update(visible=step2_camera_object_motion), \
|
162 |
+
gr.update(visible=step2_camera_object_motion_des), \
|
163 |
+
gr.update(visible=vis_step3_prompt_generate), \
|
164 |
+
gr.update(visible=vis_prompt), \
|
165 |
+
gr.update(visible=vis_num_samples), \
|
166 |
+
gr.update(visible=vis_seed), \
|
167 |
+
gr.update(visible=vis_start), \
|
168 |
+
gr.update(visible=vis_gen_video)
|
169 |
+
|
170 |
+
|
171 |
+
def visualized_step2(infer_mode):
|
172 |
+
|
173 |
+
# reset
|
174 |
+
reset_camera()
|
175 |
+
fn_traj_reset()
|
176 |
+
|
177 |
+
# camera motion control
|
178 |
+
vis_basic_camera_motion = False
|
179 |
+
vis_basic_camera_motion_des = False
|
180 |
+
vis_custom_camera_motion = False
|
181 |
+
vis_custom_run_status = False
|
182 |
+
vis_complex_camera_motion = False
|
183 |
+
vis_complex_camera_motion_des = False
|
184 |
+
vis_U = False
|
185 |
+
vis_D = False
|
186 |
+
vis_L = False
|
187 |
+
vis_R = False
|
188 |
+
vis_I = False
|
189 |
+
vis_O = False
|
190 |
+
vis_ACW = False
|
191 |
+
vis_CW = False
|
192 |
+
vis_combine1 = False
|
193 |
+
vis_combine2 = False
|
194 |
+
vis_combine3 = False
|
195 |
+
vis_combine3_des = False
|
196 |
+
vis_speed = False
|
197 |
+
|
198 |
+
vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
|
199 |
+
vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
|
200 |
+
|
201 |
+
vis_camera_args = False
|
202 |
+
vis_camera_reset = False
|
203 |
+
vis_camera_vis = False
|
204 |
+
vis_vis_camera = False
|
205 |
+
|
206 |
+
# object motion control
|
207 |
+
vis_provided_traj = False
|
208 |
+
vis_provided_traj_des = False
|
209 |
+
vis_draw_yourself = False
|
210 |
+
vis_draw_run_status = False
|
211 |
+
|
212 |
+
vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
|
213 |
+
vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
|
214 |
+
|
215 |
+
traj_args = False
|
216 |
+
traj_droplast, traj_reset = False, False
|
217 |
+
traj_vis = False
|
218 |
+
traj_input, vis_traj = False, False
|
219 |
+
|
220 |
+
|
221 |
+
# generate video
|
222 |
+
vis_step3_prompt_generate = False
|
223 |
+
vis_prompt = False
|
224 |
+
vis_num_samples = False
|
225 |
+
vis_seed = False
|
226 |
+
vis_start = False
|
227 |
+
vis_gen_video = False
|
228 |
+
|
229 |
+
if infer_mode == MODE[0]:
|
230 |
+
vis_step2_camera_motion = True
|
231 |
+
vis_step2_camera_motion_des = True
|
232 |
+
vis_camera_mode = True
|
233 |
+
vis_camera_info = True
|
234 |
+
|
235 |
+
vis_step2_object_motion = False
|
236 |
+
vis_step2_object_motion_des = False
|
237 |
+
vis_traj_mode = False
|
238 |
+
vis_traj_info = False
|
239 |
+
|
240 |
+
step2_camera_object_motion = False
|
241 |
+
step2_camera_object_motion_des = False
|
242 |
+
|
243 |
+
elif infer_mode == MODE[1]:
|
244 |
+
vis_step2_camera_motion = False
|
245 |
+
vis_step2_camera_motion_des = False
|
246 |
+
vis_camera_mode = False
|
247 |
+
vis_camera_info = False
|
248 |
+
|
249 |
+
vis_step2_object_motion = True
|
250 |
+
vis_step2_object_motion_des = True
|
251 |
+
vis_traj_mode = True
|
252 |
+
vis_traj_info = True
|
253 |
+
|
254 |
+
step2_camera_object_motion = False
|
255 |
+
step2_camera_object_motion_des = False
|
256 |
+
else: #infer_mode == MODE[2]:
|
257 |
+
vis_step2_camera_motion = True
|
258 |
+
vis_step2_camera_motion_des = True
|
259 |
+
vis_camera_mode = True
|
260 |
+
vis_camera_info = True
|
261 |
+
|
262 |
+
vis_step2_object_motion = False
|
263 |
+
vis_step2_object_motion_des = False
|
264 |
+
vis_traj_mode = False
|
265 |
+
vis_traj_info = False
|
266 |
+
|
267 |
+
step2_camera_object_motion = True
|
268 |
+
step2_camera_object_motion_des = True
|
269 |
+
|
270 |
+
return gr.update(visible=vis_step2_camera_motion), \
|
271 |
+
gr.update(visible=vis_step2_camera_motion_des), \
|
272 |
+
gr.update(visible=vis_camera_mode), \
|
273 |
+
gr.update(visible=vis_camera_info), \
|
274 |
+
gr.update(visible=vis_basic_camera_motion), \
|
275 |
+
gr.update(visible=vis_basic_camera_motion_des), \
|
276 |
+
gr.update(visible=vis_custom_camera_motion), \
|
277 |
+
gr.update(visible=vis_custom_run_status), \
|
278 |
+
gr.update(visible=vis_complex_camera_motion), \
|
279 |
+
gr.update(visible=vis_complex_camera_motion_des), \
|
280 |
+
gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
|
281 |
+
gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
|
282 |
+
gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), gr.update(visible=vis_combine3), gr.update(visible=vis_combine3_des), \
|
283 |
+
gr.update(visible=vis_speed), \
|
284 |
+
gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
|
285 |
+
gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
|
286 |
+
gr.update(visible=vis_camera_args, value=None), \
|
287 |
+
gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
|
288 |
+
gr.update(visible=vis_vis_camera, value=None), \
|
289 |
+
gr.update(visible=vis_step2_object_motion), \
|
290 |
+
gr.update(visible=vis_step2_object_motion_des), \
|
291 |
+
gr.update(visible=vis_traj_mode), \
|
292 |
+
gr.update(visible=vis_traj_info), \
|
293 |
+
gr.update(visible=vis_provided_traj), \
|
294 |
+
gr.update(visible=vis_provided_traj_des), \
|
295 |
+
gr.update(visible=vis_draw_yourself), \
|
296 |
+
gr.update(visible=vis_draw_run_status), \
|
297 |
+
gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
|
298 |
+
gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
|
299 |
+
gr.update(visible=traj_args), \
|
300 |
+
gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
|
301 |
+
gr.update(visible=traj_vis), \
|
302 |
+
gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
|
303 |
+
gr.update(visible=step2_camera_object_motion), \
|
304 |
+
gr.update(visible=step2_camera_object_motion_des), \
|
305 |
+
gr.update(visible=vis_step3_prompt_generate), \
|
306 |
+
gr.update(visible=vis_prompt), \
|
307 |
+
gr.update(visible=vis_num_samples), \
|
308 |
+
gr.update(visible=vis_seed), \
|
309 |
+
gr.update(visible=vis_start), \
|
310 |
+
gr.update(visible=vis_gen_video)
|
311 |
+
|
312 |
+
def visualized_camera_poses(step2_camera_motion):
|
313 |
+
reset_camera()
|
314 |
+
|
315 |
+
# generate video
|
316 |
+
vis_step3_prompt_generate = False
|
317 |
+
vis_prompt = False
|
318 |
+
vis_num_samples = False
|
319 |
+
vis_seed = False
|
320 |
+
vis_start = False
|
321 |
+
vis_gen_video = False
|
322 |
+
|
323 |
+
if step2_camera_motion == CAMERA_MOTION_MODE[0]:
|
324 |
+
vis_basic_camera_motion = True
|
325 |
+
vis_basic_camera_motion_des = True
|
326 |
+
vis_custom_camera_motion = False
|
327 |
+
vis_custom_run_status = False
|
328 |
+
vis_complex_camera_motion = False
|
329 |
+
vis_complex_camera_motion_des = False
|
330 |
+
vis_U = True
|
331 |
+
vis_D = True
|
332 |
+
vis_L = True
|
333 |
+
vis_R = True
|
334 |
+
vis_I = True
|
335 |
+
vis_O = True
|
336 |
+
vis_ACW = True
|
337 |
+
vis_CW = True
|
338 |
+
vis_combine1 = False
|
339 |
+
vis_combine2 = False
|
340 |
+
vis_combine3 = False
|
341 |
+
vis_combine3_des = False
|
342 |
+
vis_speed = True
|
343 |
+
|
344 |
+
vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
|
345 |
+
vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
|
346 |
+
|
347 |
+
elif step2_camera_motion == CAMERA_MOTION_MODE[1]:
|
348 |
+
vis_basic_camera_motion = False
|
349 |
+
vis_basic_camera_motion_des = False
|
350 |
+
vis_custom_camera_motion = False
|
351 |
+
vis_custom_run_status = False
|
352 |
+
vis_complex_camera_motion = True
|
353 |
+
vis_complex_camera_motion_des = True
|
354 |
+
vis_U = False
|
355 |
+
vis_D = False
|
356 |
+
vis_L = False
|
357 |
+
vis_R = False
|
358 |
+
vis_I = False
|
359 |
+
vis_O = False
|
360 |
+
vis_ACW = False
|
361 |
+
vis_CW = False
|
362 |
+
vis_combine1 = False
|
363 |
+
vis_combine2 = False
|
364 |
+
vis_combine3 = False
|
365 |
+
vis_combine3_des = False
|
366 |
+
vis_speed = False
|
367 |
+
|
368 |
+
vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = True, True, True, True
|
369 |
+
vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = True, True, True, True
|
370 |
+
|
371 |
+
else: # step2_camera_motion = CAMERA_MOTION_MODE[2]:
|
372 |
+
vis_basic_camera_motion = False
|
373 |
+
vis_basic_camera_motion_des = False
|
374 |
+
vis_custom_camera_motion = True
|
375 |
+
vis_custom_run_status = True
|
376 |
+
vis_complex_camera_motion = False
|
377 |
+
vis_complex_camera_motion_des = False
|
378 |
+
vis_U = False
|
379 |
+
vis_D = False
|
380 |
+
vis_L = False
|
381 |
+
vis_R = False
|
382 |
+
vis_I = False
|
383 |
+
vis_O = False
|
384 |
+
vis_ACW = False
|
385 |
+
vis_CW = False
|
386 |
+
vis_combine1 = True
|
387 |
+
vis_combine2 = True
|
388 |
+
vis_combine3 = True
|
389 |
+
vis_combine3_des = True
|
390 |
+
vis_speed = False
|
391 |
+
|
392 |
+
vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
|
393 |
+
vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
|
394 |
+
|
395 |
+
vis_camera_args = True
|
396 |
+
vis_camera_reset = True
|
397 |
+
vis_camera_vis = True
|
398 |
+
vis_vis_camera = True
|
399 |
+
|
400 |
+
return gr.update(visible=vis_basic_camera_motion), \
|
401 |
+
gr.update(visible=vis_basic_camera_motion_des), \
|
402 |
+
gr.update(visible=vis_custom_camera_motion), \
|
403 |
+
gr.update(visible=vis_custom_run_status), \
|
404 |
+
gr.update(visible=vis_complex_camera_motion), \
|
405 |
+
gr.update(visible=vis_complex_camera_motion_des), \
|
406 |
+
gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
|
407 |
+
gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
|
408 |
+
gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), gr.update(visible=vis_combine3), gr.update(visible=vis_combine3_des), \
|
409 |
+
gr.update(visible=vis_speed), \
|
410 |
+
gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
|
411 |
+
gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
|
412 |
+
gr.update(visible=vis_camera_args, value=None), \
|
413 |
+
gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
|
414 |
+
gr.update(visible=vis_vis_camera, value=None), \
|
415 |
+
gr.update(visible=vis_step3_prompt_generate), \
|
416 |
+
gr.update(visible=vis_prompt), \
|
417 |
+
gr.update(visible=vis_num_samples), \
|
418 |
+
gr.update(visible=vis_seed), \
|
419 |
+
gr.update(visible=vis_start), \
|
420 |
+
gr.update(visible=vis_gen_video)
|
421 |
+
|
422 |
+
def visualized_traj_poses(step2_object_motion):
|
423 |
+
|
424 |
+
fn_traj_reset()
|
425 |
+
|
426 |
+
# generate video
|
427 |
+
vis_step3_prompt_generate = False
|
428 |
+
vis_prompt = False
|
429 |
+
vis_num_samples = False
|
430 |
+
vis_seed = False
|
431 |
+
vis_start = False
|
432 |
+
vis_gen_video = False
|
433 |
+
|
434 |
+
if step2_object_motion == "Provided Trajectory":
|
435 |
+
vis_provided_traj = True
|
436 |
+
vis_provided_traj_des = True
|
437 |
+
vis_draw_yourself = False
|
438 |
+
vis_draw_run_status = False
|
439 |
+
|
440 |
+
vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = True, True, True, True
|
441 |
+
vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = True, True, True, True
|
442 |
+
|
443 |
+
traj_args = True
|
444 |
+
traj_droplast, traj_reset = False, True
|
445 |
+
traj_vis = True
|
446 |
+
traj_input, vis_traj = False, True
|
447 |
+
|
448 |
+
|
449 |
+
elif step2_object_motion == "Custom Trajectory":
|
450 |
+
vis_provided_traj = False
|
451 |
+
vis_provided_traj_des = False
|
452 |
+
vis_draw_yourself = True
|
453 |
+
vis_draw_run_status = True
|
454 |
+
|
455 |
+
vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
|
456 |
+
vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
|
457 |
+
|
458 |
+
traj_args = True
|
459 |
+
traj_droplast, traj_reset = True, True
|
460 |
+
traj_vis = True
|
461 |
+
traj_input, vis_traj = True, True
|
462 |
+
|
463 |
+
return gr.update(visible=vis_provided_traj), \
|
464 |
+
gr.update(visible=vis_provided_traj_des), \
|
465 |
+
gr.update(visible=vis_draw_yourself), \
|
466 |
+
gr.update(visible=vis_draw_run_status), \
|
467 |
+
gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
|
468 |
+
gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
|
469 |
+
gr.update(visible=traj_args), \
|
470 |
+
gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
|
471 |
+
gr.update(visible=traj_vis), \
|
472 |
+
gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
|
473 |
+
gr.update(visible=vis_step3_prompt_generate), \
|
474 |
+
gr.update(visible=vis_prompt), \
|
475 |
+
gr.update(visible=vis_num_samples), \
|
476 |
+
gr.update(visible=vis_seed), \
|
477 |
+
gr.update(visible=vis_start), \
|
478 |
+
gr.update(visible=vis_gen_video)
|
479 |
+
|
480 |
+
def add_camera_motion(camera_motion, camera_mode):
|
481 |
+
# global camera_dict
|
482 |
+
if camera_dict['complex'] is not None:
|
483 |
+
camera_dict['complex'] = None
|
484 |
+
if camera_mode == CAMERA_MOTION_MODE[2] and len(camera_dict['motion']) <2:
|
485 |
+
camera_dict['motion'].append(camera_motion)
|
486 |
+
else:
|
487 |
+
camera_dict['motion']=[camera_motion]
|
488 |
+
|
489 |
+
return display_camera_info(camera_dict, camera_mode)
|
490 |
+
|
491 |
+
def add_complex_camera_motion(camera_motion):
|
492 |
+
# global camera_dict
|
493 |
+
camera_dict['complex']=camera_motion
|
494 |
+
return display_camera_info(camera_dict)
|
495 |
+
|
496 |
+
def change_camera_mode(combine_type, camera_mode):
|
497 |
+
global camera_dict
|
498 |
+
camera_dict['mode'] = combine_type
|
499 |
+
|
500 |
+
# return display_camera_info(camera_dict, camera_mode)
|
501 |
+
vis_U = True
|
502 |
+
vis_D = True
|
503 |
+
vis_L = True
|
504 |
+
vis_R = True
|
505 |
+
vis_I = True
|
506 |
+
vis_O = True
|
507 |
+
vis_ACW = True
|
508 |
+
vis_CW = True
|
509 |
+
vis_speed = True
|
510 |
+
vis_combine3_des = False
|
511 |
+
|
512 |
+
return display_camera_info(camera_dict, camera_mode), \
|
513 |
+
gr.update(visible=vis_U), \
|
514 |
+
gr.update(visible=vis_D), \
|
515 |
+
gr.update(visible=vis_L),\
|
516 |
+
gr.update(visible=vis_R), \
|
517 |
+
gr.update(visible=vis_I), \
|
518 |
+
gr.update(visible=vis_O), \
|
519 |
+
gr.update(visible=vis_ACW), \
|
520 |
+
gr.update(visible=vis_CW), \
|
521 |
+
gr.update(visible=vis_speed), \
|
522 |
+
gr.update(visible=vis_combine3_des)
|
523 |
+
|
524 |
+
def input_raw_camera_pose(combine_type, camera_mode):
|
525 |
+
# global camera_dict
|
526 |
+
camera_dict['mode'] = combine_type
|
527 |
+
|
528 |
+
vis_U = False
|
529 |
+
vis_D = False
|
530 |
+
vis_L = False
|
531 |
+
vis_R = False
|
532 |
+
vis_I = False
|
533 |
+
vis_O = False
|
534 |
+
vis_ACW = False
|
535 |
+
vis_CW = False
|
536 |
+
vis_speed = True
|
537 |
+
vis_combine3_des = True
|
538 |
+
|
539 |
+
return gr.update(value='1 0 0 0 0 1 0 0 0 0 1 0\n1 0 0 0 0 1 0 0 0 0 1 -0.225\n1 0 0 0 0 1 0 0 0 0 1 -0.45\n1 0 0 0 0 1 0 0 0 0 1 -0.675\n1 0 0 0 0 1 0 0 0 0 1 -0.9\n1 0 0 0 0 1 0 0 0 0 1 -1.125\n1 0 0 0 0 1 0 0 0 0 1 -1.35\n1 0 0 0 0 1 0 0 0 0 1 -1.575\n1 0 0 0 0 1 0 0 0 0 1 -1.8\n1 0 0 0 0 1 0 0 0 0 1 -2.025\n1 0 0 0 0 1 0 0 0 0 1 -2.25\n1 0 0 0 0 1 0 0 0 0 1 -2.475\n1 0 0 0 0 1 0 0 0 0 1 -2.7\n1 0 0 0 0 1 0 0 0 0 1 -2.925\n1 0 0 0 0 1 0 0 0 0 1 -3.15\n1 0 0 0 0 1 0 0 0 0 1 -3.375\n', max_lines=16, interactive=True), \
|
540 |
+
gr.update(visible=vis_U), \
|
541 |
+
gr.update(visible=vis_D), \
|
542 |
+
gr.update(visible=vis_L),\
|
543 |
+
gr.update(visible=vis_R), \
|
544 |
+
gr.update(visible=vis_I), \
|
545 |
+
gr.update(visible=vis_O), \
|
546 |
+
gr.update(visible=vis_ACW), \
|
547 |
+
gr.update(visible=vis_CW), \
|
548 |
+
gr.update(visible=vis_speed), \
|
549 |
+
gr.update(visible=vis_combine3_des)
|
550 |
+
|
551 |
+
def change_camera_speed(camera_speed):
|
552 |
+
# global camera_dict
|
553 |
+
camera_dict['speed'] = camera_speed
|
554 |
+
return display_camera_info(camera_dict)
|
555 |
+
|
556 |
+
def add_traj_point(evt: gr.SelectData, ):
|
557 |
+
# global traj_list
|
558 |
+
traj_list.append(evt.index)
|
559 |
+
traj_str = [f"{traj}" for traj in traj_list]
|
560 |
+
return ", ".join(traj_str)
|
561 |
+
|
562 |
+
def add_provided_traj(traj_name):
|
563 |
+
# global traj_list
|
564 |
+
traj_list = get_provided_traj(traj_name)
|
565 |
+
traj_str = [f"{traj}" for traj in traj_list]
|
566 |
+
return ", ".join(traj_str)
|
567 |
+
|
568 |
+
|
569 |
+
def fn_traj_droplast():
|
570 |
+
# global traj_list
|
571 |
+
|
572 |
+
if traj_list:
|
573 |
+
traj_list.pop()
|
574 |
+
|
575 |
+
if traj_list:
|
576 |
+
traj_str = [f"{traj}" for traj in traj_list]
|
577 |
+
return ", ".join(traj_str)
|
578 |
+
else:
|
579 |
+
return "Click to specify trajectory"
|
580 |
+
|