Spaces:
Running
on
Zero
Running
on
Zero
envs
Browse files
app.py
CHANGED
@@ -322,9 +322,8 @@ class ImageConductor:
|
|
322 |
transforms.ToTensor(),
|
323 |
])
|
324 |
|
325 |
-
image_norm = lambda x: x
|
326 |
image_paths = [first_frame_path]
|
327 |
-
controlnet_images = [
|
328 |
controlnet_images = torch.stack(controlnet_images).unsqueeze(0).to(device)
|
329 |
controlnet_images = rearrange(controlnet_images, "b f c h w -> b c f h w")
|
330 |
num_controlnet_images = controlnet_images.shape[2]
|
@@ -502,145 +501,144 @@ def delete_last_step(tracking_points, first_frame_path, drag_mode):
|
|
502 |
return {tracking_points_var: tracking_points, input_image: trajectory_map}
|
503 |
|
504 |
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
with
|
513 |
-
with gr.
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
with gr.
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
with gr.
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
|
|
|
|
|
|
|
|
|
|
556 |
width=384,)
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
570 |
)
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
label="Seed: ", value=561793204,
|
578 |
)
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
)
|
590 |
-
num_inference_steps = gr.Slider(
|
591 |
-
label="Number of inference steps",
|
592 |
-
minimum=1,
|
593 |
-
maximum=50,
|
594 |
-
step=1,
|
595 |
-
value=25,
|
596 |
-
)
|
597 |
-
|
598 |
-
with gr.Group():
|
599 |
-
personalized = gr.Dropdown(label="Personalized", choices=['HelloObject', 'TUSUN', ""], value="")
|
600 |
-
examples_type = gr.Textbox(label="Examples Type (Ignore) ", value="", visible=False)
|
601 |
-
|
602 |
-
with gr.Column(scale=7):
|
603 |
-
output_video = gr.Video(
|
604 |
-
label="Output Video",
|
605 |
-
width=384,
|
606 |
-
height=256)
|
607 |
-
|
608 |
-
|
609 |
-
with gr.Row():
|
610 |
-
def process_example(input_image, prompt, drag_mode, seed, personalized, examples_type):
|
611 |
-
|
612 |
-
return input_image, prompt, drag_mode, seed, personalized, examples_type
|
613 |
-
|
614 |
-
example = gr.Examples(
|
615 |
-
label="Input Example",
|
616 |
-
examples=image_examples,
|
617 |
-
inputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
|
618 |
-
outputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
|
619 |
-
fn=process_example,
|
620 |
-
run_on_click=True,
|
621 |
-
examples_per_page=10,
|
622 |
-
cache_examples=False,
|
623 |
-
)
|
624 |
|
625 |
|
626 |
-
|
627 |
-
|
628 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
629 |
|
630 |
-
|
|
|
|
|
|
|
|
|
631 |
|
632 |
-
|
633 |
|
634 |
-
|
635 |
|
636 |
-
|
637 |
|
638 |
-
|
639 |
|
640 |
-
|
641 |
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
|
646 |
-
block.launch()
|
|
|
322 |
transforms.ToTensor(),
|
323 |
])
|
324 |
|
|
|
325 |
image_paths = [first_frame_path]
|
326 |
+
controlnet_images = [(image_transforms(Image.open(path).convert("RGB"))) for path in image_paths]
|
327 |
controlnet_images = torch.stack(controlnet_images).unsqueeze(0).to(device)
|
328 |
controlnet_images = rearrange(controlnet_images, "b f c h w -> b c f h w")
|
329 |
num_controlnet_images = controlnet_images.shape[2]
|
|
|
501 |
return {tracking_points_var: tracking_points, input_image: trajectory_map}
|
502 |
|
503 |
|
504 |
+
block = gr.Blocks(
|
505 |
+
theme=gr.themes.Soft(
|
506 |
+
radius_size=gr.themes.sizes.radius_none,
|
507 |
+
text_size=gr.themes.sizes.text_md
|
508 |
+
)
|
509 |
+
)
|
510 |
+
with block:
|
511 |
+
with gr.Row():
|
512 |
+
with gr.Column():
|
513 |
+
gr.HTML(head)
|
514 |
+
|
515 |
+
gr.Markdown(descriptions)
|
516 |
+
|
517 |
+
with gr.Accordion(label="🛠️ Instructions:", open=True, elem_id="accordion"):
|
518 |
+
with gr.Row(equal_height=True):
|
519 |
+
gr.Markdown(instructions)
|
520 |
+
|
521 |
+
|
522 |
+
# device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
523 |
+
device = torch.device("cuda")
|
524 |
+
unet_path = 'models/unet.ckpt'
|
525 |
+
image_controlnet_path = 'models/image_controlnet.ckpt'
|
526 |
+
flow_controlnet_path = 'models/flow_controlnet.ckpt'
|
527 |
+
ImageConductor_net = ImageConductor(device=device,
|
528 |
+
unet_path=unet_path,
|
529 |
+
image_controlnet_path=image_controlnet_path,
|
530 |
+
flow_controlnet_path=flow_controlnet_path,
|
531 |
+
height=256,
|
532 |
+
width=384,
|
533 |
+
model_length=16
|
534 |
+
)
|
535 |
+
first_frame_path_var = gr.State(value=None)
|
536 |
+
tracking_points_var = gr.State([])
|
537 |
+
|
538 |
+
with gr.Row():
|
539 |
+
with gr.Column(scale=1):
|
540 |
+
image_upload_button = gr.UploadButton(label="Upload Image",file_types=["image"])
|
541 |
+
add_drag_button = gr.Button(value="Add Drag")
|
542 |
+
reset_button = gr.Button(value="Reset")
|
543 |
+
delete_last_drag_button = gr.Button(value="Delete last drag")
|
544 |
+
delete_last_step_button = gr.Button(value="Delete last step")
|
545 |
+
|
546 |
+
|
|
|
547 |
|
548 |
+
with gr.Column(scale=7):
|
549 |
+
with gr.Row():
|
550 |
+
with gr.Column(scale=6):
|
551 |
+
input_image = gr.Image(label="Input Image",
|
552 |
+
interactive=True,
|
553 |
+
height=300,
|
554 |
+
width=384,)
|
555 |
+
with gr.Column(scale=6):
|
556 |
+
output_image = gr.Image(label="Motion Path",
|
557 |
+
interactive=False,
|
558 |
+
height=256,
|
559 |
width=384,)
|
560 |
+
with gr.Row():
|
561 |
+
with gr.Column(scale=1):
|
562 |
+
prompt = gr.Textbox(value="a wonderful elf.", label="Prompt (highly-recommended)", interactive=True, visible=True)
|
563 |
+
negative_prompt = gr.Text(
|
564 |
+
label="Negative Prompt",
|
565 |
+
max_lines=5,
|
566 |
+
placeholder="Please input your negative prompt",
|
567 |
+
value='worst quality, low quality, letterboxed',lines=1
|
568 |
+
)
|
569 |
+
drag_mode = gr.Radio(['camera', 'object'], label='Drag mode: ', value='object', scale=2)
|
570 |
+
run_button = gr.Button(value="Run")
|
571 |
+
|
572 |
+
with gr.Accordion("More input params", open=False, elem_id="accordion1"):
|
573 |
+
with gr.Group():
|
574 |
+
seed = gr.Textbox(
|
575 |
+
label="Seed: ", value=561793204,
|
576 |
+
)
|
577 |
+
randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
|
578 |
+
|
579 |
+
with gr.Group():
|
580 |
+
with gr.Row():
|
581 |
+
guidance_scale = gr.Slider(
|
582 |
+
label="Guidance scale",
|
583 |
+
minimum=1,
|
584 |
+
maximum=12,
|
585 |
+
step=0.1,
|
586 |
+
value=8.5,
|
587 |
)
|
588 |
+
num_inference_steps = gr.Slider(
|
589 |
+
label="Number of inference steps",
|
590 |
+
minimum=1,
|
591 |
+
maximum=50,
|
592 |
+
step=1,
|
593 |
+
value=25,
|
|
|
594 |
)
|
595 |
+
|
596 |
+
with gr.Group():
|
597 |
+
personalized = gr.Dropdown(label="Personalized", choices=['HelloObject', 'TUSUN', ""], value="")
|
598 |
+
examples_type = gr.Textbox(label="Examples Type (Ignore) ", value="", visible=False)
|
599 |
+
|
600 |
+
with gr.Column(scale=7):
|
601 |
+
output_video = gr.Video(
|
602 |
+
label="Output Video",
|
603 |
+
width=384,
|
604 |
+
height=256)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
605 |
|
606 |
|
607 |
+
with gr.Row():
|
608 |
+
def process_example(input_image, prompt, drag_mode, seed, personalized, examples_type):
|
609 |
+
|
610 |
+
return input_image, prompt, drag_mode, seed, personalized, examples_type
|
611 |
+
|
612 |
+
example = gr.Examples(
|
613 |
+
label="Input Example",
|
614 |
+
examples=image_examples,
|
615 |
+
inputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
|
616 |
+
outputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
|
617 |
+
fn=process_example,
|
618 |
+
run_on_click=True,
|
619 |
+
examples_per_page=10,
|
620 |
+
cache_examples=False,
|
621 |
+
)
|
622 |
+
|
623 |
|
624 |
+
with gr.Row():
|
625 |
+
gr.Markdown(citation)
|
626 |
+
|
627 |
+
|
628 |
+
image_upload_button.upload(preprocess_image, image_upload_button, [input_image, first_frame_path_var, tracking_points_var])
|
629 |
|
630 |
+
add_drag_button.click(add_drag, [tracking_points_var], tracking_points_var)
|
631 |
|
632 |
+
delete_last_drag_button.click(delete_last_drag, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
|
633 |
|
634 |
+
delete_last_step_button.click(delete_last_step, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
|
635 |
|
636 |
+
reset_button.click(reset_states, [first_frame_path_var, tracking_points_var], [input_image, first_frame_path_var, tracking_points_var])
|
637 |
|
638 |
+
input_image.select(add_tracking_points, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
|
639 |
|
640 |
+
run_button.click(ImageConductor_net.run, [first_frame_path_var, tracking_points_var, prompt, drag_mode,
|
641 |
+
negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, personalized, examples_type],
|
642 |
+
[output_image, output_video])
|
643 |
|
644 |
+
block.queue().launch()
|