Spaces:

TencentARC
/

ImageConductor

Runtime error

App Files Files Community

Yw22 commited on Jul 10, 2024

Commit

f9cca8d

1 Parent(s): 7825b1f

envs

Browse files

Files changed (1) hide show

app.py +128 -130

app.py CHANGED Viewed

@@ -322,9 +322,8 @@ class ImageConductor:
                 transforms.ToTensor(),
             ])
-        image_norm = lambda x: x
         image_paths = [first_frame_path]
-        controlnet_images = [image_norm(image_transforms(Image.open(path).convert("RGB"))) for path in image_paths]
         controlnet_images = torch.stack(controlnet_images).unsqueeze(0).to(device)
         controlnet_images = rearrange(controlnet_images, "b f c h w -> b c f h w")
         num_controlnet_images = controlnet_images.shape[2]
@@ -502,145 +501,144 @@ def delete_last_step(tracking_points, first_frame_path, drag_mode):
     return {tracking_points_var: tracking_points, input_image: trajectory_map}
-if __name__=="__main__":
-    block = gr.Blocks(
-            theme=gr.themes.Soft(
-                radius_size=gr.themes.sizes.radius_none,
-                text_size=gr.themes.sizes.text_md
-            )
-            ).queue()
-    with block:
-        with gr.Row():
-            with gr.Column():
-                gr.HTML(head)
-        gr.Markdown(descriptions)
-        with gr.Accordion(label="🛠️ Instructions:", open=True, elem_id="accordion"):
-            with gr.Row(equal_height=True):
-                gr.Markdown(instructions)
-        # device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-        device = torch.device("cuda")
-        unet_path = 'models/unet.ckpt'
-        image_controlnet_path = 'models/image_controlnet.ckpt'
-        flow_controlnet_path = 'models/flow_controlnet.ckpt'
-        ImageConductor_net = ImageConductor(device=device,
-                                            unet_path=unet_path,
-                                            image_controlnet_path=image_controlnet_path,
-                                            flow_controlnet_path=flow_controlnet_path,
-                                            height=256,
-                                            width=384,
-                                            model_length=16
-                                            )
-        first_frame_path_var = gr.State(value=None)
-        tracking_points_var = gr.State([])
-        with gr.Row():
-            with gr.Column(scale=1):
-                image_upload_button = gr.UploadButton(label="Upload Image",file_types=["image"])
-                add_drag_button = gr.Button(value="Add Drag")
-                reset_button = gr.Button(value="Reset")
-                delete_last_drag_button = gr.Button(value="Delete last drag")
-                delete_last_step_button = gr.Button(value="Delete last step")
-            with gr.Column(scale=7):
-                with gr.Row():
-                    with gr.Column(scale=6):
-                        input_image = gr.Image(label="Input Image",
-                                            interactive=True,
-                                            height=300,
                                             width=384,)
-                    with gr.Column(scale=6):
-                        output_image = gr.Image(label="Motion Path",
-                                                interactive=False,
-                                                height=256,
-                                                width=384,)
-        with gr.Row():
-            with gr.Column(scale=1):
-                prompt = gr.Textbox(value="a wonderful elf.", label="Prompt (highly-recommended)", interactive=True, visible=True)
-                negative_prompt = gr.Text(
-                            label="Negative Prompt",
-                            max_lines=5,
-                            placeholder="Please input your negative prompt",
-                            value='worst quality, low quality, letterboxed',lines=1
                         )
-                drag_mode = gr.Radio(['camera', 'object'], label='Drag mode: ', value='object', scale=2)
-                run_button = gr.Button(value="Run")
-                with gr.Accordion("More input params", open=False, elem_id="accordion1"):
-                    with gr.Group():
-                        seed = gr.Textbox(
-                            label="Seed: ",  value=561793204,
                         )
-                        randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
-                    with gr.Group():
-                        with gr.Row():
-                            guidance_scale = gr.Slider(
-                                label="Guidance scale",
-                                minimum=1,
-                                maximum=12,
-                                step=0.1,
-                                value=8.5,
-                            )
-                            num_inference_steps = gr.Slider(
-                                label="Number of inference steps",
-                                minimum=1,
-                                maximum=50,
-                                step=1,
-                                value=25,
-                            )
-                    with gr.Group():
-                        personalized = gr.Dropdown(label="Personalized", choices=['HelloObject', 'TUSUN', ""], value="")
-                        examples_type = gr.Textbox(label="Examples Type (Ignore) ",  value="", visible=False)
-            with gr.Column(scale=7):
-                output_video = gr.Video(
-                                        label="Output Video",
-                                        width=384,
-                                        height=256)
-        with gr.Row():
-            def process_example(input_image, prompt, drag_mode, seed, personalized, examples_type):
-                return input_image, prompt, drag_mode, seed, personalized, examples_type
-            example = gr.Examples(
-                label="Input Example",
-                examples=image_examples,
-                inputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
-                outputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
-                fn=process_example,
-                run_on_click=True,
-                examples_per_page=10,
-                cache_examples=False,
-            )
-        with gr.Row():
-            gr.Markdown(citation)
-        image_upload_button.upload(preprocess_image, image_upload_button, [input_image, first_frame_path_var, tracking_points_var])
-        add_drag_button.click(add_drag, [tracking_points_var], tracking_points_var)
-        delete_last_drag_button.click(delete_last_drag, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
-        delete_last_step_button.click(delete_last_step, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
-        reset_button.click(reset_states, [first_frame_path_var, tracking_points_var], [input_image, first_frame_path_var, tracking_points_var])
-        input_image.select(add_tracking_points, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
-        run_button.click(ImageConductor_net.run, [first_frame_path_var, tracking_points_var, prompt, drag_mode,
-                                                negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, personalized, examples_type],
-                                                [output_image, output_video])
-block.launch()

                 transforms.ToTensor(),
             ])
         image_paths = [first_frame_path]
+        controlnet_images = [(image_transforms(Image.open(path).convert("RGB"))) for path in image_paths]
         controlnet_images = torch.stack(controlnet_images).unsqueeze(0).to(device)
         controlnet_images = rearrange(controlnet_images, "b f c h w -> b c f h w")
         num_controlnet_images = controlnet_images.shape[2]
     return {tracking_points_var: tracking_points, input_image: trajectory_map}
+block = gr.Blocks(
+        theme=gr.themes.Soft(
+            radius_size=gr.themes.sizes.radius_none,
+            text_size=gr.themes.sizes.text_md
+        )
+        )
+with block:
+    with gr.Row():
+        with gr.Column():
+            gr.HTML(head)
+    gr.Markdown(descriptions)
+    with gr.Accordion(label="🛠️ Instructions:", open=True, elem_id="accordion"):
+        with gr.Row(equal_height=True):
+            gr.Markdown(instructions)
+    # device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    device = torch.device("cuda")
+    unet_path = 'models/unet.ckpt'
+    image_controlnet_path = 'models/image_controlnet.ckpt'
+    flow_controlnet_path = 'models/flow_controlnet.ckpt'
+    ImageConductor_net = ImageConductor(device=device,
+                                        unet_path=unet_path,
+                                        image_controlnet_path=image_controlnet_path,
+                                        flow_controlnet_path=flow_controlnet_path,
+                                        height=256,
+                                        width=384,
+                                        model_length=16
+                                        )
+    first_frame_path_var = gr.State(value=None)
+    tracking_points_var = gr.State([])
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_upload_button = gr.UploadButton(label="Upload Image",file_types=["image"])
+            add_drag_button = gr.Button(value="Add Drag")
+            reset_button = gr.Button(value="Reset")
+            delete_last_drag_button = gr.Button(value="Delete last drag")
+            delete_last_step_button = gr.Button(value="Delete last step")
+        with gr.Column(scale=7):
+            with gr.Row():
+                with gr.Column(scale=6):
+                    input_image = gr.Image(label="Input Image",
+                                        interactive=True,
+                                        height=300,
+                                        width=384,)
+                with gr.Column(scale=6):
+                    output_image = gr.Image(label="Motion Path",
+                                            interactive=False,
+                                            height=256,
                                             width=384,)
+    with gr.Row():
+        with gr.Column(scale=1):
+            prompt = gr.Textbox(value="a wonderful elf.", label="Prompt (highly-recommended)", interactive=True, visible=True)
+            negative_prompt = gr.Text(
+                        label="Negative Prompt",
+                        max_lines=5,
+                        placeholder="Please input your negative prompt",
+                        value='worst quality, low quality, letterboxed',lines=1
+                    )
+            drag_mode = gr.Radio(['camera', 'object'], label='Drag mode: ', value='object', scale=2)
+            run_button = gr.Button(value="Run")
+            with gr.Accordion("More input params", open=False, elem_id="accordion1"):
+                with gr.Group():
+                    seed = gr.Textbox(
+                        label="Seed: ",  value=561793204,
+                    )
+                    randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
+                with gr.Group():
+                    with gr.Row():
+                        guidance_scale = gr.Slider(
+                            label="Guidance scale",
+                            minimum=1,
+                            maximum=12,
+                            step=0.1,
+                            value=8.5,
                         )
+                        num_inference_steps = gr.Slider(
+                            label="Number of inference steps",
+                            minimum=1,
+                            maximum=50,
+                            step=1,
+                            value=25,
                         )
+                with gr.Group():
+                    personalized = gr.Dropdown(label="Personalized", choices=['HelloObject', 'TUSUN', ""], value="")
+                    examples_type = gr.Textbox(label="Examples Type (Ignore) ",  value="", visible=False)
+        with gr.Column(scale=7):
+            output_video = gr.Video(
+                                    label="Output Video",
+                                    width=384,
+                                    height=256)
+    with gr.Row():
+        def process_example(input_image, prompt, drag_mode, seed, personalized, examples_type):
+            return input_image, prompt, drag_mode, seed, personalized, examples_type
+        example = gr.Examples(
+            label="Input Example",
+            examples=image_examples,
+            inputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
+            outputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
+            fn=process_example,
+            run_on_click=True,
+            examples_per_page=10,
+            cache_examples=False,
+        )
+    with gr.Row():
+        gr.Markdown(citation)
+    image_upload_button.upload(preprocess_image, image_upload_button, [input_image, first_frame_path_var, tracking_points_var])
+    add_drag_button.click(add_drag, [tracking_points_var], tracking_points_var)
+    delete_last_drag_button.click(delete_last_drag, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
+    delete_last_step_button.click(delete_last_step, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
+    reset_button.click(reset_states, [first_frame_path_var, tracking_points_var], [input_image, first_frame_path_var, tracking_points_var])
+    input_image.select(add_tracking_points, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
+    run_button.click(ImageConductor_net.run, [first_frame_path_var, tracking_points_var, prompt, drag_mode,
+                                            negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, personalized, examples_type],
+                                            [output_image, output_video])
+block.queue().launch()