Video-Diffusion-WebUI

Runtime error

File size: 5,136 Bytes

4045f37

import os

import gradio as gr
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from PIL import Image

from video_diffusion.inpaint_zoom.utils.zoom_out_utils import (
    dummy,
    preprocess_image,
    preprocess_mask_image,
    write_video,
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"


stable_paint_model_list = ["stabilityai/stable-diffusion-2-inpainting", "runwayml/stable-diffusion-inpainting"]

stable_paint_prompt_list = [
    "children running in the forest , sunny, bright, by studio ghibli painting, superior quality, masterpiece,  traditional Japanese colors, by Grzegorz Rutkowski, concept art",
    "A beautiful landscape of a mountain range with a lake in the foreground",
]

stable_paint_negative_prompt_list = [
    "lurry, bad art, blurred, text, watermark",
]


class StableDiffusionZoomOut:
    def __init__(self):
        self.pipe = None

    def load_model(self, model_id):
        if self.pipe is None:
            self.pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
            self.pipe.set_use_memory_efficient_attention_xformers(True)
            self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config)
            self.pipe = self.pipe.to("cuda")
            self.pipe.safety_checker = dummy
            self.g_cuda = torch.Generator(device="cuda")

        return self.pipe

    def generate_video(
        self,
        model_id,
        prompt,
        negative_prompt,
        guidance_scale,
        num_inference_steps,
        num_frames,
        step_size,
    ):
        pipe = self.load_model(model_id)

        new_image = Image.new(mode="RGBA", size=(512, 512))
        current_image, mask_image = preprocess_mask_image(new_image)

        current_image = pipe(
            prompt=[prompt],
            negative_prompt=[negative_prompt],
            image=current_image,
            mask_image=mask_image,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
        ).images[0]

        all_frames = []
        all_frames.append(current_image)

        for i in range(num_frames):
            prev_image = preprocess_image(current_image, step_size, 512)
            current_image = prev_image
            current_image, mask_image = preprocess_mask_image(current_image)
            current_image = pipe(
                prompt=[prompt],
                negative_prompt=[negative_prompt],
                image=current_image,
                mask_image=mask_image,
                num_inference_steps=num_inference_steps,
            ).images[0]
            current_image.paste(prev_image, mask=prev_image)
            all_frames.append(current_image)

        save_path = "output.mp4"
        write_video(save_path, all_frames, fps=30)
        return save_path

    def app():
        with gr.Blocks():
            with gr.Row():
                with gr.Column():
                    text2image_out_model_path = gr.Dropdown(
                        choices=stable_paint_model_list, value=stable_paint_model_list[0], label="Text-Image Model Id"
                    )

                    text2image_out_prompt = gr.Textbox(lines=2, value=stable_paint_prompt_list[0], label="Prompt")

                    text2image_out_negative_prompt = gr.Textbox(
                        lines=1, value=stable_paint_negative_prompt_list[0], label="Negative Prompt"
                    )

                    with gr.Row():
                        with gr.Column():
                            text2image_out_guidance_scale = gr.Slider(
                                minimum=0.1, maximum=15, step=0.1, value=7.5, label="Guidance Scale"
                            )

                            text2image_out_num_inference_step = gr.Slider(
                                minimum=1, maximum=100, step=1, value=50, label="Num Inference Step"
                            )
                        with gr.Row():
                            with gr.Column():
                                text2image_out_step_size = gr.Slider(
                                    minimum=1, maximum=100, step=1, value=10, label="Step Size"
                                )

                                text2image_out_num_frames = gr.Slider(
                                    minimum=1, maximum=100, step=1, value=10, label="Frames"
                                )

                    text2image_out_predict = gr.Button(value="Generator")

                with gr.Column():
                    output_image = gr.Video(label="Output")

            text2image_out_predict.click(
                fn=StableDiffusionZoomOut().generate_video,
                inputs=[
                    text2image_out_model_path,
                    text2image_out_prompt,
                    text2image_out_negative_prompt,
                    text2image_out_guidance_scale,
                    text2image_out_num_inference_step,
                    text2image_out_step_size,
                    text2image_out_num_frames,
                ],
                outputs=output_image,
            )