File size: 5,121 Bytes
2a37fe9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video

from video_diffusion.utils.scheduler_list import diff_scheduler_list, get_scheduler_list

stable_model_list =["damo-vilab/text-to-video-ms-1.7b","cerspense/zeroscope_v2_576w"]

class DamoText2VideoGenerator:
    def __init__(self):
        self.pipe = None

    def load_model(self, stable_model, scheduler):
        if self.pipe is None:
            self.pipe = DiffusionPipeline.from_pretrained(
                stable_model, torch_dtype=torch.float16, variant="fp16"
            )
            self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
            self.pipe.to("cuda")
            self.pipe.enable_xformers_memory_efficient_attention()
        return self.pipe

    def generate_video(
        self,
        prompt: str,
        negative_prompt: str,
        stable_model:str,
        num_frames: int,
        num_inference_steps: int,
        guidance_scale: int,
        height: int,
        width: int,
        scheduler: str,
    ):
        pipe = self.load_model(stable_model=stable_model, scheduler=scheduler)
        video = pipe(
            prompt,
            negative_prompt=negative_prompt,
            num_frames=int(num_frames),
            height=height,
            width=width,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
        ).frames

        video_path = export_to_video(video)
        return video_path

    def app():
        with gr.Blocks():
            with gr.Row():
                with gr.Column():
                    dano_text2video_prompt = gr.Textbox(lines=1, placeholder="Prompt", show_label=False)
                    dano_text2video_negative_prompt = gr.Textbox(
                        lines=1, placeholder="Negative Prompt", show_label=False
                    )
                    with gr.Row():
                        with gr.Column():
                            dano_text2video_model_list = gr.Dropdown(
                                    choices=stable_model_list,
                                    label="Model List",
                                    value=stable_model_list[0],
                                )
                            
                            dano_text2video_num_inference_steps = gr.Slider(
                                minimum=1,
                                maximum=100,
                                value=50,
                                step=1,
                                label="Inference Steps",
                            )
                            dano_text2video_guidance_scale = gr.Slider(
                                minimum=1,
                                maximum=15,
                                value=7,
                                step=1,
                                label="Guidance Scale",
                            )
                            dano_text2video_num_frames = gr.Slider(
                                minimum=1,
                                maximum=50,
                                value=16,
                                step=1,
                                label="Number of Frames",
                            )
                        with gr.Row():
                            with gr.Column():
                                dano_text2video_height = gr.Slider(
                                    minimum=128,
                                    maximum=1280,
                                    value=512,
                                    step=32,
                                    label="Height",
                                )
                                dano_text2video_width = gr.Slider(
                                    minimum=128,
                                    maximum=1280,
                                    value=512,
                                    step=32,
                                    label="Width",
                                )
                                damo_text2video_scheduler = gr.Dropdown(
                                    choices=diff_scheduler_list,
                                    label="Scheduler",
                                    value=diff_scheduler_list[6],
                                )
                    dano_text2video_generate = gr.Button(value="Generator")
                with gr.Column():
                    dano_output = gr.Video(label="Output")

        dano_text2video_generate.click(
            fn=DamoText2VideoGenerator().generate_video,
            inputs=[
                dano_text2video_prompt,
                dano_text2video_negative_prompt,
                dano_text2video_model_list,
                dano_text2video_num_frames,
                dano_text2video_num_inference_steps,
                dano_text2video_guidance_scale,
                dano_text2video_height,
                dano_text2video_width,
                damo_text2video_scheduler,
            ],
            outputs=dano_output,
        )