Omnibus's picture
Duplicate from ArtGAN/Video-Diffusion-WebUI
eb65e9e
import gradio as gr
import numpy as np
import torch
from video_diffusion.stable_diffusion_video.stable_diffusion_pipeline import StableDiffusionWalkPipeline
from video_diffusion.utils.model_list import stable_model_list
class StableDiffusionText2VideoGenerator:
def __init__(self):
self.pipe = None
def load_model(
self,
model_path,
):
if self.pipe is None:
self.pipe = StableDiffusionWalkPipeline.from_pretrained(
model_path,
torch_dtype=torch.float16,
revision="fp16",
)
self.pipe.to("cuda")
self.pipe.enable_xformers_memory_efficient_attention()
self.pipe.enable_attention_slicing()
return self.pipe
def generate_video(
self,
model_path: str,
first_prompts: str,
second_prompts: str,
negative_prompt: str,
num_interpolation_steps: int,
guidance_scale: int,
num_inference_step: int,
height: int,
width: int,
upsample: bool,
fps=int,
):
first_seed = np.random.randint(0, 100000)
second_seed = np.random.randint(0, 100000)
seeds = [first_seed, second_seed]
prompts = [first_prompts, second_prompts]
pipe = self.load_model(model_path=model_path)
output_video = pipe.walk(
prompts=prompts,
num_interpolation_steps=int(num_interpolation_steps),
height=height,
width=width,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_step,
negative_prompt=negative_prompt,
seeds=seeds,
upsample=upsample,
fps=fps,
)
return output_video
def app():
with gr.Blocks():
with gr.Row():
with gr.Column():
stable_text2video_first_prompt = gr.Textbox(
lines=1,
placeholder="First Prompt",
show_label=False,
)
stable_text2video_second_prompt = gr.Textbox(
lines=1,
placeholder="Second Prompt",
show_label=False,
)
stable_text2video_negative_prompt = gr.Textbox(
lines=1,
placeholder="Negative Prompt ",
show_label=False,
)
with gr.Row():
with gr.Column():
stable_text2video_model_path = gr.Dropdown(
choices=stable_model_list,
label="Stable Model List",
value=stable_model_list[0],
)
stable_text2video_guidance_scale = gr.Slider(
minimum=0,
maximum=15,
step=1,
value=8.5,
label="Guidance Scale",
)
stable_text2video_num_inference_steps = gr.Slider(
minimum=1,
maximum=100,
step=1,
value=30,
label="Number of Inference Steps",
)
stable_text2video_fps = gr.Slider(
minimum=1,
maximum=60,
step=1,
value=10,
label="Fps",
)
with gr.Row():
with gr.Column():
stable_text2video_num_interpolation_steps = gr.Number(
value=10,
label="Number of Interpolation Steps",
)
stable_text2video_height = gr.Slider(
minimum=1,
maximum=1000,
step=1,
value=512,
label="Height",
)
stable_text2video_width = gr.Slider(
minimum=1,
maximum=1000,
step=1,
value=512,
label="Width",
)
stable_text2video_upsample = gr.Checkbox(
label="Upsample",
default=False,
)
text2video_generate = gr.Button(value="Generator")
with gr.Column():
text2video_output = gr.Video(label="Output")
text2video_generate.click(
fn=StableDiffusionText2VideoGenerator().generate_video,
inputs=[
stable_text2video_model_path,
stable_text2video_first_prompt,
stable_text2video_second_prompt,
stable_text2video_negative_prompt,
stable_text2video_num_interpolation_steps,
stable_text2video_guidance_scale,
stable_text2video_num_inference_steps,
stable_text2video_height,
stable_text2video_width,
stable_text2video_upsample,
stable_text2video_fps,
],
outputs=text2video_output,
)