|
import gradio as gr |
|
import torch |
|
from torch import autocast |
|
from diffusers import StableDiffusionPipeline |
|
import argparse |
|
from moviepy.editor import AudioFileClip, ImageClip |
|
|
|
parser = argparse.ArgumentParser() |
|
setshare = parser.add_argument('--setshare', default=True, action=argparse.BooleanOptionalAction) |
|
|
|
def process_inputs(prompt, audio): |
|
image = get_stable_diffusion_image(prompt) |
|
video = add_static_image_to_audio(image, audio) |
|
return video |
|
|
|
|
|
def add_static_image_to_audio(image, audio): |
|
"""Create and save a video file to `output_path` after |
|
combining a static image that is located in `image_path` |
|
with an audio file in `audio_path`""" |
|
|
|
audio_clip = AudioFileClip(audio) |
|
|
|
image_clip = ImageClip(image) |
|
|
|
video_clip = image_clip.set_audio(audio) |
|
|
|
video_clip.duration = audio.duration |
|
|
|
video_clip.fps = 1 |
|
|
|
return video_clip |
|
|
|
def get_stable_diffusion_image(prompt): |
|
model_id = "CompVis/stable-diffusion-v1-4" |
|
device = "cuda" |
|
pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True) |
|
pipe = pipe.to(device) |
|
with autocast("cuda"): |
|
image = pipe(prompt, guidance_scale=7.5)["sample"][0] |
|
print(image) |
|
return image |
|
|
|
|
|
iface = gr.Interface(fn=process_inputs, inputs=["text", "audio"], outputs="video") |
|
iface.launch(share=setshare) |