|
import gradio as gr |
|
import torch |
|
import os |
|
from torch import autocast |
|
from diffusers import StableDiffusionPipeline |
|
from moviepy.editor import AudioFileClip, ImageClip |
|
|
|
def process_inputs(prompt, audio): |
|
image = get_stable_diffusion_image(prompt) |
|
video = add_static_image_to_audio(image, audio) |
|
return video |
|
|
|
|
|
def add_static_image_to_audio(image, audio): |
|
"""Create and save a video file to `output_path` after |
|
combining a static image that is located in `image_path` |
|
with an audio file in `audio_path`""" |
|
|
|
audio_clip = AudioFileClip(audio) |
|
|
|
image_clip = ImageClip(image) |
|
|
|
video_clip = image_clip.set_audio(audio_clip) |
|
|
|
video_clip.duration = audio_clip.duration |
|
|
|
video_clip.fps = 1 |
|
|
|
path = "temp/video_out.mp4" |
|
video_clip.write_videofile(path) |
|
return path |
|
|
|
def get_stable_diffusion_image(prompt): |
|
model_id = "CompVis/stable-diffusion-v1-4" |
|
device = "cuda" |
|
pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=os.getenv("HF_TOKEN") if os.getenv("HF_TOKEN") else True) |
|
pipe = pipe.to(device) |
|
with autocast("cuda"): |
|
image = pipe(prompt, guidance_scale=7.5)["sample"][0] |
|
path = "temp/out.jpg" |
|
image.save(path) |
|
return path |
|
|
|
|
|
iface = gr.Interface(fn=process_inputs, inputs=["text", gr.Audio(type="filepath")], outputs="video") |
|
iface.launch() |
|
|