import gradio as gr import torch from torch import autocast from diffusers import StableDiffusionPipeline import argparse from moviepy.editor import AudioFileClip, ImageClip parser = argparse.ArgumentParser() setshare = parser.add_argument('--setshare', default=True, action=argparse.BooleanOptionalAction) def process_inputs(prompt, audio): image = get_stable_diffusion_image(prompt) video = add_static_image_to_audio(image, audio) return video def add_static_image_to_audio(image, audio): """Create and save a video file to `output_path` after combining a static image that is located in `image_path` with an audio file in `audio_path`""" # create the audio clip object audio_clip = AudioFileClip(audio) # create the image clip object image_clip = ImageClip(image) # use set_audio method from image clip to combine the audio with the image video_clip = image_clip.set_audio(audio) # specify the duration of the new clip to be the duration of the audio clip video_clip.duration = audio.duration # set the FPS to 1 video_clip.fps = 1 # write the resuling video clip return video_clip def get_stable_diffusion_image(prompt): model_id = "CompVis/stable-diffusion-v1-4" device = "cuda" pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True) pipe = pipe.to(device) with autocast("cuda"): image = pipe(prompt, guidance_scale=7.5)["sample"][0] print(image) return image iface = gr.Interface(fn=process_inputs, inputs=["text", "audio"], outputs="video") iface.launch(share=setshare)