Spaces:

aslasdlkj
/

Podfusion

Configuration error

File size: 1,588 Bytes

5244c19
3b213f1
 
 
3b6cf03
5244c19
3b6cf03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580dcd1
3b6cf03
580dcd1
3b6cf03
 
 
85113fd
580dcd1
 
3b6cf03
 
3b213f1
 
 
 
 
526b755
85113fd
580dcd1
 
3b6cf03
3b213f1
580dcd1
fa08cd8

import gradio as gr
import torch
from torch import autocast
from diffusers import StableDiffusionPipeline
from moviepy.editor import AudioFileClip, ImageClip

def process_inputs(prompt, audio):
    image = get_stable_diffusion_image(prompt)
    video = add_static_image_to_audio(image, audio)
    return video


def add_static_image_to_audio(image, audio):
    """Create and save a video file to `output_path` after 
    combining a static image that is located in `image_path` 
    with an audio file in `audio_path`"""
    # create the audio clip object
    audio_clip = AudioFileClip(audio)
    # create the image clip object
    image_clip = ImageClip(image)
    # use set_audio method from image clip to combine the audio with the image
    video_clip = image_clip.set_audio(audio_clip)
    # specify the duration of the new clip to be the duration of the audio clip
    video_clip.duration = audio_clip.duration
    # set the FPS to 1
    video_clip.fps = 1
    # write the resuling video clip
    path = "temp/video_out.mp4"
    video_clip.write_videofile(path)
    return path

def get_stable_diffusion_image(prompt):
    model_id = "CompVis/stable-diffusion-v1-4"
    device = "cuda"
    pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True)
    pipe = pipe.to(device)
    with autocast("cuda"):
        image = pipe(prompt, guidance_scale=7.5)["sample"][0]
        path = "temp/out.jpg"
        image.save(path)
        return path
    

iface = gr.Interface(fn=process_inputs, inputs=["text", gr.Audio(type="filepath")], outputs="video")
iface.launch()