import gradio as gr
from diffusers import DiffusionPipeline
import torch
from PIL import Image
import spaces

# Load the pre-trained pipeline
pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-video-diffusion-img2vid-xt-1-1")

# Define the Gradio interface
interface = gr.Interface(
    fn=lambda img: generate_video(img),
    inputs=gr.Image(type="pil"),
    outputs=gr.Video(),
    title="Stable Video Diffusion",
    description="Upload an image to generate a video",
    theme="soft"
)

# Define the function to generate the video
def generate_video(img):
    # Convert the input image to a tensor
    img_tensor = torch.tensor(img).unsqueeze(0) / 255.0
    
    # Run the pipeline to generate the video
    output = pipeline(img_tensor)
    
    # Extract the video frames from the output
    video_frames = output["video_frames"]
    
    # Convert the video frames to a video
    video = []
    for frame in video_frames:
        video.append(Image.fromarray(frame.detach().cpu().numpy()))
    
    # Return the generated video
    return video

# Launch the Gradio app
interface.launch()