File size: 1,521 Bytes
897f5fa
 
 
 
 
 
1bfb10e
897f5fa
 
 
 
1bfb10e
 
 
209b93d
1bfb10e
897f5fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import spaces
import torch
from diffusers import CogVideoXImageToVideoPipeline
from diffusers.utils import export_to_video, load_image
import gradio as gr

pipe = CogVideoXImageToVideoPipeline.from_pretrained(
        "THUDM/CogVideoX-5b-I2V",
        torch_dtype=torch.bfloat16
    )
    
pipe.vae.enable_tiling()
pipe.vae.enable_slicing()

@spaces.GPU(duration=120)
def generate_video(prompt, image):
    video = pipe(
        prompt=prompt,
        image=image,
        num_videos_per_prompt=1,
        num_inference_steps=50,
        num_frames=49,
        guidance_scale=6,
        generator=torch.Generator(device="cuda").manual_seed(42),
    ).frames[0]
    
    video_path = "output.mp4"
    export_to_video(video, video_path, fps=8)
    
    return video_path

# Interface Gradio
with gr.Blocks() as demo:
    gr.Markdown("# Image to Video Generation")
    
    with gr.Row():
        # Entrada de texto para o prompt
        prompt_input = gr.Textbox(label="Prompt", value="A little girl is riding a bicycle at high speed. Focused, detailed, realistic.")
        
        # Upload de imagem
        image_input = gr.Image(label="Upload an Image", type="pil")
    
    # Botão para gerar o vídeo
    generate_button = gr.Button("Generate Video")
    
    # Saída do vídeo gerado
    video_output = gr.Video(label="Generated Video")
    
    # Ação ao clicar no botão
    generate_button.click(fn=generate_video, inputs=[prompt_input, image_input], outputs=video_output)

# Rodar a interface
demo.launch()