File size: 4,280 Bytes
5425743
 
 
 
 
36961e5
 
5425743
160c716
36961e5
 
160c716
7862e4a
36961e5
 
 
 
 
 
 
fe3f013
36961e5
 
 
 
 
 
80fa73a
 
160c716
5c019cb
 
5664950
5c019cb
5664950
7862e4a
5c019cb
 
160c716
5c019cb
5425743
d8f08b3
5425743
 
 
 
 
 
 
7e48556
 
5425743
 
 
5eac62a
5425743
 
 
788ce0b
d8f08b3
0e6cc7d
d2d462e
d8f08b3
ebfa305
5425743
ebfa305
5425743
 
ebfa305
5425743
 
 
ebfa305
5425743
 
ebfa305
5425743
 
 
ebfa305
5425743
 
 
ebfa305
5425743
 
 
ebfa305
5425743
 
ebfa305
5425743
 
 
ebfa305
5425743
 
ebfa305
5425743
 
59cc0d3
ebfa305
5425743
 
 
 
 
 
 
bf234ee
b9a46c5
7ddb38b
d8f08b3
 
5425743
 
2345e9f
3da13dc
5425743
 
 
 
 
 
f90f982
3da13dc
5425743
b8d1d45
5425743
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline
from tuneavideo.models.unet import UNet3DConditionModel
from tuneavideo.util import save_videos_grid
import torch
import gradio as gr
from bs4 import BeautifulSoup
import requests


def model_url_list():
    url_list = []
    for i in range(0, 5):
        url_list.append(f"https://huggingface.co/models?p={i}&sort=downloads&search=dreambooth")
    return url_list

def data_scraping(url_list):
    model_list = []
    for url in url_list:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")
        div_class = 'grid grid-cols-1 gap-5 2xl:grid-cols-2'
        div = soup.find('div', {'class': div_class})
        for a in div.find_all('a', href=True):
            model_list.append(a['href'])
    return model_list

model_list = data_scraping(model_url_list())
for i in range(len(model_list)):
    model_list[i] = model_list[i][1:]

best_model_list = [
    "runwayml/stable-diffusion-v1-5",
    "CompVis/stable-diffusion-v1-4",
    "prompthero/openjourney",
    "dreamlike-art/dreamlike-photoreal-2.0",
    "dreamlike-art/dreamlike-diffusion-1.0"
]

model_list = best_model_list + model_list

def tune_video_predict(
    pipe_id: str,
    prompt: str,
    video_length: int,
    height: int,
    width: int,
    num_inference_steps: int,
    guidance_scale: float,
):
    unet = UNet3DConditionModel.from_pretrained("Tune-A-Video-library/a-man-is-surfing", subfolder='unet', torch_dtype=torch.float16).to('cuda')
    pipe = TuneAVideoPipeline.from_pretrained(pipe_id, unet=unet, torch_dtype=torch.float16).to("cuda")
    video = pipe(prompt, video_length=video_length, height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).videos
    output_path = save_videos_grid(video, save_path='output', path=f"{prompt}.gif")
    return output_path
    


demo_inputs = [
    gr.Dropdown(
        label="Model",
        choices=model_list,
        value="CompVis/stable-diffusion-v1-4",
    ),
    gr.Textbox(
        label="Prompt",
        value='a flower blooming'

    ),
    gr.Slider(
        label="Video Length",
        minimum=1,
        maximum=50,
        value=8,
        step=1,
    ),
    gr.Slider(
        label="Height",
        minimum=128,
        maximum=1280,
        value=416,
        step=32,

    ),  
    gr.Slider(
        label="Width",
        minimum=128,
        maximum=1280,
        value=416,
        step=32,
    ),
    gr.Slider(
        label="Num Inference Steps",
        minimum=1,
        maximum=100,
        value=50,
        step=1,
    ),
    gr.Slider(
        label="Guidance Scale",
        minimum=0.0,
        maximum=100,
        value=7.5,
        step=0.5,
    )
]

demo_outputs = gr.outputs.Video(type="gif", label="Output")

examples = [
    ["CompVis/stable-diffusion-v1-4", "a panda is surfing", 5, 416, 416, 50, 7.5],
    ["sd-dreambooth-library/disco-diffusion-style", "ddfusion style on the church", 5, 416, 416, 50, 7.5],
    #["sd-dreambooth-library/nasa-space-v2-768", "nasa style galaxy moving", 5, 416, 416, 50, 7.5],
    ["sd-dreambooth-library/mr-potato-head", "sks mr potato head, wearing a pink hat, is surfing.", 5, 416, 416, 50, 7.5],
    ["sd-dreambooth-library/mr-potato-head", "sks mr potato head is surfing in the forest.", 5, 416, 416, 50, 7.5],
]
    
description = "This is an application that generates video based on a text prompt. To get started, simply input text. The default model in the dropdown is a generic model that you can generate anything. Alternatively, for more photorealistic generations, you can use other models in the dropdown. These models are Dreambooth models, and they're trained with a specific object name, so make sure you know what the object is called. You can find an example prompt for a dreambooth model in Examples section right below the interface."
title = "Tune-A-Video: One-Shot Tuning of Image Diffusion Models for Text-to-Video Generation"

demo_app = gr.Interface(
    fn=tune_video_predict,
    inputs=demo_inputs,
    outputs=demo_outputs,
    examples=examples,
    cache_examples=True,
    title=title,
    theme="huggingface",
    description=description
)

demo_app.launch(debug=True, enable_queue=True)