Spaces:
Sleeping
Sleeping
File size: 3,409 Bytes
5425743 36961e5 5425743 36961e5 0e92ec6 36961e5 5425743 d8f08b3 5425743 7e48556 5425743 d8f08b3 36961e5 d8f08b3 5425743 c226973 5425743 27ad720 5425743 27ad720 5425743 27ad720 5425743 c226973 5425743 59cc0d3 5425743 d8f08b3 5425743 b8d1d45 5425743 843d100 5425743 b8d1d45 5425743 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline
from tuneavideo.models.unet import UNet3DConditionModel
from tuneavideo.util import save_videos_grid
import torch
import gradio as gr
from bs4 import BeautifulSoup
import requests
def model_url_list():
url_list = []
for i in range(1, 6):
url_list.append(f"https://huggingface.co/models?p={i}&sort=downloads&search=sd-dreambooth-library")
return url_list
def data_scraping(url_list):
model_list = []
for url in url_list:
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
div_class = 'grid gap-5 grid-cols-1 2xl:grid-cols-2'
div = soup.find('div', {'class': div_class})
for a in div.find_all('a', href=True):
model_list.append(a['href'])
return model_list
model_list = data_scraping(model_url_list())
def tune_video_predict(
pipe_id: str,
prompt: str,
video_length: int,
height: int,
width: int,
num_inference_steps: int,
guidance_scale: float,
):
unet = UNet3DConditionModel.from_pretrained("Tune-A-Video-library/a-man-is-surfing", subfolder='unet', torch_dtype=torch.float16).to('cuda')
pipe = TuneAVideoPipeline.from_pretrained(pipe_id, unet=unet, torch_dtype=torch.float16).to("cuda")
video = pipe(prompt, video_length=video_length, height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).videos
output_path = save_videos_grid(video, save_path='output', path=f"{prompt}.gif")
return output_path
demo_inputs = [
gr.inputs.Dropdown(
label="Model",
choices=model_list
),
gr.inputs.Textbox(
label="Prompt",
default='a flower blooming'
),
gr.inputs.Slider(
label="Video Length",
minimum=1,
maximum=50,
default=8,
step=1,
),
gr.inputs.Slider(
label="Height",
minimum=128,
maximum=1280,
default=416,
step=32,
),
gr.inputs.Slider(
label="Width",
minimum=128,
maximum=1280,
default=416,
step=32,
),
gr.inputs.Slider(
label="Num Inference Steps",
minimum=1,
maximum=100,
default=50,
step=1,
),
gr.inputs.Slider(
label="Guidance Scale",
minimum=0.0,
maximum=100,
default=7.5,
step=0.5,
)
]
demo_outputs = gr.outputs.Video(type="gif", label="Output")
examples = [
["Tune-A-Video-library/a-man-is-surfing", "a panda is surfing", 5, 416, 416, 50, 7.5],
["Tune-A-Video-library/a-man-is-surfing", "a flower blooming", 5, 416, 416, 50, 7.5],
["sd-dreambooth-library/mr-potato-head", "sks mr potato head, wearing a pink hat, is surfing.", 5, 416, 416, 50, 7.5],
["sd-dreambooth-library/mr-potato-head", "sks mr potato head is surfing in the forest.", 5, 416, 416, 50, 7.5],
]
description = "This generates video from an input text, using [one-shot tuning of diffusion models](https://arxiv.org/abs/2212.11565). To use it, simply input a text."
demo_app = gr.Interface(
fn=tune_video_predict,
inputs=demo_inputs,
outputs=demo_outputs,
examples=examples,
cache_examples=False,
title="Tune-A-Video",
theme="huggingface",
description=description
)
demo_app.launch(debug=True, enable_queue=True)
|