Doubiiu's picture
Update app.py
16ab4e3 verified
raw history blame
No virus
10.8 kB
import spaces
import gradio as gr
import os
import sys
import time
from omegaconf import OmegaConf
import torch
from pytorch_lightning import seed_everything
from huggingface_hub import hf_hub_download
from einops import repeat
import torchvision.transforms as transforms
from utils.utils import instantiate_from_config
sys.path.insert(0, "scripts/evaluation")
from funcs import (
batch_ddim_sampling,
load_model_checkpoint,
get_latent_z,
save_videos
)
def download_model():
REPO_ID = 'Doubiiu/DynamiCrafter_512_Interp'
filename_list = ['model.ckpt']
if not os.path.exists('./checkpoints/dynamicrafter_512_interp_v1/'):
os.makedirs('./checkpoints/dynamicrafter_512_interp_v1/')
for filename in filename_list:
local_file = os.path.join('./checkpoints/dynamicrafter_512_interp_v1/', filename)
if not os.path.exists(local_file):
hf_hub_download(repo_id=REPO_ID, filename=filename, local_dir='./checkpoints/dynamicrafter_512_interp_v1/', force_download=True)
download_model()
ckpt_path='checkpoints/dynamicrafter_512_interp_v1/model.ckpt'
config_file='configs/inference_512_v1.0.yaml'
config = OmegaConf.load(config_file)
model_config = config.pop("model", OmegaConf.create())
model_config['params']['unet_config']['params']['use_checkpoint']=False
model = instantiate_from_config(model_config)
assert os.path.exists(ckpt_path), "Error: checkpoint Not Found!"
model = load_model_checkpoint(model, ckpt_path)
model.eval()
model = model.cuda()
@spaces.GPU(duration=300)
def infer(image, prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, image2=None):
resolution = (320, 512)
save_fps = 8
seed_everything(seed)
transform = transforms.Compose([
transforms.Resize(min(resolution)),
transforms.CenterCrop(resolution),
])
torch.cuda.empty_cache()
print('start:', prompt, time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
start = time.time()
if steps > 60:
steps = 60
batch_size=1
channels = model.model.diffusion_model.out_channels
frames = model.temporal_length
h, w = resolution[0] // 8, resolution[1] // 8
noise_shape = [batch_size, channels, frames, h, w]
# text cond
with torch.no_grad(), torch.cuda.amp.autocast():
text_emb = model.get_learned_conditioning([prompt])
# img cond
img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device)
img_tensor = (img_tensor / 255. - 0.5) * 2
image_tensor_resized = transform(img_tensor) #3,256,256
videos = image_tensor_resized.unsqueeze(0) # bchw
z = get_latent_z(model, videos.unsqueeze(2)) #bc,1,hw
if image2 is not None:
img_tensor2 = torch.from_numpy(image2).permute(2, 0, 1).float().to(model.device)
img_tensor2 = (img_tensor2 / 255. - 0.5) * 2
image_tensor_resized2 = transform(img_tensor2) #3,h,w
videos2 = image_tensor_resized2.unsqueeze(0) # bchw
z2 = get_latent_z(model, videos2.unsqueeze(2)) #bc,1,hw
img_tensor_repeat = repeat(z, 'b c t h w -> b c (repeat t) h w', repeat=frames)
img_tensor_repeat = torch.zeros_like(img_tensor_repeat)
## old
img_tensor_repeat[:,:,:1,:,:] = z
if image2 is not None:
img_tensor_repeat[:,:,-1:,:,:] = z2
else:
img_tensor_repeat[:,:,-1:,:,:] = z
cond_images = model.embedder(img_tensor.unsqueeze(0)) ## blc
img_emb = model.image_proj_model(cond_images)
imtext_cond = torch.cat([text_emb, img_emb], dim=1)
fs = torch.tensor([fs], dtype=torch.long, device=model.device)
cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat]}
## inference
batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale)
## b,samples,c,t,h,w
## remove the last frame for looping video
if image2 is None:
batch_samples = batch_samples[:,:,:,:-1,...]
video_path = './output.mp4'
save_videos(batch_samples, './', filenames=['output'], fps=save_fps)
return video_path
i2v_examples_interp_512 = [
['prompts/512_interp/smile_01.png', 'a smiling girl', 50, 7.5, 1.0, 5, 12306, 'prompts/512_interp/smile_02.png'],
['prompts/512_interp/stone01_01.png', 'rotating view', 50, 7.5, 1.0, 5, 123, 'prompts/512_interp/stone01_02.png'],
['prompts/512_interp/walk_01.png', 'man walking', 50, 7.5, 1.0, 5, 345, 'prompts/512_interp/walk_02.png'],
]
i2v_examples_loop_512 = [
['prompts/512_loop/24.png', 'a beach with waves and clouds at sunset', 50, 7.5, 1.0, 5, 234],
['prompts/512_loop/36.png', 'clothes swaying in the wind', 50, 7.5, 1.0, 5, 123],
['prompts/512_loop/40.png', 'flowers swaying in the wind', 50, 7.5, 1.0, 5, 234],
]
css = """#input_img {max-width: 512px !important} #input_img2 {max-width: 512px !important} #output_vid {max-width: 512px; max-height: 320px} """
with gr.Blocks(analytics_enabled=False, css=css) as dynamicrafter_iface:
gr.Markdown("<div align='center'> <h1> DynamiCrafter: Animating Open-domain Images with Video Diffusion Priors </span> </h1> \
<h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\
<a href='https://doubiiu.github.io/'>Jinbo Xing</a>, \
<a href='https://menghanxia.github.io/'>Menghan Xia</a>, <a href='https://yzhang2016.github.io/'>Yong Zhang</a>, \
<a href=''>Haoxin Chen</a>, <a href=''> Wangbo Yu</a>,\
<a href='https://github.com/hyliu'>Hanyuan Liu</a>, <a href='https://xinntao.github.io/'>Xintao Wang</a>,\
<a href='https://www.cse.cuhk.edu.hk/~ttwong/myself.html'>Tien-Tsin Wong</a>,\
<a href='https://scholar.google.com/citations?user=4oXBp9UAAAAJ&hl=zh-CN'>Ying Shan</a>\
</h2> \
<a style='font-size:18px;color: #000000'>If DynamiCrafter is useful, please help star the </a>\
<a style='font-size:18px;color: #000000' href='https://github.com/Doubiiu/DynamiCrafter'>[Github Repo]</a>\
<a style='font-size:18px;color: #000000'>, which is important to Open-Source projects. Thanks!</a>\
<a style='font-size:18px;color: #000000' href='https://arxiv.org/abs/2310.12190'> [ArXiv] </a>\
<a style='font-size:18px;color: #000000' href='https://doubiiu.github.io/projects/DynamiCrafter/'> [Project Page] </a> </div>")
#######generative frame interpolation and looping video generation######
with gr.Tab(label='Generative Frame Interpolation_320x512'):
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Row():
i2v_input_image = gr.Image(label="Input Image1",elem_id="input_img")
with gr.Row():
i2v_input_text = gr.Text(label='Prompts')
with gr.Row():
i2v_seed = gr.Slider(label='Random Seed', minimum=0, maximum=50000, step=1, value=123)
i2v_eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='ETA', value=1.0, elem_id="i2v_eta")
i2v_cfg_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='CFG Scale', value=7.5, elem_id="i2v_cfg_scale")
with gr.Row():
i2v_steps = gr.Slider(minimum=1, maximum=50, step=1, elem_id="i2v_steps", label="Sampling steps", value=50)
i2v_motion = gr.Slider(minimum=5, maximum=30, step=1, elem_id="i2v_motion", label="FPS", value=10)
i2v_end_btn = gr.Button("Generate")
with gr.Column():
with gr.Row():
i2v_input_image2 = gr.Image(label="Input Image2",elem_id="input_img2")
with gr.Row():
i2v_output_video = gr.Video(label="Generated Video",elem_id="output_vid",autoplay=True,show_share_button=True)
gr.Examples(examples=i2v_examples_interp_512,
inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed, i2v_input_image2],
outputs=[i2v_output_video],
fn = infer,
cache_examples=True,
)
i2v_end_btn.click(inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed, i2v_input_image2],
outputs=[i2v_output_video],
fn = infer
)
#######generative frame interpolation and looping video generation######
with gr.Tab(label='Looping Video Generation_320x512'):
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Row():
i2v_input_image = gr.Image(label="Input Image",elem_id="input_img")
with gr.Row():
i2v_input_text = gr.Text(label='Prompts')
with gr.Row():
i2v_seed = gr.Slider(label='Random Seed', minimum=0, maximum=50000, step=1, value=123)
i2v_eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='ETA', value=1.0, elem_id="i2v_eta")
i2v_cfg_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='CFG Scale', value=7.5, elem_id="i2v_cfg_scale")
with gr.Row():
i2v_steps = gr.Slider(minimum=1, maximum=50, step=1, elem_id="i2v_steps", label="Sampling steps", value=50)
i2v_motion = gr.Slider(minimum=5, maximum=30, step=1, elem_id="i2v_motion", label="FPS", value=5)
i2v_end_btn = gr.Button("Generate")
# with gr.Tab(label='Result'):
with gr.Row():
i2v_output_video = gr.Video(label="Generated Video",elem_id="output_vid",autoplay=True,show_share_button=True)
gr.Examples(examples=i2v_examples_loop_512,
inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed],
outputs=[i2v_output_video],
fn = infer,
cache_examples=True,
)
i2v_end_btn.click(inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed],
outputs=[i2v_output_video],
fn = infer
)
dynamicrafter_iface.queue(max_size=12).launch(show_api=True)