|
import tempfile |
|
|
|
import ffmpegio |
|
import gradio as gr |
|
import numpy as np |
|
import omegaconf |
|
import tensorflow as tf |
|
from pyprojroot.pyprojroot import here |
|
from huggingface_hub import hf_hub_url, hf_hub_download |
|
|
|
from ganime.model.vqgan_clean.experimental.net2net_v3 import Net2Net |
|
|
|
IMAGE_SHAPE = (64, 128, 3) |
|
|
|
model = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_video(video): |
|
output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) |
|
b, f, h, w, c = video.shape |
|
|
|
filename = output_file.name |
|
video = video.numpy() |
|
video = video * 255 |
|
video = video.astype(np.uint8) |
|
ffmpegio.video.write(filename, 20, video, overwrite=True) |
|
return filename |
|
|
|
|
|
def resize_if_necessary(image): |
|
if image.shape[0] != 64 and image.shape[1] != 128: |
|
image = tf.image.resize(image, (64, 128)) |
|
return image |
|
|
|
|
|
def normalize(image): |
|
image = (tf.cast(image, tf.float32) / 127.5) - 1 |
|
|
|
return image |
|
|
|
def load_model(): |
|
hf_hub_download(repo_id="Kurokabe/VQGAN_Kimetsu-no-yaiba_Tensorflow", filename="checkpoint.data-00000-of-00001", subfolder="vqgan_kny_image_full") |
|
hf_hub_download(repo_id="Kurokabe/VQGAN_Kimetsu-no-yaiba_Tensorflow", filename="checkpoint.index", subfolder="vqgan_kny_image_full") |
|
vqgan_path = hf_hub_download(repo_id="Kurokabe/VQGAN_Kimetsu-no-yaiba_Tensorflow", filename="checkpoint", subfolder="vqgan_kny_image_full") |
|
|
|
hf_hub_download(repo_id="Kurokabe/GANime_Kimetsu-no-yaiba_Tensorflow", filename="checkpoint.data-00000-of-00001", subfolder="ganime_kny_video_full") |
|
hf_hub_download(repo_id="Kurokabe/GANime_Kimetsu-no-yaiba_Tensorflow", filename="checkpoint.index", subfolder="ganime_kny_video_full") |
|
gpt_path = hf_hub_download(repo_id="Kurokabe/GANime_Kimetsu-no-yaiba_Tensorflow", filename="checkpoint", subfolder="ganime_kny_video_full") |
|
|
|
cfg = omegaconf.OmegaConf.load(here("configs/kny_video_gpt2_large_gradio.yaml")) |
|
cfg["model"]["first_stage_config"]["checkpoint_path"] = vqgan_path |
|
cfg["model"]["transformer_config"]["checkpoint_path"] = gpt_path |
|
|
|
model = Net2Net(**cfg["model"], trainer_config=cfg["train"], num_replicas=1) |
|
model.first_stage_model.build((20, *IMAGE_SHAPE)) |
|
return model |
|
|
|
|
|
def generate(first, last, n_frames): |
|
global model |
|
|
|
if model is None: |
|
model = load_model() |
|
|
|
n_frames = int(n_frames) |
|
first = resize_if_necessary(first) |
|
last = resize_if_necessary(last) |
|
first = normalize(first) |
|
last = normalize(last) |
|
data = { |
|
"first_frame": np.expand_dims(first, axis=0), |
|
"last_frame": np.expand_dims(last, axis=0), |
|
"y": None, |
|
"n_frames": [n_frames], |
|
"remaining_frames": [list(reversed(range(n_frames)))], |
|
} |
|
generated = model.predict(data) |
|
|
|
return save_video(generated) |
|
|
|
|
|
gr.Interface( |
|
generate, |
|
inputs=[ |
|
gr.Image(label="Upload the first image"), |
|
gr.Image(label="Upload the last image"), |
|
gr.Slider( |
|
label="Number of frame to generate", |
|
minimum=15, |
|
maximum=100, |
|
value=15, |
|
step=1, |
|
), |
|
], |
|
outputs="video", |
|
title="Generate a video from the first and last frame", |
|
).launch(share=True) |
|
|