File size: 2,943 Bytes
7a9ce61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import tempfile

import ffmpegio
import gradio as gr
import numpy as np
import omegaconf
import tensorflow as tf
from pyprojroot.pyprojroot import here
from huggingface_hub import hf_hub_url, cached_download

from ganime.model.vqgan_clean.experimental.net2net_v3 import Net2Net

IMAGE_SHAPE = (64, 128, 3)

vqgan_path = cached_download(
    hf_hub_url("Kurokabe", "VQGAN_Kimetsu-no-yaiba_Tensorflow/vqgan_kny_image_full")
)
gpt_path = cached_download(
    hf_hub_url("Kurokabe", "GANime_Kimetsu-no-yaiba_Tensorflow/ganime_kny_video_full")
)

cfg = omegaconf.OmegaConf.load(here("configs/kny_video_gpt2_large_gradio.yaml"))
cfg["model"]["first_stage_config"]["checkpoint_path"] = vqgan_path + "/checkpoint"
cfg["model"]["transformer_config"]["checkpoint_path"] = gpt_path + "/checkpoint"

model = Net2Net(**cfg["model"], trainer_config=cfg["train"], num_replicas=1)
model.first_stage_model.build((20, *IMAGE_SHAPE))


# def save_video(video):
#     b, f, h, w, c = 1, 20, 500, 500, 3

#     # filename = output_file.name
#     filename = "./test_video.mp4"
#     images = []
#     for i in range(f):
#         # image = video[0][i].numpy()
#         # image = 255 * image  # Now scale by 255
#         # image = image.astype(np.uint8)
#         images.append(np.random.randint(0, 255, (h, w, c), dtype=np.uint8))

#     ffmpegio.video.write(filename, 20, np.array(images), overwrite=True)
#     return filename


def save_video(video):
    output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    b, f, h, w, c = video.shape

    filename = output_file.name
    video = video.numpy()
    video = video * 255
    video = video.astype(np.uint8)
    ffmpegio.video.write(filename, 20, video, overwrite=True)
    return filename


def resize_if_necessary(image):
    if image.shape[0] != 64 and image.shape[1] != 128:
        image = tf.image.resize(image, (64, 128))
    return image


def normalize(image):
    image = (tf.cast(image, tf.float32) / 127.5) - 1

    return image


def generate(first, last, n_frames):
    # n_frames = 20
    n_frames = int(n_frames)
    first = resize_if_necessary(first)
    last = resize_if_necessary(last)
    first = normalize(first)
    last = normalize(last)
    data = {
        "first_frame": np.expand_dims(first, axis=0),
        "last_frame": np.expand_dims(last, axis=0),
        "y": None,
        "n_frames": [n_frames],
        "remaining_frames": [list(reversed(range(n_frames)))],
    }
    generated = model.predict(data)

    return save_video(generated)


gr.Interface(
    generate,
    inputs=[
        gr.Image(label="Upload the first image"),
        gr.Image(label="Upload the last image"),
        gr.Slider(
            label="Number of frame to generate",
            minimum=15,
            maximum=100,
            value=15,
            step=1,
        ),
    ],
    outputs="video",
    title="Generate a video from the first and last frame",
).launch(share=True)