Spaces:
Running
on
A10G
Running
on
A10G
Commit
•
0ff2c60
1
Parent(s):
f157d20
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import spaces
|
2 |
|
3 |
import gradio as gr
|
4 |
# import gradio.helpers
|
@@ -8,6 +7,13 @@ from glob import glob
|
|
8 |
from pathlib import Path
|
9 |
from typing import Optional
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
from PIL import Image
|
12 |
from diffusers.utils import load_image, export_to_video
|
13 |
from pipeline import StableVideoDiffusionPipeline
|
@@ -16,6 +22,13 @@ import random
|
|
16 |
from safetensors import safe_open
|
17 |
from lcm_scheduler import AnimateLCMSVDStochasticIterativeScheduler
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def get_safetensors_files():
|
21 |
models_dir = "./safetensors"
|
@@ -39,6 +52,118 @@ def model_select(selected_file):
|
|
39 |
return
|
40 |
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
noise_scheduler = AnimateLCMSVDStochasticIterativeScheduler(
|
43 |
num_train_timesteps=40,
|
44 |
sigma_min=0.002,
|
@@ -62,7 +187,6 @@ model_select("AnimateLCM-SVD-xt-1.1.safetensors")
|
|
62 |
|
63 |
max_64_bit_int = 2**63 - 1
|
64 |
|
65 |
-
@spaces.GPU
|
66 |
def sample(
|
67 |
image: Image,
|
68 |
seed: Optional[int] = 42,
|
|
|
|
|
1 |
|
2 |
import gradio as gr
|
3 |
# import gradio.helpers
|
|
|
7 |
from pathlib import Path
|
8 |
from typing import Optional
|
9 |
|
10 |
+
import tempfile
|
11 |
+
import numpy as np
|
12 |
+
import cv2
|
13 |
+
import subprocess
|
14 |
+
|
15 |
+
from DeepCache import DeepCacheSDHelper
|
16 |
+
|
17 |
from PIL import Image
|
18 |
from diffusers.utils import load_image, export_to_video
|
19 |
from pipeline import StableVideoDiffusionPipeline
|
|
|
22 |
from safetensors import safe_open
|
23 |
from lcm_scheduler import AnimateLCMSVDStochasticIterativeScheduler
|
24 |
|
25 |
+
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
|
26 |
+
|
27 |
+
# is that 8 or 25?
|
28 |
+
|
29 |
+
hardcoded_fps = 25
|
30 |
+
hardcoded_duration_sec = 3
|
31 |
+
|
32 |
|
33 |
def get_safetensors_files():
|
34 |
models_dir = "./safetensors"
|
|
|
52 |
return
|
53 |
|
54 |
|
55 |
+
|
56 |
+
# ----------------------------- FRAME INTERPOLATION ---------------------------------
|
57 |
+
# we cannot afford to use AI-based algorithms such as FILM or ST-MFNet,
|
58 |
+
# those are way too slow for AiTube which needs things to be as fast as possible
|
59 |
+
# -----------------------------------------------------------------------------------
|
60 |
+
|
61 |
+
def interpolate_video_frames(
|
62 |
+
input_file_path,
|
63 |
+
output_file_path,
|
64 |
+
output_fps=hardcoded_fps,
|
65 |
+
desired_duration=hardcoded_duration_sec,
|
66 |
+
original_duration=hardcoded_duration_sec,
|
67 |
+
output_width=None,
|
68 |
+
output_height=None,
|
69 |
+
use_cuda=False, # this requires FFmpeg to have been compiled with CUDA support (to try - I'm not sure the Hugging Face image has that by default)
|
70 |
+
verbose=False):
|
71 |
+
|
72 |
+
scale_factor = desired_duration / original_duration
|
73 |
+
|
74 |
+
filters = []
|
75 |
+
|
76 |
+
# Scaling if dimensions are provided
|
77 |
+
# note: upscaling produces disastrous results,
|
78 |
+
# it will double the compute time
|
79 |
+
# I think that's either because we are not hardware-accelerated,
|
80 |
+
# or because of the interpolation done after it, which thus become more computationally intensive
|
81 |
+
if output_width and output_height:
|
82 |
+
filters.append(f'scale={output_width}:{output_height}')
|
83 |
+
|
84 |
+
|
85 |
+
# note: from all fact, it looks like using a small macroblock is important for us,
|
86 |
+
# since the video resolution is very small (usually 512x288px)
|
87 |
+
interpolation_filter = f'minterpolate=mi_mode=mci:mc_mode=obmc:me=hexbs:vsbmc=1:mb_size=4:fps={output_fps}:scd=none,setpts={scale_factor}*PTS'
|
88 |
+
#- `mi_mode=mci`: Specifies motion compensated interpolation.
|
89 |
+
#- `mc_mode=obmc`: Overlapped block motion compensation is used.
|
90 |
+
#- `me=hexbs`: Hexagon-based search (motion estimation method).
|
91 |
+
#- `vsbmc=1`: Variable-size block motion compensation is enabled.
|
92 |
+
#- `mb_size=4`: Sets the macroblock size.
|
93 |
+
#- `fps={output_fps}`: Defines the output frame rate.
|
94 |
+
#- `scd=none`: Disables scene change detection entirely.
|
95 |
+
#- `setpts={scale_factor}*PTS`: Adjusts for the stretching of the video duration.
|
96 |
+
|
97 |
+
# Frame interpolation setup
|
98 |
+
filters.append(interpolation_filter)
|
99 |
+
|
100 |
+
# Combine all filters into a single filter complex
|
101 |
+
filter_complex = ','.join(filters)
|
102 |
+
|
103 |
+
|
104 |
+
cmd = [
|
105 |
+
'ffmpeg',
|
106 |
+
'-i', input_file_path,
|
107 |
+
]
|
108 |
+
|
109 |
+
# not supported by the current image, we will have to build it
|
110 |
+
if use_cuda:
|
111 |
+
cmd.extend(['-hwaccel', 'cuda', '-hwaccel_output_format', 'cuda'])
|
112 |
+
|
113 |
+
cmd.extend([
|
114 |
+
'-filter:v', filter_complex,
|
115 |
+
'-r', str(output_fps),
|
116 |
+
output_file_path
|
117 |
+
])
|
118 |
+
|
119 |
+
# Adjust the log level based on the verbosity input
|
120 |
+
if not verbose:
|
121 |
+
cmd.insert(1, '-loglevel')
|
122 |
+
cmd.insert(2, 'error')
|
123 |
+
|
124 |
+
# Logging for debugging if verbose
|
125 |
+
if verbose:
|
126 |
+
print("output_fps:", output_fps)
|
127 |
+
print("desired_duration:", desired_duration)
|
128 |
+
print("original_duration:", original_duration)
|
129 |
+
print("cmd:", cmd)
|
130 |
+
|
131 |
+
try:
|
132 |
+
subprocess.run(cmd, check=True)
|
133 |
+
return output_file_path
|
134 |
+
except subprocess.CalledProcessError as e:
|
135 |
+
print("Failed to interpolate video. Error:", e)
|
136 |
+
return input_file_path # In case of error, return original path
|
137 |
+
|
138 |
+
# ----------------------------------- VIDEO ENCODING ---------------------------------
|
139 |
+
# The Diffusers utils hardcode MP4V as a codec which is not supported by all browsers.
|
140 |
+
# This is a critical issue for AiTube so we are forced to implement our own routine.
|
141 |
+
# ------------------------------------------------------------------------------------
|
142 |
+
|
143 |
+
def export_to_video_file(video_frames, output_video_path=None, fps=hardcoded_fps):
|
144 |
+
if output_video_path is None:
|
145 |
+
output_video_path = tempfile.NamedTemporaryFile(suffix=".webm").name
|
146 |
+
|
147 |
+
if isinstance(video_frames[0], np.ndarray):
|
148 |
+
video_frames = [(frame * 255).astype(np.uint8) for frame in video_frames]
|
149 |
+
elif isinstance(video_frames[0], Image.Image):
|
150 |
+
video_frames = [np.array(frame) for frame in video_frames]
|
151 |
+
|
152 |
+
# Use VP9 codec - don't freak out: yes, this will throw an exception, but this still works
|
153 |
+
# https://stackoverflow.com/a/61116338
|
154 |
+
# I suspect there is a bug somewhere and the actual hex code should be different
|
155 |
+
fourcc = cv2.VideoWriter_fourcc(*'VP90')
|
156 |
+
h, w, c = video_frames[0].shape
|
157 |
+
video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h), True)
|
158 |
+
|
159 |
+
for frame in video_frames:
|
160 |
+
# Ensure the video frame is in the correct color format
|
161 |
+
img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
162 |
+
video_writer.write(img)
|
163 |
+
video_writer.release()
|
164 |
+
|
165 |
+
return output_video_path
|
166 |
+
|
167 |
noise_scheduler = AnimateLCMSVDStochasticIterativeScheduler(
|
168 |
num_train_timesteps=40,
|
169 |
sigma_min=0.002,
|
|
|
187 |
|
188 |
max_64_bit_int = 2**63 - 1
|
189 |
|
|
|
190 |
def sample(
|
191 |
image: Image,
|
192 |
seed: Optional[int] = 42,
|