Commit
•
f9a691e
1
Parent(s):
7e1bff8
Update app.py
Browse files
app.py
CHANGED
@@ -3,12 +3,17 @@ import torch
|
|
3 |
import os
|
4 |
import base64
|
5 |
import uuid
|
|
|
|
|
|
|
|
|
6 |
|
7 |
from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
from safetensors.torch import load_file
|
10 |
from PIL import Image
|
11 |
|
|
|
12 |
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
|
13 |
|
14 |
# Constants
|
@@ -29,9 +34,11 @@ dtype = torch.float16
|
|
29 |
pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
|
30 |
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
35 |
|
36 |
def export_to_video_file(video_frames, output_video_path=None, fps=10):
|
37 |
if output_video_path is None:
|
@@ -55,7 +62,76 @@ def export_to_video_file(video_frames, output_video_path=None, fps=10):
|
|
55 |
|
56 |
return output_video_path
|
57 |
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
if secret_token != SECRET_TOKEN:
|
60 |
raise gr.Error(
|
61 |
f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
|
@@ -98,7 +174,11 @@ def generate_image(secret_token, prompt, base, width, height, motion, step):
|
|
98 |
|
99 |
# I think we are looking time here too, converting to webm is too slow, we should return
|
100 |
# the frames unencoded to the frontend renderer
|
101 |
-
export_to_video_file(output.frames[0], path, fps=10)
|
|
|
|
|
|
|
|
|
102 |
|
103 |
# Read the content of the video file and encode it to base64
|
104 |
with open(path, "rb") as video_file:
|
@@ -139,22 +219,21 @@ with gr.Blocks() as demo:
|
|
139 |
"ToonYou",
|
140 |
"epiCRealism",
|
141 |
],
|
142 |
-
value=base_loaded
|
143 |
-
interactive=True
|
144 |
)
|
145 |
width = gr.Slider(
|
146 |
label='Width',
|
147 |
minimum=128,
|
148 |
maximum=2048,
|
149 |
step=32,
|
150 |
-
value=
|
151 |
)
|
152 |
height = gr.Slider(
|
153 |
label='Height',
|
154 |
minimum=128,
|
155 |
maximum=2048,
|
156 |
step=32,
|
157 |
-
value=
|
158 |
)
|
159 |
select_motion = gr.Dropdown(
|
160 |
label='Motion',
|
@@ -170,7 +249,6 @@ with gr.Blocks() as demo:
|
|
170 |
("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
|
171 |
],
|
172 |
value="",
|
173 |
-
interactive=True
|
174 |
)
|
175 |
select_step = gr.Dropdown(
|
176 |
label='Inference steps',
|
@@ -180,15 +258,17 @@ with gr.Blocks() as demo:
|
|
180 |
('4-Step', 4),
|
181 |
('8-Step', 8)],
|
182 |
value=4,
|
183 |
-
interactive=True
|
184 |
)
|
|
|
|
|
|
|
185 |
submit = gr.Button()
|
186 |
|
187 |
output_video_base64 = gr.Text()
|
188 |
|
189 |
submit.click(
|
190 |
fn=generate_image,
|
191 |
-
inputs=[secret_token, prompt, select_base, width, height, select_motion, select_step],
|
192 |
outputs=output_video_base64,
|
193 |
)
|
194 |
|
|
|
3 |
import os
|
4 |
import base64
|
5 |
import uuid
|
6 |
+
import tempfile
|
7 |
+
import numpy as np
|
8 |
+
import cv2
|
9 |
+
import subprocess
|
10 |
|
11 |
from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
|
12 |
from huggingface_hub import hf_hub_download
|
13 |
from safetensors.torch import load_file
|
14 |
from PIL import Image
|
15 |
|
16 |
+
|
17 |
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
|
18 |
|
19 |
# Constants
|
|
|
34 |
pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
|
35 |
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
|
36 |
|
37 |
+
# ----------------------------- VIDEO ENCODING ---------------------------------
|
38 |
+
# Unfortunately, the Hugging Face Diffusers utils hardcode MP4V as a codec,
|
39 |
+
# which is not supported by all browsers. This is a critical issue for AiTube,
|
40 |
+
# so we are forced to implement our own encoding algorithm.
|
41 |
+
# ------------------------------------------------------------------------------
|
42 |
|
43 |
def export_to_video_file(video_frames, output_video_path=None, fps=10):
|
44 |
if output_video_path is None:
|
|
|
62 |
|
63 |
return output_video_path
|
64 |
|
65 |
+
# ----------------------------- FRAME INTERPOLATION ---------------------------------
|
66 |
+
# we cannot afford to use AI-based algorithms such as FILM or ST-MFNet,
|
67 |
+
# those are way too slow for a AiTube which needs things to be as fast as possible
|
68 |
+
# -----------------------------------------------------------------------------------
|
69 |
+
|
70 |
+
def interpolate_video_frames(file_path, output_fps=10, desired_duration=2):
|
71 |
+
"""
|
72 |
+
Interpolates frames in a video file to adjust frame rate and duration using ffmpeg's minterpolate.
|
73 |
+
|
74 |
+
Parameters:
|
75 |
+
file_path (str): Path to the input video file.
|
76 |
+
output_fps (int): Target frames per second for the output video.
|
77 |
+
desired_duration (int): Desired duration of the video in seconds.
|
78 |
+
|
79 |
+
Returns:
|
80 |
+
str: The file path of the modified video.
|
81 |
+
"""
|
82 |
+
# Calculate the input fps required to stretch the video to the desired duration
|
83 |
+
input_fps = find_input_fps(file_path, desired_duration)
|
84 |
+
|
85 |
+
# Construct the ffmpeg command for interpolation
|
86 |
+
cmd = [
|
87 |
+
'ffmpeg',
|
88 |
+
'-i', file_path, # input file
|
89 |
+
'-filter:v', f'minterpolate=fps={output_fps}', # minterpolate filter options
|
90 |
+
'-r', str(output_fps), # output frame rate
|
91 |
+
'-y', # Overwrite output files without asking
|
92 |
+
file_path # Output file (Overwrites the original)
|
93 |
+
]
|
94 |
+
|
95 |
+
# Execute the command
|
96 |
+
try:
|
97 |
+
subprocess.run(cmd, check=True)
|
98 |
+
print("Video interpolation successful.")
|
99 |
+
except subprocess.CalledProcessError as e:
|
100 |
+
print("Failed to interpolate video. Error:", e)
|
101 |
+
|
102 |
+
return file_path
|
103 |
+
|
104 |
+
def find_input_fps(file_path, desired_duration):
|
105 |
+
"""
|
106 |
+
Determine the input fps that, when stretched to the desired duration, matches the original video length.
|
107 |
+
|
108 |
+
Parameters:
|
109 |
+
file_path (str): Path to the video file.
|
110 |
+
desired_duration (int or float): Desired duration in seconds.
|
111 |
+
|
112 |
+
Returns:
|
113 |
+
float: Calculated input fps.
|
114 |
+
"""
|
115 |
+
# FFprobe command to find the duration of the video
|
116 |
+
ffprobe_cmd = [
|
117 |
+
'ffprobe',
|
118 |
+
'-v', 'error',
|
119 |
+
'-show_entries', 'format=duration',
|
120 |
+
'-of', 'default=noprint_wrappers=1:nokey=1',
|
121 |
+
file_path
|
122 |
+
]
|
123 |
+
|
124 |
+
try:
|
125 |
+
result = subprocess.run(ffprobe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
126 |
+
duration = float(result.stdout.strip())
|
127 |
+
input_fps = duration / desired_duration
|
128 |
+
except Exception as e:
|
129 |
+
print("Failed to get video duration. Error:", e)
|
130 |
+
input_fps = 10 # Assume a default value if unable to fetch duration
|
131 |
+
|
132 |
+
return input_fps
|
133 |
+
|
134 |
+
def generate_image(secret_token, prompt, base, width, height, motion, step, desired_duration, desired_fps):
|
135 |
if secret_token != SECRET_TOKEN:
|
136 |
raise gr.Error(
|
137 |
f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
|
|
|
174 |
|
175 |
# I think we are looking time here too, converting to webm is too slow, we should return
|
176 |
# the frames unencoded to the frontend renderer
|
177 |
+
path = export_to_video_file(output.frames[0], path, fps=10)
|
178 |
+
|
179 |
+
# Optional frame interpolation
|
180 |
+
if desired_duration != 2 or desired_fps != 10:
|
181 |
+
path = interpolate_video_frames(path, output_fps=desired_fps, desired_duration=desired_duration)
|
182 |
|
183 |
# Read the content of the video file and encode it to base64
|
184 |
with open(path, "rb") as video_file:
|
|
|
219 |
"ToonYou",
|
220 |
"epiCRealism",
|
221 |
],
|
222 |
+
value=base_loaded
|
|
|
223 |
)
|
224 |
width = gr.Slider(
|
225 |
label='Width',
|
226 |
minimum=128,
|
227 |
maximum=2048,
|
228 |
step=32,
|
229 |
+
value=512,
|
230 |
)
|
231 |
height = gr.Slider(
|
232 |
label='Height',
|
233 |
minimum=128,
|
234 |
maximum=2048,
|
235 |
step=32,
|
236 |
+
value=256,
|
237 |
)
|
238 |
select_motion = gr.Dropdown(
|
239 |
label='Motion',
|
|
|
249 |
("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
|
250 |
],
|
251 |
value="",
|
|
|
252 |
)
|
253 |
select_step = gr.Dropdown(
|
254 |
label='Inference steps',
|
|
|
258 |
('4-Step', 4),
|
259 |
('8-Step', 8)],
|
260 |
value=4,
|
|
|
261 |
)
|
262 |
+
duration_slider = gr.Slider(label="Desired Duration (seconds)", min_value=2, max_value=30, value=2, step=1)
|
263 |
+
fps_slider = gr.Slider(label="Desired Frames Per Second", min_value=10, max_value=60, value=10, step=1)
|
264 |
+
|
265 |
submit = gr.Button()
|
266 |
|
267 |
output_video_base64 = gr.Text()
|
268 |
|
269 |
submit.click(
|
270 |
fn=generate_image,
|
271 |
+
inputs=[secret_token, prompt, select_base, width, height, select_motion, select_step, duration_slider, fps_slider],
|
272 |
outputs=output_video_base64,
|
273 |
)
|
274 |
|