Spaces:
Sleeping
Sleeping
Commit
•
6204823
1
Parent(s):
c9d5420
Update app.py
Browse files
app.py
CHANGED
@@ -26,12 +26,9 @@ from lcm_scheduler import AnimateLCMSVDStochasticIterativeScheduler
|
|
26 |
|
27 |
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
hardcoded_fps = 25
|
32 |
hardcoded_duration_sec = 3
|
33 |
|
34 |
-
|
35 |
def get_safetensors_files():
|
36 |
models_dir = "./safetensors"
|
37 |
safetensors_files = [
|
@@ -192,35 +189,20 @@ model_select("AnimateLCM-SVD-xt-1.1.safetensors")
|
|
192 |
# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
|
193 |
|
194 |
|
195 |
-
helper = DeepCacheSDHelper(pipe=pipe)
|
196 |
-
helper.set_params(
|
197 |
-
# cache_interval means the frequency of feature caching, specified as the number of steps between each cache operation.
|
198 |
-
# with AnimateDiff this seems to have large effects, so we cannot use large values,
|
199 |
-
# even with cache_interval=3 I notice a big degradation in quality
|
200 |
-
cache_interval=2,
|
201 |
-
|
202 |
-
# cache_branch_id identifies which branch of the network (ordered from the shallowest to the deepest layer) is responsible for executing the caching processes.
|
203 |
-
# Note Julian: I should create my own benchmarks for this
|
204 |
-
cache_branch_id=0,
|
205 |
-
|
206 |
-
# Opting for a lower cache_branch_id or a larger cache_interval can lead to faster inference speed at the expense of reduced image quality
|
207 |
-
#(ablation experiments of these two hyperparameters can be found in the paper).
|
208 |
-
)
|
209 |
-
helper.enable()
|
210 |
-
|
211 |
max_64_bit_int = 2**63 - 1
|
212 |
|
213 |
def sample(
|
214 |
secret_token: str,
|
215 |
input_image_base64: str,
|
216 |
seed: Optional[int] = 42,
|
217 |
-
randomize_seed: bool =
|
218 |
-
motion_bucket_id: int =
|
219 |
-
|
|
|
220 |
max_guidance_scale: float = 1.2,
|
221 |
min_guidance_scale: float = 1,
|
222 |
-
width: int =
|
223 |
-
height: int =
|
224 |
num_inference_steps: int = 4,
|
225 |
decoding_t: int = 4, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
|
226 |
output_folder: str = "outputs_gradio",
|
@@ -256,9 +238,16 @@ def sample(
|
|
256 |
min_guidance_scale=min_guidance_scale,
|
257 |
max_guidance_scale=max_guidance_scale,
|
258 |
).frames[0]
|
259 |
-
|
|
|
|
|
|
|
|
|
260 |
torch.manual_seed(seed)
|
261 |
|
|
|
|
|
|
|
262 |
# Read the content of the video file and encode it to base64
|
263 |
with open(video_path, "rb") as video_file:
|
264 |
video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
|
@@ -297,20 +286,16 @@ with gr.Blocks() as demo:
|
|
297 |
minimum=1,
|
298 |
maximum=255,
|
299 |
)
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
value=8,
|
304 |
-
minimum=5,
|
305 |
-
maximum=30,
|
306 |
-
)
|
307 |
# note: we want something that is close to 16:9 (1.7777)
|
308 |
# 576 / 320 = 1.8
|
309 |
# 448 / 256 = 1.75
|
310 |
width = gr.Slider(
|
311 |
label="Width of input image",
|
312 |
info="It should be divisible by 64",
|
313 |
-
value=
|
314 |
minimum=256,
|
315 |
maximum=2048,
|
316 |
step=64,
|
@@ -318,7 +303,7 @@ with gr.Blocks() as demo:
|
|
318 |
height = gr.Slider(
|
319 |
label="Height of input image",
|
320 |
info="It should be divisible by 64",
|
321 |
-
value=
|
322 |
minimum=256,
|
323 |
maximum=1152,
|
324 |
)
|
@@ -353,7 +338,8 @@ with gr.Blocks() as demo:
|
|
353 |
seed,
|
354 |
randomize_seed,
|
355 |
motion_bucket_id,
|
356 |
-
|
|
|
357 |
max_guidance_scale,
|
358 |
min_guidance_scale,
|
359 |
width,
|
|
|
26 |
|
27 |
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
|
28 |
|
29 |
+
hardcoded_fps = 8
|
|
|
|
|
30 |
hardcoded_duration_sec = 3
|
31 |
|
|
|
32 |
def get_safetensors_files():
|
33 |
models_dir = "./safetensors"
|
34 |
safetensors_files = [
|
|
|
189 |
# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
|
190 |
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
max_64_bit_int = 2**63 - 1
|
193 |
|
194 |
def sample(
|
195 |
secret_token: str,
|
196 |
input_image_base64: str,
|
197 |
seed: Optional[int] = 42,
|
198 |
+
randomize_seed: bool = True,
|
199 |
+
motion_bucket_id: int = 33,
|
200 |
+
desired_duration: int = hardcoded_duration_sec,
|
201 |
+
desired_fps: int = hardcoded_fps,
|
202 |
max_guidance_scale: float = 1.2,
|
203 |
min_guidance_scale: float = 1,
|
204 |
+
width: int = 832,
|
205 |
+
height: int = 448,
|
206 |
num_inference_steps: int = 4,
|
207 |
decoding_t: int = 4, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
|
208 |
output_folder: str = "outputs_gradio",
|
|
|
238 |
min_guidance_scale=min_guidance_scale,
|
239 |
max_guidance_scale=max_guidance_scale,
|
240 |
).frames[0]
|
241 |
+
|
242 |
+
# we leave default values here
|
243 |
+
# alternatively we have implemented our own here: export_to_video_file(...)
|
244 |
+
export_to_video(frames, video_path, fps=hardcoded_fps)
|
245 |
+
|
246 |
torch.manual_seed(seed)
|
247 |
|
248 |
+
final_video_path = interpolate_video_frames(video_path, enhanced_video_path, output_fps=desired_fps, desired_duration=desired_duration)
|
249 |
+
|
250 |
+
|
251 |
# Read the content of the video file and encode it to base64
|
252 |
with open(video_path, "rb") as video_file:
|
253 |
video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
|
|
|
286 |
minimum=1,
|
287 |
maximum=255,
|
288 |
)
|
289 |
+
duration_slider = gr.Slider(label="Desired Duration (seconds)", min_value=1, max_value=120, value=hardcoded_duration_sec, step=0.1)
|
290 |
+
fps_slider = gr.Slider(label="Desired Frames Per Second", min_value=5, max_value=60, value=hardcoded_fps, step=1)
|
291 |
+
|
|
|
|
|
|
|
|
|
292 |
# note: we want something that is close to 16:9 (1.7777)
|
293 |
# 576 / 320 = 1.8
|
294 |
# 448 / 256 = 1.75
|
295 |
width = gr.Slider(
|
296 |
label="Width of input image",
|
297 |
info="It should be divisible by 64",
|
298 |
+
value=832, # 576, # 256, 320, 384, 448
|
299 |
minimum=256,
|
300 |
maximum=2048,
|
301 |
step=64,
|
|
|
303 |
height = gr.Slider(
|
304 |
label="Height of input image",
|
305 |
info="It should be divisible by 64",
|
306 |
+
value=448, # 320, # 256, 320, 384, 448
|
307 |
minimum=256,
|
308 |
maximum=1152,
|
309 |
)
|
|
|
338 |
seed,
|
339 |
randomize_seed,
|
340 |
motion_bucket_id,
|
341 |
+
duration_slider,
|
342 |
+
fps_slider,
|
343 |
max_guidance_scale,
|
344 |
min_guidance_scale,
|
345 |
width,
|