jbilcke-hf HF staff commited on
Commit
6204823
1 Parent(s): c9d5420

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -36
app.py CHANGED
@@ -26,12 +26,9 @@ from lcm_scheduler import AnimateLCMSVDStochasticIterativeScheduler
26
 
27
  SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
28
 
29
- # is that 8 or 25?
30
-
31
- hardcoded_fps = 25
32
  hardcoded_duration_sec = 3
33
 
34
-
35
  def get_safetensors_files():
36
  models_dir = "./safetensors"
37
  safetensors_files = [
@@ -192,35 +189,20 @@ model_select("AnimateLCM-SVD-xt-1.1.safetensors")
192
  # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
193
 
194
 
195
- helper = DeepCacheSDHelper(pipe=pipe)
196
- helper.set_params(
197
- # cache_interval means the frequency of feature caching, specified as the number of steps between each cache operation.
198
- # with AnimateDiff this seems to have large effects, so we cannot use large values,
199
- # even with cache_interval=3 I notice a big degradation in quality
200
- cache_interval=2,
201
-
202
- # cache_branch_id identifies which branch of the network (ordered from the shallowest to the deepest layer) is responsible for executing the caching processes.
203
- # Note Julian: I should create my own benchmarks for this
204
- cache_branch_id=0,
205
-
206
- # Opting for a lower cache_branch_id or a larger cache_interval can lead to faster inference speed at the expense of reduced image quality
207
- #(ablation experiments of these two hyperparameters can be found in the paper).
208
- )
209
- helper.enable()
210
-
211
  max_64_bit_int = 2**63 - 1
212
 
213
  def sample(
214
  secret_token: str,
215
  input_image_base64: str,
216
  seed: Optional[int] = 42,
217
- randomize_seed: bool = False,
218
- motion_bucket_id: int = 80,
219
- fps_id: int = 8,
 
220
  max_guidance_scale: float = 1.2,
221
  min_guidance_scale: float = 1,
222
- width: int = 768,
223
- height: int = 384,
224
  num_inference_steps: int = 4,
225
  decoding_t: int = 4, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
226
  output_folder: str = "outputs_gradio",
@@ -256,9 +238,16 @@ def sample(
256
  min_guidance_scale=min_guidance_scale,
257
  max_guidance_scale=max_guidance_scale,
258
  ).frames[0]
259
- export_to_video(frames, video_path, fps=fps_id)
 
 
 
 
260
  torch.manual_seed(seed)
261
 
 
 
 
262
  # Read the content of the video file and encode it to base64
263
  with open(video_path, "rb") as video_file:
264
  video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
@@ -297,20 +286,16 @@ with gr.Blocks() as demo:
297
  minimum=1,
298
  maximum=255,
299
  )
300
- fps_id = gr.Slider(
301
- label="Frames per second",
302
- info="The length of your video in seconds will be 25/fps",
303
- value=8,
304
- minimum=5,
305
- maximum=30,
306
- )
307
  # note: we want something that is close to 16:9 (1.7777)
308
  # 576 / 320 = 1.8
309
  # 448 / 256 = 1.75
310
  width = gr.Slider(
311
  label="Width of input image",
312
  info="It should be divisible by 64",
313
- value=768, # 576, # 256, 320, 384, 448
314
  minimum=256,
315
  maximum=2048,
316
  step=64,
@@ -318,7 +303,7 @@ with gr.Blocks() as demo:
318
  height = gr.Slider(
319
  label="Height of input image",
320
  info="It should be divisible by 64",
321
- value=384, # 320, # 256, 320, 384, 448
322
  minimum=256,
323
  maximum=1152,
324
  )
@@ -353,7 +338,8 @@ with gr.Blocks() as demo:
353
  seed,
354
  randomize_seed,
355
  motion_bucket_id,
356
- fps_id,
 
357
  max_guidance_scale,
358
  min_guidance_scale,
359
  width,
 
26
 
27
  SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
28
 
29
+ hardcoded_fps = 8
 
 
30
  hardcoded_duration_sec = 3
31
 
 
32
  def get_safetensors_files():
33
  models_dir = "./safetensors"
34
  safetensors_files = [
 
189
  # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
190
 
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  max_64_bit_int = 2**63 - 1
193
 
194
  def sample(
195
  secret_token: str,
196
  input_image_base64: str,
197
  seed: Optional[int] = 42,
198
+ randomize_seed: bool = True,
199
+ motion_bucket_id: int = 33,
200
+ desired_duration: int = hardcoded_duration_sec,
201
+ desired_fps: int = hardcoded_fps,
202
  max_guidance_scale: float = 1.2,
203
  min_guidance_scale: float = 1,
204
+ width: int = 832,
205
+ height: int = 448,
206
  num_inference_steps: int = 4,
207
  decoding_t: int = 4, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
208
  output_folder: str = "outputs_gradio",
 
238
  min_guidance_scale=min_guidance_scale,
239
  max_guidance_scale=max_guidance_scale,
240
  ).frames[0]
241
+
242
+ # we leave default values here
243
+ # alternatively we have implemented our own here: export_to_video_file(...)
244
+ export_to_video(frames, video_path, fps=hardcoded_fps)
245
+
246
  torch.manual_seed(seed)
247
 
248
+ final_video_path = interpolate_video_frames(video_path, enhanced_video_path, output_fps=desired_fps, desired_duration=desired_duration)
249
+
250
+
251
  # Read the content of the video file and encode it to base64
252
  with open(video_path, "rb") as video_file:
253
  video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
 
286
  minimum=1,
287
  maximum=255,
288
  )
289
+ duration_slider = gr.Slider(label="Desired Duration (seconds)", min_value=1, max_value=120, value=hardcoded_duration_sec, step=0.1)
290
+ fps_slider = gr.Slider(label="Desired Frames Per Second", min_value=5, max_value=60, value=hardcoded_fps, step=1)
291
+
 
 
 
 
292
  # note: we want something that is close to 16:9 (1.7777)
293
  # 576 / 320 = 1.8
294
  # 448 / 256 = 1.75
295
  width = gr.Slider(
296
  label="Width of input image",
297
  info="It should be divisible by 64",
298
+ value=832, # 576, # 256, 320, 384, 448
299
  minimum=256,
300
  maximum=2048,
301
  step=64,
 
303
  height = gr.Slider(
304
  label="Height of input image",
305
  info="It should be divisible by 64",
306
+ value=448, # 320, # 256, 320, 384, 448
307
  minimum=256,
308
  maximum=1152,
309
  )
 
338
  seed,
339
  randomize_seed,
340
  motion_bucket_id,
341
+ duration_slider,
342
+ fps_slider,
343
  max_guidance_scale,
344
  min_guidance_scale,
345
  width,