ai-tube-model-adl-4

Paused

App Files Files Community

jbilcke-hf HF staff commited on Apr 23, 2024

Commit

257cbfe

verified ·

1 Parent(s): b889cf7

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -12

app.py CHANGED Viewed

@@ -34,27 +34,31 @@ dtype = torch.float16
 pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
 pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
 # Note Julian: I'm not sure this works well when the pipeline changes dynamically.. to check
 helper = DeepCacheSDHelper(pipe=pipe)
 helper.set_params(
     # cache_interval means the frequency of feature caching, specified as the number of steps between each cache operation.
-    cache_interval=4,
-    # cache_branch_id identifies which branch of the network (ordered from the shallowest to the deepest layer) is responsible for executing the caching processes.
-    cache_branch_id=1,
     # Opting for a lower cache_branch_id or a larger cache_interval can lead to faster inference speed at the expense of reduced image quality
     #(ablation experiments of these two hyperparameters can be found in the paper).
 )
 helper.enable()
-# ----------------------------- VIDEO ENCODING ---------------------------------
-# Unfortunately, the Hugging Face Diffusers utils hardcode MP4V as a codec,
-# which is not supported by all browsers. This is a critical issue for AiTube,
-# so we are forced to implement our own encoding algorithm.
-# ------------------------------------------------------------------------------
 def export_to_video_file(video_frames, output_video_path=None, fps=10):
     if output_video_path is None:
@@ -85,7 +89,7 @@ def export_to_video_file(video_frames, output_video_path=None, fps=10):
 # those are way too slow for a AiTube which needs things to be as fast as possible
 # -----------------------------------------------------------------------------------
-def interpolate_video_frames(input_file_path, output_file_path, output_fps=10, desired_duration=2):
     """
     Interpolates frames in a video file to adjust frame rate and duration using ffmpeg's minterpolate.
@@ -202,7 +206,7 @@ def generate_image(secret_token, prompt, base, width, height, motion, step, desi
     final_video_path = raw_video_path
     # Optional frame interpolation
-    if desired_duration != 2 or desired_fps != 10:
         final_video_path = interpolate_video_frames(raw_video_path, enhanced_video_path, output_fps=desired_fps, desired_duration=desired_duration)
     # Read the content of the video file and encode it to base64
@@ -290,7 +294,7 @@ with gr.Blocks() as demo:
                     ('8-Step', 8)],
                 value=4,
             )
-            duration_slider = gr.Slider(label="Desired Duration (seconds)", min_value=2, max_value=30, value=2, step=1)
             fps_slider = gr.Slider(label="Desired Frames Per Second", min_value=10, max_value=60, value=10, step=1)
             submit = gr.Button()

 pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
 pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
+step = 2
+repo = "ByteDance/AnimateDiff-Lightning"
+ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
+pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device), strict=False)
+step_loaded = step
 # Note Julian: I'm not sure this works well when the pipeline changes dynamically.. to check
 helper = DeepCacheSDHelper(pipe=pipe)
 helper.set_params(
     # cache_interval means the frequency of feature caching, specified as the number of steps between each cache operation.
+    cache_interval=2,
+    # cache_branch_id identifies which branch of the network (ordered from the shallowest to the deepest layer) is responsible for executing the caching processes.
+    # Note Julian: I have tried cache_branch_id=0 but quality was very "smoothed out"
+    cache_branch_id=0,
     # Opting for a lower cache_branch_id or a larger cache_interval can lead to faster inference speed at the expense of reduced image quality
     #(ablation experiments of these two hyperparameters can be found in the paper).
 )
 helper.enable()
+# ----------------------------------- VIDEO ENCODING ---------------------------------
+# The Diffusers utils hardcode MP4V as a codec which is not supported by all browsers.
+# This is a critical issue for AiTube so we are forced to implement our own routine.
+# ------------------------------------------------------------------------------------
 def export_to_video_file(video_frames, output_video_path=None, fps=10):
     if output_video_path is None:
 # those are way too slow for a AiTube which needs things to be as fast as possible
 # -----------------------------------------------------------------------------------
+def interpolate_video_frames(input_file_path, output_file_path, output_fps=10, desired_duration=1):
     """
     Interpolates frames in a video file to adjust frame rate and duration using ffmpeg's minterpolate.
     final_video_path = raw_video_path
     # Optional frame interpolation
+    if desired_duration != 1 or desired_fps != 10:
         final_video_path = interpolate_video_frames(raw_video_path, enhanced_video_path, output_fps=desired_fps, desired_duration=desired_duration)
     # Read the content of the video file and encode it to base64
                     ('8-Step', 8)],
                 value=4,
             )
+            duration_slider = gr.Slider(label="Desired Duration (seconds)", min_value=1, max_value=30, value=1, step=1)
             fps_slider = gr.Slider(label="Desired Frames Per Second", min_value=10, max_value=60, value=10, step=1)
             submit = gr.Button()