huggingchat-tool-video

Paused

App Files Files Community

jbilcke-hf HF staff commited on Apr 23, 2024

Commit

5d48b65

verified ·

1 Parent(s): e54869f

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -6

app.py CHANGED Viewed

@@ -34,6 +34,10 @@ dtype = torch.float16
 pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
 pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
 # unfortunately 2 steps isn't good enough for AiTube, we need 4 steps
 step = 4
 repo = "ByteDance/AnimateDiff-Lightning"
@@ -63,7 +67,7 @@ step_loaded = step
 # This is a critical issue for AiTube so we are forced to implement our own routine.
 # ------------------------------------------------------------------------------------
-def export_to_video_file(video_frames, output_video_path=None, fps=10):
     if output_video_path is None:
         output_video_path = tempfile.NamedTemporaryFile(suffix=".webm").name
@@ -92,7 +96,7 @@ def export_to_video_file(video_frames, output_video_path=None, fps=10):
 # those are way too slow for a AiTube which needs things to be as fast as possible
 # -----------------------------------------------------------------------------------
-def interpolate_video_frames(input_file_path, output_file_path, output_fps=10, desired_duration=1.6, original_duration=1.6):
     scale_factor = original_duration / desired_duration
     interpolation_filter = f'minterpolate=fps={output_fps},setpts={scale_factor}*PTS'
@@ -165,12 +169,12 @@ def generate_image(secret_token, prompt, base, width, height, motion, step, desi
     #
     # maybe to make things faster, we could *not* encode the video (as this uses files and external processes, which can be slow)
     # and instead return the unencoded frames to the frontend renderer?
-    raw_video_path = export_to_video_file(output.frames[0], raw_video_path, fps=10)
     final_video_path = raw_video_path
     # Optional frame interpolation
-    if desired_duration != 1 or desired_fps != 10:
         final_video_path = interpolate_video_frames(raw_video_path, enhanced_video_path, output_fps=desired_fps, desired_duration=desired_duration)
     # Read the content of the video file and encode it to base64
@@ -258,8 +262,8 @@ with gr.Blocks() as demo:
                     ('8-Step', 8)],
                 value=4,
             )
-            duration_slider = gr.Slider(label="Desired Duration (seconds)", min_value=1, max_value=120, value=1.6, step=0.1)
-            fps_slider = gr.Slider(label="Desired Frames Per Second", min_value=10, max_value=60, value=10, step=1)
             submit = gr.Button()

 pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
 pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
+# those are AnimateDiff defaults - we don't touch them for now
+hardcoded_fps = 10
+hardcoded_duration_sec = 1.6
 # unfortunately 2 steps isn't good enough for AiTube, we need 4 steps
 step = 4
 repo = "ByteDance/AnimateDiff-Lightning"
 # This is a critical issue for AiTube so we are forced to implement our own routine.
 # ------------------------------------------------------------------------------------
+def export_to_video_file(video_frames, output_video_path=None, fps=hardcoded_fps):
     if output_video_path is None:
         output_video_path = tempfile.NamedTemporaryFile(suffix=".webm").name
 # those are way too slow for a AiTube which needs things to be as fast as possible
 # -----------------------------------------------------------------------------------
+def interpolate_video_frames(input_file_path, output_file_path, output_fps=hardcoded_fps, desired_duration=hardcoded_duration_sec, original_duration=hardcoded_duration_sec):
     scale_factor = original_duration / desired_duration
     interpolation_filter = f'minterpolate=fps={output_fps},setpts={scale_factor}*PTS'
     #
     # maybe to make things faster, we could *not* encode the video (as this uses files and external processes, which can be slow)
     # and instead return the unencoded frames to the frontend renderer?
+    raw_video_path = export_to_video_file(output.frames[0], raw_video_path, fps=hardcoded_fps)
     final_video_path = raw_video_path
     # Optional frame interpolation
+    if desired_duration > hardcoded_duration_sec or desired_duration < hardcoded_duration_sec or desired_fps > hardcoded_fps or desired_fps < hardcoded_fps:
         final_video_path = interpolate_video_frames(raw_video_path, enhanced_video_path, output_fps=desired_fps, desired_duration=desired_duration)
     # Read the content of the video file and encode it to base64
                     ('8-Step', 8)],
                 value=4,
             )
+            duration_slider = gr.Slider(label="Desired Duration (seconds)", min_value=1, max_value=120, value=hardcoded_duration_sec, step=0.1)
+            fps_slider = gr.Slider(label="Desired Frames Per Second", min_value=10, max_value=60, value=hardcoded_fps, step=1)
             submit = gr.Button()