ai-tube-model-als-2

Sleeping

App Files Files Community

jbilcke-hf HF staff commited on May 14

Commit

6204823

•

1 Parent(s): c9d5420

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -36

app.py CHANGED Viewed

@@ -26,12 +26,9 @@ from lcm_scheduler import AnimateLCMSVDStochasticIterativeScheduler
 SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
-# is that 8 or 25?
-hardcoded_fps = 25
 hardcoded_duration_sec = 3
 def get_safetensors_files():
     models_dir = "./safetensors"
     safetensors_files = [
@@ -192,35 +189,20 @@ model_select("AnimateLCM-SVD-xt-1.1.safetensors")
 # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
-helper = DeepCacheSDHelper(pipe=pipe)
-helper.set_params(
-    # cache_interval means the frequency of feature caching, specified as the number of steps between each cache operation.
-    # with AnimateDiff this seems to have large effects, so we cannot use large values,
-    # even with cache_interval=3 I notice a big degradation in quality
-    cache_interval=2,
-    # cache_branch_id identifies which branch of the network (ordered from the shallowest to the deepest layer) is responsible for executing the caching processes.
-    # Note Julian: I should create my own benchmarks for this
-    cache_branch_id=0,
-    # Opting for a lower cache_branch_id or a larger cache_interval can lead to faster inference speed at the expense of reduced image quality
-    #(ablation experiments of these two hyperparameters can be found in the paper).
-)
-helper.enable()
 max_64_bit_int = 2**63 - 1
 def sample(
     secret_token: str,
     input_image_base64: str,
     seed: Optional[int] = 42,
-    randomize_seed: bool = False,
-    motion_bucket_id: int = 80,
-    fps_id: int = 8,
     max_guidance_scale: float = 1.2,
     min_guidance_scale: float = 1,
-    width: int = 768,
-    height: int = 384,
     num_inference_steps: int = 4,
     decoding_t: int = 4,  # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
     output_folder: str = "outputs_gradio",
@@ -256,9 +238,16 @@ def sample(
             min_guidance_scale=min_guidance_scale,
             max_guidance_scale=max_guidance_scale,
         ).frames[0]
-    export_to_video(frames, video_path, fps=fps_id)
     torch.manual_seed(seed)
     # Read the content of the video file and encode it to base64
     with open(video_path, "rb") as video_file:
         video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
@@ -297,20 +286,16 @@ with gr.Blocks() as demo:
             minimum=1,
             maximum=255,
         )
-        fps_id = gr.Slider(
-            label="Frames per second",
-            info="The length of your video in seconds will be 25/fps",
-            value=8,
-            minimum=5,
-            maximum=30,
-        )
         # note: we want something that is close to 16:9 (1.7777)
         # 576 / 320 = 1.8
         #  448 / 256 = 1.75
         width = gr.Slider(
             label="Width of input image",
             info="It should be divisible by 64",
-            value=768, # 576, # 256, 320, 384, 448
             minimum=256,
             maximum=2048,
             step=64,
@@ -318,7 +303,7 @@ with gr.Blocks() as demo:
         height = gr.Slider(
             label="Height of input image",
             info="It should be divisible by 64",
-            value=384, # 320, # 256, 320, 384, 448
             minimum=256,
             maximum=1152,
         )
@@ -353,7 +338,8 @@ with gr.Blocks() as demo:
             seed,
             randomize_seed,
             motion_bucket_id,
-            fps_id,
             max_guidance_scale,
             min_guidance_scale,
             width,

 SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
+hardcoded_fps = 8
 hardcoded_duration_sec = 3
 def get_safetensors_files():
     models_dir = "./safetensors"
     safetensors_files = [
 # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
 max_64_bit_int = 2**63 - 1
 def sample(
     secret_token: str,
     input_image_base64: str,
     seed: Optional[int] = 42,
+    randomize_seed: bool = True,
+    motion_bucket_id: int = 33,
+    desired_duration: int = hardcoded_duration_sec,
+    desired_fps: int = hardcoded_fps,
     max_guidance_scale: float = 1.2,
     min_guidance_scale: float = 1,
+    width: int = 832,
+    height: int = 448,
     num_inference_steps: int = 4,
     decoding_t: int = 4,  # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
     output_folder: str = "outputs_gradio",
             min_guidance_scale=min_guidance_scale,
             max_guidance_scale=max_guidance_scale,
         ).frames[0]
+    # we leave default values here
+    # alternatively we have implemented our own here: export_to_video_file(...)
+    export_to_video(frames, video_path, fps=hardcoded_fps)
     torch.manual_seed(seed)
+    final_video_path = interpolate_video_frames(video_path, enhanced_video_path, output_fps=desired_fps, desired_duration=desired_duration)
     # Read the content of the video file and encode it to base64
     with open(video_path, "rb") as video_file:
         video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
             minimum=1,
             maximum=255,
         )
+        duration_slider = gr.Slider(label="Desired Duration (seconds)", min_value=1, max_value=120, value=hardcoded_duration_sec, step=0.1)
+        fps_slider = gr.Slider(label="Desired Frames Per Second", min_value=5, max_value=60, value=hardcoded_fps, step=1)
         # note: we want something that is close to 16:9 (1.7777)
         # 576 / 320 = 1.8
         #  448 / 256 = 1.75
         width = gr.Slider(
             label="Width of input image",
             info="It should be divisible by 64",
+            value=832, # 576, # 256, 320, 384, 448
             minimum=256,
             maximum=2048,
             step=64,
         height = gr.Slider(
             label="Height of input image",
             info="It should be divisible by 64",
+            value=448, # 320, # 256, 320, 384, 448
             minimum=256,
             maximum=1152,
         )
             seed,
             randomize_seed,
             motion_bucket_id,
+            duration_slider,
+            fps_slider,
             max_guidance_scale,
             min_guidance_scale,
             width,