ai-tube-model-als-9

Running on A10G

App Files Files Community

jbilcke-hf HF staff commited on May 14

Commit

c9d5420

•

1 Parent(s): ef4b87c

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -5

app.py CHANGED Viewed

@@ -187,16 +187,32 @@ pipe = StableVideoDiffusionPipeline.from_pretrained(
     variant="fp16",
 )
 pipe.to("cuda")
-# pipe.enable_model_cpu_offload()  # for smaller cost
 model_select("AnimateLCM-SVD-xt-1.1.safetensors")
-pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
 max_64_bit_int = 2**63 - 1
 def sample(
     secret_token: str,
-    input_image_base64: Image,
     seed: Optional[int] = 42,
     randomize_seed: bool = False,
     motion_bucket_id: int = 80,
@@ -214,7 +230,7 @@ def sample(
             f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
     image = decode_data_uri_to_image(input_image_base64)
     print(f"seed={seed}\nrandomize_seed={randomize_seed}\nmotion_bucket_id={motion_bucket_id}\nfps_id={fps_id}\nmax_guidance_scale={max_guidance_scale}\nmin_guidance_scale={min_guidance_scale}\nwidth={width}\nheight={height}\nnum_inference_steps={num_inference_steps}\ndecoding_t={decoding_t}")
     if image.mode == "RGBA":
@@ -246,7 +262,7 @@ def sample(
     # Read the content of the video file and encode it to base64
     with open(video_path, "rb") as video_file:
         video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
     # Prepend the appropriate data URI header with MIME type
     return 'data:video/mp4;base64,' + video_base64

     variant="fp16",
 )
 pipe.to("cuda")
+pipe.enable_model_cpu_offload()  # for smaller cost
 model_select("AnimateLCM-SVD-xt-1.1.safetensors")
+# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
+helper = DeepCacheSDHelper(pipe=pipe)
+helper.set_params(
+    # cache_interval means the frequency of feature caching, specified as the number of steps between each cache operation.
+    # with AnimateDiff this seems to have large effects, so we cannot use large values,
+    # even with cache_interval=3 I notice a big degradation in quality
+    cache_interval=2,
+    # cache_branch_id identifies which branch of the network (ordered from the shallowest to the deepest layer) is responsible for executing the caching processes.
+    # Note Julian: I should create my own benchmarks for this
+    cache_branch_id=0,
+    # Opting for a lower cache_branch_id or a larger cache_interval can lead to faster inference speed at the expense of reduced image quality
+    #(ablation experiments of these two hyperparameters can be found in the paper).
+)
+helper.enable()
 max_64_bit_int = 2**63 - 1
 def sample(
     secret_token: str,
+    input_image_base64: str,
     seed: Optional[int] = 42,
     randomize_seed: bool = False,
     motion_bucket_id: int = 80,
             f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
     image = decode_data_uri_to_image(input_image_base64)
     print(f"seed={seed}\nrandomize_seed={randomize_seed}\nmotion_bucket_id={motion_bucket_id}\nfps_id={fps_id}\nmax_guidance_scale={max_guidance_scale}\nmin_guidance_scale={min_guidance_scale}\nwidth={width}\nheight={height}\nnum_inference_steps={num_inference_steps}\ndecoding_t={decoding_t}")
     if image.mode == "RGBA":
     # Read the content of the video file and encode it to base64
     with open(video_path, "rb") as video_file:
         video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
     # Prepend the appropriate data URI header with MIME type
     return 'data:video/mp4;base64,' + video_base64