jbilcke-hf HF staff commited on
Commit
c9d5420
1 Parent(s): ef4b87c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -187,16 +187,32 @@ pipe = StableVideoDiffusionPipeline.from_pretrained(
187
  variant="fp16",
188
  )
189
  pipe.to("cuda")
190
- # pipe.enable_model_cpu_offload() # for smaller cost
191
  model_select("AnimateLCM-SVD-xt-1.1.safetensors")
192
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
193
 
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  max_64_bit_int = 2**63 - 1
196
 
197
  def sample(
198
  secret_token: str,
199
- input_image_base64: Image,
200
  seed: Optional[int] = 42,
201
  randomize_seed: bool = False,
202
  motion_bucket_id: int = 80,
@@ -214,7 +230,7 @@ def sample(
214
  f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
215
 
216
  image = decode_data_uri_to_image(input_image_base64)
217
-
218
  print(f"seed={seed}\nrandomize_seed={randomize_seed}\nmotion_bucket_id={motion_bucket_id}\nfps_id={fps_id}\nmax_guidance_scale={max_guidance_scale}\nmin_guidance_scale={min_guidance_scale}\nwidth={width}\nheight={height}\nnum_inference_steps={num_inference_steps}\ndecoding_t={decoding_t}")
219
 
220
  if image.mode == "RGBA":
@@ -246,7 +262,7 @@ def sample(
246
  # Read the content of the video file and encode it to base64
247
  with open(video_path, "rb") as video_file:
248
  video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
249
-
250
  # Prepend the appropriate data URI header with MIME type
251
  return 'data:video/mp4;base64,' + video_base64
252
 
 
187
  variant="fp16",
188
  )
189
  pipe.to("cuda")
190
+ pipe.enable_model_cpu_offload() # for smaller cost
191
  model_select("AnimateLCM-SVD-xt-1.1.safetensors")
192
+ # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
193
 
194
 
195
+ helper = DeepCacheSDHelper(pipe=pipe)
196
+ helper.set_params(
197
+ # cache_interval means the frequency of feature caching, specified as the number of steps between each cache operation.
198
+ # with AnimateDiff this seems to have large effects, so we cannot use large values,
199
+ # even with cache_interval=3 I notice a big degradation in quality
200
+ cache_interval=2,
201
+
202
+ # cache_branch_id identifies which branch of the network (ordered from the shallowest to the deepest layer) is responsible for executing the caching processes.
203
+ # Note Julian: I should create my own benchmarks for this
204
+ cache_branch_id=0,
205
+
206
+ # Opting for a lower cache_branch_id or a larger cache_interval can lead to faster inference speed at the expense of reduced image quality
207
+ #(ablation experiments of these two hyperparameters can be found in the paper).
208
+ )
209
+ helper.enable()
210
+
211
  max_64_bit_int = 2**63 - 1
212
 
213
  def sample(
214
  secret_token: str,
215
+ input_image_base64: str,
216
  seed: Optional[int] = 42,
217
  randomize_seed: bool = False,
218
  motion_bucket_id: int = 80,
 
230
  f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
231
 
232
  image = decode_data_uri_to_image(input_image_base64)
233
+
234
  print(f"seed={seed}\nrandomize_seed={randomize_seed}\nmotion_bucket_id={motion_bucket_id}\nfps_id={fps_id}\nmax_guidance_scale={max_guidance_scale}\nmin_guidance_scale={min_guidance_scale}\nwidth={width}\nheight={height}\nnum_inference_steps={num_inference_steps}\ndecoding_t={decoding_t}")
235
 
236
  if image.mode == "RGBA":
 
262
  # Read the content of the video file and encode it to base64
263
  with open(video_path, "rb") as video_file:
264
  video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
265
+
266
  # Prepend the appropriate data URI header with MIME type
267
  return 'data:video/mp4;base64,' + video_base64
268