xiangfan00 commited on
Commit
7baf5d2
·
1 Parent(s): 6f279fd

Increase chunk time and log progress

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -579,6 +579,7 @@ def _run_diffusion_steps(
579
  scheduler = GENERATION_PIPE.scheduler
580
  with torch.no_grad():
581
  for i in range(start_step, end_step):
 
582
  t = timesteps[i]
583
  latent_model_input = torch.cat([latents, condition], dim=1).to(transformer_dtype)
584
  timestep = t.expand(latents.shape[0])
@@ -601,10 +602,16 @@ def _run_diffusion_steps(
601
  )[0]
602
  noise_pred = noise_uncond + GUIDANCE_SCALE * (noise_pred - noise_uncond)
603
  latents = scheduler.step(noise_pred, t, latents, return_dict=False)[0]
 
 
 
 
 
 
604
  return latents
605
 
606
 
607
- @spaces.GPU(duration=50)
608
  def generate_latents_setup_on_gpu(resized_image, prompt, seed, height, width):
609
  """Encode prompt+image, prepare initial latents and condition. NO denoising.
610
 
@@ -669,7 +676,7 @@ def generate_latents_setup_on_gpu(resized_image, prompt, seed, height, width):
669
  return state
670
 
671
 
672
- @spaces.GPU(duration=50)
673
  def generate_latents_chunk_on_gpu(state, end_step):
674
  """Run denoising steps from state['step_idx'] to end_step. Only transformer is moved to GPU."""
675
  log_cuda_mem(f"start latents chunk -> step {end_step}")
 
579
  scheduler = GENERATION_PIPE.scheduler
580
  with torch.no_grad():
581
  for i in range(start_step, end_step):
582
+ step_start = time.perf_counter()
583
  t = timesteps[i]
584
  latent_model_input = torch.cat([latents, condition], dim=1).to(transformer_dtype)
585
  timestep = t.expand(latents.shape[0])
 
602
  )[0]
603
  noise_pred = noise_uncond + GUIDANCE_SCALE * (noise_pred - noise_uncond)
604
  latents = scheduler.step(noise_pred, t, latents, return_dict=False)[0]
605
+ step_secs = time.perf_counter() - step_start
606
+ print(
607
+ f"[diffusion] step {i + 1}/{NUM_INFERENCE_STEPS} "
608
+ f"(t={float(t):.1f}, {step_secs:.2f}s)",
609
+ flush=True,
610
+ )
611
  return latents
612
 
613
 
614
+ @spaces.GPU(duration=60)
615
  def generate_latents_setup_on_gpu(resized_image, prompt, seed, height, width):
616
  """Encode prompt+image, prepare initial latents and condition. NO denoising.
617
 
 
676
  return state
677
 
678
 
679
+ @spaces.GPU(duration=60)
680
  def generate_latents_chunk_on_gpu(state, end_step):
681
  """Run denoising steps from state['step_idx'] to end_step. Only transformer is moved to GPU."""
682
  log_cuda_mem(f"start latents chunk -> step {end_step}")