Test

Paused

App Files Files Community

eeuuia commited on Oct 12

Commit

a18d7a7

verified ·

1 Parent(s): da423da

Update LTX-Video/ltx_video/pipelines/pipeline_ltx_video.py

Browse files

Files changed (1) hide show

LTX-Video/ltx_video/pipelines/pipeline_ltx_video.py +23 -63

LTX-Video/ltx_video/pipelines/pipeline_ltx_video.py CHANGED Viewed

@@ -107,11 +107,6 @@ class SpyLatent:
                                                   necessária se o tensor de entrada for 3D.
             save_visual (bool): Se True, decodifica com o VAE e salva uma imagem.
         """
-        #print(f"\n--- [INSPEÇÃO DE LATENTE: {tag}] ---")
-        #if not isinstance(tensor, torch.Tensor):
-        #    print(f"  AVISO: O objeto fornecido para '{tag}' não é um tensor.")
-        #    print("--- [FIM DA INSPEÇÃO] ---\n")
-        #    return
         try:
             # --- Imprime Estatísticas do Tensor Original ---
@@ -120,7 +115,7 @@ class SpyLatent:
             # --- Converte para 5D se necessário ---
             tensor_5d = self._to_5d(tensor, reference_shape_5d)
             if tensor_5d is not None and tensor.ndim == 3:
-                self._print_stats("Convertido para 5D", tensor_5d)
             # --- Visualização com VAE ---
             if save_visual and self.vae is not None and tensor_5d is not None:
@@ -129,7 +124,7 @@ class SpyLatent:
                 frame_idx_to_viz = min(1, tensor_5d.shape[2] - 1)
                 if frame_idx_to_viz < 0:
-                    print("  VISUALIZAÇÃO (VAE): Tensor não tem frames para visualizar.")
                 else:
                     #print(f"  VISUALIZAÇÃO (VAE): Usando frame de índice {frame_idx_to_viz}.")
                     latent_slice = tensor_5d[:, :, frame_idx_to_viz:frame_idx_to_viz+1, :, :]
@@ -138,7 +133,7 @@ class SpyLatent:
                         pixel_slice = self.vae.decode(latent_slice / self.vae.config.scaling_factor).sample
                     save_image((pixel_slice / 2 + 0.5).clamp(0, 1), os.path.join(self.output_dir, f"inspect_{tag.lower()}.png"))
-                    print("  VISUALIZAÇÃO (VAE): Imagem salva.")
         except Exception as e:
             #print(f"  ERRO na inspeção: {e}")
@@ -163,7 +158,7 @@ class SpyLatent:
         std = tensor.std().item()
         min_val = tensor.min().item()
         max_val = tensor.max().item()
-        print(f"  {prefix}: {tensor.shape}")
@@ -1086,11 +1081,7 @@ class LTXVideoPipeline(DiffusionPipeline):
             **retrieve_timesteps_kwargs,
         )
-        try:
-            print(f"[LTX2]LATENTS {latents.shape}")
-        except Exception:
-            pass
         if self.allowed_inference_steps is not None:
             for timestep in [round(x, 4) for x in timesteps.tolist()]:
                 assert (
@@ -1159,11 +1150,7 @@ class LTXVideoPipeline(DiffusionPipeline):
                 max_new_tokens=text_encoder_max_tokens,
             )
-        try:
-            print(f"[LTX3]LATENTS {latents.shape}")
-        except Exception:
-            pass
         # 3. Encode input prompt
         if self.text_encoder is not None:
             self.text_encoder = self.text_encoder.to(self._execution_device)
@@ -1230,7 +1217,7 @@ class LTXVideoPipeline(DiffusionPipeline):
         )
         try:
-            print(f"[LTX4]LATENTS {latents.shape}")
             original_shape = latents
         except Exception:
             pass
@@ -1252,20 +1239,11 @@ class LTXVideoPipeline(DiffusionPipeline):
         init_latents = latents.clone()  # Used for image_cond_noise_update
         try:
-            print(f"[LTXCond]conditioning_mask {conditioning_mask.shape}")
-        except Exception:
-            pass
-        try:
-            print(f"[LTXCond]pixel_coords {pixel_coords.shape}")
-        except Exception:
-            pass
-        try:
-            print(f"[LTXCond]pixel_coords {pixel_coords.shape}")
         except Exception:
             pass
@@ -1273,10 +1251,6 @@ class LTXVideoPipeline(DiffusionPipeline):
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
-        try:
-            print(f"[LTX5]LATENTS {latents.shape}")
-        except Exception:
-            pass
         # 7. Denoising loop
         num_warmup_steps = max(
@@ -1337,11 +1311,7 @@ class LTXVideoPipeline(DiffusionPipeline):
                         generator,
                     )
-                try:
-                   print(f"[LTX6]LATENTS {latents.shape}")
-                   self.spy.inspect(latents, "LTX6_After_Patchify", reference_shape_5d=original_shape)
-                except Exception:
-                   pass
@@ -1352,11 +1322,7 @@ class LTXVideoPipeline(DiffusionPipeline):
                     latent_model_input, t
                 )
-                try:
-                   print(f"[LTX7]LATENTS {latent_model_input.shape}")
-                   self.spy.inspect(latents, "LTX7_After_Patchify", reference_shape_5d=original_shape)
-                except Exception:
-                   pass
                 current_timestep = t
                 if not torch.is_tensor(current_timestep):
@@ -1473,12 +1439,7 @@ class LTXVideoPipeline(DiffusionPipeline):
                     stochastic_sampling=stochastic_sampling,
                 )
-                try:
-                   print(f"[LTX8]LATENTS {latents.shape}")
-                   self.spy.inspect(latents, "LTX8_After_Patchify", reference_shape_5d=original_shape)
-                except Exception:
-                   pass
                 # call the callback, if provided
                 if i == len(timesteps) - 1 or (
                     (i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0
@@ -1490,12 +1451,7 @@ class LTXVideoPipeline(DiffusionPipeline):
-        try:
-            print(f"[LTX9]LATENTS {latents.shape}")
-            self.spy.inspect(latents, "LTX9_After_Patchify", reference_shape_5d=original_shape)
-        except Exception:
-            pass
         if offload_to_cpu:
@@ -1507,11 +1463,7 @@ class LTXVideoPipeline(DiffusionPipeline):
         latents = latents[:, num_cond_latents:]
-        try:
-             print(f"[LTX10]LATENTS {latents.shape}")
-             self.spy.inspect(latents, "LTX10_After_Patchify", reference_shape_5d=original_shape)
-        except Exception:
-            pass
         latents = self.patchifier.unpatchify(
             latents=latents,
@@ -1520,6 +1472,14 @@ class LTXVideoPipeline(DiffusionPipeline):
             out_channels=self.transformer.in_channels
             // math.prod(self.patchifier.patch_size),
         )
         if output_type != "latent":
             if self.vae.decoder.timestep_conditioning:
                 noise = torch.randn_like(latents)
@@ -1549,7 +1509,7 @@ class LTXVideoPipeline(DiffusionPipeline):
             )
             try:
-                 print(f"[LTX11]LATENTS {latents.shape}")
             except Exception:
                  pass

                                                   necessária se o tensor de entrada for 3D.
             save_visual (bool): Se True, decodifica com o VAE e salva uma imagem.
         """
         try:
             # --- Imprime Estatísticas do Tensor Original ---
             # --- Converte para 5D se necessário ---
             tensor_5d = self._to_5d(tensor, reference_shape_5d)
             if tensor_5d is not None and tensor.ndim == 3:
+                #self._print_stats("Convertido para 5D", tensor_5d)
             # --- Visualização com VAE ---
             if save_visual and self.vae is not None and tensor_5d is not None:
                 frame_idx_to_viz = min(1, tensor_5d.shape[2] - 1)
                 if frame_idx_to_viz < 0:
+                    #print("  VISUALIZAÇÃO (VAE): Tensor não tem frames para visualizar.")
                 else:
                     #print(f"  VISUALIZAÇÃO (VAE): Usando frame de índice {frame_idx_to_viz}.")
                     latent_slice = tensor_5d[:, :, frame_idx_to_viz:frame_idx_to_viz+1, :, :]
                         pixel_slice = self.vae.decode(latent_slice / self.vae.config.scaling_factor).sample
                     save_image((pixel_slice / 2 + 0.5).clamp(0, 1), os.path.join(self.output_dir, f"inspect_{tag.lower()}.png"))
+                    #print("  VISUALIZAÇÃO (VAE): Imagem salva.")
         except Exception as e:
             #print(f"  ERRO na inspeção: {e}")
         std = tensor.std().item()
         min_val = tensor.min().item()
         max_val = tensor.max().item()
+        print(f"{tensor.shape}")
             **retrieve_timesteps_kwargs,
         )
         if self.allowed_inference_steps is not None:
             for timestep in [round(x, 4) for x in timesteps.tolist()]:
                 assert (
                 max_new_tokens=text_encoder_max_tokens,
             )
         # 3. Encode input prompt
         if self.text_encoder is not None:
             self.text_encoder = self.text_encoder.to(self._execution_device)
         )
         try:
+            print(f"[LTX]RUIDO-LATENTS-INICIAL {latents.shape}")
             original_shape = latents
         except Exception:
             pass
         init_latents = latents.clone()  # Used for image_cond_noise_update
         try:
+            print(f"[LTXCond]conditioning_items {conditioning_items.shape}")
         except Exception:
             pass
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
         # 7. Denoising loop
         num_warmup_steps = max(
                         generator,
                     )
                     latent_model_input, t
                 )
                 current_timestep = t
                 if not torch.is_tensor(current_timestep):
                     stochastic_sampling=stochastic_sampling,
                 )
                 # call the callback, if provided
                 if i == len(timesteps) - 1 or (
                     (i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0
         if offload_to_cpu:
         latents = latents[:, num_cond_latents:]
         latents = self.patchifier.unpatchify(
             latents=latents,
             out_channels=self.transformer.in_channels
             // math.prod(self.patchifier.patch_size),
         )
+        try:
+             print(f"[LTX10]LATENTS Fim{latents.shape}")
+             #self.spy.inspect(latents, "LTX_After_Patchify", reference_shape_5d=original_shape)
+        except Exception:
+            pass
         if output_type != "latent":
             if self.vae.decoder.timestep_conditioning:
                 noise = torch.randn_like(latents)
             )
             try:
+                 print(f"[LTX11]LATENTS_pix_fim{latents.shape}")
             except Exception:
                  pass