Aduc_sdr

Paused

App Files Files Community

euiia commited on Sep 2

Commit

82712d4

verified ·

1 Parent(s): 431a182

Update deformes4D_engine.py

Browse files

Files changed (1) hide show

deformes4D_engine.py +67 -78

deformes4D_engine.py CHANGED Viewed

@@ -1,13 +1,5 @@
-# deformes4D_engine.py
 # Copyright (C) 4 de Agosto de 2025  Carlos Rodrigues dos Santos
-#
-# MODIFICATIONS FOR ADUC-SDR:
-# Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
-#
-# This file is part of the ADUC-SDR project. It contains the core logic for
-# video fragment generation, latent manipulation, and dynamic editing,
-# governed by the ADUC orchestrator.
-# This component is licensed under the GNU Affero General Public License v3.0.
 import os
 import time
@@ -21,7 +13,6 @@ import gradio as gr
 import subprocess
 import gc
-# Importações de especialistas, com o de áudio removido
 from ltx_manager_helpers import ltx_manager_singleton
 from gemini_helpers import gemini_singleton
 from upscaler_specialist import upscaler_specialist_singleton
@@ -33,22 +24,17 @@ logger = logging.getLogger(__name__)
 @dataclass
 class LatentConditioningItem:
-    """Representa uma âncora de condicionamento no espaço latente para a Câmera (Ψ)."""
     latent_tensor: torch.Tensor
     media_frame_number: int
     conditioning_strength: float
 class Deformes4DEngine:
-    """
-    Implementa a Câmera (Ψ) e o Destilador (Δ) da arquitetura ADUC-SDR.
-    Orquestra a geração, pós-produção latente e renderização final dos fragmentos de vídeo.
-    """
     def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
         self.ltx_manager = ltx_manager
         self.workspace_dir = workspace_dir
         self._vae = None
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        logger.info("Especialista Deformes4D (Executor ADUC-SDR: Câmera Ψ e Destilador Δ) inicializado.")
     @property
     def vae(self):
@@ -58,7 +44,6 @@ class Deformes4DEngine:
         return self._vae
     # --- MÉTODOS AUXILIARES ---
     @torch.no_grad()
     def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
         tensor = tensor.to(self.device, dtype=self.vae.dtype)
@@ -89,7 +74,7 @@ class Deformes4DEngine:
                             progress: gr.Progress = gr.Progress()):
         num_transitions_to_generate = len(keyframes) - 1
-        TOTAL_STEPS = num_transitions_to_generate + 3 # Fragmentos + Renderização + HD
         current_step = 0
         FPS = 24
@@ -100,9 +85,6 @@ class Deformes4DEngine:
         frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
         latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
-        if total_frames_brutos // FRAMES_PER_LATENT_CHUNK <= latents_a_podar + 1:
-            raise gr.Error("A combinação de duração e poda é muito agressiva.")
         DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
         DESTINATION_FRAME_TARGET = total_frames_brutos - 1
@@ -112,14 +94,15 @@ class Deformes4DEngine:
         eco_latent_for_next_loop = None
         dejavu_latent_for_next_loop = None
-        processed_latent_fragments = []
-        # --- ATO I: GERAÇÃO LATENTE (LOOP DE FRAGMENTOS) ---
         for i in range(num_transitions_to_generate):
             fragment_index = i + 1
             current_step += 1
-            progress(current_step / TOTAL_STEPS, desc=f"Gerando Fragmento {fragment_index}/{num_transitions_to_generate}")
             past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
             start_keyframe_path = keyframe_paths[i]
             destination_keyframe_path = keyframe_paths[i + 1]
@@ -135,7 +118,8 @@ class Deformes4DEngine:
             downscaled_height = self._quantize_to_multiple(int(expected_height * downscale_factor), 8)
             downscaled_width = self._quantize_to_multiple(int(expected_width * downscale_factor), 8)
             target_resolution_tuple = (downscaled_height, downscaled_width)
             conditioning_items = []
             if eco_latent_for_next_loop is None:
                img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
@@ -145,10 +129,11 @@ class Deformes4DEngine:
                conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
             img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
             conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
             current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
             latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
             last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
             eco_latent_for_next_loop = last_trim[:, :, :ECO_LATENT_CHUNKS, :, :].clone()
             dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
@@ -158,72 +143,79 @@ class Deformes4DEngine:
             if transition_type == "cut":
                 eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
-            upscaled_latents = self.upscale_latents(latents_video)
-            refined_latents = self.refine_latents(upscaled_latents, motion_prompt=f"refining scene: {motion_prompt}")
-            processed_latent_fragments.append(refined_latents)
-        # --- ATO II: RENDERIZAÇÃO PRIMÁRIA (COM CORREÇÃO DE OOM) ---
         base_name = f"movie_{int(time.time())}"
         current_step += 1
-        progress(current_step / TOTAL_STEPS, desc="Renderizando vídeo (em lotes)...")
         refined_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_refined_silent.mp4")
         with imageio.get_writer(refined_silent_video_path, fps=FPS, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
-            for i, latent_fragment in enumerate(processed_latent_fragments):
-                logger.info(f"Decodificando fragmento {i+1}/{len(processed_latent_fragments)} para pixels...")
-                pixel_tensor_fragment = self.latents_to_pixels(latent_fragment)
-                pixel_tensor_fragment = pixel_tensor_fragment.squeeze(0).permute(1, 2, 3, 0)
-                pixel_tensor_fragment = (pixel_tensor_fragment.clamp(-1, 1) + 1) / 2.0
-                video_np_fragment = (pixel_tensor_fragment.detach().cpu().float().numpy() * 255).astype(np.uint8)
-                for frame in video_np_fragment:
                     writer.append_data(frame)
-                del pixel_tensor_fragment, video_np_fragment
                 gc.collect()
                 torch.cuda.empty_cache()
-        logger.info(f"Vídeo base renderizado com sucesso em: {refined_silent_video_path}")
-        del processed_latent_fragments
-        gc.collect()
-        torch.cuda.empty_cache()
-        # --- ATO III: MASTERIZAÇÃO FINAL (APENAS HD) ---
         current_step += 1
         progress(current_step / TOTAL_STEPS, desc="Aprimoramento final (HD)...")
-        hq_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_hq_silent.mp4")
         try:
             hd_specialist_singleton.process_video(
                 input_video_path=refined_silent_video_path,
-                output_video_path=hq_silent_video_path,
                 prompt=global_prompt
             )
         except Exception as e:
             logger.error(f"Falha no aprimoramento HD: {e}. Usando vídeo de qualidade padrão.")
-            os.rename(refined_silent_video_path, hq_silent_video_path)
-        current_step += 1
-        progress(current_step / TOTAL_STEPS, desc="Finalizando...")
-        final_video_path = os.path.join(self.workspace_dir, f"{base_name}_FINAL.mp4")
-        os.rename(hq_silent_video_path, final_video_path)
-        logger.info(f"Processo concluído! Vídeo final (silencioso) salvo em: {final_video_path}")
         yield {"final_path": final_video_path}
-    def refine_latents(self, latents: torch.Tensor,
-                       fps: int = 24,
-                       denoise_strength: float = 0.35,
-                       refine_steps: int = 12,
-                       motion_prompt: str = "refining video, improving details, cinematic quality") -> torch.Tensor:
-        """
-        Aplica um passe de refinamento (denoise) em um tensor latente.
-        """
-        logger.info(f"Refinando tensor latente com shape {latents.shape} para refinamento.")
         _, _, num_latent_frames, latent_h, latent_w = latents.shape
         video_scale_factor = getattr(self.vae.config, 'temporal_scale_factor', 8)
         vae_scale_factor = getattr(self.vae.config, 'spatial_downscale_factor', 8)
@@ -231,23 +223,20 @@ class Deformes4DEngine:
         pixel_width = latent_w * vae_scale_factor
         pixel_frames = (num_latent_frames - 1) * video_scale_factor
-        refined_latents_tensor, _ = self.ltx_manager.refine_latents(
-            latents,
-            height=pixel_height,
-            width=pixel_width,
-            video_total_frames=pixel_frames,
-            video_fps=fps,
-            motion_prompt=motion_prompt,
-            current_fragment_index=int(time.time()),
-            denoise_strength=denoise_strength,
-            refine_steps=refine_steps
-        )
         logger.info(f"Retornando tensor latente refinado com shape: {refined_latents_tensor.shape}")
         return refined_latents_tensor
     def upscale_latents(self, latents: torch.Tensor) -> torch.Tensor:
-        """Interface para o UpscalerSpecialist."""
         logger.info(f"Realizando upscale em tensor latente com shape {latents.shape}.")
         return upscaler_specialist_singleton.upscale(latents)

+# deformes4D_engine.py (Fluxo Experimental 2: Concat -> Denoise Global -> Upscale/Render em Lotes)
 # Copyright (C) 4 de Agosto de 2025  Carlos Rodrigues dos Santos
 import os
 import time
 import subprocess
 import gc
 from ltx_manager_helpers import ltx_manager_singleton
 from gemini_helpers import gemini_singleton
 from upscaler_specialist import upscaler_specialist_singleton
 @dataclass
 class LatentConditioningItem:
     latent_tensor: torch.Tensor
     media_frame_number: int
     conditioning_strength: float
 class Deformes4DEngine:
     def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
         self.ltx_manager = ltx_manager
         self.workspace_dir = workspace_dir
         self._vae = None
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        logger.info("Especialista Deformes4D (Executor ADUC-SDR) inicializado.")
     @property
     def vae(self):
         return self._vae
     # --- MÉTODOS AUXILIARES ---
     @torch.no_grad()
     def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
         tensor = tensor.to(self.device, dtype=self.vae.dtype)
                             progress: gr.Progress = gr.Progress()):
         num_transitions_to_generate = len(keyframes) - 1
+        TOTAL_STEPS = num_transitions_to_generate + 4
         current_step = 0
         FPS = 24
         frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
         latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
         DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
         DESTINATION_FRAME_TARGET = total_frames_brutos - 1
         eco_latent_for_next_loop = None
         dejavu_latent_for_next_loop = None
+        raw_latent_fragments = []
+        # --- ATO I: GERAÇÃO BRUTA E SEQUENCIAL DE FRAGMENTOS ---
         for i in range(num_transitions_to_generate):
             fragment_index = i + 1
             current_step += 1
+            progress(current_step / TOTAL_STEPS, desc=f"Gerando Fragmento Bruto {fragment_index}/{num_transitions_to_generate}")
+            # ... (Lógica do Gemini para obter motion_prompt - sem alterações) ...
             past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
             start_keyframe_path = keyframe_paths[i]
             destination_keyframe_path = keyframe_paths[i + 1]
             downscaled_height = self._quantize_to_multiple(int(expected_height * downscale_factor), 8)
             downscaled_width = self._quantize_to_multiple(int(expected_width * downscale_factor), 8)
             target_resolution_tuple = (downscaled_height, downscaled_width)
+            # ... (Lógica de condicionamento - sem alterações) ...
             conditioning_items = []
             if eco_latent_for_next_loop is None:
                img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
                conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
             img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
             conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
             current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
             latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
+            # ... (Lógica de poda e extração de Eco/Déjà-Vu - sem alterações) ...
             last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
             eco_latent_for_next_loop = last_trim[:, :, :ECO_LATENT_CHUNKS, :, :].clone()
             dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
             if transition_type == "cut":
                 eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
+            raw_latent_fragments.append(latents_video)
+        # --- ATO II: PÓS-PRODUÇÃO LATENTE GLOBAL ---
+        current_step += 1
+        progress(current_step / TOTAL_STEPS, desc="Unificação Causal (Concatenação)...")
+        concatenated_raw_latents = torch.cat(raw_latent_fragments, dim=2)
+        del raw_latent_fragments; gc.collect(); torch.cuda.empty_cache()
+        logger.info(f"Latentes brutos unificados. Shape: {concatenated_raw_latents.shape}")
+        current_step += 1
+        progress(current_step / TOTAL_STEPS, desc="Polimento Global (Denoise)...")
+        # [ALTERAÇÃO CRÍTICA] Usamos um prompt vazio e guidance_scale=1.0 para um refinamento incondicional.
+        denoised_latents = self.refine_latents(
+            concatenated_raw_latents,
+            motion_prompt="",
+            guidance_scale=1.0
+        )
+        del concatenated_raw_latents; gc.collect(); torch.cuda.empty_cache()
+        logger.info(f"Polimento global aplicado. Shape: {denoised_latents.shape}")
+        # --- ATO III: UPSCALE E RENDERIZAÇÃO EM LOTES ---
         base_name = f"movie_{int(time.time())}"
         current_step += 1
+        progress(current_step / TOTAL_STEPS, desc="Renderização Final (em lotes)...")
         refined_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_refined_silent.mp4")
         with imageio.get_writer(refined_silent_video_path, fps=FPS, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
+            chunk_size = 7 # Tamanho de lote seguro para VRAM
+            latent_chunks = torch.split(denoised_latents, chunk_size, dim=2)
+            for i, latent_chunk in enumerate(latent_chunks):
+                logger.info(f"Processando e renderizando lote {i+1}/{len(latent_chunks)}...")
+                # Upscale e Decode por lote
+                upscaled_chunk = self.upscale_latents(latent_chunk)
+                pixel_tensor_chunk = self.latents_to_pixels(upscaled_chunk)
+                # Converte e salva os frames
+                pixel_tensor_chunk = pixel_tensor_chunk.squeeze(0).permute(1, 2, 3, 0)
+                pixel_tensor_chunk = (pixel_tensor_chunk.clamp(-1, 1) + 1) / 2.0
+                video_np_chunk = (pixel_tensor_chunk.detach().cpu().float().numpy() * 255).astype(np.uint8)
+                for frame in video_np_chunk:
                     writer.append_data(frame)
+                del latent_chunk, upscaled_chunk, pixel_tensor_chunk, video_np_chunk
                 gc.collect()
                 torch.cuda.empty_cache()
+        del denoised_latents; gc.collect(); torch.cuda.empty_cache()
+        logger.info(f"Vídeo de alta qualidade (silencioso) renderizado em: {refined_silent_video_path}")
+        # --- ATO IV: APRIMORAMENTO HD FINAL ---
         current_step += 1
         progress(current_step / TOTAL_STEPS, desc="Aprimoramento final (HD)...")
+        final_video_path = os.path.join(self.workspace_dir, f"{base_name}_FINAL.mp4")
         try:
             hd_specialist_singleton.process_video(
                 input_video_path=refined_silent_video_path,
+                output_video_path=final_video_path,
                 prompt=global_prompt
             )
         except Exception as e:
             logger.error(f"Falha no aprimoramento HD: {e}. Usando vídeo de qualidade padrão.")
+            os.rename(refined_silent_video_path, final_video_path)
+        logger.info(f"Processo concluído! Vídeo final salvo em: {final_video_path}")
         yield {"final_path": final_video_path}
+    def refine_latents(self, latents: torch.Tensor, fps: int = 24, denoise_strength: float = 0.35, refine_steps: int = 12, motion_prompt: str = "...", **kwargs) -> torch.Tensor:
+        logger.info(f"Refinando tensor latente com shape {latents.shape}.")
         _, _, num_latent_frames, latent_h, latent_w = latents.shape
         video_scale_factor = getattr(self.vae.config, 'temporal_scale_factor', 8)
         vae_scale_factor = getattr(self.vae.config, 'spatial_downscale_factor', 8)
         pixel_width = latent_w * vae_scale_factor
         pixel_frames = (num_latent_frames - 1) * video_scale_factor
+        # [ALTERAÇÃO] Permite que guidance_scale seja passado como argumento
+        final_ltx_params = {
+            "height": pixel_height, "width": pixel_width, "video_total_frames": pixel_frames,
+            "video_fps": fps, "motion_prompt": motion_prompt, "current_fragment_index": int(time.time()),
+            "denoise_strength": denoise_strength, "refine_steps": refine_steps,
+            "guidance_scale": kwargs.get('guidance_scale', 2.0) # Usa o valor passado ou o padrão
+        }
+        refined_latents_tensor, _ = self.ltx_manager.refine_latents(latents, **final_ltx_params)
         logger.info(f"Retornando tensor latente refinado com shape: {refined_latents_tensor.shape}")
         return refined_latents_tensor
     def upscale_latents(self, latents: torch.Tensor) -> torch.Tensor:
         logger.info(f"Realizando upscale em tensor latente com shape {latents.shape}.")
         return upscaler_specialist_singleton.upscale(latents)