euiia commited on
Commit
82712d4
·
verified ·
1 Parent(s): 431a182

Update deformes4D_engine.py

Browse files
Files changed (1) hide show
  1. deformes4D_engine.py +67 -78
deformes4D_engine.py CHANGED
@@ -1,13 +1,5 @@
1
- # deformes4D_engine.py
2
  # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
- #
4
- # MODIFICATIONS FOR ADUC-SDR:
5
- # Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
6
- #
7
- # This file is part of the ADUC-SDR project. It contains the core logic for
8
- # video fragment generation, latent manipulation, and dynamic editing,
9
- # governed by the ADUC orchestrator.
10
- # This component is licensed under the GNU Affero General Public License v3.0.
11
 
12
  import os
13
  import time
@@ -21,7 +13,6 @@ import gradio as gr
21
  import subprocess
22
  import gc
23
 
24
- # Importações de especialistas, com o de áudio removido
25
  from ltx_manager_helpers import ltx_manager_singleton
26
  from gemini_helpers import gemini_singleton
27
  from upscaler_specialist import upscaler_specialist_singleton
@@ -33,22 +24,17 @@ logger = logging.getLogger(__name__)
33
 
34
  @dataclass
35
  class LatentConditioningItem:
36
- """Representa uma âncora de condicionamento no espaço latente para a Câmera (Ψ)."""
37
  latent_tensor: torch.Tensor
38
  media_frame_number: int
39
  conditioning_strength: float
40
 
41
  class Deformes4DEngine:
42
- """
43
- Implementa a Câmera (Ψ) e o Destilador (Δ) da arquitetura ADUC-SDR.
44
- Orquestra a geração, pós-produção latente e renderização final dos fragmentos de vídeo.
45
- """
46
  def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
47
  self.ltx_manager = ltx_manager
48
  self.workspace_dir = workspace_dir
49
  self._vae = None
50
  self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
51
- logger.info("Especialista Deformes4D (Executor ADUC-SDR: Câmera Ψ e Destilador Δ) inicializado.")
52
 
53
  @property
54
  def vae(self):
@@ -58,7 +44,6 @@ class Deformes4DEngine:
58
  return self._vae
59
 
60
  # --- MÉTODOS AUXILIARES ---
61
-
62
  @torch.no_grad()
63
  def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
64
  tensor = tensor.to(self.device, dtype=self.vae.dtype)
@@ -89,7 +74,7 @@ class Deformes4DEngine:
89
  progress: gr.Progress = gr.Progress()):
90
 
91
  num_transitions_to_generate = len(keyframes) - 1
92
- TOTAL_STEPS = num_transitions_to_generate + 3 # Fragmentos + Renderização + HD
93
  current_step = 0
94
 
95
  FPS = 24
@@ -100,9 +85,6 @@ class Deformes4DEngine:
100
  frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
101
  latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
102
 
103
- if total_frames_brutos // FRAMES_PER_LATENT_CHUNK <= latents_a_podar + 1:
104
- raise gr.Error("A combinação de duração e poda é muito agressiva.")
105
-
106
  DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
107
  DESTINATION_FRAME_TARGET = total_frames_brutos - 1
108
 
@@ -112,14 +94,15 @@ class Deformes4DEngine:
112
 
113
  eco_latent_for_next_loop = None
114
  dejavu_latent_for_next_loop = None
115
- processed_latent_fragments = []
116
 
117
- # --- ATO I: GERAÇÃO LATENTE (LOOP DE FRAGMENTOS) ---
118
  for i in range(num_transitions_to_generate):
119
  fragment_index = i + 1
120
  current_step += 1
121
- progress(current_step / TOTAL_STEPS, desc=f"Gerando Fragmento {fragment_index}/{num_transitions_to_generate}")
122
 
 
123
  past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
124
  start_keyframe_path = keyframe_paths[i]
125
  destination_keyframe_path = keyframe_paths[i + 1]
@@ -135,7 +118,8 @@ class Deformes4DEngine:
135
  downscaled_height = self._quantize_to_multiple(int(expected_height * downscale_factor), 8)
136
  downscaled_width = self._quantize_to_multiple(int(expected_width * downscale_factor), 8)
137
  target_resolution_tuple = (downscaled_height, downscaled_width)
138
-
 
139
  conditioning_items = []
140
  if eco_latent_for_next_loop is None:
141
  img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
@@ -145,10 +129,11 @@ class Deformes4DEngine:
145
  conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
146
  img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
147
  conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
148
-
149
  current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
150
  latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
151
-
 
152
  last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
153
  eco_latent_for_next_loop = last_trim[:, :, :ECO_LATENT_CHUNKS, :, :].clone()
154
  dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
@@ -158,72 +143,79 @@ class Deformes4DEngine:
158
  if transition_type == "cut":
159
  eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
160
 
161
- upscaled_latents = self.upscale_latents(latents_video)
162
- refined_latents = self.refine_latents(upscaled_latents, motion_prompt=f"refining scene: {motion_prompt}")
163
- processed_latent_fragments.append(refined_latents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
- # --- ATO II: RENDERIZAÇÃO PRIMÁRIA (COM CORREÇÃO DE OOM) ---
166
  base_name = f"movie_{int(time.time())}"
167
  current_step += 1
168
- progress(current_step / TOTAL_STEPS, desc="Renderizando vídeo (em lotes)...")
169
  refined_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_refined_silent.mp4")
170
 
171
  with imageio.get_writer(refined_silent_video_path, fps=FPS, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
172
- for i, latent_fragment in enumerate(processed_latent_fragments):
173
- logger.info(f"Decodificando fragmento {i+1}/{len(processed_latent_fragments)} para pixels...")
174
- pixel_tensor_fragment = self.latents_to_pixels(latent_fragment)
 
 
 
 
 
 
175
 
176
- pixel_tensor_fragment = pixel_tensor_fragment.squeeze(0).permute(1, 2, 3, 0)
177
- pixel_tensor_fragment = (pixel_tensor_fragment.clamp(-1, 1) + 1) / 2.0
178
- video_np_fragment = (pixel_tensor_fragment.detach().cpu().float().numpy() * 255).astype(np.uint8)
 
179
 
180
- for frame in video_np_fragment:
181
  writer.append_data(frame)
182
 
183
- del pixel_tensor_fragment, video_np_fragment
184
  gc.collect()
185
  torch.cuda.empty_cache()
186
-
187
- logger.info(f"Vídeo base renderizado com sucesso em: {refined_silent_video_path}")
188
- del processed_latent_fragments
189
- gc.collect()
190
- torch.cuda.empty_cache()
191
 
192
- # --- ATO III: MASTERIZAÇÃO FINAL (APENAS HD) ---
 
 
 
193
  current_step += 1
194
  progress(current_step / TOTAL_STEPS, desc="Aprimoramento final (HD)...")
195
- hq_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_hq_silent.mp4")
196
 
197
  try:
198
  hd_specialist_singleton.process_video(
199
  input_video_path=refined_silent_video_path,
200
- output_video_path=hq_silent_video_path,
201
  prompt=global_prompt
202
  )
203
  except Exception as e:
204
  logger.error(f"Falha no aprimoramento HD: {e}. Usando vídeo de qualidade padrão.")
205
- os.rename(refined_silent_video_path, hq_silent_video_path)
206
 
207
- current_step += 1
208
- progress(current_step / TOTAL_STEPS, desc="Finalizando...")
209
- final_video_path = os.path.join(self.workspace_dir, f"{base_name}_FINAL.mp4")
210
- os.rename(hq_silent_video_path, final_video_path)
211
-
212
- logger.info(f"Processo concluído! Vídeo final (silencioso) salvo em: {final_video_path}")
213
  yield {"final_path": final_video_path}
214
 
215
- def refine_latents(self, latents: torch.Tensor,
216
- fps: int = 24,
217
- denoise_strength: float = 0.35,
218
- refine_steps: int = 12,
219
- motion_prompt: str = "refining video, improving details, cinematic quality") -> torch.Tensor:
220
- """
221
- Aplica um passe de refinamento (denoise) em um tensor latente.
222
- """
223
- logger.info(f"Refinando tensor latente com shape {latents.shape} para refinamento.")
224
-
225
  _, _, num_latent_frames, latent_h, latent_w = latents.shape
226
-
227
  video_scale_factor = getattr(self.vae.config, 'temporal_scale_factor', 8)
228
  vae_scale_factor = getattr(self.vae.config, 'spatial_downscale_factor', 8)
229
 
@@ -231,23 +223,20 @@ class Deformes4DEngine:
231
  pixel_width = latent_w * vae_scale_factor
232
  pixel_frames = (num_latent_frames - 1) * video_scale_factor
233
 
234
- refined_latents_tensor, _ = self.ltx_manager.refine_latents(
235
- latents,
236
- height=pixel_height,
237
- width=pixel_width,
238
- video_total_frames=pixel_frames,
239
- video_fps=fps,
240
- motion_prompt=motion_prompt,
241
- current_fragment_index=int(time.time()),
242
- denoise_strength=denoise_strength,
243
- refine_steps=refine_steps
244
- )
245
 
246
  logger.info(f"Retornando tensor latente refinado com shape: {refined_latents_tensor.shape}")
247
  return refined_latents_tensor
248
 
249
  def upscale_latents(self, latents: torch.Tensor) -> torch.Tensor:
250
- """Interface para o UpscalerSpecialist."""
251
  logger.info(f"Realizando upscale em tensor latente com shape {latents.shape}.")
252
  return upscaler_specialist_singleton.upscale(latents)
253
 
 
1
+ # deformes4D_engine.py (Fluxo Experimental 2: Concat -> Denoise Global -> Upscale/Render em Lotes)
2
  # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
 
 
 
 
 
 
 
 
3
 
4
  import os
5
  import time
 
13
  import subprocess
14
  import gc
15
 
 
16
  from ltx_manager_helpers import ltx_manager_singleton
17
  from gemini_helpers import gemini_singleton
18
  from upscaler_specialist import upscaler_specialist_singleton
 
24
 
25
  @dataclass
26
  class LatentConditioningItem:
 
27
  latent_tensor: torch.Tensor
28
  media_frame_number: int
29
  conditioning_strength: float
30
 
31
  class Deformes4DEngine:
 
 
 
 
32
  def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
33
  self.ltx_manager = ltx_manager
34
  self.workspace_dir = workspace_dir
35
  self._vae = None
36
  self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
37
+ logger.info("Especialista Deformes4D (Executor ADUC-SDR) inicializado.")
38
 
39
  @property
40
  def vae(self):
 
44
  return self._vae
45
 
46
  # --- MÉTODOS AUXILIARES ---
 
47
  @torch.no_grad()
48
  def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
49
  tensor = tensor.to(self.device, dtype=self.vae.dtype)
 
74
  progress: gr.Progress = gr.Progress()):
75
 
76
  num_transitions_to_generate = len(keyframes) - 1
77
+ TOTAL_STEPS = num_transitions_to_generate + 4
78
  current_step = 0
79
 
80
  FPS = 24
 
85
  frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
86
  latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
87
 
 
 
 
88
  DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
89
  DESTINATION_FRAME_TARGET = total_frames_brutos - 1
90
 
 
94
 
95
  eco_latent_for_next_loop = None
96
  dejavu_latent_for_next_loop = None
97
+ raw_latent_fragments = []
98
 
99
+ # --- ATO I: GERAÇÃO BRUTA E SEQUENCIAL DE FRAGMENTOS ---
100
  for i in range(num_transitions_to_generate):
101
  fragment_index = i + 1
102
  current_step += 1
103
+ progress(current_step / TOTAL_STEPS, desc=f"Gerando Fragmento Bruto {fragment_index}/{num_transitions_to_generate}")
104
 
105
+ # ... (Lógica do Gemini para obter motion_prompt - sem alterações) ...
106
  past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
107
  start_keyframe_path = keyframe_paths[i]
108
  destination_keyframe_path = keyframe_paths[i + 1]
 
118
  downscaled_height = self._quantize_to_multiple(int(expected_height * downscale_factor), 8)
119
  downscaled_width = self._quantize_to_multiple(int(expected_width * downscale_factor), 8)
120
  target_resolution_tuple = (downscaled_height, downscaled_width)
121
+
122
+ # ... (Lógica de condicionamento - sem alterações) ...
123
  conditioning_items = []
124
  if eco_latent_for_next_loop is None:
125
  img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
 
129
  conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
130
  img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
131
  conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
132
+
133
  current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
134
  latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
135
+
136
+ # ... (Lógica de poda e extração de Eco/Déjà-Vu - sem alterações) ...
137
  last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
138
  eco_latent_for_next_loop = last_trim[:, :, :ECO_LATENT_CHUNKS, :, :].clone()
139
  dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
 
143
  if transition_type == "cut":
144
  eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
145
 
146
+ raw_latent_fragments.append(latents_video)
147
+
148
+ # --- ATO II: PÓS-PRODUÇÃO LATENTE GLOBAL ---
149
+ current_step += 1
150
+ progress(current_step / TOTAL_STEPS, desc="Unificação Causal (Concatenação)...")
151
+ concatenated_raw_latents = torch.cat(raw_latent_fragments, dim=2)
152
+ del raw_latent_fragments; gc.collect(); torch.cuda.empty_cache()
153
+ logger.info(f"Latentes brutos unificados. Shape: {concatenated_raw_latents.shape}")
154
+
155
+ current_step += 1
156
+ progress(current_step / TOTAL_STEPS, desc="Polimento Global (Denoise)...")
157
+ # [ALTERAÇÃO CRÍTICA] Usamos um prompt vazio e guidance_scale=1.0 para um refinamento incondicional.
158
+ denoised_latents = self.refine_latents(
159
+ concatenated_raw_latents,
160
+ motion_prompt="",
161
+ guidance_scale=1.0
162
+ )
163
+ del concatenated_raw_latents; gc.collect(); torch.cuda.empty_cache()
164
+ logger.info(f"Polimento global aplicado. Shape: {denoised_latents.shape}")
165
 
166
+ # --- ATO III: UPSCALE E RENDERIZAÇÃO EM LOTES ---
167
  base_name = f"movie_{int(time.time())}"
168
  current_step += 1
169
+ progress(current_step / TOTAL_STEPS, desc="Renderização Final (em lotes)...")
170
  refined_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_refined_silent.mp4")
171
 
172
  with imageio.get_writer(refined_silent_video_path, fps=FPS, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
173
+ chunk_size = 7 # Tamanho de lote seguro para VRAM
174
+ latent_chunks = torch.split(denoised_latents, chunk_size, dim=2)
175
+
176
+ for i, latent_chunk in enumerate(latent_chunks):
177
+ logger.info(f"Processando e renderizando lote {i+1}/{len(latent_chunks)}...")
178
+
179
+ # Upscale e Decode por lote
180
+ upscaled_chunk = self.upscale_latents(latent_chunk)
181
+ pixel_tensor_chunk = self.latents_to_pixels(upscaled_chunk)
182
 
183
+ # Converte e salva os frames
184
+ pixel_tensor_chunk = pixel_tensor_chunk.squeeze(0).permute(1, 2, 3, 0)
185
+ pixel_tensor_chunk = (pixel_tensor_chunk.clamp(-1, 1) + 1) / 2.0
186
+ video_np_chunk = (pixel_tensor_chunk.detach().cpu().float().numpy() * 255).astype(np.uint8)
187
 
188
+ for frame in video_np_chunk:
189
  writer.append_data(frame)
190
 
191
+ del latent_chunk, upscaled_chunk, pixel_tensor_chunk, video_np_chunk
192
  gc.collect()
193
  torch.cuda.empty_cache()
 
 
 
 
 
194
 
195
+ del denoised_latents; gc.collect(); torch.cuda.empty_cache()
196
+ logger.info(f"Vídeo de alta qualidade (silencioso) renderizado em: {refined_silent_video_path}")
197
+
198
+ # --- ATO IV: APRIMORAMENTO HD FINAL ---
199
  current_step += 1
200
  progress(current_step / TOTAL_STEPS, desc="Aprimoramento final (HD)...")
201
+ final_video_path = os.path.join(self.workspace_dir, f"{base_name}_FINAL.mp4")
202
 
203
  try:
204
  hd_specialist_singleton.process_video(
205
  input_video_path=refined_silent_video_path,
206
+ output_video_path=final_video_path,
207
  prompt=global_prompt
208
  )
209
  except Exception as e:
210
  logger.error(f"Falha no aprimoramento HD: {e}. Usando vídeo de qualidade padrão.")
211
+ os.rename(refined_silent_video_path, final_video_path)
212
 
213
+ logger.info(f"Processo concluído! Vídeo final salvo em: {final_video_path}")
 
 
 
 
 
214
  yield {"final_path": final_video_path}
215
 
216
+ def refine_latents(self, latents: torch.Tensor, fps: int = 24, denoise_strength: float = 0.35, refine_steps: int = 12, motion_prompt: str = "...", **kwargs) -> torch.Tensor:
217
+ logger.info(f"Refinando tensor latente com shape {latents.shape}.")
 
 
 
 
 
 
 
 
218
  _, _, num_latent_frames, latent_h, latent_w = latents.shape
 
219
  video_scale_factor = getattr(self.vae.config, 'temporal_scale_factor', 8)
220
  vae_scale_factor = getattr(self.vae.config, 'spatial_downscale_factor', 8)
221
 
 
223
  pixel_width = latent_w * vae_scale_factor
224
  pixel_frames = (num_latent_frames - 1) * video_scale_factor
225
 
226
+ # [ALTERAÇÃO] Permite que guidance_scale seja passado como argumento
227
+ final_ltx_params = {
228
+ "height": pixel_height, "width": pixel_width, "video_total_frames": pixel_frames,
229
+ "video_fps": fps, "motion_prompt": motion_prompt, "current_fragment_index": int(time.time()),
230
+ "denoise_strength": denoise_strength, "refine_steps": refine_steps,
231
+ "guidance_scale": kwargs.get('guidance_scale', 2.0) # Usa o valor passado ou o padrão
232
+ }
233
+
234
+ refined_latents_tensor, _ = self.ltx_manager.refine_latents(latents, **final_ltx_params)
 
 
235
 
236
  logger.info(f"Retornando tensor latente refinado com shape: {refined_latents_tensor.shape}")
237
  return refined_latents_tensor
238
 
239
  def upscale_latents(self, latents: torch.Tensor) -> torch.Tensor:
 
240
  logger.info(f"Realizando upscale em tensor latente com shape {latents.shape}.")
241
  return upscaler_specialist_singleton.upscale(latents)
242