euiia commited on
Commit
ac22439
·
verified ·
1 Parent(s): b93d4a6

Update deformes4D_engine.py

Browse files
Files changed (1) hide show
  1. deformes4D_engine.py +19 -26
deformes4D_engine.py CHANGED
@@ -2,7 +2,7 @@
2
  #
3
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
  #
5
- # Version: 2.0.0
6
  #
7
  # This file contains the Deformes4D Engine, which acts as the primary "Editor" or
8
  # "Film Crew" specialist within the ADUC-SDR architecture. It implements the Camera (Ψ)
@@ -58,6 +58,7 @@ class Deformes4DEngine:
58
  @property
59
  def vae(self):
60
  if self._vae is None:
 
61
  self._vae = self.ltx_manager.workers[0].pipeline.vae
62
  self._vae.to(self.device); self._vae.eval()
63
  return self._vae
@@ -66,16 +67,19 @@ class Deformes4DEngine:
66
 
67
  @torch.no_grad()
68
  def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
 
69
  tensor = tensor.to(self.device, dtype=self.vae.dtype)
70
  return vae_encode(tensor, self.vae, vae_per_channel_normalize=True)
71
 
72
  @torch.no_grad()
73
  def latents_to_pixels(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
 
74
  latent_tensor = latent_tensor.to(self.device, dtype=self.vae.dtype)
75
  timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
76
  return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
77
 
78
  def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
 
79
  if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
80
  video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
81
  video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
@@ -84,17 +88,20 @@ class Deformes4DEngine:
84
  for frame in video_np: writer.append_data(frame)
85
 
86
  def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
 
87
  if image.size != target_resolution:
88
  return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
89
  return image
90
 
91
  def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
 
92
  image_np = np.array(pil_image).astype(np.float32) / 255.0
93
  tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
94
  tensor = (tensor * 2.0) - 1.0
95
  return self.pixels_to_latents(tensor)
96
 
97
  def concatenate_videos_ffmpeg(self, video_paths: list[str], output_path: str):
 
98
  if not video_paths: raise gr.Error("No video fragments to assemble.")
99
  list_file_path = os.path.join(self.workspace_dir, "concat_list.txt")
100
  with open(list_file_path, 'w', encoding='utf-8') as f:
@@ -124,7 +131,6 @@ class Deformes4DEngine:
124
  progress: gr.Progress = gr.Progress()):
125
  """
126
  Step 3: Production. Generates the original master video from keyframes.
127
- This involves generating latent tensors for each segment and then decoding them into a video file.
128
  """
129
  FPS = 24
130
  FRAMES_PER_LATENT_CHUNK = 8
@@ -269,15 +275,10 @@ class Deformes4DEngine:
269
  yield {"final_path": final_video_path}
270
 
271
  def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress):
272
- """
273
- Post-Production Step 4B: Applies SeedVR super-resolution to an existing video file.
274
- """
275
  logger.info(f"--- STARTING POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
276
  progress(0.1, desc=f"Preparing for HD Mastering with SeedVR {model_version}...")
277
-
278
  run_timestamp = int(time.time())
279
- output_path = os.path.join(self.workspace_dir, f"hd_mastered_movie_{run_timestamp}.mp4")
280
-
281
  try:
282
  final_path = hd_specialist_singleton.process_video(
283
  input_video_path=source_video_path,
@@ -292,49 +293,41 @@ class Deformes4DEngine:
292
  except Exception as e:
293
  logger.error(f"HD Mastering failed: {e}", exc_info=True)
294
  raise gr.Error(f"HD Mastering failed. Details: {e}")
295
-
296
  def generate_audio_for_final_video(self, source_video_path: str, audio_prompt: str, progress: gr.Progress):
297
- """
298
- Post-Production Step 4C: Generates audio for a final video file and muxes it in.
299
- """
300
  logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
301
  progress(0.1, desc="Preparing for audio generation...")
302
-
303
  try:
304
- # Get video duration using ffprobe
305
  result = subprocess.run(
306
  ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
307
  capture_output=True, text=True, check=True)
308
  duration = float(result.stdout.strip())
309
  logger.info(f"Source video duration: {duration:.2f} seconds.")
310
-
311
  progress(0.5, desc="Generating audio track...")
312
- # The audio specialist handles file naming and muxing internally
313
  final_path = audio_specialist_singleton.generate_audio_for_video(
314
  video_path=source_video_path,
315
  prompt=audio_prompt,
316
  duration_seconds=duration,
 
317
  )
318
-
319
  logger.info(f"Audio generation complete! Final video with audio at: {final_path}")
320
  progress(1.0, desc="Audio generation complete!")
321
  yield {"final_path": final_path}
322
-
323
  except Exception as e:
324
  logger.error(f"Audio generation failed: {e}", exc_info=True)
325
  raise gr.Error(f"Audio generation failed. Details: {e}")
326
 
327
  def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
328
- """Internal helper to call the LTX manager."""
329
- final_ltx_params = {
330
- **ltx_params, 'width': target_resolution[0], 'height': target_resolution[1],
331
- 'video_total_frames': total_frames_to_generate, 'video_fps': 24,
332
- 'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items
333
- }
334
  return self.ltx_manager.generate_latent_fragment(**final_ltx_params)
335
 
336
  def _quantize_to_multiple(self, n, m):
337
- """Helper to round n to the nearest multiple of m."""
338
  if m == 0: return n
339
  quantized = int(round(n / m) * m)
340
- return m if n > 0 and quantized == 0 else quantized
 
 
 
 
 
2
  #
3
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
  #
5
+ # Version: 2.0.1
6
  #
7
  # This file contains the Deformes4D Engine, which acts as the primary "Editor" or
8
  # "Film Crew" specialist within the ADUC-SDR architecture. It implements the Camera (Ψ)
 
58
  @property
59
  def vae(self):
60
  if self._vae is None:
61
+ # Assumes the VAE from the first LTX worker is representative
62
  self._vae = self.ltx_manager.workers[0].pipeline.vae
63
  self._vae.to(self.device); self._vae.eval()
64
  return self._vae
 
67
 
68
  @torch.no_grad()
69
  def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
70
+ """Encodes a pixel-space tensor to the latent space using the VAE."""
71
  tensor = tensor.to(self.device, dtype=self.vae.dtype)
72
  return vae_encode(tensor, self.vae, vae_per_channel_normalize=True)
73
 
74
  @torch.no_grad()
75
  def latents_to_pixels(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
76
+ """Decodes a latent-space tensor to pixels using the VAE."""
77
  latent_tensor = latent_tensor.to(self.device, dtype=self.vae.dtype)
78
  timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
79
  return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
80
 
81
  def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
82
+ """Saves a pixel-space tensor as an MP4 video file."""
83
  if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
84
  video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
85
  video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
 
88
  for frame in video_np: writer.append_data(frame)
89
 
90
  def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
91
+ """Resizes and fits an image to the target resolution for VAE encoding."""
92
  if image.size != target_resolution:
93
  return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
94
  return image
95
 
96
  def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
97
+ """Converts a PIL Image to a latent tensor."""
98
  image_np = np.array(pil_image).astype(np.float32) / 255.0
99
  tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
100
  tensor = (tensor * 2.0) - 1.0
101
  return self.pixels_to_latents(tensor)
102
 
103
  def concatenate_videos_ffmpeg(self, video_paths: list[str], output_path: str):
104
+ """Concatenates multiple video clips into a single file using FFmpeg."""
105
  if not video_paths: raise gr.Error("No video fragments to assemble.")
106
  list_file_path = os.path.join(self.workspace_dir, "concat_list.txt")
107
  with open(list_file_path, 'w', encoding='utf-8') as f:
 
131
  progress: gr.Progress = gr.Progress()):
132
  """
133
  Step 3: Production. Generates the original master video from keyframes.
 
134
  """
135
  FPS = 24
136
  FRAMES_PER_LATENT_CHUNK = 8
 
275
  yield {"final_path": final_video_path}
276
 
277
  def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress):
 
 
 
278
  logger.info(f"--- STARTING POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
279
  progress(0.1, desc=f"Preparing for HD Mastering with SeedVR {model_version}...")
 
280
  run_timestamp = int(time.time())
281
+ output_path = os.path.join(self.workspace_dir, f"hd_mastered_movie_{model_version}_{run_timestamp}.mp4")
 
282
  try:
283
  final_path = hd_specialist_singleton.process_video(
284
  input_video_path=source_video_path,
 
293
  except Exception as e:
294
  logger.error(f"HD Mastering failed: {e}", exc_info=True)
295
  raise gr.Error(f"HD Mastering failed. Details: {e}")
296
+
297
  def generate_audio_for_final_video(self, source_video_path: str, audio_prompt: str, progress: gr.Progress):
 
 
 
298
  logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
299
  progress(0.1, desc="Preparing for audio generation...")
300
+ run_timestamp = int(time.time())
301
  try:
 
302
  result = subprocess.run(
303
  ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
304
  capture_output=True, text=True, check=True)
305
  duration = float(result.stdout.strip())
306
  logger.info(f"Source video duration: {duration:.2f} seconds.")
 
307
  progress(0.5, desc="Generating audio track...")
308
+ output_path = os.path.join(self.workspace_dir, f"final_movie_with_audio_{run_timestamp}.mp4")
309
  final_path = audio_specialist_singleton.generate_audio_for_video(
310
  video_path=source_video_path,
311
  prompt=audio_prompt,
312
  duration_seconds=duration,
313
+ output_path_override=output_path
314
  )
 
315
  logger.info(f"Audio generation complete! Final video with audio at: {final_path}")
316
  progress(1.0, desc="Audio generation complete!")
317
  yield {"final_path": final_path}
 
318
  except Exception as e:
319
  logger.error(f"Audio generation failed: {e}", exc_info=True)
320
  raise gr.Error(f"Audio generation failed. Details: {e}")
321
 
322
  def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
323
+ final_ltx_params = {**ltx_params, 'width': target_resolution[0], 'height': target_resolution[1], 'video_total_frames': total_frames_to_generate, 'video_fps': 24, 'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items}
 
 
 
 
 
324
  return self.ltx_manager.generate_latent_fragment(**final_ltx_params)
325
 
326
  def _quantize_to_multiple(self, n, m):
 
327
  if m == 0: return n
328
  quantized = int(round(n / m) * m)
329
+ return m if n > 0 and quantized == 0 else quantized```
330
+
331
+ Com este arquivo finalizado, toda a lógica do backend para o ADUC 2.0 está completa. Todas as funções estão prontas para serem chamadas pela UI através do orquestrador.
332
+
333
+ Agora estamos prontos para testar a aplicação completa.