Spaces:
Runtime error
Runtime error
Update deformes4D_engine.py
Browse files- deformes4D_engine.py +19 -26
deformes4D_engine.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
#
|
| 3 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
-
# Version: 2.0.
|
| 6 |
#
|
| 7 |
# This file contains the Deformes4D Engine, which acts as the primary "Editor" or
|
| 8 |
# "Film Crew" specialist within the ADUC-SDR architecture. It implements the Camera (Ψ)
|
|
@@ -58,6 +58,7 @@ class Deformes4DEngine:
|
|
| 58 |
@property
|
| 59 |
def vae(self):
|
| 60 |
if self._vae is None:
|
|
|
|
| 61 |
self._vae = self.ltx_manager.workers[0].pipeline.vae
|
| 62 |
self._vae.to(self.device); self._vae.eval()
|
| 63 |
return self._vae
|
|
@@ -66,16 +67,19 @@ class Deformes4DEngine:
|
|
| 66 |
|
| 67 |
@torch.no_grad()
|
| 68 |
def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
|
|
|
|
| 69 |
tensor = tensor.to(self.device, dtype=self.vae.dtype)
|
| 70 |
return vae_encode(tensor, self.vae, vae_per_channel_normalize=True)
|
| 71 |
|
| 72 |
@torch.no_grad()
|
| 73 |
def latents_to_pixels(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
|
|
|
|
| 74 |
latent_tensor = latent_tensor.to(self.device, dtype=self.vae.dtype)
|
| 75 |
timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
|
| 76 |
return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
|
| 77 |
|
| 78 |
def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
|
|
|
|
| 79 |
if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
|
| 80 |
video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
|
| 81 |
video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
|
|
@@ -84,17 +88,20 @@ class Deformes4DEngine:
|
|
| 84 |
for frame in video_np: writer.append_data(frame)
|
| 85 |
|
| 86 |
def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
|
|
|
|
| 87 |
if image.size != target_resolution:
|
| 88 |
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
|
| 89 |
return image
|
| 90 |
|
| 91 |
def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
|
|
|
|
| 92 |
image_np = np.array(pil_image).astype(np.float32) / 255.0
|
| 93 |
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
|
| 94 |
tensor = (tensor * 2.0) - 1.0
|
| 95 |
return self.pixels_to_latents(tensor)
|
| 96 |
|
| 97 |
def concatenate_videos_ffmpeg(self, video_paths: list[str], output_path: str):
|
|
|
|
| 98 |
if not video_paths: raise gr.Error("No video fragments to assemble.")
|
| 99 |
list_file_path = os.path.join(self.workspace_dir, "concat_list.txt")
|
| 100 |
with open(list_file_path, 'w', encoding='utf-8') as f:
|
|
@@ -124,7 +131,6 @@ class Deformes4DEngine:
|
|
| 124 |
progress: gr.Progress = gr.Progress()):
|
| 125 |
"""
|
| 126 |
Step 3: Production. Generates the original master video from keyframes.
|
| 127 |
-
This involves generating latent tensors for each segment and then decoding them into a video file.
|
| 128 |
"""
|
| 129 |
FPS = 24
|
| 130 |
FRAMES_PER_LATENT_CHUNK = 8
|
|
@@ -269,15 +275,10 @@ class Deformes4DEngine:
|
|
| 269 |
yield {"final_path": final_video_path}
|
| 270 |
|
| 271 |
def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress):
|
| 272 |
-
"""
|
| 273 |
-
Post-Production Step 4B: Applies SeedVR super-resolution to an existing video file.
|
| 274 |
-
"""
|
| 275 |
logger.info(f"--- STARTING POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
|
| 276 |
progress(0.1, desc=f"Preparing for HD Mastering with SeedVR {model_version}...")
|
| 277 |
-
|
| 278 |
run_timestamp = int(time.time())
|
| 279 |
-
output_path = os.path.join(self.workspace_dir, f"hd_mastered_movie_{run_timestamp}.mp4")
|
| 280 |
-
|
| 281 |
try:
|
| 282 |
final_path = hd_specialist_singleton.process_video(
|
| 283 |
input_video_path=source_video_path,
|
|
@@ -292,49 +293,41 @@ class Deformes4DEngine:
|
|
| 292 |
except Exception as e:
|
| 293 |
logger.error(f"HD Mastering failed: {e}", exc_info=True)
|
| 294 |
raise gr.Error(f"HD Mastering failed. Details: {e}")
|
| 295 |
-
|
| 296 |
def generate_audio_for_final_video(self, source_video_path: str, audio_prompt: str, progress: gr.Progress):
|
| 297 |
-
"""
|
| 298 |
-
Post-Production Step 4C: Generates audio for a final video file and muxes it in.
|
| 299 |
-
"""
|
| 300 |
logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
|
| 301 |
progress(0.1, desc="Preparing for audio generation...")
|
| 302 |
-
|
| 303 |
try:
|
| 304 |
-
# Get video duration using ffprobe
|
| 305 |
result = subprocess.run(
|
| 306 |
["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
|
| 307 |
capture_output=True, text=True, check=True)
|
| 308 |
duration = float(result.stdout.strip())
|
| 309 |
logger.info(f"Source video duration: {duration:.2f} seconds.")
|
| 310 |
-
|
| 311 |
progress(0.5, desc="Generating audio track...")
|
| 312 |
-
|
| 313 |
final_path = audio_specialist_singleton.generate_audio_for_video(
|
| 314 |
video_path=source_video_path,
|
| 315 |
prompt=audio_prompt,
|
| 316 |
duration_seconds=duration,
|
|
|
|
| 317 |
)
|
| 318 |
-
|
| 319 |
logger.info(f"Audio generation complete! Final video with audio at: {final_path}")
|
| 320 |
progress(1.0, desc="Audio generation complete!")
|
| 321 |
yield {"final_path": final_path}
|
| 322 |
-
|
| 323 |
except Exception as e:
|
| 324 |
logger.error(f"Audio generation failed: {e}", exc_info=True)
|
| 325 |
raise gr.Error(f"Audio generation failed. Details: {e}")
|
| 326 |
|
| 327 |
def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
|
| 328 |
-
|
| 329 |
-
final_ltx_params = {
|
| 330 |
-
**ltx_params, 'width': target_resolution[0], 'height': target_resolution[1],
|
| 331 |
-
'video_total_frames': total_frames_to_generate, 'video_fps': 24,
|
| 332 |
-
'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items
|
| 333 |
-
}
|
| 334 |
return self.ltx_manager.generate_latent_fragment(**final_ltx_params)
|
| 335 |
|
| 336 |
def _quantize_to_multiple(self, n, m):
|
| 337 |
-
"""Helper to round n to the nearest multiple of m."""
|
| 338 |
if m == 0: return n
|
| 339 |
quantized = int(round(n / m) * m)
|
| 340 |
-
return m if n > 0 and quantized == 0 else quantized
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
#
|
| 3 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
+
# Version: 2.0.1
|
| 6 |
#
|
| 7 |
# This file contains the Deformes4D Engine, which acts as the primary "Editor" or
|
| 8 |
# "Film Crew" specialist within the ADUC-SDR architecture. It implements the Camera (Ψ)
|
|
|
|
| 58 |
@property
|
| 59 |
def vae(self):
|
| 60 |
if self._vae is None:
|
| 61 |
+
# Assumes the VAE from the first LTX worker is representative
|
| 62 |
self._vae = self.ltx_manager.workers[0].pipeline.vae
|
| 63 |
self._vae.to(self.device); self._vae.eval()
|
| 64 |
return self._vae
|
|
|
|
| 67 |
|
| 68 |
@torch.no_grad()
|
| 69 |
def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
|
| 70 |
+
"""Encodes a pixel-space tensor to the latent space using the VAE."""
|
| 71 |
tensor = tensor.to(self.device, dtype=self.vae.dtype)
|
| 72 |
return vae_encode(tensor, self.vae, vae_per_channel_normalize=True)
|
| 73 |
|
| 74 |
@torch.no_grad()
|
| 75 |
def latents_to_pixels(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
|
| 76 |
+
"""Decodes a latent-space tensor to pixels using the VAE."""
|
| 77 |
latent_tensor = latent_tensor.to(self.device, dtype=self.vae.dtype)
|
| 78 |
timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
|
| 79 |
return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
|
| 80 |
|
| 81 |
def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
|
| 82 |
+
"""Saves a pixel-space tensor as an MP4 video file."""
|
| 83 |
if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
|
| 84 |
video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
|
| 85 |
video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
|
|
|
|
| 88 |
for frame in video_np: writer.append_data(frame)
|
| 89 |
|
| 90 |
def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
|
| 91 |
+
"""Resizes and fits an image to the target resolution for VAE encoding."""
|
| 92 |
if image.size != target_resolution:
|
| 93 |
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
|
| 94 |
return image
|
| 95 |
|
| 96 |
def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
|
| 97 |
+
"""Converts a PIL Image to a latent tensor."""
|
| 98 |
image_np = np.array(pil_image).astype(np.float32) / 255.0
|
| 99 |
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
|
| 100 |
tensor = (tensor * 2.0) - 1.0
|
| 101 |
return self.pixels_to_latents(tensor)
|
| 102 |
|
| 103 |
def concatenate_videos_ffmpeg(self, video_paths: list[str], output_path: str):
|
| 104 |
+
"""Concatenates multiple video clips into a single file using FFmpeg."""
|
| 105 |
if not video_paths: raise gr.Error("No video fragments to assemble.")
|
| 106 |
list_file_path = os.path.join(self.workspace_dir, "concat_list.txt")
|
| 107 |
with open(list_file_path, 'w', encoding='utf-8') as f:
|
|
|
|
| 131 |
progress: gr.Progress = gr.Progress()):
|
| 132 |
"""
|
| 133 |
Step 3: Production. Generates the original master video from keyframes.
|
|
|
|
| 134 |
"""
|
| 135 |
FPS = 24
|
| 136 |
FRAMES_PER_LATENT_CHUNK = 8
|
|
|
|
| 275 |
yield {"final_path": final_video_path}
|
| 276 |
|
| 277 |
def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress):
|
|
|
|
|
|
|
|
|
|
| 278 |
logger.info(f"--- STARTING POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
|
| 279 |
progress(0.1, desc=f"Preparing for HD Mastering with SeedVR {model_version}...")
|
|
|
|
| 280 |
run_timestamp = int(time.time())
|
| 281 |
+
output_path = os.path.join(self.workspace_dir, f"hd_mastered_movie_{model_version}_{run_timestamp}.mp4")
|
|
|
|
| 282 |
try:
|
| 283 |
final_path = hd_specialist_singleton.process_video(
|
| 284 |
input_video_path=source_video_path,
|
|
|
|
| 293 |
except Exception as e:
|
| 294 |
logger.error(f"HD Mastering failed: {e}", exc_info=True)
|
| 295 |
raise gr.Error(f"HD Mastering failed. Details: {e}")
|
| 296 |
+
|
| 297 |
def generate_audio_for_final_video(self, source_video_path: str, audio_prompt: str, progress: gr.Progress):
|
|
|
|
|
|
|
|
|
|
| 298 |
logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
|
| 299 |
progress(0.1, desc="Preparing for audio generation...")
|
| 300 |
+
run_timestamp = int(time.time())
|
| 301 |
try:
|
|
|
|
| 302 |
result = subprocess.run(
|
| 303 |
["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
|
| 304 |
capture_output=True, text=True, check=True)
|
| 305 |
duration = float(result.stdout.strip())
|
| 306 |
logger.info(f"Source video duration: {duration:.2f} seconds.")
|
|
|
|
| 307 |
progress(0.5, desc="Generating audio track...")
|
| 308 |
+
output_path = os.path.join(self.workspace_dir, f"final_movie_with_audio_{run_timestamp}.mp4")
|
| 309 |
final_path = audio_specialist_singleton.generate_audio_for_video(
|
| 310 |
video_path=source_video_path,
|
| 311 |
prompt=audio_prompt,
|
| 312 |
duration_seconds=duration,
|
| 313 |
+
output_path_override=output_path
|
| 314 |
)
|
|
|
|
| 315 |
logger.info(f"Audio generation complete! Final video with audio at: {final_path}")
|
| 316 |
progress(1.0, desc="Audio generation complete!")
|
| 317 |
yield {"final_path": final_path}
|
|
|
|
| 318 |
except Exception as e:
|
| 319 |
logger.error(f"Audio generation failed: {e}", exc_info=True)
|
| 320 |
raise gr.Error(f"Audio generation failed. Details: {e}")
|
| 321 |
|
| 322 |
def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
|
| 323 |
+
final_ltx_params = {**ltx_params, 'width': target_resolution[0], 'height': target_resolution[1], 'video_total_frames': total_frames_to_generate, 'video_fps': 24, 'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
return self.ltx_manager.generate_latent_fragment(**final_ltx_params)
|
| 325 |
|
| 326 |
def _quantize_to_multiple(self, n, m):
|
|
|
|
| 327 |
if m == 0: return n
|
| 328 |
quantized = int(round(n / m) * m)
|
| 329 |
+
return m if n > 0 and quantized == 0 else quantized```
|
| 330 |
+
|
| 331 |
+
Com este arquivo finalizado, toda a lógica do backend para o ADUC 2.0 está completa. Todas as funções estão prontas para serem chamadas pela UI através do orquestrador.
|
| 332 |
+
|
| 333 |
+
Agora estamos prontos para testar a aplicação completa.
|