Aduc-sdr-2_5s

Paused

App Files Files Community

x2XcarleX2x commited on Sep 24

Commit

def1a3e

verified ·

1 Parent(s): f93bb97

Update aduc_framework/managers/wan_manager.py

Browse files

Files changed (1) hide show

aduc_framework/managers/wan_manager.py +24 -41

aduc_framework/managers/wan_manager.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import os
 import tempfile
 import random
-from typing import List, Any
 import numpy as np
 import torch
@@ -17,13 +17,13 @@ from diffusers.utils.export_utils import export_to_video
 class WanManager:
     """
-    Serviço responsável por:
-    - Carregar a pipeline Wan I2V com dois transformadores (alto/baixo ruído).
-    - Aplicar e fundir LoRA Lightning para geração rápida (8 passos).
-    - Processar imagens e gerar vídeo a partir de uma lista images_condition_items.
     """
-    # Constantes espelhadas da UI
     MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
     MAX_DIMENSION = 832
@@ -44,7 +44,7 @@ class WanManager:
     def __init__(self) -> None:
         print("Loading models into memory. This may take a few minutes...")
-        # Carrega a pipeline principal com dois transformadores
         self.pipe = WanImageToVideoPipeline.from_pretrained(
             self.MODEL_ID,
             transformer=WanTransformer3DModel.from_pretrained(
@@ -62,12 +62,12 @@ class WanManager:
             torch_dtype=torch.bfloat16,
         )
-        # Scheduler FlowMatch Euler com shift igual ao do app
         self.pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
             self.pipe.scheduler.config, shift=32.0
         )
-        # Fusão do LoRA Lightning (dois adaptadores: transformer e transformer_2)
         print("Applying 8-step Lightning LoRA...")
         try:
             self.pipe.load_lora_weights(
@@ -88,7 +88,7 @@ class WanManager:
             self.pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
             self.pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
-            # Após a fusão, descarta adaptadores da memória
             self.pipe.unload_lora_weights()
             print("Lightning LoRA successfully fused. Model is ready for fast 8-step generation.")
         except Exception as e:
@@ -96,15 +96,9 @@ class WanManager:
         print("All models loaded. Service is ready.")
-    # ===== Utilidades de imagem (espelho da UI) =====
     def process_image_for_video(self, image: Image.Image) -> Image.Image:
-        """
-        Reamostra a imagem respeitando:
-        - Mín/Máx dimensões
-        - Múltiplo de 16
-        - Caso quadrada, força SQUARE_SIZE
-        """
         width, height = image.size
         if width == height:
             return image.resize((self.SQUARE_SIZE, self.SQUARE_SIZE), Image.Resampling.LANCZOS)
@@ -124,7 +118,7 @@ class WanManager:
             new_width *= scale
             new_height *= scale
-        # Múltiplo e mínimos finais
         final_width = int(round(new_width / self.DIMENSION_MULTIPLE) * self.DIMENSION_MULTIPLE)
         final_height = int(round(new_height / self.DIMENSION_MULTIPLE) * self.DIMENSION_MULTIPLE)
@@ -134,9 +128,6 @@ class WanManager:
         return image.resize((final_width, final_height), Image.Resampling.LANCZOS)
     def resize_and_crop_to_match(self, target_image: Image.Image, reference_image: Image.Image) -> Image.Image:
-        """
-        Redimensiona e faz center-crop para igualar (W,H) da imagem de referência.
-        """
         ref_width, ref_height = reference_image.size
         target_width, target_height = target_image.size
         scale = max(ref_width / target_width, ref_height / target_height)
@@ -145,63 +136,55 @@ class WanManager:
         left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
         return resized.crop((left, top, left + ref_width, top + ref_height))
-    # ===== API principal =====
     def generate_video_from_conditions(
         self,
         images_condition_items: List[List[Any]],  # [[patch(Image), frame(int|str), peso(float)], ...]
         prompt: str,
-        negative_prompt: str,
         duration_seconds: float,
         steps: int,
         guidance_scale: float,
         guidance_scale_2: float,
         seed: int,
         randomize_seed: bool,
     ):
         """
-        Usos atuais:
-        - Usa SOMENTE o primeiro item como imagem inicial (image)
-        - Usa SOMENTE o último item como last_image (endpoint)
-        - Mantém todo o restante do contrato da pipeline i2v
         """
         if not images_condition_items or len(images_condition_items) < 2:
-            raise ValueError("Forneça ao menos dois itens em images_condition_items (início e fim).")
         first_item = images_condition_items[0]
         last_item = images_condition_items[-1]
-        # Estrutura: [patch, frame, peso]; por ora só o patch é utilizado.
         start_image = first_item[0]
         end_image = last_item[0]
         if start_image is None or end_image is None:
             raise ValueError("As imagens inicial e final não podem ser vazias.")
-        if not isinstance(start_image, Image.Image):
-            raise TypeError("O 'patch' do primeiro item deve ser uma PIL.Image.")
-        if not isinstance(end_image, Image.Image):
-            raise TypeError("O 'patch' do último item deve ser uma PIL.Image.")
-        # Pré-processamento idêntico ao da UI
         processed_start = self.process_image_for_video(start_image)
         processed_end = self.resize_and_crop_to_match(end_image, processed_start)
         target_height, target_width = processed_start.height, processed_start.width
-        # Frames do vídeo
         num_frames = int(round(duration_seconds * self.FIXED_FPS))
         num_frames = int(np.clip(num_frames, self.MIN_FRAMES_MODEL, self.MAX_FRAMES_MODEL))
-        # Semente
         current_seed = random.randint(0, np.iinfo(np.int32).max) if randomize_seed else int(seed)
         generator = torch.Generator().manual_seed(current_seed)
-        # Chamada direta da pipeline (image/last_image)
         result = self.pipe(
             image=processed_start,
             last_image=processed_end,
             prompt=prompt,
-            negative_prompt=negative_prompt,
             height=target_height,
             width=target_width,
             num_frames=num_frames,
@@ -209,11 +192,11 @@ class WanManager:
             guidance_scale_2=float(guidance_scale_2),
             num_inference_steps=int(steps),
             generator=generator,
         )
         frames = result.frames[0]
-        # Exporta para vídeo temporário
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
             video_path = tmp.name
         export_to_video(frames, video_path, fps=self.FIXED_FPS)

 import os
 import tempfile
 import random
+from typing import List, Any, Optional, Union
 import numpy as np
 import torch
 class WanManager:
     """
+    Serviço que encapsula:
+    - Carregamento da pipeline Wan I2V com dois transformadores (alto/baixo ruído).
+    - Fusão da LoRA Lightning para 8 passos rápidos.
+    - Pré-processamento de imagens e geração de vídeo a partir de images_condition_items.
     """
+    # Constantes alinhadas ao app
     MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
     MAX_DIMENSION = 832
     def __init__(self) -> None:
         print("Loading models into memory. This may take a few minutes...")
+        # Pipeline com dois transformadores (bf16 + device_map='auto')
         self.pipe = WanImageToVideoPipeline.from_pretrained(
             self.MODEL_ID,
             transformer=WanTransformer3DModel.from_pretrained(
             torch_dtype=torch.bfloat16,
         )
+        # Scheduler FlowMatch Euler (shift = 32.0)
         self.pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
             self.pipe.scheduler.config, shift=32.0
         )
+        # Fusão da LoRA Lightning (dois adaptadores, um por transformer)
         print("Applying 8-step Lightning LoRA...")
         try:
             self.pipe.load_lora_weights(
             self.pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
             self.pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
+            # Libera adaptadores após a fusão
             self.pipe.unload_lora_weights()
             print("Lightning LoRA successfully fused. Model is ready for fast 8-step generation.")
         except Exception as e:
         print("All models loaded. Service is ready.")
+    # ============ Utilidades de imagem ============
     def process_image_for_video(self, image: Image.Image) -> Image.Image:
         width, height = image.size
         if width == height:
             return image.resize((self.SQUARE_SIZE, self.SQUARE_SIZE), Image.Resampling.LANCZOS)
             new_width *= scale
             new_height *= scale
+        # Múltiplo de 16 e mínimos finais
         final_width = int(round(new_width / self.DIMENSION_MULTIPLE) * self.DIMENSION_MULTIPLE)
         final_height = int(round(new_height / self.DIMENSION_MULTIPLE) * self.DIMENSION_MULTIPLE)
         return image.resize((final_width, final_height), Image.Resampling.LANCZOS)
     def resize_and_crop_to_match(self, target_image: Image.Image, reference_image: Image.Image) -> Image.Image:
         ref_width, ref_height = reference_image.size
         target_width, target_height = target_image.size
         scale = max(ref_width / target_width, ref_height / target_height)
         left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
         return resized.crop((left, top, left + ref_width, top + ref_height))
+    # ============ API principal ============
     def generate_video_from_conditions(
         self,
         images_condition_items: List[List[Any]],  # [[patch(Image), frame(int|str), peso(float)], ...]
         prompt: str,
+        negative_prompt: Optional[str],
         duration_seconds: float,
         steps: int,
         guidance_scale: float,
         guidance_scale_2: float,
         seed: int,
         randomize_seed: bool,
+        output_type: str = "np",
     ):
         """
+        Usa apenas:
+        - Primeiro item como imagem inicial (image)
+        - Último item como last_image (endpoint)
+        Mantém todo o restante do contrato i2v.
         """
         if not images_condition_items or len(images_condition_items) < 2:
+            raise ValueError("Forneça ao menos dois itens (início e fim).")
         first_item = images_condition_items[0]
         last_item = images_condition_items[-1]
         start_image = first_item[0]
         end_image = last_item[0]
         if start_image is None or end_image is None:
             raise ValueError("As imagens inicial e final não podem ser vazias.")
+        if not isinstance(start_image, Image.Image) or not isinstance(end_image, Image.Image):
+            raise TypeError("Os 'patches' devem ser PIL.Image.")
         processed_start = self.process_image_for_video(start_image)
         processed_end = self.resize_and_crop_to_match(end_image, processed_start)
         target_height, target_width = processed_start.height, processed_start.width
         num_frames = int(round(duration_seconds * self.FIXED_FPS))
         num_frames = int(np.clip(num_frames, self.MIN_FRAMES_MODEL, self.MAX_FRAMES_MODEL))
         current_seed = random.randint(0, np.iinfo(np.int32).max) if randomize_seed else int(seed)
         generator = torch.Generator().manual_seed(current_seed)
         result = self.pipe(
             image=processed_start,
             last_image=processed_end,
             prompt=prompt,
+            negative_prompt=negative_prompt if negative_prompt is not None else self.default_negative_prompt,
             height=target_height,
             width=target_width,
             num_frames=num_frames,
             guidance_scale_2=float(guidance_scale_2),
             num_inference_steps=int(steps),
             generator=generator,
+            output_type=output_type,
         )
         frames = result.frames[0]
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
             video_path = tmp.name
         export_to_video(frames, video_path, fps=self.FIXED_FPS)