Test

Paused

App Files Files Community

eeuuia commited on Oct 15

Commit

7edcb31

verified ·

1 Parent(s): df14d1f

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -12

app.py CHANGED Viewed

@@ -15,6 +15,9 @@ import cv2
 import shutil
 import glob
 from pathlib import Path
 import warnings
 import logging
@@ -34,33 +37,54 @@ FPS = 24
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 base_model_repo = "Lightricks/LTX-Video"
-print(f"Carregando a arquitetura completa da pipeline de {base_model_repo}...")
 pipeline = LTXConditionPipeline.from_pretrained(
     base_model_repo,
     torch_dtype=dtype,
     cache_dir=os.getenv("HF_HOME_CACHE"),
     token=os.getenv("HF_TOKEN"),
 )
-# 2. Definir a URL para o arquivo de pesos FP8 que contém apenas o TRANSFORMER.
-fp8_transformer_weights_url = "https://huggingface.co/Lightricks/LTX-Video/ltxv-13b-0.9.8-distilled-fp8.safetensors"
-print(f"Sobrescrevendo pesos do Transformer com o arquivo FP8 de: {fp8_transformer_weights_url}")
-pipeline.load_lora_weights(fp8_transformer_weights_url, from_diffusers=True)
 print("Carregando upsampler...")
 pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained(
     "Lightricks/ltxv-spatial-upscaler-0.9.7",
     cache_dir=os.getenv("HF_HOME_CACHE"),
-    vae=pipeline.vae,
     torch_dtype=dtype
 )
-print("Movendo modelos para o dispositivo...")
-pipeline.to(device)
-pipe_upsample.to(device)
-pipeline.vae.enable_tiling()
 current_dir = Path(__file__).parent

 import shutil
 import glob
 from pathlib import Path
+from diffusers.models.modeling_utils import AutoModel
+from diffusers.models.group_offload import apply_group_offloading
 import warnings
 import logging
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# 1. Definir o repositório base
 base_model_repo = "Lightricks/LTX-Video"
+# 2. Carregar o Transformer separadamente para aplicar o casting FP8
+print("Carregando Transformer para otimização FP8...")
+transformer = AutoModel.from_pretrained(
+    base_model_repo,
+    subfolder="transformer",
+    torch_dtype=dtype
+)
+# Habilita a conversão dinâmica para FP8 (requer hardware compatível para funcionar)
+print("Habilitando layerwise casting para FP8...")
+transformer.enable_layerwise_casting(
+    storage_dtype=torch.float8_e4m3fn, compute_dtype=dtype
+)
+# 3. Carregar a pipeline completa, injetando o Transformer já otimizado
+print(f"Carregando a arquitetura da pipeline de {base_model_repo}...")
 pipeline = LTXConditionPipeline.from_pretrained(
     base_model_repo,
+    transformer=transformer, # Injeta o transformer otimizado
     torch_dtype=dtype,
     cache_dir=os.getenv("HF_HOME_CACHE"),
     token=os.getenv("HF_TOKEN"),
 )
+# 4. Carregar o upsampler (seu repositório é separado e está correto)
 print("Carregando upsampler...")
 pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained(
     "Lightricks/ltxv-spatial-upscaler-0.9.7",
     cache_dir=os.getenv("HF_HOME_CACHE"),
+    vae=pipeline.vae,
     torch_dtype=dtype
 )
+# 5. Aplicar o descarregamento de grupos para economizar VRAM
+print("Aplicando otimizações de group-offloading para economizar VRAM...")
+onload_device = torch.device("cuda")
+offload_device = torch.device("cpu")
+# O Transformer já tem um método integrado
+pipeline.transformer.enable_group_offload(onload_device=onload_device, offload_device=offload_device, offload_type="leaf_level", use_stream=True)
+# Para os outros componentes, usamos a função auxiliar
+apply_group_offloading(pipeline.text_encoder, onload_device=onload_device, offload_type="block_level", num_blocks_per_group=2)
+apply_group_offloading(pipeline.vae, onload_device=onload_device, offload_type="leaf_level")
 current_dir = Path(__file__).parent