Test4

Paused

App Files Files Community

Eueuiaa commited on Oct 8

Commit

21e9173

verified ·

1 Parent(s): 35be4e2

Update api/ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +189 -318

api/ltx_server.py CHANGED Viewed

@@ -25,6 +25,7 @@ import yaml
 from typing import List, Dict
 from pathlib import Path
 import imageio
 import tempfile
 from huggingface_hub import hf_hub_download
 import sys
@@ -40,9 +41,8 @@ from managers.vae_manager import vae_manager_singleton
 from tools.video_encode_tool import video_encode_tool_singleton
 DEPS_DIR = Path("/data")
 LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
-if not LTX_VIDEO_REPO_DIR.exists():
-    print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
-    run_setup()
 def run_setup():
     setup_script_path = "setup.py"
     if not os.path.exists(setup_script_path):
@@ -55,6 +55,11 @@ def run_setup():
     except subprocess.CalledProcessError as e:
         print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
         sys.exit(1)
 def add_deps_to_path():
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
     if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
@@ -117,61 +122,39 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
                 continue
     return results
 def calculate_new_dimensions(orig_w, orig_h, divisor=8):
-    """
-    Calcula novas dimensões mantendo a proporção, garantindo que ambos os
-    lados sejam divisíveis pelo divisor especificado (padrão 8).
-    """
     if orig_w == 0 or orig_h == 0:
-        # Retorna um valor padrão seguro
         return 512, 512
-    # Preserva a orientação (paisagem vs. retrato)
     if orig_w >= orig_h:
-        # Paisagem ou quadrado
         aspect_ratio = orig_w / orig_h
-        # Começa com uma altura base e calcula a largura
-        new_h = 512 # Altura base para paisagem
         new_w = new_h * aspect_ratio
     else:
-        # Retrato
         aspect_ratio = orig_h / orig_w
-        # Começa com uma largura base e calcula a altura
-        new_w = 512 # Largura base para retrato
         new_h = new_w * aspect_ratio
-    # Arredonda AMBOS os valores para o múltiplo mais próximo do divisor
     final_w = int(round(new_w / divisor)) * divisor
     final_h = int(round(new_h / divisor)) * divisor
-    # Garante que as dimensões não sejam zero após o arredondamento
     final_w = max(divisor, final_w)
     final_h = max(divisor, final_h)
     print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
-    return final_h, final_w # Retorna (altura, largura)
 def handle_media_upload_for_dims(filepath, current_h, current_w):
-    """
-    Esta função agora usará o novo cálculo robusto.
-    (O corpo desta função não precisa de alterações, pois ela já chama a função de cálculo)
-    """
     if not filepath or not os.path.exists(str(filepath)):
-        return gr.update(value=current_h), gr.update(value=current_w)
     try:
         if str(filepath).lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
             with Image.open(filepath) as img:
                 orig_w, orig_h = img.size
-        else: # Assumir que é um vídeo
             with imageio.get_reader(filepath) as reader:
                 meta = reader.get_meta_data()
                 orig_w, orig_h = meta.get('size', (current_w, current_h))
-        # Chama a nova função corrigida
         new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
-        return gr.update(value=new_h), gr.update(value=new_w)
     except Exception as e:
         print(f"Erro ao processar mídia para dimensões: {e}")
-        return gr.update(value=current_h), gr.update(value=current_w)
 def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
     if not processes:
         return "  - Processos ativos: (nenhum)\n"
@@ -233,7 +216,6 @@ class VideoService:
         self._apply_precision_policy()
         print(f"[DEBUG] runtime_autocast_dtype = {getattr(self, 'runtime_autocast_dtype', None)}")
-        # Injeta pipeline/vae no manager (impede vae=None)
         vae_manager_singleton.attach_pipeline(
             self.pipeline,
             device=self.device,
@@ -398,17 +380,10 @@ class VideoService:
                 pass
         print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
     @torch.no_grad()
     def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
-        """
-        Lógica extraída diretamente da LTXMultiScalePipeline para upscale de latentes.
-        """
         if not self.latent_upsampler:
             raise ValueError("Latent Upsampler não está carregado.")
-        # Garante que os modelos estejam no dispositivo correto
         self.latent_upsampler.to(self.device)
         self.pipeline.vae.to(self.device)
         print(f"[DEBUG-UPSAMPLE] Shape de entrada: {tuple(latents.shape)}")
@@ -416,11 +391,8 @@ class VideoService:
         upsampled_latents = self.latent_upsampler(latents)
         upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
         print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
         return upsampled_latents
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
@@ -454,156 +426,124 @@ class VideoService:
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
     def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
-        """
-        Divide o tensor de latentes em chunks com tamanho definido em número de latentes.
-        Args:
-            latents_brutos: tensor [B, C, T, H, W]
-            num_latente_por_chunk: número de latentes por chunk
-            overlap: número de frames que se sobrepõem entre chunks
-        Returns:
-            List[tensor]: lista de chunks cloneados
-        """
         sum_latent = latents_brutos.shape[2]
         chunks = []
         if num_latente_por_chunk >= sum_latent:
-            return [latents_brutos]
-        n_chunks = (sum_latent) // num_latente_por_chunk
-        steps = sum_latent//n_chunks
-        print("================PODA CAUSAL=================")
-        print(f"[DEBUG] TOTAL LATENTES = {sum_latent}")
-        print(f"[DEBUG] LATENTES min por chunk = {num_latente_por_chunk}")
-        print(f"[DEBUG] Número de chunks = {n_chunks}")
-        if n_chunks > 1:
-            i=0
-            while i < n_chunks:
-                start = (num_latente_por_chunk*i)
-                end = (start+num_latente_por_chunk+overlap)
-                if i+1 < n_chunks:
-                    chunk = latents_brutos[:, :, start:end, :, :].clone().detach()
-                    print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
-                else:
-                    chunk = latents_brutos[:, :, start:, :, :].clone().detach()
-                    print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
-                chunks.append(chunk)
-                i+=1
-        else:
-            print(f"[DEBUG] numero chunks minimo ")
-            print(f"[DEBUG] latents_brutos[:, :, :, :, :] = {latents_brutos.shape[2]}")
-            chunks.append(latents_brutos)
-        print("================PODA CAUSAL=================")
         return chunks
     def _get_total_frames(self, video_path: str) -> int:
         cmd = [
-            "ffprobe",
-            "-v", "error",
-            "-select_streams", "v:0",
-            "-count_frames",
-            "-show_entries", "stream=nb_read_frames",
-            "-of", "default=nokey=1:noprint_wrappers=1",
-            video_path
         ]
         result = subprocess.run(cmd, capture_output=True, text=True, check=True)
         return int(result.stdout.strip())
     def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
-        """
-        Gera uma nova lista de vídeos aplicando transições suaves (blend frame a frame)
-        seguindo exatamente a lógica linear de Carlos.
-        """
-        import os, subprocess, shutil
-        poda = crossfade_frames
-        total_partes = len(video_paths)
-        video_fade_fim = None
-        video_fade_ini = None
-        nova_lista = []
-        print("===========CONCATECAO CAUSAL=============")
-        print(f"[DEBUG] Iniciando pipeline com {total_partes} vídeos e {poda} frames de crossfade")
-        for i in range(total_partes):
-            base = video_paths[i]
-            # --- PODA ---
-            video_podado = os.path.join(pasta, f"{base}_podado_{i}.mp4")
-            if i<total_partes-1:
-                end_frame = self._get_total_frames(base) - poda
-            else:
-                end_frame = self._get_total_frames(base)
-            if i>0:
-                start_frame = poda
-            else:
-                start_frame = 0
-            cmd_fim = (
-               f'ffmpeg -y -hide_banner -loglevel error -i "{base}" '
-               f'-vf "trim=start_frame={start_frame}:end_frame={end_frame},setpts=PTS-STARTPTS" '
-               f'-an "{video_podado}"'
-            )
-            subprocess.run(cmd_fim, shell=True, check=True)
-            # --- FADE_INI ---
-            if i > 0:
-                video_fade_ini = os.path.join(pasta, f"{base}_fade_ini_{i}.mp4")
-                cmd_ini = (
-                    f'ffmpeg -y -hide_banner -loglevel error -i "{base}" '
-                    f'-vf "trim=end_frame={poda},setpts=PTS-STARTPTS" -an "{video_fade_ini}"'
-                )
-                subprocess.run(cmd_ini, shell=True, check=True)
-            # --- TRANSIÇÃO ---
-            if video_fade_fim and video_fade_ini:
-                video_fade  = os.path.join(pasta, f"transicao_{i}_{i+1}.mp4")
-                cmd_blend = (
-                    f'ffmpeg -y -hide_banner -loglevel error '
-                    f'-i "{video_fade_fim}" -i "{video_fade_ini}" '
-                    f'-filter_complex "[0:v][1:v]blend=all_expr=\'A*(1-T/{poda})+B*(T/{poda})\',format=yuv420p" '
-                    f'-frames:v {poda} "{video_fade}"'
-                )
-                subprocess.run(cmd_blend, shell=True, check=True)
-                print(f"[DEBUG] transicao adicionada {i}/{i+1} {self._get_total_frames(video_fade)} frames ✅")
-                nova_lista.append(video_fade)
-            # --- FADE_FIM ---
-            if i<=total_partes-1:
-                video_fade_fim = os.path.join(pasta, f"{base}_fade_fim_{i}.mp4")
-                cmd_fim = (
-                    f'ffmpeg -y -hide_banner -loglevel error -i "{base}" '
-                    f'-vf "trim=start_frame={end_frame-poda},setpts=PTS-STARTPTS" -an "{video_fade_fim}"'
-                )
-                subprocess.run(cmd_fim, shell=True, check=True)
-            nova_lista.append(video_podado)
-            print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
-        print("===========CONCATECAO CAUSAL=============")
-        print(f"[DEBUG] {nova_lista}")
-        return nova_lista
     def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
-        """
-        Concatena múltiplos MP4s sem reencode usando o demuxer do ffmpeg.
-        ATENÇÃO: todos os arquivos precisam ter mesmo codec, fps, resolução etc.
-        """
-        if not mp4_list or len(mp4_list) < 2:
-            raise ValueError("Forneça pelo menos dois arquivos MP4 para concatenar.")
-        # Cria lista temporária para o ffmpeg
         with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
             for mp4 in mp4_list:
                 f.write(f"file '{os.path.abspath(mp4)}'\n")
@@ -620,10 +560,6 @@ class VideoService:
             except Exception:
                 pass
-    # ==============================================================================
-    # --- FUNÇÃO GENERATE COMPLETA E ATUALIZADA ---
-    # ==============================================================================
     def generate(
         self,
         prompt,
@@ -639,21 +575,20 @@ class VideoService:
         height=512,
         width=704,
         duration=2.0,
-        frames_to_use=9,
         seed=42,
         randomize_seed=True,
         guidance_scale=3.0,
         improve_texture=True,
         progress_callback=None,
-        external_decode=True,
     ):
         t_all = time.perf_counter()
-        print(f"[DEBUG] generate() begin mode={mode} external_decode={external_decode} improve_texture={improve_texture}")
         if self.device == "cuda":
             torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
         self._log_gpu_memory("Início da Geração")
-        # --- Setup Inicial (como antes) ---
         if mode == "image-to-video" and not start_image_filepath:
             raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
         used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
@@ -682,49 +617,33 @@ class VideoService:
             print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
         call_kwargs = {
-            "prompt": prompt,
-            "negative_prompt": negative_prompt,
-            "height": height_padded,
-            "width": width_padded,
-            "num_frames": actual_num_frames,
-            "frame_rate": int(FPS),
-            "generator": generator,
-            "output_type": "latent",
-            "conditioning_items": conditioning_items if conditioning_items else None,
-            "media_items": None,
-            "decode_timestep": self.config["decode_timestep"],
-            "decode_noise_scale": self.config["decode_noise_scale"],
-            "stochastic_sampling": self.config["stochastic_sampling"],
-            "image_cond_noise_scale": 0.01,
-            "is_video": True,
-            "vae_per_channel_normalize": True,
-            "mixed_precision": (self.config["precision"] == "mixed_precision"),
-            "offload_to_cpu": False,
-            "enhance_prompt": False,
-            "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
         }
-        latents = None
-        latents_list[]
         temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
         results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
         try:
             if improve_texture:
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                 with ctx:
                     if not self.latent_upsampler:
                         raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
-                    # --- ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---
                     print("\n--- INICIANDO ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---")
                     t_pass1 = time.perf_counter()
                     first_pass_config = self.config.get("first_pass", {}).copy()
                     first_pass_config.pop("num_inference_steps", None)
                     downscale_factor = self.config.get("downscale_factor", 0.6666666)
-                    vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
                     x_width = int(width_padded * downscale_factor)
                     downscaled_width = x_width - (x_width % vae_scale_factor)
                     x_height = int(height_padded * downscale_factor)
@@ -733,155 +652,107 @@ class VideoService:
                     first_pass_kwargs = call_kwargs.copy()
                     first_pass_kwargs.update({
-                        "output_type": "latent",
-                        "width": downscaled_width,
-                        "height": downscaled_height,
-                        "guidance_scale": float(guidance_scale),
-                        **first_pass_config
                     })
                     print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
                     latents = self.pipeline(**first_pass_kwargs).images
                     log_tensor_info(latents, "Latentes Base (First Pass)")
                     print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
-                    del pipeline
-                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                 with ctx:
                     print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
                     t_upscale = time.perf_counter()
                     upsampled_latents = self._upsample_latents_internal(latents)
                     upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents)
                     print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
-                    latents_cpu = upsampled_latents.detach().to("cpu", non_blocking=True)
-                    del upsampled_latents;
-                    del latents; gc.collect(); torch.cuda.empty_cache()
-                    del spatial_upscaler_path
-                    #latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
-                    latents_parts_up[latents_cpu]
-                    #del latents_cpu_up
-                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
-                with ctx:
-                    for latents in latents_parts_up:
-                        latents = adain_filter_latent(latents=latents, reference_latents=latents_cpu_up)
-                        # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
-                        print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
                         second_pass_config = self.config.get("second_pass", {}).copy()
                         second_pass_config.pop("num_inference_steps", None)
-                        second_pass_width = downscaled_width * 2
-                        second_pass_height = downscaled_height * 2
                         print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
                         t_pass2 = time.perf_counter()
                         second_pass_kwargs = call_kwargs.copy()
                         second_pass_kwargs.update({
-                           "output_type": "latent",
-                           "width": second_pass_width,
-                           "height": second_pass_height,
-                           "latents": latents,
                            "guidance_scale": float(guidance_scale),
                            **second_pass_config
                         })
-                        print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
                         final_latents = self.pipeline(**second_pass_kwargs).images
                         log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
                         print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
                         latents_cpu = final_latents.detach().to("cpu", non_blocking=True)
                         latents_list.append(latents_cpu)
-                        del final_latents; gc.collect(); torch.cuda.empty_cache()
-                        del pipeline
             else:
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                 with ctx:
                     print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
                     t_single = time.perf_counter()
                     single_pass_call_kwargs = call_kwargs.copy()
-                    first_pass_config_from_yaml = self.config.get("first_pass", {})
-                    single_pass_call_kwargs["timesteps"] = first_pass_config_from_yaml.get("timesteps")
-                    single_pass_call_kwargs["guidance_scale"] = float(guidance_scale)
-                    single_pass_call_kwargs["stg_scale"] = first_pass_config_from_yaml.get("stg_scale")
-                    single_pass_call_kwargs["rescaling_scale"] = first_pass_config_from_yaml.get("rescaling_scale")
-                    single_pass_call_kwargs["skip_block_list"] = first_pass_config_from_yaml.get("skip_block_list")
-                    single_pass_call_kwargs.pop("num_inference_steps", None)
-                    single_pass_call_kwargs.pop("first_pass", None)
-                    single_pass_call_kwargs.pop("second_pass", None)
-                    single_pass_call_kwargs.pop("downscale_factor", None)
-                    latents_single_pass = pipeline_instance(**single_pass_call_kwargs).images
                     log_tensor_info(latents_single_pass, "Latentes Finais (Etapa Única)")
                     print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
                     latents_cpu = latents_single_pass.detach().to("cpu", non_blocking=True)
-                    latents_list.append(latents_single_pass)
                     del latents_single_pass; gc.collect(); torch.cuda.empty_cache()
-                    del pipeline
-            ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
-                with ctx:
-                    # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
-                    print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
-                latents_parts[]
-                for latents in latents_list:
-                    latents_parts.append(self._dividir_latentes_por_tamanho(latents_cpu,4,1))
-                partes_mp4 = []
-                par = 0
-                for latents in latents_parts:
-                    latents = adain_filter_latent(latents=latents, reference_latents=latents_cpu)
-                    print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
-                    par = par + 1
-                    output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
-                    final_output_path = None
-                    print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
-                    # Usar manager om timestep por item; previne target_shape e rota NoneType.decode
-                    pixel_tensor = vae_manager_singleton.decode(
-                        latents.to(self.device, non_blocking=True),
-                        decode_timestep=float(self.config.get("decode_timestep", 0.05))
-                    )
-                    log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
-                    print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
-                    video_encode_tool_singleton.save_video_from_tensor(
-                       pixel_tensor,
-                       output_video_path,
-                       fps=call_kwargs["frame_rate"],
-                       progress_callback=progress_callback
-                    )
-                    candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
-                    try:
-                       shutil.move(output_video_path, candidate)
-                       final_output_path = candidate
-                       print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
-                       partes_mp4.append(final_output_path)
-                    except Exception as e:
-                        final_output_path = output_video_path
-                        print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
-                    del pixel_tensor
-                    del latents; gc.collect(); torch.cuda.empty_cache()
-                    del candidate
-            total_partes = len(partes_mp4)
-            if (total_partes>1):
-                final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
-                partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=partes_mp4, crossfade_frames=8)
                 self._concat_mp4s_no_reencode(partes_mp4_fade, final_vid)
             else:
-                final_vid = partes_mp4[0]
-            del partes_mp4_fade
-            del latents_list
-            del latents_parts
-            del partes_mp4
             self._log_gpu_memory("Fim da Geração")
             return final_vid, used_seed
         except Exception as e:
             print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
             print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
@@ -889,10 +760,10 @@ class VideoService:
         finally:
             gc.collect()
-            torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
-            self.finalize(keep_paths=[])
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
 video_generation_service = VideoService()

 from typing import List, Dict
 from pathlib import Path
 import imageio
+from PIL import Image # Import adicionado para handle_media_upload_for_dims
 import tempfile
 from huggingface_hub import hf_hub_download
 import sys
 from tools.video_encode_tool import video_encode_tool_singleton
 DEPS_DIR = Path("/data")
 LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
+# CORREÇÃO: Movido run_setup para o início para garantir que seja definido antes de ser chamado.
 def run_setup():
     setup_script_path = "setup.py"
     if not os.path.exists(setup_script_path):
     except subprocess.CalledProcessError as e:
         print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
         sys.exit(1)
+if not LTX_VIDEO_REPO_DIR.exists():
+    print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
+    run_setup()
 def add_deps_to_path():
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
     if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
                 continue
     return results
 def calculate_new_dimensions(orig_w, orig_h, divisor=8):
     if orig_w == 0 or orig_h == 0:
         return 512, 512
     if orig_w >= orig_h:
         aspect_ratio = orig_w / orig_h
+        new_h = 512
         new_w = new_h * aspect_ratio
     else:
         aspect_ratio = orig_h / orig_w
+        new_w = 512
         new_h = new_w * aspect_ratio
     final_w = int(round(new_w / divisor)) * divisor
     final_h = int(round(new_h / divisor)) * divisor
     final_w = max(divisor, final_w)
     final_h = max(divisor, final_h)
     print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
+    return final_h, final_w
 def handle_media_upload_for_dims(filepath, current_h, current_w):
+    # CORREÇÃO: Gradio (`gr`) não deve ser usado no backend. Retornando tupla diretamente.
     if not filepath or not os.path.exists(str(filepath)):
+        return current_h, current_w
     try:
         if str(filepath).lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
             with Image.open(filepath) as img:
                 orig_w, orig_h = img.size
+        else:
             with imageio.get_reader(filepath) as reader:
                 meta = reader.get_meta_data()
                 orig_w, orig_h = meta.get('size', (current_w, current_h))
         new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
+        return new_h, new_w
     except Exception as e:
         print(f"Erro ao processar mídia para dimensões: {e}")
+        return current_h, current_w
 def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
     if not processes:
         return "  - Processos ativos: (nenhum)\n"
         self._apply_precision_policy()
         print(f"[DEBUG] runtime_autocast_dtype = {getattr(self, 'runtime_autocast_dtype', None)}")
         vae_manager_singleton.attach_pipeline(
             self.pipeline,
             device=self.device,
                 pass
         print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
     @torch.no_grad()
     def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
         if not self.latent_upsampler:
             raise ValueError("Latent Upsampler não está carregado.")
         self.latent_upsampler.to(self.device)
         self.pipeline.vae.to(self.device)
         print(f"[DEBUG-UPSAMPLE] Shape de entrada: {tuple(latents.shape)}")
         upsampled_latents = self.latent_upsampler(latents)
         upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
         print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
         return upsampled_latents
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
     def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
         sum_latent = latents_brutos.shape[2]
         chunks = []
         if num_latente_por_chunk >= sum_latent:
+            return [latents_brutos.clone().detach()] # CORREÇÃO: Retornar uma lista e clonar
+        # CORREÇÃO: Lógica de chunking simplificada e corrigida para evitar estouro de índice
+        start = 0
+        while start < sum_latent:
+            end = min(start + num_latente_por_chunk, sum_latent)
+            # Para o overlap, pegamos um pouco do chunk anterior, exceto para o primeiro
+            overlap_start = max(0, start - overlap)
+            # O chunk a ser processado vai de `overlap_start` até `end`
+            # mas o chunk "real" para junção posterior seria de `start` a `end`
+            # A lógica atual já faz um overlap simples, vamos refinar
+            effective_end = min(start + num_latente_por_chunk, sum_latent)
+            chunk = latents_brutos[:, :, start:effective_end, :, :].clone().detach()
+            # Adiciona overlap no final se não for o último chunk
+            if effective_end < sum_latent:
+                overlap_end = min(effective_end + overlap, sum_latent)
+                chunk = latents_brutos[:, :, start:overlap_end, :, :].clone().detach()
+            print(f"[DEBUG] Chunk: start={start}, end={chunk.shape[2]}, total_latents={sum_latent}")
+            chunks.append(chunk)
+            # Avança para o próximo chunk
+            if start + num_latente_por_chunk >= sum_latent:
+                break
+            start += num_latente_por_chunk
         return chunks
     def _get_total_frames(self, video_path: str) -> int:
         cmd = [
+            "ffprobe", "-v", "error", "-select_streams", "v:0", "-count_frames",
+            "-show_entries", "stream=nb_read_frames", "-of", "default=nokey=1:noprint_wrappers=1", video_path
         ]
         result = subprocess.run(cmd, capture_output=True, text=True, check=True)
         return int(result.stdout.strip())
     def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
+        # Esta função parece complexa e propensa a erros com nomes de arquivo.
+        # Por segurança, mantendo a lógica original, mas corrigindo possíveis bugs de `shell=True`
+        # e garantindo que os arquivos existam.
+        if len(video_paths) <= 1:
+            return video_paths # Não há o que fazer
+        nova_lista_intermediaria = []
+        # Primeiro, cria todos os vídeos podados
+        videos_podados = []
+        for i, base in enumerate(video_paths):
+            video_podado = os.path.join(pasta, f"podado_{i}.mp4")
+            total_frames = self._get_total_frames(base)
+            start_frame = crossfade_frames if i > 0 else 0
+            end_frame = total_frames - crossfade_frames if i < len(video_paths) - 1 else total_frames
+            # Pular poda se não houver frames suficientes
+            if start_frame >= end_frame:
+                continue
+            cmd = [
+                'ffmpeg', '-y', '-hide_banner', '-loglevel', 'error', '-i', base,
+                '-vf', f'trim=start_frame={start_frame}:end_frame={end_frame},setpts=PTS-STARTPTS',
+                '-an', video_podado
+            ]
+            subprocess.run(cmd, check=True)
+            videos_podados.append(video_podado)
+        # Agora, cria as transições e monta a lista final
+        lista_final = [videos_podados[0]]
+        for i in range(len(video_paths) - 1):
+            video_anterior = video_paths[i]
+            video_seguinte = video_paths[i+1]
+            # Extrai fade_fim do anterior
+            fade_fim_path = os.path.join(pasta, f"fade_fim_{i}.mp4")
+            total_frames_anterior = self._get_total_frames(video_anterior)
+            cmd_fim = [
+                'ffmpeg', '-y', '-hide_banner', '-loglevel', 'error', '-i', video_anterior,
+                '-vf', f'trim=start_frame={total_frames_anterior - crossfade_frames},setpts=PTS-STARTPTS',
+                '-an', fade_fim_path
+            ]
+            subprocess.run(cmd_fim, check=True)
+            # Extrai fade_ini do seguinte
+            fade_ini_path = os.path.join(pasta, f"fade_ini_{i+1}.mp4")
+            cmd_ini = [
+                'ffmpeg', '-y', '-hide_banner', '-loglevel', 'error', '-i', video_seguinte,
+                '-vf', f'trim=end_frame={crossfade_frames},setpts=PTS-STARTPTS', '-an', fade_ini_path
+            ]
+            subprocess.run(cmd_ini, check=True)
+            # Cria a transição
+            transicao_path = os.path.join(pasta, f"transicao_{i}_{i+1}.mp4")
+            cmd_blend = [
+                'ffmpeg', '-y', '-hide_banner', '-loglevel', 'error',
+                '-i', fade_fim_path, '-i', fade_ini_path,
+                '-filter_complex', f'[0:v][1:v]blend=all_expr=\'A*(1-T/{crossfade_frames})+B*(T/{crossfade_frames})\',format=yuv420p',
+                '-frames:v', str(crossfade_frames), transicao_path
+            ]
+            subprocess.run(cmd_blend, check=True)
+            lista_final.append(transicao_path)
+            lista_final.append(videos_podados[i+1])
+        return lista_final
     def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
+        if not mp4_list:
+            raise ValueError("A lista de MP4s para concatenar está vazia.")
+        # Se houver apenas um vídeo, apenas o copie/mova
+        if len(mp4_list) == 1:
+            shutil.move(mp4_list[0], out_path)
+            print(f"[DEBUG] Apenas um vídeo, movido para: {out_path}")
+            return
         with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
             for mp4 in mp4_list:
                 f.write(f"file '{os.path.abspath(mp4)}'\n")
             except Exception:
                 pass
     def generate(
         self,
         prompt,
         height=512,
         width=704,
         duration=2.0,
+        frames_to_use=9, # Parâmetro não utilizado, mas mantido por consistência
         seed=42,
         randomize_seed=True,
         guidance_scale=3.0,
         improve_texture=True,
         progress_callback=None,
+        external_decode=True, # Parâmetro não utilizado, mas mantido
     ):
         t_all = time.perf_counter()
+        print(f"[DEBUG] generate() begin mode={mode} improve_texture={improve_texture}")
         if self.device == "cuda":
             torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
         self._log_gpu_memory("Início da Geração")
         if mode == "image-to-video" and not start_image_filepath:
             raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
         used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
             print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
         call_kwargs = {
+            "prompt": prompt, "negative_prompt": negative_prompt, "height": height_padded, "width": width_padded,
+            "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": generator, "output_type": "latent",
+            "conditioning_items": conditioning_items if conditioning_items else None, "media_items": None,
+            "decode_timestep": self.config["decode_timestep"], "decode_noise_scale": self.config["decode_noise_scale"],
+            "stochastic_sampling": self.config["stochastic_sampling"], "image_cond_noise_scale": 0.01, "is_video": True,
+            "vae_per_channel_normalize": True, "mixed_precision": (self.config["precision"] == "mixed_precision"),
+            "offload_to_cpu": False, "enhance_prompt": False, "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
         }
+        # CORREÇÃO: Inicialização de listas
+        latents_list = []
         temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
         results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
         try:
             if improve_texture:
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                 with ctx:
                     if not self.latent_upsampler:
                         raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
                     print("\n--- INICIANDO ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---")
                     t_pass1 = time.perf_counter()
                     first_pass_config = self.config.get("first_pass", {}).copy()
                     first_pass_config.pop("num_inference_steps", None)
                     downscale_factor = self.config.get("downscale_factor", 0.6666666)
+                    vae_scale_factor = self.pipeline.vae_scale_factor
                     x_width = int(width_padded * downscale_factor)
                     downscaled_width = x_width - (x_width % vae_scale_factor)
                     x_height = int(height_padded * downscale_factor)
                     first_pass_kwargs = call_kwargs.copy()
                     first_pass_kwargs.update({
+                        "output_type": "latent", "width": downscaled_width, "height": downscaled_height,
+                        "guidance_scale": float(guidance_scale), **first_pass_config
                     })
                     print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
+                    # CORREÇÃO: Usar self.pipeline, não a variável deletada 'pipeline'
                     latents = self.pipeline(**first_pass_kwargs).images
                     log_tensor_info(latents, "Latentes Base (First Pass)")
                     print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
                 with ctx:
                     print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
                     t_upscale = time.perf_counter()
                     upsampled_latents = self._upsample_latents_internal(latents)
                     upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents)
                     print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
+                    # CORREÇÃO: Manter latentes originais para AdaIN e passar latentes com upscale para o second pass
+                    reference_latents_cpu = latents.detach().to("cpu", non_blocking=True)
+                    latents_to_refine = upsampled_latents
+                    del upsampled_latents; del latents; gc.collect(); torch.cuda.empty_cache()
+                # CORREÇÃO: Lógica de chunking para o second pass
+                latents_parts = self._dividir_latentes_por_tamanho(latents_to_refine, 32, 8) # Exemplo: chunks de 32 frames com 8 de overlap
+                del latents_to_refine
+                with ctx:
+                    for i, latents_chunk in enumerate(latents_parts):
+                        print(f"\n--- INICIANDO ETAPA 3.{i+1}: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
+                        # CORREÇÃO: AdaIN precisa de latents de referência com mesmo H/W, o que não é o caso aqui.
+                        # Vamos aplicar AdaIN com o próprio chunk para normalização, ou pular. Pulando por simplicidade.
                         second_pass_config = self.config.get("second_pass", {}).copy()
                         second_pass_config.pop("num_inference_steps", None)
+                        # O tamanho do second pass deve ser o tamanho do latente de entrada (após upscale)
+                        second_pass_height, second_pass_width = latents_chunk.shape[3] * 8, latents_chunk.shape[4] * 8
                         print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
                         t_pass2 = time.perf_counter()
                         second_pass_kwargs = call_kwargs.copy()
                         second_pass_kwargs.update({
+                           "output_type": "latent", "width": second_pass_width, "height": second_pass_height,
+                           "latents": latents_chunk.to(self.device), # Mover chunk para GPU
                            "guidance_scale": float(guidance_scale),
+                           "num_frames": latents_chunk.shape[2], # Usar o número de frames do chunk
                            **second_pass_config
                         })
+                        print(f"[DEBUG] Second Pass: Refinando chunk {i+1}/{len(latents_parts)}...")
                         final_latents = self.pipeline(**second_pass_kwargs).images
                         log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
                         print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
                         latents_cpu = final_latents.detach().to("cpu", non_blocking=True)
                         latents_list.append(latents_cpu)
+                        del final_latents; del latents_chunk; gc.collect(); torch.cuda.empty_cache()
             else:
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                 with ctx:
                     print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
                     t_single = time.perf_counter()
                     single_pass_call_kwargs = call_kwargs.copy()
+                    # CORREÇÃO: `pipeline_instance` não existe, usar `self.pipeline`.
+                    latents_single_pass = self.pipeline(**single_pass_call_kwargs).images
                     log_tensor_info(latents_single_pass, "Latentes Finais (Etapa Única)")
                     print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
                     latents_cpu = latents_single_pass.detach().to("cpu", non_blocking=True)
+                    latents_list.append(latents_cpu) # CORREÇÃO: aqui deve ser latents_cpu, não latents_single_pass
                     del latents_single_pass; gc.collect(); torch.cuda.empty_cache()
+            # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
+            print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
+            partes_mp4 = []
+            for i, latents in enumerate(latents_list):
+                print(f"[DEBUG] Decodificando partição {i+1}/{len(latents_list)}: {tuple(latents.shape)}")
+                output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{i}.mp4")
+                pixel_tensor = vae_manager_singleton.decode(
+                    latents.to(self.device, non_blocking=True),
+                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
+                )
+                log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
+                video_encode_tool_singleton.save_video_from_tensor(
+                   pixel_tensor, output_video_path, fps=call_kwargs["frame_rate"], progress_callback=progress_callback
+                )
+                partes_mp4.append(output_video_path)
+                del pixel_tensor; del latents; gc.collect(); torch.cuda.empty_cache()
+            final_vid = os.path.join(results_dir, f"final_video_{used_seed}.mp4")
+            if len(partes_mp4) > 1:
+                # A função _gerar_lista_com_transicoes é complexa, usando uma concatenação direta como fallback robusto.
+                # Para usar a transição, a lógica de overlap na divisão de latentes precisa ser perfeita.
+                print("[DEBUG] Múltiplas partes geradas, concatenando...")
+                partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=temp_dir, video_paths=partes_mp4, crossfade_frames=8)
                 self._concat_mp4s_no_reencode(partes_mp4_fade, final_vid)
             else:
+                shutil.move(partes_mp4[0], final_vid)
             self._log_gpu_memory("Fim da Geração")
             return final_vid, used_seed
         except Exception as e:
             print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
             print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
         finally:
             gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.ipc_collect()
+            self.finalize(keep_paths=[]) # O resultado final já foi movido
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
 video_generation_service = VideoService()