Eueuiaa commited on
Commit
2701e1f
·
verified ·
1 Parent(s): 07293af

Update api/ltx_server_refactored.py

Browse files
Files changed (1) hide show
  1. api/ltx_server_refactored.py +313 -344
api/ltx_server_refactored.py CHANGED
@@ -1,425 +1,394 @@
1
- # ltx_server.py — VideoService (beta 1.3 - Fiel ao Original)
2
- # DESCRIÇÃO:
3
- # - Versão completa e fiel ao código original, restaurando toda a lógica de múltiplos passes,
4
- # chunking, e concatenação que foi previamente omitida.
5
- # - Inclui a função 'generate_low' para o primeiro passe de geração.
6
- # - Mantém a divisão de latentes (`_dividir_latentes_por_tamanho`) e a montagem de vídeo
7
- # com transições (`_gerar_lista_com_transicoes`).
8
- # - Corrigido para ser funcional e completo, sem omissões deliberadas.
9
 
10
- # --- 0. WARNINGS, IMPORTS E CONFIGURAÇÃO DE AMBIENTE ---
11
  import warnings
12
  warnings.filterwarnings("ignore", category=UserWarning)
13
  warnings.filterwarnings("ignore", category=FutureWarning)
14
- from huggingface_hub import logging as hf_logging, hf_hub_download
15
- hf_logging.set_verbosity_error()
16
-
17
- import os, sys, subprocess, shlex, tempfile, gc, shutil, contextlib, time, traceback, json, yaml, random
18
- from typing import List, Dict
19
- from pathlib import Path
20
-
 
 
21
  import torch
22
- import torch.nn.functional as F
23
  import numpy as np
 
 
 
 
 
 
24
  import imageio
25
  from PIL import Image
 
 
 
 
 
 
 
 
 
26
  from einops import rearrange
27
-
28
- # --- Constantes e Configuração de Ambiente ---
29
- LTXV_DEBUG = os.getenv("LTXV_DEBUG", "1") == "1"
30
- LTXV_FRAME_LOG_EVERY = int(os.getenv("LTXV_FRAME_LOG_EVERY", "8"))
31
  DEPS_DIR = Path("/data")
32
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
33
 
34
- # --- 1. SETUP E GERENCIAMENTO DE DEPENDÊNCIAS ---
 
35
  def run_setup():
36
  setup_script_path = "setup.py"
37
  if not os.path.exists(setup_script_path):
38
- print("[DEBUG] 'setup.py' não encontrado. Pulando dependências.")
39
  return
40
  try:
41
- print("[DEBUG] Executando setup.py para instalar dependências...")
42
- subprocess.run([sys.executable, setup_script_path], check=True, capture_output=True, text=True)
43
- print("[DEBUG] Setup concluído.")
44
  except subprocess.CalledProcessError as e:
45
- print(f"[ERROR] Falha crítica ao executar setup.py: {e.stderr}")
46
  sys.exit(1)
47
-
 
 
48
  def add_deps_to_path():
49
  repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
50
- if repo_path not in sys.path:
51
  sys.path.insert(0, repo_path)
52
- print(f"[DEBUG] Repositório LTX-Video adicionado ao sys.path.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- if not LTX_VIDEO_REPO_DIR.exists():
55
- run_setup()
56
  add_deps_to_path()
57
-
58
- from managers.vae_manager import vae_manager_singleton
59
- from tools.video_encode_tool import video_encode_tool_singleton
60
- from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline, adain_filter_latent
61
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
62
  from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
 
63
  from api.ltx.inference import (
64
- create_ltx_video_pipeline, create_latent_upsampler,
65
- load_image_to_tensor_with_resize_and_crop, seed_everething,
66
- calculate_padding, load_media_file
 
67
  )
68
 
69
- # --- 2. FUNÇÕES UTILITÁRIAS ---
70
- def calculate_new_dimensions(orig_w, orig_h, target_area=512*768, divisor=8):
71
- if orig_w <= 0 or orig_h <= 0: return 512, 768
72
- aspect_ratio = orig_w / orig_h
73
- new_h = int((target_area / aspect_ratio)**0.5)
74
- new_w = int(new_h * aspect_ratio)
75
- final_w = max(divisor, round(new_w / divisor) * divisor)
76
- final_h = max(divisor, round(new_h / divisor) * divisor)
77
- return final_h, final_w
78
-
79
- def log_tensor_info(tensor, name="Tensor"):
80
- if not LTXV_DEBUG: return
81
- if not isinstance(tensor, torch.Tensor): print(f"\n[INFO] '{name}' não é um tensor."); return
82
- print(f"\n--- Tensor: {name} ---\n - Shape: {tuple(tensor.shape)}\n - Dtype: {tensor.dtype}\n - Device: {tensor.device}")
83
- if tensor.numel() > 0:
84
- try: print(f" - Stats: Min={tensor.min().item():.4f}, Max={tensor.max().item():.4f}, Mean={tensor.mean().item():.4f}")
85
- except: pass
86
- print("------------------------------------------\n")
87
-
88
- # --- 3. CLASSE PRINCIPAL DO SERVIÇO DE VÍDEO ---
89
  class VideoService:
90
  def __init__(self):
91
  t0 = time.perf_counter()
92
- print("[INFO] Inicializando VideoService...")
93
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
94
  self.config = self._load_config()
95
- print(f"[INFO] Config: {self.config.get('precision')}, Sampler: {self.config.get('sampler')}, Device: {self.device}")
96
- self._tmp_dirs, self._tmp_files = set(), set()
97
  self.pipeline, self.latent_upsampler = self._load_models()
98
  self.pipeline.to(self.device)
99
- if self.latent_upsampler: self.latent_upsampler.to(self.device)
 
100
  self._apply_precision_policy()
101
- vae_manager_singleton.attach_pipeline(self.pipeline, device=self.device, autocast_dtype=self.runtime_autocast_dtype)
102
- if self.device == "cuda": torch.cuda.empty_cache()
103
- print(f"[SUCCESS] VideoService pronto. ({time.perf_counter()-t0:.2f}s)")
 
 
 
 
104
 
105
  def _load_config(self):
106
- # ... (Implementação completa, sem omissões)
107
  base = LTX_VIDEO_REPO_DIR / "configs"
108
- candidates = [
109
- base / "ltxv-13b-0.9.8-dev-fp8.yaml",
110
- base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
111
- base / "ltxv-13b-0.9.8-distilled.yaml",
112
- ]
113
- for cfg_path in candidates:
114
- if cfg_path.exists():
115
- with open(cfg_path, "r") as file: return yaml.safe_load(file)
116
- raise FileNotFoundError(f"Nenhum arquivo de config YAML encontrado em {base}.")
117
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def _load_models(self):
119
- # ... (Implementação completa, sem omissões)
120
  t0 = time.perf_counter()
121
- repo_id = self.config.get("repo_id", "Lightricks/LTX-Video")
122
-
123
- ckpt_path = hf_hub_download(repo_id=repo_id, filename=self.config["checkpoint_path"], token=os.getenv("HF_TOKEN"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  pipeline = create_ltx_video_pipeline(
125
- ckpt_path=ckpt_path,
126
  precision=self.config["precision"],
127
  text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
128
  sampler=self.config["sampler"],
129
- device="cpu"
 
 
 
130
  )
131
-
 
132
  latent_upsampler = None
133
  if self.config.get("spatial_upscaler_model_path"):
134
- upscaler_path = hf_hub_download(repo_id=repo_id, filename=self.config["spatial_upscaler_model_path"], token=os.getenv("HF_TOKEN"))
135
- latent_upsampler = create_latent_upsampler(upscaler_path, device="cpu")
136
-
137
- print(f"[DEBUG] Modelos carregados em {time.perf_counter() - t0:.2f}s")
138
  return pipeline, latent_upsampler
139
 
140
  def _apply_precision_policy(self):
141
  prec = str(self.config.get("precision", "")).lower()
142
  self.runtime_autocast_dtype = torch.float32
143
- if "bfloat16" in prec or "fp8" in prec: self.runtime_autocast_dtype = torch.bfloat16
144
- elif "mixed_precision" in prec or "fp16" in prec: self.runtime_autocast_dtype = torch.float16
145
- print(f"[DEBUG] Dtype para Autocast: {self.runtime_autocast_dtype}")
146
-
147
- def finalize(self, keep_paths=None, clear_gpu=True):
148
- # ... (Implementação robusta de limpeza)
149
- print("[INFO] Finalize: iniciando limpeza de recursos...")
150
- keep = set(keep_paths or [])
151
- for f in list(self._tmp_files):
152
- try:
153
- if f not in keep and os.path.isfile(f): os.remove(f)
154
- except Exception as e: print(f"[WARN] Falha ao remover tmp file {f}: {e}")
155
- finally: self._tmp_files.discard(f)
156
- for d in list(self._tmp_dirs):
157
- try:
158
- if d not in keep and os.path.isdir(d): shutil.rmtree(d, ignore_errors=True)
159
- except Exception as e: print(f"[WARN] Falha ao remover tmp dir {d}: {e}")
160
- finally: self._tmp_dirs.discard(d)
161
- gc.collect()
162
- if clear_gpu and self.device == "cuda":
163
- try:
164
- torch.cuda.empty_cache(); torch.cuda.ipc_collect()
165
- except Exception as e: print(f"[ERROR] Falha na limpeza da GPU: {e}")
166
-
167
- # --- LÓGICA DE GERAÇÃO E CHUNKING RESTAURADA ---
168
 
169
- def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
170
- total_latentes = latents_brutos.shape[2]
171
- if num_latente_por_chunk >= total_latentes:
172
- return [latents_brutos]
173
-
174
- chunks = []
175
- start = 0
176
- while start < total_latentes:
177
- end = min(start + num_latente_por_chunk, total_latentes)
178
- # Adiciona overlap, exceto no último chunk
179
- end_with_overlap = min(end + overlap, total_latentes) if end < total_latentes else end
180
- chunk = latents_brutos[:, :, start:end_with_overlap, :, :].clone().detach()
181
- chunks.append(chunk)
182
- if LTXV_DEBUG: print(f"[DEBUG] Chunk criado: frames {start} a {end_with_overlap}")
183
- start = end
184
- return chunks
185
-
186
- def _get_total_frames(self, video_path: str) -> int:
187
- cmd = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-count_frames", "-show_entries", "stream=nb_read_frames", "-of", "default=nokey=1:noprint_wrappers=1", str(video_path)]
188
- try:
189
- result = subprocess.run(cmd, capture_output=True, text=True, check=True)
190
- return int(result.stdout.strip())
191
- except (subprocess.CalledProcessError, ValueError) as e:
192
- print(f"[ERROR] FFprobe falhou para {video_path}: {e}")
193
- return 0
194
 
195
- def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
196
- if len(video_paths) <= 1: return video_paths
197
-
198
- print("[DEBUG] Iniciando processo de concatenação com transições...")
199
- arquivos_para_concatenar = []
200
- temp_blend_files = []
201
-
202
- # 1. Trata o primeiro vídeo (só corta o final)
203
- primeiro_video = video_paths[0]
204
- total_frames_primeiro = self._get_total_frames(primeiro_video)
205
- path_primeiro_cortado = os.path.join(pasta, "0_head.mp4")
206
- cmd_primeiro = f'ffmpeg -y -hide_banner -loglevel error -i "{primeiro_video}" -vf "trim=end_frame={total_frames_primeiro - crossfade_frames},setpts=PTS-STARTPTS" -an "{path_primeiro_cortado}"'
207
- subprocess.run(cmd_primeiro, shell=True, check=True)
208
- arquivos_para_concatenar.append(path_primeiro_cortado)
209
-
210
- # 2. Itera pelos vídeos intermediários, criando blends
211
- for i in range(len(video_paths) - 1):
212
- video_A_path = video_paths[i]
213
- video_B_path = video_paths[i+1]
214
-
215
- total_frames_A = self._get_total_frames(video_A_path)
216
-
217
- # Extrai cauda de A e cabeça de B
218
- cauda_A = os.path.join(pasta, f"{i}_tail_A.mp4")
219
- cabeca_B = os.path.join(pasta, f"{i+1}_head_B.mp4")
220
- cmd_cauda_A = f'ffmpeg -y -hide_banner -loglevel error -i "{video_A_path}" -vf "trim=start_frame={total_frames_A - crossfade_frames},setpts=PTS-STARTPTS" -an "{cauda_A}"'
221
- cmd_cabeca_B = f'ffmpeg -y -hide_banner -loglevel error -i "{video_B_path}" -vf "trim=end_frame={crossfade_frames},setpts=PTS-STARTPTS" -an "{cabeca_B}"'
222
- subprocess.run(cmd_cauda_A, shell=True, check=True)
223
- subprocess.run(cmd_cabeca_B, shell=True, check=True)
224
-
225
- # Cria o blend
226
- blend_path = os.path.join(pasta, f"blend_{i}_{i+1}.mp4")
227
- cmd_blend = f'ffmpeg -y -hide_banner -loglevel error -i "{cauda_A}" -i "{cabeca_B}" -filter_complex "[0:v][1:v]blend=all_expr=\'A*(1-T/{crossfade_frames})+B*(T/{crossfade_frames})\',format=yuv420p" -an "{blend_path}"'
228
- subprocess.run(cmd_blend, shell=True, check=True)
229
- arquivos_para_concatenar.append(blend_path)
230
- temp_blend_files.extend([cauda_A, cabeca_B])
231
-
232
- # Pega o meio do vídeo B (se não for o último)
233
- if i + 1 < len(video_paths) - 1:
234
- meio_B = os.path.join(pasta, f"{i+1}_body.mp4")
235
- total_frames_B = self._get_total_frames(video_B_path)
236
- cmd_meio_B = f'ffmpeg -y -hide_banner -loglevel error -i "{video_B_path}" -vf "trim=start_frame={crossfade_frames}:end_frame={total_frames_B - crossfade_frames},setpts=PTS-STARTPTS" -an "{meio_B}"'
237
- subprocess.run(cmd_meio_B, shell=True, check=True)
238
- arquivos_para_concatenar.append(meio_B)
239
 
240
- # 3. Trata o último vídeo (só corta o começo)
241
- ultimo_video = video_paths[-1]
242
- path_ultimo_cortado = os.path.join(pasta, f"{len(video_paths)-1}_tail.mp4")
243
- cmd_ultimo = f'ffmpeg -y -hide_banner -loglevel error -i "{ultimo_video}" -vf "trim=start_frame={crossfade_frames},setpts=PTS-STARTPTS" -an "{path_ultimo_cortado}"'
244
- subprocess.run(cmd_ultimo, shell=True, check=True)
245
- arquivos_para_concatenar.append(path_ultimo_cortado)
246
-
247
- # Limpa arquivos intermediários de blend
248
- for f in temp_blend_files: os.remove(f)
249
 
250
- return arquivos_para_concatenar
251
-
252
  def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
253
- if not mp4_list: raise ValueError("Lista de MP4s para concatenar está vazia.")
254
  if len(mp4_list) == 1:
255
  shutil.move(mp4_list[0], out_path)
256
  return
257
-
258
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt", dir=os.path.dirname(out_path)) as f:
259
  for mp4 in mp4_list:
260
  f.write(f"file '{os.path.abspath(mp4)}'\n")
261
  list_path = f.name
262
-
263
  cmd = f"ffmpeg -y -f concat -safe 0 -i {list_path} -c copy {out_path}"
264
  try:
265
- subprocess.run(shlex.split(cmd), check=True, capture_output=True, text=True)
266
- except subprocess.CalledProcessError as e:
267
- print(f"[ERROR] Concatenação falhou: {e.stderr}")
268
- raise
269
  finally:
270
  os.remove(list_path)
271
 
272
- # --- FUNÇÃO GENERATE_LOW RESTAURADA ---
273
- @torch.no_grad()
274
- def generate_low(self, call_kwargs, guidance_scale, width, height):
275
- first_pass_config = self.config.get("first_pass", {}).copy()
276
- first_pass_config.pop("num_inference_steps", None) # Evita duplicidade
277
-
278
- first_pass_kwargs = call_kwargs.copy()
279
- first_pass_kwargs.update({
280
- "output_type": "latent",
281
- "width": width,
282
- "height": height,
283
- "guidance_scale": float(guidance_scale),
284
- **first_pass_config
285
- })
286
-
287
- print(f"[DEBUG] First Pass: Gerando em {width}x{height}...")
288
- latents = self.pipeline(**first_pass_kwargs).images
289
- log_tensor_info(latents, "Latentes Base (First Pass)")
290
-
291
- partes_mp4 = [latents]
292
-
293
- if len(partes_mp4) > 1:
294
- print("[INFO] Múltiplos chunks gerados. Concatenando com transições...")
295
- final_output_path = os.path.join(results_dir, f"final_{used_seed}.mp4")
296
- partes_para_concatenar = self._gerar_lista_com_transicoes(temp_dir, partes_mp4, crossfade_frames=8)
297
- self._concat_mp4s_no_reencode(partes_para_concatenar, final_output_path)
298
- elif partes_mp4:
299
- print("[INFO] Apenas um chunk gerado. Movendo para o destino final.")
300
- final_output_path = os.path.join(results_dir, f"final_{used_seed}.mp4")
301
- shutil.move(partes_mp4[0], final_output_path)
302
- else:
303
- raise RuntimeError("Nenhum vídeo foi gerado.")
304
-
305
- return final_output_path
306
 
307
  # ==============================================================================
308
- # --- FUNÇÃO DE GERAÇÃO PRINCIPAL (COMPLETA) ---
309
  # ==============================================================================
310
- def generate(self, prompt: str, **kwargs):
311
- final_output_path, used_seed = None, None
312
- try:
313
- t_all = time.perf_counter()
314
- print(f"\n{'='*20} INICIANDO NOVA GERAÇÃO {'='*20}")
315
- if self.device == "cuda": torch.cuda.empty_cache()
316
-
317
- # --- 1. Setup da Geração ---
318
- negative_prompt = kwargs.get("negative_prompt", "")
319
- mode = kwargs.get("mode", "text-to-video")
320
- height = kwargs.get("height", 512)
321
- width = kwargs.get("width", 704)
322
- duration = kwargs.get("duration", 2.0)
323
- guidance_scale = kwargs.get("guidance_scale", 3.0)
324
- improve_texture = kwargs.get("improve_texture", True)
325
-
326
- used_seed = random.randint(0, 2**32 - 1) if kwargs.get("randomize_seed", True) else int(kwargs.get("seed", 42))
327
- seed_everething(used_seed)
328
- print(f"[INFO] Geração com Seed: {used_seed}")
329
 
330
- FPS = 24.0
331
- actual_num_frames = max(9, int(round(duration * FPS) / 8) * 8 + 1)
332
- height_padded = ((height - 1) // 8 + 1) * 8
333
- width_padded = ((width - 1) // 8 + 1) * 8
334
- padding_values = calculate_padding(height, width, height_padded, width_padded)
335
- generator = torch.Generator(device=self.device).manual_seed(used_seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
- temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._tmp_dirs.add(temp_dir)
338
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
 
 
 
 
 
339
 
340
- # --- 2. Condicionamento ---
341
- conditioning_items = []
342
- # (Adicionar lógica de condicionamento de imagem aqui se necessário)
343
-
344
- # --- 3. Argumentos da Pipeline ---
345
- call_kwargs = { "prompt": prompt, "negative_prompt": negative_prompt, "height": height_padded, "width": width_padded, "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": generator, "output_type": "latent", "conditioning_items": conditioning_items or None }
346
-
347
- # --- 4. Geração dos Latentes (com lógica de 2 passes restaurada) ---
348
- latents_list = []
349
- ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
350
-
351
- with ctx:
352
- if improve_texture:
353
- # ETAPA 1: Geração Base com generate_low
354
- downscale_factor = self.config.get("downscale_factor", 0.66666)
355
- low_res_area = (width * height) * (downscale_factor**2)
356
- downscaled_h, downscaled_w = calculate_new_dimensions(width, height, target_area=low_res_area)
357
-
358
- base_latents = self.generate_low(call_kwargs, guidance_scale, downscaled_w, downscaled_h)
359
-
360
- # ETAPA 2: Upsample
361
- upsampled_latents = self._upsample_latents_internal(base_latents)
362
- upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=base_latents)
363
- del base_latents; gc.collect(); torch.cuda.empty_cache()
364
-
365
- # ETAPA 3: Refinamento (Second Pass)
366
- second_pass_config = self.config.get("second_pass", {}).copy()
367
- second_pass_kwargs = call_kwargs.copy()
368
- second_pass_kwargs.update({
369
- "latents": upsampled_latents, "guidance_scale": guidance_scale, **second_pass_config
370
- })
371
- final_latents = self.pipeline(**second_pass_kwargs).images
372
- latents_list.append(final_latents.detach().cpu())
373
- del final_latents, upsampled_latents; gc.collect(); torch.cuda.empty_cache()
374
  else:
375
- # Geração de Passe Único
376
- single_pass_latents = self.pipeline(**call_kwargs).images
377
- latents_list.append(single_pass_latents.detach().cpu())
378
- del single_pass_latents; gc.collect(); torch.cuda.empty_cache()
379
 
380
- # --- 5. Decodificação em Chunks e Concatenação ---
381
- partes_mp4 = []
382
- chunk_count = 0
383
- for i, latents_cpu in enumerate(latents_list):
384
- # Dividir os latentes em partes menores para decodificar
385
- latents_parts = self._dividir_latentes_por_tamanho(latents_cpu, 16, 8)
386
-
387
- for chunk in latents_parts:
388
- chunk_count += 1
389
- print(f"[INFO] Decodificando chunk {chunk_count}/{len(latents_parts) * len(latents_list)}...")
390
- pixel_tensor = vae_manager_singleton.decode(chunk.to(self.device), decode_timestep=self.config.get("decode_timestep", 0.05))
391
-
392
- chunk_video_path = os.path.join(temp_dir, f"part_{chunk_count}.mp4")
393
- video_encode_tool_singleton.save_video_from_tensor(pixel_tensor, chunk_video_path, fps=FPS)
394
-
395
- partes_mp4.append(chunk_video_path)
396
- del pixel_tensor, chunk; gc.collect(); torch.cuda.empty_cache()
397
 
398
- # --- 6. Montagem Final do Vídeo ---
399
- if len(partes_mp4) > 1:
400
- print("[INFO] Múltiplos chunks gerados. Concatenando com transições...")
401
- final_output_path = os.path.join(results_dir, f"final_{used_seed}.mp4")
402
- partes_para_concatenar = self._gerar_lista_com_transicoes(temp_dir, partes_mp4, crossfade_frames=8)
403
- self._concat_mp4s_no_reencode(partes_para_concatenar, final_output_path)
404
- elif partes_mp4:
405
- print("[INFO] Apenas um chunk gerado. Movendo para o destino final.")
406
- final_output_path = os.path.join(results_dir, f"final_{used_seed}.mp4")
407
- shutil.move(partes_mp4[0], final_output_path)
408
- else:
409
- raise RuntimeError("Nenhum vídeo foi gerado.")
410
-
411
- print(f"[SUCCESS] Geração concluída em {time.perf_counter() - t_all:.2f}s. Vídeo: {final_output_path}")
412
- return final_output_path, used_seed
413
-
414
- except Exception as e:
415
- print(f"[FATAL ERROR] A geração falhou: {type(e).__name__} - {e}")
416
- traceback.print_exc()
417
- raise
418
  finally:
419
- self.finalize(keep_paths=[final_output_path] if final_output_path else [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
- # --- Ponto de Entrada ---
422
- if __name__ == "__main__":
423
- print("Iniciando carregamento do VideoService...")
424
- video_generation_service = VideoService()
425
- print("\n[INFO] VideoService pronto para receber tarefas.")
 
1
+ # ltx_server_refactored.py — VideoService (Modular Version with Simple Overlap Chunking)
 
 
 
 
 
 
 
2
 
3
+ # --- 0. WARNINGS E AMBIENTE ---
4
  import warnings
5
  warnings.filterwarnings("ignore", category=UserWarning)
6
  warnings.filterwarnings("ignore", category=FutureWarning)
7
+ warnings.filterwarnings("ignore", message=".*")
8
+ from huggingface_hub import logging
9
+ logging.set_verbosity_error()
10
+ logging.set_verbosity_warning()
11
+ logging.set_verbosity_info()
12
+ logging.set_verbosity_debug()
13
+ LTXV_DEBUG=1
14
+ LTXV_FRAME_LOG_EVERY=8
15
+ import os, subprocess, shlex, tempfile
16
  import torch
17
+ import json
18
  import numpy as np
19
+ import random
20
+ import os
21
+ import shlex
22
+ import yaml
23
+ from typing import List, Dict
24
+ from pathlib import Path
25
  import imageio
26
  from PIL import Image
27
+ import tempfile
28
+ from huggingface_hub import hf_hub_download
29
+ import sys
30
+ import subprocess
31
+ import gc
32
+ import shutil
33
+ import contextlib
34
+ import time
35
+ import traceback
36
  from einops import rearrange
37
+ import torch.nn.functional as F
38
+ from managers.vae_manager import vae_manager_singleton
39
+ from tools.video_encode_tool import video_encode_tool_singleton
 
40
  DEPS_DIR = Path("/data")
41
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
42
 
43
+ # (Todas as funções de setup, helpers e inicialização da classe permanecem inalteradas)
44
+ # ... (run_setup, add_deps_to_path, _query_gpu_processes_via_nvml, etc.)
45
  def run_setup():
46
  setup_script_path = "setup.py"
47
  if not os.path.exists(setup_script_path):
48
+ print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
49
  return
50
  try:
51
+ print("[DEBUG] Executando setup.py para dependências...")
52
+ subprocess.run([sys.executable, setup_script_path], check=True)
53
+ print("[DEBUG] Setup concluído com sucesso.")
54
  except subprocess.CalledProcessError as e:
55
+ print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
56
  sys.exit(1)
57
+ if not LTX_VIDEO_REPO_DIR.exists():
58
+ print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
59
+ run_setup()
60
  def add_deps_to_path():
61
  repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
62
+ if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
63
  sys.path.insert(0, repo_path)
64
+ print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
65
+ def calculate_padding(orig_h, orig_w, target_h, target_w):
66
+ pad_h = target_h - orig_h
67
+ pad_w = target_w - orig_w
68
+ pad_top = pad_h // 2
69
+ pad_bottom = pad_h - pad_top
70
+ pad_left = pad_w // 2
71
+ pad_right = pad_w - pad_left
72
+ return (pad_left, pad_right, pad_top, pad_bottom)
73
+ def log_tensor_info(tensor, name="Tensor"):
74
+ if not isinstance(tensor, torch.Tensor):
75
+ print(f"\n[INFO] '{name}' não é tensor.")
76
+ return
77
+ print(f"\n--- Tensor: {name} ---")
78
+ print(f" - Shape: {tuple(tensor.shape)}")
79
+ print(f" - Dtype: {tensor.dtype}")
80
+ print(f" - Device: {tensor.device}")
81
+ if tensor.numel() > 0:
82
+ try:
83
+ print(f" - Min: {tensor.min().item():.4f} Max: {tensor.max().item():.4f} Mean: {tensor.mean().item():.4f}")
84
+ except Exception:
85
+ pass
86
+ print("------------------------------------------\n")
87
 
 
 
88
  add_deps_to_path()
89
+ from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
 
 
 
90
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
91
  from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
92
+ from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
93
  from api.ltx.inference import (
94
+ create_ltx_video_pipeline,
95
+ create_latent_upsampler,
96
+ load_image_to_tensor_with_resize_and_crop,
97
+ seed_everething,
98
  )
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  class VideoService:
101
  def __init__(self):
102
  t0 = time.perf_counter()
103
+ print("[DEBUG] Inicializando VideoService...")
104
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
105
  self.config = self._load_config()
 
 
106
  self.pipeline, self.latent_upsampler = self._load_models()
107
  self.pipeline.to(self.device)
108
+ if self.latent_upsampler:
109
+ self.latent_upsampler.to(self.device)
110
  self._apply_precision_policy()
111
+ vae_manager_singleton.attach_pipeline(
112
+ self.pipeline,
113
+ device=self.device,
114
+ autocast_dtype=self.runtime_autocast_dtype
115
+ )
116
+ self._tmp_dirs = set()
117
+ print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
118
 
119
  def _load_config(self):
 
120
  base = LTX_VIDEO_REPO_DIR / "configs"
121
+ config_path = base / "ltxv-13b-0.9.8-distilled-fp8.yaml"
122
+ with open(config_path, "r") as file:
123
+ return yaml.safe_load(file)
124
+
125
+ def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
126
+ print("[DEBUG] Finalize: iniciando limpeza...")
127
+ keep = set(keep_paths or []); extras = set(extra_paths or [])
128
+ removed_files = 0
129
+ for f in list(self._tmp_files | extras):
130
+ try:
131
+ if f not in keep and os.path.isfile(f):
132
+ os.remove(f); removed_files += 1; print(f"[DEBUG] Removido arquivo tmp: {f}")
133
+ except Exception as e:
134
+ print(f"[DEBUG] Falha removendo arquivo {f}: {e}")
135
+ finally:
136
+ self._tmp_files.discard(f)
137
+ removed_dirs = 0
138
+ for d in list(self._tmp_dirs):
139
+ try:
140
+ if d not in keep and os.path.isdir(d):
141
+ shutil.rmtree(d, ignore_errors=True); removed_dirs += 1; print(f"[DEBUG] Removido diretório tmp: {d}")
142
+ except Exception as e:
143
+ print(f"[DEBUG] Falha removendo diretório {d}: {e}")
144
+ finally:
145
+ self._tmp_dirs.discard(d)
146
+ print(f"[DEBUG] Finalize: arquivos removidos={removed_files}, dirs removidos={removed_dirs}")
147
+ gc.collect()
148
+ try:
149
+ if clear_gpu and torch.cuda.is_available():
150
+ torch.cuda.empty_cache()
151
+ try:
152
+ torch.cuda.ipc_collect()
153
+ except Exception:
154
+ pass
155
+ except Exception as e:
156
+ print(f"[DEBUG] Finalize: limpeza GPU falhou: {e}")
157
+ try:
158
+ self._log_gpu_memory("Após finalize")
159
+ except Exception as e:
160
+ print(f"[DEBUG] Log GPU pós-finalize falhou: {e}")
161
+
162
  def _load_models(self):
 
163
  t0 = time.perf_counter()
164
+ LTX_REPO = "Lightricks/LTX-Video"
165
+ print("[DEBUG] Baixando checkpoint principal...")
166
+ distilled_model_path = hf_hub_download(
167
+ repo_id=LTX_REPO,
168
+ filename=self.config["checkpoint_path"],
169
+ local_dir=os.getenv("HF_HOME"),
170
+ cache_dir=os.getenv("HF_HOME_CACHE"),
171
+ token=os.getenv("HF_TOKEN"),
172
+ )
173
+ self.config["checkpoint_path"] = distilled_model_path
174
+ print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
175
+
176
+ print("[DEBUG] Baixando upscaler espacial...")
177
+ spatial_upscaler_path = hf_hub_download(
178
+ repo_id=LTX_REPO,
179
+ filename=self.config["spatial_upscaler_model_path"],
180
+ local_dir=os.getenv("HF_HOME"),
181
+ cache_dir=os.getenv("HF_HOME_CACHE"),
182
+ token=os.getenv("HF_TOKEN")
183
+ )
184
+ self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
185
+ print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
186
+
187
+ print("[DEBUG] Construindo pipeline...")
188
  pipeline = create_ltx_video_pipeline(
189
+ ckpt_path=self.config["checkpoint_path"],
190
  precision=self.config["precision"],
191
  text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
192
  sampler=self.config["sampler"],
193
+ device="cpu",
194
+ enhance_prompt=False,
195
+ prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
196
+ prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
197
  )
198
+ print("[DEBUG] Pipeline pronto.")
199
+
200
  latent_upsampler = None
201
  if self.config.get("spatial_upscaler_model_path"):
202
+ print("[DEBUG] Construindo latent_upsampler...")
203
+ latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
204
+ print("[DEBUG] Upsampler pronto.")
205
+ print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
206
  return pipeline, latent_upsampler
207
 
208
  def _apply_precision_policy(self):
209
  prec = str(self.config.get("precision", "")).lower()
210
  self.runtime_autocast_dtype = torch.float32
211
+ if prec in ["float8_e4m3fn", "bfloat16"]:
212
+ self.runtime_autocast_dtype = torch.bfloat16
213
+ elif prec == "mixed_precision":
214
+ self.runtime_autocast_dtype = torch.float16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ def _register_tmp_dir(self, d: str):
217
+ if d and os.path.isdir(d):
218
+ self._tmp_dirs.add(d); print(f"[DEBUG] Registrado tmp dir: {d}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
+ @torch.no_grad()
221
+ def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
222
+ if not self.latent_upsampler:
223
+ raise ValueError("Latent Upsampler não está carregado.")
224
+ latents_unnormalized = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
225
+ upsampled_latents = self.latent_upsampler(latents_unnormalized)
226
+ return normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
227
+ finally:
228
+ torch.cuda.empty_cache()
229
+ torch.cuda.ipc_collect()
230
+ self.finalize(keep_paths=[])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
+ def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
233
+ tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
234
+ tensor = torch.nn.functional.pad(tensor, padding_values)
235
+ return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
 
 
 
 
 
236
 
 
 
237
  def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
 
238
  if len(mp4_list) == 1:
239
  shutil.move(mp4_list[0], out_path)
240
  return
241
+ with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
 
242
  for mp4 in mp4_list:
243
  f.write(f"file '{os.path.abspath(mp4)}'\n")
244
  list_path = f.name
 
245
  cmd = f"ffmpeg -y -f concat -safe 0 -i {list_path} -c copy {out_path}"
246
  try:
247
+ subprocess.check_call(shlex.split(cmd))
 
 
 
248
  finally:
249
  os.remove(list_path)
250
 
251
+ def _save_and_log_video(self, pixel_tensor, base_filename, fps, temp_dir, results_dir, used_seed, progress_callback=None):
252
+ output_path = os.path.join(temp_dir, f"{base_filename}_{used_seed}.mp4")
253
+ video_encode_tool_singleton.save_video_from_tensor(
254
+ pixel_tensor, output_path, fps=fps, progress_callback=progress_callback
255
+ )
256
+ final_path = os.path.join(results_dir, f"{base_filename}_{used_seed}.mp4")
257
+ shutil.move(output_path, final_path)
258
+ print(f"[DEBUG] Vídeo salvo em: {final_path}")
259
+ return final_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
  # ==============================================================================
262
+ # --- FUNÇÕES MODULARES COM A LÓGICA DE CHUNKING SIMPLIFICADA ---
263
  # ==============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
+ def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int):
266
+ if not items_list: return []
267
+ height_padded = ((height - 1) // 8 + 1) * 8
268
+ width_padded = ((width - 1) // 8 + 1) * 8
269
+ padding_values = calculate_padding(height, width, height_padded, width_padded)
270
+ conditioning_items = []
271
+ for media, frame, weight in items_list:
272
+ tensor = self._prepare_conditioning_tensor(media, height, width, padding_values) if isinstance(media, str) else media.to(self.device, dtype=self.runtime_autocast_dtype)
273
+ safe_frame = max(0, min(int(frame), num_frames - 1))
274
+ conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
275
+ return conditioning_items
276
+
277
+ def generate_low(self, prompt, negative_prompt, height, width, duration, guidance_scale, seed, conditioning_items=None):
278
+ used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
279
+ seed_everething(used_seed)
280
+ FPS = 24.0
281
+ actual_num_frames = max(9, int(round((round(duration * FPS) - 1) / 8.0) * 8 + 1))
282
+ height_padded = ((height - 1) // 8 + 1) * 8
283
+ width_padded = ((width - 1) // 8 + 1) * 8
284
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_low_"); self._register_tmp_dir(temp_dir)
285
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
286
+ downscale_factor = self.config.get("downscale_factor", 0.6666666)
287
+ vae_scale_factor = self.pipeline.vae_scale_factor
288
+ x_width = int(width_padded * downscale_factor)
289
+ downscaled_width = x_width - (x_width % vae_scale_factor)
290
+ x_height = int(height_padded * downscale_factor)
291
+ downscaled_height = x_height - (x_height % vae_scale_factor)
292
+ first_pass_kwargs = {
293
+ "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
294
+ "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": torch.Generator(device=self.device).manual_seed(used_seed),
295
+ "output_type": "latent", "conditioning_items": conditioning_items, "guidance_scale": float(guidance_scale),
296
+ **(self.config.get("first_pass", {}))
297
+ }
298
+ with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
299
+ latents = self.pipeline(**first_pass_kwargs).images
300
+ pixel_tensor = vae_manager_singleton.decode(latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
301
+ video_path = self._save_and_log_video(pixel_tensor, "low_res_video", FPS, temp_dir, results_dir, used_seed)
302
+ latents_cpu = latents.detach().to("cpu")
303
+ tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
304
+ torch.save(latents_cpu, tensor_path)
305
+ return video_path, tensor_path, used_seed
306
+ finally:
307
+ torch.cuda.empty_cache()
308
+ torch.cuda.ipc_collect()
309
+ self.finalize(keep_paths=[])
310
+
311
+ def generate_upscale_denoise(self, latents_path, prompt, negative_prompt, guidance_scale, seed):
312
+ used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
313
+ seed_everething(used_seed)
314
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_up_"); self._register_tmp_dir(temp_dir)
315
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
316
+ latents_low = torch.load(latents_path).to(self.device)
317
+ with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
318
+ upsampled_latents = self._upsample_latents_internal(latents_low)
319
+ upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents_low)
320
+ del latents_low; torch.cuda.empty_cache()
321
 
322
+ # --- LÓGICA DE DIVISÃO SIMPLES COM OVERLAP ---
323
+ total_frames = upsampled_latents.shape[2]
324
+ # Garante que mid_point seja pelo menos 1 para evitar um segundo chunk vazio se houver poucos frames
325
+ mid_point = max(1, total_frames // 2)
326
+ chunk1 = upsampled_latents[:, :, :mid_point, :, :]
327
+ # O segundo chunk começa um frame antes para criar o overlap
328
+ chunk2 = upsampled_latents[:, :, mid_point - 1:, :, :]
329
 
330
+ final_latents_list = []
331
+ for i, chunk in enumerate([chunk1, chunk2]):
332
+ if chunk.shape[2] <= 1: continue # Pula chunks inválidos ou vazios
333
+ second_pass_height = chunk.shape[3] * self.pipeline.vae_scale_factor
334
+ second_pass_width = chunk.shape[4] * self.pipeline.vae_scale_factor
335
+ second_pass_kwargs = {
336
+ "prompt": prompt, "negative_prompt": negative_prompt, "height": second_pass_height, "width": second_pass_width,
337
+ "num_frames": chunk.shape[2], "latents": chunk, "guidance_scale": float(guidance_scale),
338
+ "output_type": "latent", "generator": torch.Generator(device=self.device).manual_seed(used_seed),
339
+ **(self.config.get("second_pass", {}))
340
+ }
341
+ refined_chunk = self.pipeline(**second_pass_kwargs).images
342
+ # Remove o overlap do primeiro chunk refinado antes de juntar
343
+ if i == 0:
344
+ final_latents_list.append(refined_chunk[:, :, :-1, :, :])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  else:
346
+ final_latents_list.append(refined_chunk)
 
 
 
347
 
348
+ final_latents = torch.cat(final_latents_list, dim=2)
349
+ log_tensor_info(final_latents, "Latentes Upscaled/Refinados Finais")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
+ latents_cpu = final_latents.detach().to("cpu")
352
+ tensor_path = os.path.join(results_dir, f"latents_refined_{used_seed}.pt")
353
+ torch.save(latents_cpu, tensor_path)
354
+ pixel_tensor = vae_manager_singleton.decode(final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
355
+ video_path = self._save_and_log_video(pixel_tensor, "refined_video", 24.0, temp_dir, results_dir, used_seed)
356
+ return video_path, tensor_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  finally:
358
+ torch.cuda.empty_cache()
359
+ torch.cuda.ipc_collect()
360
+ self.finalize(keep_paths=[])
361
+
362
+ def encode_mp4(self, latents_path: str, fps: int = 24):
363
+ latents = torch.load(latents_path)
364
+ seed = random.randint(0, 99999)
365
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_enc_"); self._register_tmp_dir(temp_dir)
366
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
367
+
368
+ # --- LÓGICA DE DIVISÃO SIMPLES COM OVERLAP ---
369
+ total_frames = latents.shape[2]
370
+ mid_point = max(1, total_frames // 2)
371
+ chunk1_latents = latents[:, :, :mid_point, :, :]
372
+ chunk2_latents = latents[:, :, mid_point - 1:, :, :]
373
+
374
+ video_parts = []
375
+ pixel_chunks_to_concat = []
376
+ with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
377
+ for i, chunk in enumerate([chunk1_latents, chunk2_latents]):
378
+ if chunk.shape[2] == 0: continue
379
+ pixel_chunk = vae_manager_singleton.decode(chunk.to(self.device), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
380
+ # Remove o overlap do primeiro chunk de pixels
381
+ if i == 0:
382
+ pixel_chunks_to_concat.append(pixel_chunk[:, :, :-1, :, :])
383
+ else:
384
+ pixel_chunks_to_concat.append(pixel_chunk)
385
+
386
+ final_pixel_tensor = torch.cat(pixel_chunks_to_concat, dim=2)
387
+ final_video_path = self._save_and_log_video(final_pixel_tensor, f"final_concatenated_{seed}", fps, temp_dir, results_dir, seed)
388
+ return final_video_path
389
+
390
 
391
+ # --- INSTANCIAÇÃO DO SERVIÇO ---
392
+ print("Criando instância do VideoService. O carregamento do modelo começará agora...")
393
+ video_generation_service = VideoService()
394
+ print("Instância do VideoService pronta para uso.")