Eueuiaa commited on
Commit
95f3cf4
·
verified ·
1 Parent(s): a272948

Update api/ltx_server_refactored.py

Browse files
Files changed (1) hide show
  1. api/ltx_server_refactored.py +0 -258
api/ltx_server_refactored.py CHANGED
@@ -265,264 +265,6 @@ class VideoService:
265
  torch.cuda.ipc_collect()
266
  self.finalize(keep_paths=[])
267
 
268
- # ==============================================================================
269
- # --- FUNÇÃO #1: GERADOR DE CHUNK ÚNICO (AUXILIAR INTERNA) ---
270
- # ==============================================================================
271
- def _generate_single_chunk_low(
272
- self, prompt, negative_prompt,
273
- height, width, num_frames, guidance_scale:str="4",
274
- seed, initial_latent_condition=None, image_conditions=None,
275
- fp_num_inference_steps:int=30, ship_initial_inference_steps:int=0, ship_final_inference_steps:int=0,
276
- ltx_configs_override=None):
277
- """
278
- [NÓ DE GERAÇÃO]
279
- Gera um ÚNICO chunk de latentes brutos. Esta é a unidade de trabalho fundamental.
280
- """
281
- print("\n" + "-"*20 + " INÍCIO: _generate_single_chunk_low " + "-"*20)
282
-
283
- # --- NÓ 1.1: SETUP DE PARÂMETROS ---
284
- height_padded = ((height - 1) // 8 + 1) * 8
285
- width_padded = ((width - 1) // 8 + 1) * 8
286
- generator = torch.Generator(device=self.device).manual_seed(seed)
287
-
288
- downscale_factor = self.config.get("downscale_factor", 0.6666666)
289
- vae_scale_factor = self.pipeline.vae_scale_factor
290
-
291
- x_width = int(width_padded * downscale_factor)
292
- downscaled_width = x_width - (x_width % vae_scale_factor)
293
- x_height = int(height_padded * downscale_factor)
294
- downscaled_height = x_height - (x_height % vae_scale_factor)
295
-
296
- # --- NÓ 1.2: MONTAGEM DE CONDIÇÕES E OVERRIDES ---
297
- all_conditions = []
298
- if image_conditions: all_conditions.extend(image_conditions)
299
- if initial_latent_condition: all_conditions.append(initial_latent_condition)
300
-
301
- first_pass_config = self.config.get("first_pass", {}).copy()
302
-
303
- if ltx_configs_override:
304
- print("[DEBUG] Sobrepondo configurações do LTX com valores da UI...")
305
- preset = ltx_configs_override.get("guidance_preset")
306
- if preset == "Customizado":
307
- try:
308
- first_pass_config["guidance_scale"] = json.loads(ltx_configs_override["guidance_scale_list"])
309
- first_pass_config["stg_scale"] = json.loads(ltx_configs_override["stg_scale_list"])
310
- #first_pass_config["guidance_timesteps"] = json.loads(ltx_configs_override["timesteps_list"])
311
- except Exception as e:
312
- print(f" > ERRO ao parsear valores customizados: {e}. Usando Padrão como fallback.")
313
- elif preset == "Agressivo":
314
- first_pass_config["guidance_scale"] = [1, 2, 8, 12, 8, 2, 1]
315
- first_pass_config["stg_scale"] = [0, 0, 5, 6, 5, 3, 2]
316
- elif preset == "Suave":
317
- first_pass_config["guidance_scale"] = [1, 1, 4, 5, 4, 1, 1]
318
- first_pass_config["stg_scale"] = [0, 0, 2, 2, 2, 1, 0]
319
-
320
- if fp_num_inference_steps!="0":
321
- first_pass_config["num_inference_steps"] = fp_num_inference_steps
322
- else:
323
- first_pass_config["num_inference_steps"] = ltx_configs_override["first_pass_num_inference_steps"]
324
-
325
- if ship_initial_inference_steps!="0":
326
- first_pass_config["skip_initial_inference_steps"] = ship_initial_inference_steps
327
- else:
328
- first_pass_config["skip_initial_inference_steps"] = ltx_configs_override["skip_initial_inference_steps"]
329
-
330
- if ship_final_inference_steps!="0":
331
- first_pass_config["skip_final_inference_steps"] = ship_final_inference_steps
332
- else:
333
- first_pass_config["skip_final_inference_steps"] = ltx_configs_override["skip_final_inference_steps"]
334
-
335
- first_pass_kwargs = {
336
- "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
337
- "num_frames": num_frames, "frame_rate": 24, "generator": generator, "output_type": "latent",
338
- "conditioning_items": all_conditions if all_conditions else None,
339
- **first_pass_config
340
- }
341
-
342
- # --- NÓ 1.3: CHAMADA AO PIPELINE ---
343
- try:
344
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
345
- latents_bruto = self.pipeline(**first_pass_kwargs).images
346
- latents_cpu_bruto = latents_bruto.detach().to("cpu")
347
- tensor_path_cpu = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
348
- torch.save(latents_cpu_bruto, tensor_path_cpu)
349
- log_tensor_info(latents_bruto, f"Latente Bruto Gerado para: '{prompt[:40]}...'")
350
-
351
- print("-" * 20 + " FIM: _generate_single_chunk_low " + "-"*20)
352
- return tensor_path_cpu
353
-
354
- except Exception as e:
355
- print("-" * 20 + f" ERRO: _generate_single_chunk_low {e} " + "-"*20)
356
- finally:
357
- torch.cuda.empty_cache()
358
- torch.cuda.ipc_collect()
359
- self.finalize(keep_paths=[])
360
-
361
- # ==============================================================================
362
- # --- FUNÇÃO #2: ORQUESTRADOR NARRATIVO (MÚLTIPLOS PROMPTS) ---
363
- # ==============================================================================
364
- def generate_narrative_low(
365
- self, prompt: str, negative_prompt,
366
- height, width, duration, guidance_scale:str="4",
367
- seed, initial_image_conditions=None, overlap_frames: int = 8,
368
- fp_num_inference_steps:int=30, ship_initial_inference_steps:int=0, ship_final_inference_steps:int=0,
369
- ltx_configs_override: dict = None):
370
- """
371
- [ORQUESTRADOR NARRATIVO]
372
- Gera um vídeo em múltiplos chunks sequenciais a partir de um prompt com várias linhas.
373
- """
374
- print("\n" + "="*80)
375
- print("====== INICIANDO GERAÇÃO NARRATIVA EM CHUNKS (LOW-RES) ======")
376
- print("="*80)
377
-
378
- used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
379
- seed_everething(used_seed)
380
- FPS = 24.0
381
-
382
- prompt_list = [p.strip() for p in prompt.splitlines() if p.strip()]
383
- num_chunks = len(prompt_list)
384
- if num_chunks == 0: raise ValueError("O prompt está vazio ou não contém linhas válidas.")
385
-
386
- total_actual_frames = max(9, int(round((round(duration * FPS) - 1) / 8.0) * 8 + 1))
387
-
388
-
389
- if num_chunks > 1:
390
- total_blocks = (total_actual_frames - 1) // 8
391
- blocks_per_chunk = total_blocks // num_chunks
392
- blocks_last_chunk = total_blocks - (blocks_per_chunk * (num_chunks - 1))
393
- frames_per_chunk = blocks_per_chunk * 8 + 1
394
- frames_per_chunk_last = blocks_last_chunk * 8 + 1
395
- else:
396
- frames_per_chunk = total_actual_frames
397
- frames_per_chunk_last = total_actual_frames
398
-
399
- frames_per_chunk = max(9, frames_per_chunk)
400
- frames_per_chunk_last = max(9, frames_per_chunk_last)
401
-
402
- poda_latents_num = overlap_frames // self.pipeline.video_scale_factor if self.pipeline.video_scale_factor > 0 else 0
403
-
404
- latentes_chunk_video = []
405
- condition_item_latent_overlap = None
406
- temp_dir = tempfile.mkdtemp(prefix="ltxv_narrative_"); self._register_tmp_dir(temp_dir)
407
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
408
-
409
- for i, chunk_prompt in enumerate(prompt_list):
410
- print(f"\n--- Gerando Chunk Narrativo {i+1}/{num_chunks}: '{chunk_prompt}' ---")
411
-
412
- current_image_conditions = []
413
- if initial_image_conditions:
414
- cond_item_original = initial_image_conditions[0]
415
- if i == 0:
416
- current_image_conditions.append(cond_item_original)
417
- else:
418
- cond_item_fraco = ConditioningItem(
419
- media_item=cond_item_original.media_item, media_frame_number=0, conditioning_strength=0.1
420
- )
421
- current_image_conditions.append(cond_item_fraco)
422
-
423
- num_frames_para_gerar = frames_per_chunk_last if i == num_chunks - 1 else frames_per_chunk
424
- if i > 0 and poda_latents_num > 0:
425
- num_frames_para_gerar += overlap_frames
426
-
427
- latentes_bruto = self._generate_single_chunk_low(
428
- prompt=chunk_prompt, negative_prompt=negative_prompt, height=height, width=width,
429
- num_frames=num_frames_para_gerar, guidance_scale=guidance_scale, seed=used_seed + i,
430
- initial_latent_condition=condition_item_latent_overlap, image_conditions=current_image_conditions,
431
- fp_num_inference_steps=fp_num_inference_steps, ship_initial_inference_steps=ship_initial_inference_steps, ship_final_inference_steps=ship_final_inference_steps,
432
- ltx_configs_override=ltx_configs_override
433
- )
434
-
435
- if i > 0 and poda_latents_num > 0:
436
- latentes_bruto = latentes_bruto[:, :, poda_latents_num:, :, :]
437
-
438
- latentes_podado = latentes_bruto.clone().detach()
439
- if i < num_chunks - 1 and poda_latents_num > 0:
440
- latentes_podado = latentes_bruto[:, :, :-poda_latents_num, :, :].clone()
441
- overlap_latents = latentes_bruto[:, :, -poda_latents_num:, :, :].clone()
442
- condition_item_latent_overlap = ConditioningItem(
443
- media_item=overlap_latents, media_frame_number=0, conditioning_strength=1.0
444
- )
445
- latentes_chunk_video.append(latentes_podado)
446
-
447
- print("\n--- Finalizando Narrativa: Concatenando chunks ---")
448
- final_latents = torch.cat(latentes_chunk_video, dim=2)
449
- log_tensor_info(final_latents, "Tensor de Latentes Final Concatenado")
450
-
451
- try:
452
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
453
- pixel_tensor = vae_manager_singleton.decode(final_latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
454
- video_path = self._save_and_log_video(pixel_tensor, "narrative_video", FPS, temp_dir, results_dir, used_seed)
455
- latents_cpu = latents.detach().to("cpu")
456
- tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
457
- torch.save(latents_cpu, tensor_path)
458
- return video_path, tensor_path, used_seed
459
-
460
- except Exception as e:
461
- pass
462
- finally:
463
- torch.cuda.empty_cache()
464
- torch.cuda.ipc_collect()
465
- self.finalize(keep_paths=[])
466
-
467
- # ==============================================================================
468
- # --- FUNÇÃO #3: ORQUESTRADOR SIMPLES (PROMPT ÚNICO) ---
469
- # ==============================================================================
470
- def generate_single_low(
471
- self, prompt: str, negative_prompt,
472
- height, width, duration, guidance_scale:str="4",
473
- seed, initial_image_conditions=None,
474
- fp_num_inference_steps:int=30, ship_initial_inference_steps:int=0, ship_final_inference_steps:int=0,
475
- ltx_configs_override: dict = None):
476
- """
477
- [ORQUESTRADOR SIMPLES]
478
- Gera um vídeo completo em um único chunk. Ideal para prompts simples e curtos.
479
- """
480
- print("\n" + "="*80)
481
- print("====== INICIANDO GERAÇÃO SIMPLES EM CHUNK ÚNICO (LOW-RES) ======")
482
- print("="*80)
483
-
484
- used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
485
- seed_everething(used_seed)
486
- FPS = 24.0
487
-
488
- total_actual_frames = max(9, int(round((round(duration * FPS) - 1) / 8.0) * 8 + 1))
489
-
490
- temp_dir = tempfile.mkdtemp(prefix="ltxv_single_"); self._register_tmp_dir(temp_dir)
491
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
492
-
493
- # Chama a função de geração de chunk único para fazer todo o trabalho
494
- final_latents = self._generate_single_chunk_low(
495
- prompt=prompt, negative_prompt=negative_prompt, height=height, width=width,
496
- num_frames=total_actual_frames, guidance_scale=guidance_scale, seed=used_seed,
497
- image_conditions=initial_image_conditions,
498
- fp_num_inference_steps=fp_num_inference_steps, ship_initial_inference_steps=ship_initial_inference_steps, ship_final_inference_steps=ship_final_inference_steps,
499
- ltx_configs_override=ltx_configs_override
500
- )
501
-
502
- print("\n--- Finalizando Geração Simples: Salvando e decodificando ---")
503
- log_tensor_info(final_latents, "Tensor de Latentes Final")
504
-
505
- try:
506
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
507
- pixel_tensor = vae_manager_singleton.decode(final_latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
508
- video_path = self._save_and_log_video(pixel_tensor, "single_video", FPS, temp_dir, results_dir, used_seed)
509
- latents_cpu = latents.detach().to("cpu")
510
- tensor_path = os.path.join(results_dir, f"latents_single_{used_seed}.pt")
511
- torch.save(latents_cpu, tensor_path)
512
- return video_path, tensor_path, used_seed
513
-
514
- except Exception as e:
515
- pass
516
- finally:
517
- torch.cuda.empty_cache()
518
- torch.cuda.ipc_collect()
519
- self.finalize(keep_paths=[])
520
-
521
-
522
-
523
-
524
- # Em api/ltx_server_refactored.py -> dentro da classe VideoService
525
-
526
  # ==============================================================================
527
  # --- FUNÇÕES DE GERAÇÃO ATUALIZADAS E MODULARES ---
528
  # ==============================================================================
 
265
  torch.cuda.ipc_collect()
266
  self.finalize(keep_paths=[])
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  # ==============================================================================
269
  # --- FUNÇÕES DE GERAÇÃO ATUALIZADAS E MODULARES ---
270
  # ==============================================================================