dagloop5 commited on
Commit
1e66590
·
verified ·
1 Parent(s): 7c85561

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -43
app.py CHANGED
@@ -297,29 +297,57 @@ def build_loras_tuple(pose_strength: float, general_strength: float, motion_stre
297
  # initial strengths (you can change defaults)
298
  INITIAL_LORAS = build_loras_tuple(1.0, 1.0, 1.0)
299
 
300
- # Initialize pipeline WITH text encoder and optional audio support
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  pipeline = LTX23DistilledA2VPipeline(
302
  distilled_checkpoint_path=checkpoint_path,
303
  spatial_upsampler_path=spatial_upsampler_path,
304
  gemma_root=gemma_root,
305
  loras=INITIAL_LORAS,
306
- quantization=QuantizationPolicy.fp8_cast(),
307
  )
 
308
 
309
- # Preload all models for ZeroGPU tensor packing.
310
- print("Preloading all models (including Gemma and audio components)...")
311
  ledger = pipeline.model_ledger
312
- _transformer = ledger.transformer()
313
- _video_encoder = ledger.video_encoder()
314
- _video_decoder = ledger.video_decoder()
315
- _audio_encoder = ledger.audio_encoder()
316
- _audio_decoder = ledger.audio_decoder()
317
- _vocoder = ledger.vocoder()
318
- _spatial_upsampler = ledger.spatial_upsampler()
319
- _text_encoder = ledger.text_encoder()
320
- _embeddings_processor = ledger.gemma_embeddings_processor()
321
-
322
- print("All models preloaded (including Gemma text encoder and audio encoder)!")
 
 
 
 
 
 
 
 
 
 
 
323
 
324
  print("=" * 80)
325
  print("Pipeline ready!")
@@ -327,11 +355,23 @@ print("=" * 80)
327
 
328
 
329
  def log_memory(tag: str):
330
- if torch.cuda.is_available():
331
- allocated = torch.cuda.memory_allocated() / 1024**3
332
- peak = torch.cuda.max_memory_allocated() / 1024**3
333
- free, total = torch.cuda.mem_get_info()
334
- print(f"[VRAM {tag}] allocated={allocated:.2f}GB peak={peak:.2f}GB free={free / 1024**3:.2f}GB total={total / 1024**3:.2f}GB")
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
 
337
  def detect_aspect_ratio(image) -> str:
@@ -397,31 +437,49 @@ def generate_video(
397
 
398
  requested_strengths = (float(pose_lora_strength), float(general_lora_strength), float(motion_lora_strength))
399
  if _get_current_strengths(current_ledger) != requested_strengths:
400
- # build new tuple and replace ledger.loras
401
  current_ledger.loras = build_loras_tuple(*requested_strengths)
402
- # clear cached model instances so new models are constructed with the new LoRAs
403
- # (ModelLedger builds models on first access using its configured `loras`)
404
- try:
405
- current_ledger.clear_vram()
406
- except Exception:
407
- # `clear_vram` should exist; if it doesn't, fall back to deleting cached attrs
408
- for k in list(vars(current_ledger).keys()):
409
- if k in ("_transformer", "_video_encoder", "_video_decoder", "_audio_encoder", "_audio_decoder", "_vocoder", "_spatial_upsampler", "_text_encoder", "_gemma_embeddings_processor"):
410
- vars(current_ledger).pop(k, None)
411
- # Now pre-load the models again (ensures they are on-device before pipeline call)
412
- _ = current_ledger.transformer()
413
- _ = current_ledger.video_encoder()
414
- _ = current_ledger.video_decoder()
415
- _ = current_ledger.audio_encoder()
416
- _ = current_ledger.audio_decoder()
417
- _ = current_ledger.vocoder()
418
- _ = current_ledger.spatial_upsampler()
419
- _ = current_ledger.text_encoder()
420
- _ = current_ledger.gemma_embeddings_processor()
421
- torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  except Exception as e:
423
- # if this fails, we still proceed with the existing pipeline (safer to continue than to crash)
424
- print(f"[LoRA rebuild warning] Could not update LoRA strengths in-place: {e}")
425
  # --- end LoRA update ---
426
 
427
  frame_rate = DEFAULT_FRAME_RATE
 
297
  # initial strengths (you can change defaults)
298
  INITIAL_LORAS = build_loras_tuple(1.0, 1.0, 1.0)
299
 
300
+ # --- REPLACE pipeline creation with CUDA-aware quantization ---
301
+ use_cuda = torch.cuda.is_available()
302
+ print(f"[INFO] torch.cuda.is_available() = {use_cuda}")
303
+
304
+ # Only enable FP8 quantization if CUDA is present (FP8 uses Triton/CUDA kernels).
305
+ # If QuantizationPolicy defines a no-op or 'none' option, use it; otherwise omit the arg.
306
+ quant = None
307
+ if use_cuda:
308
+ quant = QuantizationPolicy.fp8_cast()
309
+ else:
310
+ # try to use a 'none' policy if available; otherwise we'll omit quantization
311
+ quant = getattr(QuantizationPolicy, "none", None)
312
+
313
+ quant_kwargs = {}
314
+ if quant is not None:
315
+ quant_kwargs["quantization"] = quant
316
+
317
  pipeline = LTX23DistilledA2VPipeline(
318
  distilled_checkpoint_path=checkpoint_path,
319
  spatial_upsampler_path=spatial_upsampler_path,
320
  gemma_root=gemma_root,
321
  loras=INITIAL_LORAS,
322
+ **quant_kwargs,
323
  )
324
+ # --- end replace ---
325
 
326
+ # --- REPLACE preload block with CUDA-aware version ---
327
+ print("Preloading models (GPU preloads only if CUDA is available)...")
328
  ledger = pipeline.model_ledger
329
+
330
+ if torch.cuda.is_available():
331
+ try:
332
+ # Preload models (this will trigger GPU-side building; only do this when CUDA is present)
333
+ _transformer = ledger.transformer()
334
+ _video_encoder = ledger.video_encoder()
335
+ _video_decoder = ledger.video_decoder()
336
+ _audio_encoder = ledger.audio_encoder()
337
+ _audio_decoder = ledger.audio_decoder()
338
+ _vocoder = ledger.vocoder()
339
+ _spatial_upsampler = ledger.spatial_upsampler()
340
+ _text_encoder = ledger.text_encoder()
341
+ _embeddings_processor = ledger.gemma_embeddings_processor()
342
+ print("All models preloaded onto GPU (Gemma text encoder and audio encoder included).")
343
+ except Exception as e:
344
+ # If FP8/Triton or other GPU initialization fails, print warning and continue in safe (lazy) mode.
345
+ print(f"[WARNING] Failed to preload GPU models at startup: {type(e).__name__}: {e}")
346
+ print("[WARNING] Falling back to lazy model loading / reduced quantization (if possible).")
347
+ else:
348
+ # No CUDA — do not attempt GPU preloads that will invoke Triton kernels.
349
+ print("[INFO] No CUDA device detected — skipping GPU preloads. Models will be loaded lazily (CPU).")
350
+ # --- end replace ---
351
 
352
  print("=" * 80)
353
  print("Pipeline ready!")
 
355
 
356
 
357
  def log_memory(tag: str):
358
+ try:
359
+ if torch.cuda.is_available():
360
+ allocated = torch.cuda.memory_allocated() / 1024**3
361
+ peak = torch.cuda.max_memory_allocated() / 1024**3
362
+ try:
363
+ free, total = torch.cuda.mem_get_info()
364
+ free_gb = free / 1024**3
365
+ total_gb = total / 1024**3
366
+ except Exception:
367
+ free_gb = total_gb = 0.0
368
+ print(f"[VRAM {tag}] allocated={allocated:.2f}GB peak={peak:.2f}GB free={free_gb:.2f}GB total={total_gb:.2f}GB")
369
+ else:
370
+ # Basic CPU fallback logging
371
+ print(f"[VRAM {tag}] CUDA not available — running on CPU.")
372
+ except Exception as e:
373
+ # Defensive: don't let logging crash the app
374
+ print(f"[log_memory error] {type(e).__name__}: {e}")
375
 
376
 
377
  def detect_aspect_ratio(image) -> str:
 
437
 
438
  requested_strengths = (float(pose_lora_strength), float(general_lora_strength), float(motion_lora_strength))
439
  if _get_current_strengths(current_ledger) != requested_strengths:
440
+ # replace ledger.loras with new strengths (list)
441
  current_ledger.loras = build_loras_tuple(*requested_strengths)
442
+
443
+ if torch.cuda.is_available():
444
+ # Only try to clear VRAM and rebuild on GPU-enabled hosts
445
+ try:
446
+ current_ledger.clear_vram()
447
+ except Exception:
448
+ # Fallback: remove cached attributes to force rebuild on next access
449
+ for k in list(vars(current_ledger).keys()):
450
+ if k in (
451
+ "_transformer",
452
+ "_video_encoder",
453
+ "_video_decoder",
454
+ "_audio_encoder",
455
+ "_audio_decoder",
456
+ "_vocoder",
457
+ "_spatial_upsampler",
458
+ "_text_encoder",
459
+ "_gemma_embeddings_processor",
460
+ ):
461
+ vars(current_ledger).pop(k, None)
462
+ # Preload the models again on GPU so they're available before pipeline call
463
+ try:
464
+ _ = current_ledger.transformer()
465
+ _ = current_ledger.video_encoder()
466
+ _ = current_ledger.video_decoder()
467
+ _ = current_ledger.audio_encoder()
468
+ _ = current_ledger.audio_decoder()
469
+ _ = current_ledger.vocoder()
470
+ _ = current_ledger.spatial_upsampler()
471
+ _ = current_ledger.text_encoder()
472
+ _ = current_ledger.gemma_embeddings_processor()
473
+ torch.cuda.empty_cache()
474
+ except Exception as e:
475
+ print(f"[LoRA preload warning] Failed to preload models after LoRA change: {type(e).__name__}: {e}")
476
+ # continue — the pipeline will attempt to build when called
477
+ else:
478
+ # No CUDA: we updated the ledger.loras but won't attempt GPU preloads.
479
+ print("[INFO] LoRA strengths updated (CPU-only; models will be applied lazily).")
480
  except Exception as e:
481
+ # if this fails, proceed with the existing pipeline (safer to continue than to crash)
482
+ print(f"[LoRA rebuild warning] Could not update LoRA strengths in-place: {type(e).__name__}: {e}")
483
  # --- end LoRA update ---
484
 
485
  frame_rate = DEFAULT_FRAME_RATE