dagloop5 commited on
Commit
4b32720
·
verified ·
1 Parent(s): 1e66590

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -6
app.py CHANGED
@@ -297,9 +297,46 @@ def build_loras_tuple(pose_strength: float, general_strength: float, motion_stre
297
  # initial strengths (you can change defaults)
298
  INITIAL_LORAS = build_loras_tuple(1.0, 1.0, 1.0)
299
 
300
- # --- REPLACE pipeline creation with CUDA-aware quantization ---
301
- use_cuda = torch.cuda.is_available()
302
- print(f"[INFO] torch.cuda.is_available() = {use_cuda}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  # Only enable FP8 quantization if CUDA is present (FP8 uses Triton/CUDA kernels).
305
  # If QuantizationPolicy defines a no-op or 'none' option, use it; otherwise omit the arg.
@@ -403,8 +440,6 @@ def on_highres_toggle(first_image, last_image, high_res):
403
  w, h = RESOLUTIONS[tier][aspect]
404
  return gr.update(value=w), gr.update(value=h)
405
 
406
-
407
- @spaces.GPU(duration=80)
408
  @torch.inference_mode()
409
  def generate_video(
410
  first_image,
@@ -423,7 +458,11 @@ def generate_video(
423
  progress=gr.Progress(track_tqdm=True),
424
  ):
425
  try:
426
- torch.cuda.reset_peak_memory_stats()
 
 
 
 
427
  log_memory("start")
428
 
429
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
@@ -525,6 +564,17 @@ def generate_video(
525
  tiling_config=tiling_config,
526
  enhance_prompt=enhance_prompt,
527
  )
 
 
 
 
 
 
 
 
 
 
 
528
 
529
  log_memory("after pipeline call")
530
 
@@ -546,6 +596,20 @@ def generate_video(
546
  print(f"Error: {str(e)}\n{traceback.format_exc()}")
547
  return None, current_seed
548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549
 
550
  with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
551
  gr.Markdown("# LTX-2.3 F2LF:Heretic with Fast Audio-Video Generation with Frame Conditioning")
 
297
  # initial strengths (you can change defaults)
298
  INITIAL_LORAS = build_loras_tuple(1.0, 1.0, 1.0)
299
 
300
+ # --- START robust CUDA detection and quant selection ---
301
+ def _probe_cuda_ready() -> bool:
302
+ """
303
+ Return True if a CUDA-capable device is actually available and can be initialized.
304
+ Uses multiple checks and a tiny safe probe to avoid later surprise RuntimeError.
305
+ """
306
+ try:
307
+ # First quick checks
308
+ if not torch.cuda.is_available():
309
+ return False
310
+ if torch.cuda.device_count() <= 0:
311
+ return False
312
+ # Try a tiny CUDA probe (safe): allocate a tiny tensor on CUDA and free it.
313
+ try:
314
+ t = torch.tensor([0], device="cuda")
315
+ del t
316
+ except Exception:
317
+ return False
318
+ # If we reached here, CUDA seems usable.
319
+ return True
320
+ except Exception:
321
+ return False
322
+
323
+
324
+ use_cuda = _probe_cuda_ready()
325
+ print(f"[INFO] cuda probe -> use_cuda = {use_cuda}")
326
+
327
+ # Only enable FP8 quantization if a usable CUDA device is present.
328
+ quant = None
329
+ if use_cuda:
330
+ # Safe to enable FP8 (Triton-backed) quantization.
331
+ quant = QuantizationPolicy.fp8_cast()
332
+ else:
333
+ # Fallback to no quantization (if available) to avoid Triton paths.
334
+ quant = getattr(QuantizationPolicy, "none", None)
335
+
336
+ quant_kwargs = {}
337
+ if quant is not None:
338
+ quant_kwargs["quantization"] = quant
339
+ # --- END robust CUDA detection and quant selection ---
340
 
341
  # Only enable FP8 quantization if CUDA is present (FP8 uses Triton/CUDA kernels).
342
  # If QuantizationPolicy defines a no-op or 'none' option, use it; otherwise omit the arg.
 
440
  w, h = RESOLUTIONS[tier][aspect]
441
  return gr.update(value=w), gr.update(value=h)
442
 
 
 
443
  @torch.inference_mode()
444
  def generate_video(
445
  first_image,
 
458
  progress=gr.Progress(track_tqdm=True),
459
  ):
460
  try:
461
+ if use_cuda:
462
+ try:
463
+ torch.cuda.reset_peak_memory_stats()
464
+ except Exception:
465
+ pass
466
  log_memory("start")
467
 
468
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
 
564
  tiling_config=tiling_config,
565
  enhance_prompt=enhance_prompt,
566
  )
567
+ except Exception as e:
568
+ # If this looks like a CUDA / Triton / worker GPU init error, handle gracefully.
569
+ msg = str(e).lower()
570
+ if "no cuda" in msg or "cuda error" in msg or "triton" in msg or "no cuda-capable" in msg:
571
+ print(f"[ERROR] GPU initialization failed during pipeline call: {type(e).__name__}: {e}")
572
+ print("[ERROR] This environment reports CUDA availability but failed to initialize a GPU.")
573
+ print("[SUGGESTION] Try running the Space on a GPU-enabled runner, or ensure CUDA drivers are installed.")
574
+ # return None (failure) and the seed so the UI doesn't hang.
575
+ return None, current_seed
576
+ # Otherwise re-raise so other programming errors bubble up
577
+ raise
578
 
579
  log_memory("after pipeline call")
580
 
 
596
  print(f"Error: {str(e)}\n{traceback.format_exc()}")
597
  return None, current_seed
598
 
599
+ # Attach spaces GPU decorator only if the CUDA probe succeeded.
600
+ try:
601
+ if use_cuda:
602
+ try:
603
+ generate_video = spaces.GPU(duration=80)(generate_video)
604
+ print("[INFO] generate_video wrapped with spaces.GPU decorator.")
605
+ except Exception as e:
606
+ print(f"[WARNING] could not attach spaces.GPU decorator: {type(e).__name__}: {e}")
607
+ else:
608
+ print("[INFO] Not attaching spaces.GPU decorator (CPU-only environment).")
609
+ except Exception as e:
610
+ # Defensive logging
611
+ print(f"[WARNING] Error while attaching GPU decorator: {type(e).__name__}: {e}")
612
+
613
 
614
  with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
615
  gr.Markdown("# LTX-2.3 F2LF:Heretic with Fast Audio-Video Generation with Frame Conditioning")