Testing3

Runtime error

App Files Files Community

dagloop5 commited on Mar 18

Commit

4b32720

verified ·

1 Parent(s): 1e66590

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -6

app.py CHANGED Viewed

@@ -297,9 +297,46 @@ def build_loras_tuple(pose_strength: float, general_strength: float, motion_stre
 # initial strengths (you can change defaults)
 INITIAL_LORAS = build_loras_tuple(1.0, 1.0, 1.0)
-# --- REPLACE pipeline creation with CUDA-aware quantization ---
-use_cuda = torch.cuda.is_available()
-print(f"[INFO] torch.cuda.is_available() = {use_cuda}")
 # Only enable FP8 quantization if CUDA is present (FP8 uses Triton/CUDA kernels).
 # If QuantizationPolicy defines a no-op or 'none' option, use it; otherwise omit the arg.
@@ -403,8 +440,6 @@ def on_highres_toggle(first_image, last_image, high_res):
     w, h = RESOLUTIONS[tier][aspect]
     return gr.update(value=w), gr.update(value=h)
-@spaces.GPU(duration=80)
 @torch.inference_mode()
 def generate_video(
     first_image,
@@ -423,7 +458,11 @@ def generate_video(
     progress=gr.Progress(track_tqdm=True),
 ):
     try:
-        torch.cuda.reset_peak_memory_stats()
         log_memory("start")
         current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
@@ -525,6 +564,17 @@ def generate_video(
             tiling_config=tiling_config,
             enhance_prompt=enhance_prompt,
         )
         log_memory("after pipeline call")
@@ -546,6 +596,20 @@ def generate_video(
         print(f"Error: {str(e)}\n{traceback.format_exc()}")
         return None, current_seed
 with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
     gr.Markdown("# LTX-2.3 F2LF:Heretic with Fast Audio-Video Generation with Frame Conditioning")

 # initial strengths (you can change defaults)
 INITIAL_LORAS = build_loras_tuple(1.0, 1.0, 1.0)
+# --- START robust CUDA detection and quant selection ---
+def _probe_cuda_ready() -> bool:
+    """
+    Return True if a CUDA-capable device is actually available and can be initialized.
+    Uses multiple checks and a tiny safe probe to avoid later surprise RuntimeError.
+    """
+    try:
+        # First quick checks
+        if not torch.cuda.is_available():
+            return False
+        if torch.cuda.device_count() <= 0:
+            return False
+        # Try a tiny CUDA probe (safe): allocate a tiny tensor on CUDA and free it.
+        try:
+            t = torch.tensor([0], device="cuda")
+            del t
+        except Exception:
+            return False
+        # If we reached here, CUDA seems usable.
+        return True
+    except Exception:
+        return False
+use_cuda = _probe_cuda_ready()
+print(f"[INFO] cuda probe -> use_cuda = {use_cuda}")
+# Only enable FP8 quantization if a usable CUDA device is present.
+quant = None
+if use_cuda:
+    # Safe to enable FP8 (Triton-backed) quantization.
+    quant = QuantizationPolicy.fp8_cast()
+else:
+    # Fallback to no quantization (if available) to avoid Triton paths.
+    quant = getattr(QuantizationPolicy, "none", None)
+quant_kwargs = {}
+if quant is not None:
+    quant_kwargs["quantization"] = quant
+# --- END robust CUDA detection and quant selection ---
 # Only enable FP8 quantization if CUDA is present (FP8 uses Triton/CUDA kernels).
 # If QuantizationPolicy defines a no-op or 'none' option, use it; otherwise omit the arg.
     w, h = RESOLUTIONS[tier][aspect]
     return gr.update(value=w), gr.update(value=h)
 @torch.inference_mode()
 def generate_video(
     first_image,
     progress=gr.Progress(track_tqdm=True),
 ):
     try:
+        if use_cuda:
+            try:
+                torch.cuda.reset_peak_memory_stats()
+            except Exception:
+                pass
         log_memory("start")
         current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
             tiling_config=tiling_config,
             enhance_prompt=enhance_prompt,
         )
+        except Exception as e:
+            # If this looks like a CUDA / Triton / worker GPU init error, handle gracefully.
+            msg = str(e).lower()
+            if "no cuda" in msg or "cuda error" in msg or "triton" in msg or "no cuda-capable" in msg:
+                print(f"[ERROR] GPU initialization failed during pipeline call: {type(e).__name__}: {e}")
+                print("[ERROR] This environment reports CUDA availability but failed to initialize a GPU.")
+                print("[SUGGESTION] Try running the Space on a GPU-enabled runner, or ensure CUDA drivers are installed.")
+                # return None (failure) and the seed so the UI doesn't hang.
+                return None, current_seed
+            # Otherwise re-raise so other programming errors bubble up
+            raise
         log_memory("after pipeline call")
         print(f"Error: {str(e)}\n{traceback.format_exc()}")
         return None, current_seed
+# Attach spaces GPU decorator only if the CUDA probe succeeded.
+try:
+    if use_cuda:
+        try:
+            generate_video = spaces.GPU(duration=80)(generate_video)
+            print("[INFO] generate_video wrapped with spaces.GPU decorator.")
+        except Exception as e:
+            print(f"[WARNING] could not attach spaces.GPU decorator: {type(e).__name__}: {e}")
+    else:
+        print("[INFO] Not attaching spaces.GPU decorator (CPU-only environment).")
+except Exception as e:
+    # Defensive logging
+    print(f"[WARNING] Error while attaching GPU decorator: {type(e).__name__}: {e}")
 with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
     gr.Markdown("# LTX-2.3 F2LF:Heretic with Fast Audio-Video Generation with Frame Conditioning")