Update app.py
Browse files
app.py
CHANGED
|
@@ -297,9 +297,46 @@ def build_loras_tuple(pose_strength: float, general_strength: float, motion_stre
|
|
| 297 |
# initial strengths (you can change defaults)
|
| 298 |
INITIAL_LORAS = build_loras_tuple(1.0, 1.0, 1.0)
|
| 299 |
|
| 300 |
-
# ---
|
| 301 |
-
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
# Only enable FP8 quantization if CUDA is present (FP8 uses Triton/CUDA kernels).
|
| 305 |
# If QuantizationPolicy defines a no-op or 'none' option, use it; otherwise omit the arg.
|
|
@@ -403,8 +440,6 @@ def on_highres_toggle(first_image, last_image, high_res):
|
|
| 403 |
w, h = RESOLUTIONS[tier][aspect]
|
| 404 |
return gr.update(value=w), gr.update(value=h)
|
| 405 |
|
| 406 |
-
|
| 407 |
-
@spaces.GPU(duration=80)
|
| 408 |
@torch.inference_mode()
|
| 409 |
def generate_video(
|
| 410 |
first_image,
|
|
@@ -423,7 +458,11 @@ def generate_video(
|
|
| 423 |
progress=gr.Progress(track_tqdm=True),
|
| 424 |
):
|
| 425 |
try:
|
| 426 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
log_memory("start")
|
| 428 |
|
| 429 |
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
|
@@ -525,6 +564,17 @@ def generate_video(
|
|
| 525 |
tiling_config=tiling_config,
|
| 526 |
enhance_prompt=enhance_prompt,
|
| 527 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
|
| 529 |
log_memory("after pipeline call")
|
| 530 |
|
|
@@ -546,6 +596,20 @@ def generate_video(
|
|
| 546 |
print(f"Error: {str(e)}\n{traceback.format_exc()}")
|
| 547 |
return None, current_seed
|
| 548 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
|
| 550 |
with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
|
| 551 |
gr.Markdown("# LTX-2.3 F2LF:Heretic with Fast Audio-Video Generation with Frame Conditioning")
|
|
|
|
| 297 |
# initial strengths (you can change defaults)
|
| 298 |
INITIAL_LORAS = build_loras_tuple(1.0, 1.0, 1.0)
|
| 299 |
|
| 300 |
+
# --- START robust CUDA detection and quant selection ---
|
| 301 |
+
def _probe_cuda_ready() -> bool:
|
| 302 |
+
"""
|
| 303 |
+
Return True if a CUDA-capable device is actually available and can be initialized.
|
| 304 |
+
Uses multiple checks and a tiny safe probe to avoid later surprise RuntimeError.
|
| 305 |
+
"""
|
| 306 |
+
try:
|
| 307 |
+
# First quick checks
|
| 308 |
+
if not torch.cuda.is_available():
|
| 309 |
+
return False
|
| 310 |
+
if torch.cuda.device_count() <= 0:
|
| 311 |
+
return False
|
| 312 |
+
# Try a tiny CUDA probe (safe): allocate a tiny tensor on CUDA and free it.
|
| 313 |
+
try:
|
| 314 |
+
t = torch.tensor([0], device="cuda")
|
| 315 |
+
del t
|
| 316 |
+
except Exception:
|
| 317 |
+
return False
|
| 318 |
+
# If we reached here, CUDA seems usable.
|
| 319 |
+
return True
|
| 320 |
+
except Exception:
|
| 321 |
+
return False
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
use_cuda = _probe_cuda_ready()
|
| 325 |
+
print(f"[INFO] cuda probe -> use_cuda = {use_cuda}")
|
| 326 |
+
|
| 327 |
+
# Only enable FP8 quantization if a usable CUDA device is present.
|
| 328 |
+
quant = None
|
| 329 |
+
if use_cuda:
|
| 330 |
+
# Safe to enable FP8 (Triton-backed) quantization.
|
| 331 |
+
quant = QuantizationPolicy.fp8_cast()
|
| 332 |
+
else:
|
| 333 |
+
# Fallback to no quantization (if available) to avoid Triton paths.
|
| 334 |
+
quant = getattr(QuantizationPolicy, "none", None)
|
| 335 |
+
|
| 336 |
+
quant_kwargs = {}
|
| 337 |
+
if quant is not None:
|
| 338 |
+
quant_kwargs["quantization"] = quant
|
| 339 |
+
# --- END robust CUDA detection and quant selection ---
|
| 340 |
|
| 341 |
# Only enable FP8 quantization if CUDA is present (FP8 uses Triton/CUDA kernels).
|
| 342 |
# If QuantizationPolicy defines a no-op or 'none' option, use it; otherwise omit the arg.
|
|
|
|
| 440 |
w, h = RESOLUTIONS[tier][aspect]
|
| 441 |
return gr.update(value=w), gr.update(value=h)
|
| 442 |
|
|
|
|
|
|
|
| 443 |
@torch.inference_mode()
|
| 444 |
def generate_video(
|
| 445 |
first_image,
|
|
|
|
| 458 |
progress=gr.Progress(track_tqdm=True),
|
| 459 |
):
|
| 460 |
try:
|
| 461 |
+
if use_cuda:
|
| 462 |
+
try:
|
| 463 |
+
torch.cuda.reset_peak_memory_stats()
|
| 464 |
+
except Exception:
|
| 465 |
+
pass
|
| 466 |
log_memory("start")
|
| 467 |
|
| 468 |
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
|
|
|
| 564 |
tiling_config=tiling_config,
|
| 565 |
enhance_prompt=enhance_prompt,
|
| 566 |
)
|
| 567 |
+
except Exception as e:
|
| 568 |
+
# If this looks like a CUDA / Triton / worker GPU init error, handle gracefully.
|
| 569 |
+
msg = str(e).lower()
|
| 570 |
+
if "no cuda" in msg or "cuda error" in msg or "triton" in msg or "no cuda-capable" in msg:
|
| 571 |
+
print(f"[ERROR] GPU initialization failed during pipeline call: {type(e).__name__}: {e}")
|
| 572 |
+
print("[ERROR] This environment reports CUDA availability but failed to initialize a GPU.")
|
| 573 |
+
print("[SUGGESTION] Try running the Space on a GPU-enabled runner, or ensure CUDA drivers are installed.")
|
| 574 |
+
# return None (failure) and the seed so the UI doesn't hang.
|
| 575 |
+
return None, current_seed
|
| 576 |
+
# Otherwise re-raise so other programming errors bubble up
|
| 577 |
+
raise
|
| 578 |
|
| 579 |
log_memory("after pipeline call")
|
| 580 |
|
|
|
|
| 596 |
print(f"Error: {str(e)}\n{traceback.format_exc()}")
|
| 597 |
return None, current_seed
|
| 598 |
|
| 599 |
+
# Attach spaces GPU decorator only if the CUDA probe succeeded.
|
| 600 |
+
try:
|
| 601 |
+
if use_cuda:
|
| 602 |
+
try:
|
| 603 |
+
generate_video = spaces.GPU(duration=80)(generate_video)
|
| 604 |
+
print("[INFO] generate_video wrapped with spaces.GPU decorator.")
|
| 605 |
+
except Exception as e:
|
| 606 |
+
print(f"[WARNING] could not attach spaces.GPU decorator: {type(e).__name__}: {e}")
|
| 607 |
+
else:
|
| 608 |
+
print("[INFO] Not attaching spaces.GPU decorator (CPU-only environment).")
|
| 609 |
+
except Exception as e:
|
| 610 |
+
# Defensive logging
|
| 611 |
+
print(f"[WARNING] Error while attaching GPU decorator: {type(e).__name__}: {e}")
|
| 612 |
+
|
| 613 |
|
| 614 |
with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
|
| 615 |
gr.Markdown("# LTX-2.3 F2LF:Heretic with Fast Audio-Video Generation with Frame Conditioning")
|