Update app.py
Browse files
app.py
CHANGED
|
@@ -338,19 +338,6 @@ if quant is not None:
|
|
| 338 |
quant_kwargs["quantization"] = quant
|
| 339 |
# --- END robust CUDA detection and quant selection ---
|
| 340 |
|
| 341 |
-
# Only enable FP8 quantization if CUDA is present (FP8 uses Triton/CUDA kernels).
|
| 342 |
-
# If QuantizationPolicy defines a no-op or 'none' option, use it; otherwise omit the arg.
|
| 343 |
-
quant = None
|
| 344 |
-
if use_cuda:
|
| 345 |
-
quant = QuantizationPolicy.fp8_cast()
|
| 346 |
-
else:
|
| 347 |
-
# try to use a 'none' policy if available; otherwise we'll omit quantization
|
| 348 |
-
quant = getattr(QuantizationPolicy, "none", None)
|
| 349 |
-
|
| 350 |
-
quant_kwargs = {}
|
| 351 |
-
if quant is not None:
|
| 352 |
-
quant_kwargs["quantization"] = quant
|
| 353 |
-
|
| 354 |
pipeline = LTX23DistilledA2VPipeline(
|
| 355 |
distilled_checkpoint_path=checkpoint_path,
|
| 356 |
spatial_upsampler_path=spatial_upsampler_path,
|
|
@@ -364,7 +351,7 @@ pipeline = LTX23DistilledA2VPipeline(
|
|
| 364 |
print("Preloading models (GPU preloads only if CUDA is available)...")
|
| 365 |
ledger = pipeline.model_ledger
|
| 366 |
|
| 367 |
-
if
|
| 368 |
try:
|
| 369 |
# Preload models (this will trigger GPU-side building; only do this when CUDA is present)
|
| 370 |
_transformer = ledger.transformer()
|
|
|
|
| 338 |
quant_kwargs["quantization"] = quant
|
| 339 |
# --- END robust CUDA detection and quant selection ---
|
| 340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
pipeline = LTX23DistilledA2VPipeline(
|
| 342 |
distilled_checkpoint_path=checkpoint_path,
|
| 343 |
spatial_upsampler_path=spatial_upsampler_path,
|
|
|
|
| 351 |
print("Preloading models (GPU preloads only if CUDA is available)...")
|
| 352 |
ledger = pipeline.model_ledger
|
| 353 |
|
| 354 |
+
if use_cuda:
|
| 355 |
try:
|
| 356 |
# Preload models (this will trigger GPU-side building; only do this when CUDA is present)
|
| 357 |
_transformer = ledger.transformer()
|