Update app.py
Browse files
app.py
CHANGED
|
@@ -47,6 +47,8 @@ from ltx_core.components.noisers import GaussianNoiser
|
|
| 47 |
from ltx_core.model.audio_vae import encode_audio as vae_encode_audio
|
| 48 |
from ltx_core.model.upsampler import upsample_video
|
| 49 |
from ltx_core.model.video_vae import TilingConfig, get_video_chunks_number, decode_video as vae_decode_video
|
|
|
|
|
|
|
| 50 |
from ltx_core.quantization import QuantizationPolicy
|
| 51 |
from ltx_core.types import Audio, AudioLatentShape, VideoPixelShape
|
| 52 |
from ltx_pipelines.distilled import DistilledPipeline
|
|
@@ -267,6 +269,15 @@ checkpoint_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-22b-
|
|
| 267 |
spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
|
| 268 |
gemma_root = snapshot_download(repo_id=GEMMA_REPO)
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
print(f"Checkpoint: {checkpoint_path}")
|
| 271 |
print(f"Spatial upsampler: {spatial_upsampler_path}")
|
| 272 |
print(f"Gemma root: {gemma_root}")
|
|
@@ -276,14 +287,18 @@ pipeline = LTX23DistilledA2VPipeline(
|
|
| 276 |
distilled_checkpoint_path=checkpoint_path,
|
| 277 |
spatial_upsampler_path=spatial_upsampler_path,
|
| 278 |
gemma_root=gemma_root,
|
| 279 |
-
loras=[],
|
| 280 |
quantization=QuantizationPolicy.fp8_cast(),
|
| 281 |
)
|
| 282 |
|
| 283 |
# Preload all models for ZeroGPU tensor packing.
|
| 284 |
-
|
|
|
|
| 285 |
ledger = pipeline.model_ledger
|
| 286 |
-
|
|
|
|
|
|
|
|
|
|
| 287 |
_video_encoder = ledger.video_encoder()
|
| 288 |
_video_decoder = ledger.video_decoder()
|
| 289 |
_audio_encoder = ledger.audio_encoder()
|
|
@@ -293,7 +308,8 @@ _spatial_upsampler = ledger.spatial_upsampler()
|
|
| 293 |
_text_encoder = ledger.text_encoder()
|
| 294 |
_embeddings_processor = ledger.gemma_embeddings_processor()
|
| 295 |
|
| 296 |
-
ledger
|
|
|
|
| 297 |
ledger.video_encoder = lambda: _video_encoder
|
| 298 |
ledger.video_decoder = lambda: _video_decoder
|
| 299 |
ledger.audio_encoder = lambda: _audio_encoder
|
|
@@ -302,7 +318,9 @@ ledger.vocoder = lambda: _vocoder
|
|
| 302 |
ledger.spatial_upsampler = lambda: _spatial_upsampler
|
| 303 |
ledger.text_encoder = lambda: _text_encoder
|
| 304 |
ledger.gemma_embeddings_processor = lambda: _embeddings_processor
|
| 305 |
-
|
|
|
|
|
|
|
| 306 |
|
| 307 |
print("=" * 80)
|
| 308 |
print("Pipeline ready!")
|
|
@@ -360,6 +378,7 @@ def generate_video(
|
|
| 360 |
randomize_seed: bool = True,
|
| 361 |
height: int = 1024,
|
| 362 |
width: int = 1536,
|
|
|
|
| 363 |
progress=gr.Progress(track_tqdm=True),
|
| 364 |
):
|
| 365 |
try:
|
|
@@ -397,6 +416,15 @@ def generate_video(
|
|
| 397 |
tiling_config = TilingConfig.default()
|
| 398 |
video_chunks_number = get_video_chunks_number(num_frames, tiling_config)
|
| 399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
log_memory("before pipeline call")
|
| 401 |
|
| 402 |
video, audio = pipeline(
|
|
@@ -464,9 +492,18 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
|
|
| 464 |
with gr.Row():
|
| 465 |
enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=False)
|
| 466 |
high_res = gr.Checkbox(label="High Resolution", value=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
with gr.Column():
|
| 469 |
-
output_video = gr.Video(label="Generated Video", autoplay=
|
| 470 |
|
| 471 |
gr.Examples(
|
| 472 |
examples=[
|
|
@@ -486,11 +523,12 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
|
|
| 486 |
True,
|
| 487 |
1024,
|
| 488 |
1024,
|
|
|
|
| 489 |
],
|
| 490 |
],
|
| 491 |
inputs=[
|
| 492 |
first_image, last_image, input_audio, prompt, duration,
|
| 493 |
-
enhance_prompt, seed, randomize_seed, height, width,
|
| 494 |
],
|
| 495 |
)
|
| 496 |
|
|
@@ -516,7 +554,7 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
|
|
| 516 |
fn=generate_video,
|
| 517 |
inputs=[
|
| 518 |
first_image, last_image, input_audio, prompt, duration, enhance_prompt,
|
| 519 |
-
seed, randomize_seed, height, width,
|
| 520 |
],
|
| 521 |
outputs=[output_video, seed],
|
| 522 |
)
|
|
|
|
| 47 |
from ltx_core.model.audio_vae import encode_audio as vae_encode_audio
|
| 48 |
from ltx_core.model.upsampler import upsample_video
|
| 49 |
from ltx_core.model.video_vae import TilingConfig, get_video_chunks_number, decode_video as vae_decode_video
|
| 50 |
+
# >>> ADD these imports (place immediately after your video_vae import)
|
| 51 |
+
from ltx_core.loader import LoraPathStrengthAndSDOps, LTXV_LORA_COMFY_RENAMING_MAP
|
| 52 |
from ltx_core.quantization import QuantizationPolicy
|
| 53 |
from ltx_core.types import Audio, AudioLatentShape, VideoPixelShape
|
| 54 |
from ltx_pipelines.distilled import DistilledPipeline
|
|
|
|
| 269 |
spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
|
| 270 |
gemma_root = snapshot_download(repo_id=GEMMA_REPO)
|
| 271 |
|
| 272 |
+
# >>> ADD: download and prepare LoRA descriptor
|
| 273 |
+
print("Downloading LoRA for this Space (dagloop5/LoRA:LoRA.safetensors)...")
|
| 274 |
+
lora_path = hf_hub_download(repo_id="dagloop5/LoRA", filename="LoRA.safetensors")
|
| 275 |
+
# Create a descriptor object that the LTX loader expects.
|
| 276 |
+
# initial strength is set to 1.0; we'll mutate `.strength` at runtime from the UI slider.
|
| 277 |
+
lora_descriptor = LoraPathStrengthAndSDOps(lora_path, 1.0, LTXV_LORA_COMFY_RENAMING_MAP)
|
| 278 |
+
|
| 279 |
+
print(f"LoRA: {lora_path}")
|
| 280 |
+
|
| 281 |
print(f"Checkpoint: {checkpoint_path}")
|
| 282 |
print(f"Spatial upsampler: {spatial_upsampler_path}")
|
| 283 |
print(f"Gemma root: {gemma_root}")
|
|
|
|
| 287 |
distilled_checkpoint_path=checkpoint_path,
|
| 288 |
spatial_upsampler_path=spatial_upsampler_path,
|
| 289 |
gemma_root=gemma_root,
|
| 290 |
+
loras=[lora_descriptor],
|
| 291 |
quantization=QuantizationPolicy.fp8_cast(),
|
| 292 |
)
|
| 293 |
|
| 294 |
# Preload all models for ZeroGPU tensor packing.
|
| 295 |
+
# >>> REPLACE the "Preload all models" block with this one:
|
| 296 |
+
print("Preloading models (pinning decoders/encoders but leaving transformer dynamic)...")
|
| 297 |
ledger = pipeline.model_ledger
|
| 298 |
+
|
| 299 |
+
# NOTE: do NOT call ledger.transformer() here. We keep the transformer's construction dynamic
|
| 300 |
+
# so that changes to lora_descriptor.strength (made at runtime) are applied when the transformer
|
| 301 |
+
# is built. We DO preload other components that are safe to pin.
|
| 302 |
_video_encoder = ledger.video_encoder()
|
| 303 |
_video_decoder = ledger.video_decoder()
|
| 304 |
_audio_encoder = ledger.audio_encoder()
|
|
|
|
| 308 |
_text_encoder = ledger.text_encoder()
|
| 309 |
_embeddings_processor = ledger.gemma_embeddings_processor()
|
| 310 |
|
| 311 |
+
# Replace ledger methods to return the pinned objects for those components.
|
| 312 |
+
# Intentionally do NOT override ledger.transformer so transformer is built when needed.
|
| 313 |
ledger.video_encoder = lambda: _video_encoder
|
| 314 |
ledger.video_decoder = lambda: _video_decoder
|
| 315 |
ledger.audio_encoder = lambda: _audio_encoder
|
|
|
|
| 318 |
ledger.spatial_upsampler = lambda: _spatial_upsampler
|
| 319 |
ledger.text_encoder = lambda: _text_encoder
|
| 320 |
ledger.gemma_embeddings_processor = lambda: _embeddings_processor
|
| 321 |
+
|
| 322 |
+
print("Selected models pinned. Transformer remains dynamic to reflect runtime LoRA strength.")
|
| 323 |
+
print("Preload complete.")
|
| 324 |
|
| 325 |
print("=" * 80)
|
| 326 |
print("Pipeline ready!")
|
|
|
|
| 378 |
randomize_seed: bool = True,
|
| 379 |
height: int = 1024,
|
| 380 |
width: int = 1536,
|
| 381 |
+
lora_strength: float = 1.0,
|
| 382 |
progress=gr.Progress(track_tqdm=True),
|
| 383 |
):
|
| 384 |
try:
|
|
|
|
| 416 |
tiling_config = TilingConfig.default()
|
| 417 |
video_chunks_number = get_video_chunks_number(num_frames, tiling_config)
|
| 418 |
|
| 419 |
+
# >>> SET the LoRA descriptor strength at runtime
|
| 420 |
+
try:
|
| 421 |
+
# lora_descriptor was created at module scope during init
|
| 422 |
+
lora_descriptor.strength = float(lora_strength)
|
| 423 |
+
print(f"[LoRA] set strength = {lora_descriptor.strength}")
|
| 424 |
+
except Exception as e:
|
| 425 |
+
# Defensive: if the descriptor isn't present, continue quietly
|
| 426 |
+
print(f"[LoRA] could not set strength: {e}")
|
| 427 |
+
|
| 428 |
log_memory("before pipeline call")
|
| 429 |
|
| 430 |
video, audio = pipeline(
|
|
|
|
| 492 |
with gr.Row():
|
| 493 |
enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=False)
|
| 494 |
high_res = gr.Checkbox(label="High Resolution", value=True)
|
| 495 |
+
# >>> ADD this line inside the Advanced Settings (e.g., after randomize_seed)
|
| 496 |
+
lora_strength = gr.Slider(
|
| 497 |
+
label="LoRA Strength",
|
| 498 |
+
info="Scale for the LoRA weights (0.0 = off). Set near 1.0 for full effect.",
|
| 499 |
+
minimum=0.0,
|
| 500 |
+
maximum=1.5,
|
| 501 |
+
value=1.0,
|
| 502 |
+
step=0.01,
|
| 503 |
+
)
|
| 504 |
|
| 505 |
with gr.Column():
|
| 506 |
+
output_video = gr.Video(label="Generated Video", autoplay=False)
|
| 507 |
|
| 508 |
gr.Examples(
|
| 509 |
examples=[
|
|
|
|
| 523 |
True,
|
| 524 |
1024,
|
| 525 |
1024,
|
| 526 |
+
1,
|
| 527 |
],
|
| 528 |
],
|
| 529 |
inputs=[
|
| 530 |
first_image, last_image, input_audio, prompt, duration,
|
| 531 |
+
enhance_prompt, seed, randomize_seed, height, width, lora_strength
|
| 532 |
],
|
| 533 |
)
|
| 534 |
|
|
|
|
| 554 |
fn=generate_video,
|
| 555 |
inputs=[
|
| 556 |
first_image, last_image, input_audio, prompt, duration, enhance_prompt,
|
| 557 |
+
seed, randomize_seed, height, width, lora_strength
|
| 558 |
],
|
| 559 |
outputs=[output_video, seed],
|
| 560 |
)
|