dagloop5 commited on
Commit
fb49839
·
verified ·
1 Parent(s): 14a0485

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -8
app.py CHANGED
@@ -47,6 +47,8 @@ from ltx_core.components.noisers import GaussianNoiser
47
  from ltx_core.model.audio_vae import encode_audio as vae_encode_audio
48
  from ltx_core.model.upsampler import upsample_video
49
  from ltx_core.model.video_vae import TilingConfig, get_video_chunks_number, decode_video as vae_decode_video
 
 
50
  from ltx_core.quantization import QuantizationPolicy
51
  from ltx_core.types import Audio, AudioLatentShape, VideoPixelShape
52
  from ltx_pipelines.distilled import DistilledPipeline
@@ -267,6 +269,15 @@ checkpoint_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-22b-
267
  spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
268
  gemma_root = snapshot_download(repo_id=GEMMA_REPO)
269
 
 
 
 
 
 
 
 
 
 
270
  print(f"Checkpoint: {checkpoint_path}")
271
  print(f"Spatial upsampler: {spatial_upsampler_path}")
272
  print(f"Gemma root: {gemma_root}")
@@ -276,14 +287,18 @@ pipeline = LTX23DistilledA2VPipeline(
276
  distilled_checkpoint_path=checkpoint_path,
277
  spatial_upsampler_path=spatial_upsampler_path,
278
  gemma_root=gemma_root,
279
- loras=[],
280
  quantization=QuantizationPolicy.fp8_cast(),
281
  )
282
 
283
  # Preload all models for ZeroGPU tensor packing.
284
- print("Preloading all models (including Gemma and audio components)...")
 
285
  ledger = pipeline.model_ledger
286
- _transformer = ledger.transformer()
 
 
 
287
  _video_encoder = ledger.video_encoder()
288
  _video_decoder = ledger.video_decoder()
289
  _audio_encoder = ledger.audio_encoder()
@@ -293,7 +308,8 @@ _spatial_upsampler = ledger.spatial_upsampler()
293
  _text_encoder = ledger.text_encoder()
294
  _embeddings_processor = ledger.gemma_embeddings_processor()
295
 
296
- ledger.transformer = lambda: _transformer
 
297
  ledger.video_encoder = lambda: _video_encoder
298
  ledger.video_decoder = lambda: _video_decoder
299
  ledger.audio_encoder = lambda: _audio_encoder
@@ -302,7 +318,9 @@ ledger.vocoder = lambda: _vocoder
302
  ledger.spatial_upsampler = lambda: _spatial_upsampler
303
  ledger.text_encoder = lambda: _text_encoder
304
  ledger.gemma_embeddings_processor = lambda: _embeddings_processor
305
- print("All models preloaded (including Gemma text encoder and audio encoder)!")
 
 
306
 
307
  print("=" * 80)
308
  print("Pipeline ready!")
@@ -360,6 +378,7 @@ def generate_video(
360
  randomize_seed: bool = True,
361
  height: int = 1024,
362
  width: int = 1536,
 
363
  progress=gr.Progress(track_tqdm=True),
364
  ):
365
  try:
@@ -397,6 +416,15 @@ def generate_video(
397
  tiling_config = TilingConfig.default()
398
  video_chunks_number = get_video_chunks_number(num_frames, tiling_config)
399
 
 
 
 
 
 
 
 
 
 
400
  log_memory("before pipeline call")
401
 
402
  video, audio = pipeline(
@@ -464,9 +492,18 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
464
  with gr.Row():
465
  enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=False)
466
  high_res = gr.Checkbox(label="High Resolution", value=True)
 
 
 
 
 
 
 
 
 
467
 
468
  with gr.Column():
469
- output_video = gr.Video(label="Generated Video", autoplay=True)
470
 
471
  gr.Examples(
472
  examples=[
@@ -486,11 +523,12 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
486
  True,
487
  1024,
488
  1024,
 
489
  ],
490
  ],
491
  inputs=[
492
  first_image, last_image, input_audio, prompt, duration,
493
- enhance_prompt, seed, randomize_seed, height, width,
494
  ],
495
  )
496
 
@@ -516,7 +554,7 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
516
  fn=generate_video,
517
  inputs=[
518
  first_image, last_image, input_audio, prompt, duration, enhance_prompt,
519
- seed, randomize_seed, height, width,
520
  ],
521
  outputs=[output_video, seed],
522
  )
 
47
  from ltx_core.model.audio_vae import encode_audio as vae_encode_audio
48
  from ltx_core.model.upsampler import upsample_video
49
  from ltx_core.model.video_vae import TilingConfig, get_video_chunks_number, decode_video as vae_decode_video
50
+ # >>> ADD these imports (place immediately after your video_vae import)
51
+ from ltx_core.loader import LoraPathStrengthAndSDOps, LTXV_LORA_COMFY_RENAMING_MAP
52
  from ltx_core.quantization import QuantizationPolicy
53
  from ltx_core.types import Audio, AudioLatentShape, VideoPixelShape
54
  from ltx_pipelines.distilled import DistilledPipeline
 
269
  spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
270
  gemma_root = snapshot_download(repo_id=GEMMA_REPO)
271
 
272
+ # >>> ADD: download and prepare LoRA descriptor
273
+ print("Downloading LoRA for this Space (dagloop5/LoRA:LoRA.safetensors)...")
274
+ lora_path = hf_hub_download(repo_id="dagloop5/LoRA", filename="LoRA.safetensors")
275
+ # Create a descriptor object that the LTX loader expects.
276
+ # initial strength is set to 1.0; we'll mutate `.strength` at runtime from the UI slider.
277
+ lora_descriptor = LoraPathStrengthAndSDOps(lora_path, 1.0, LTXV_LORA_COMFY_RENAMING_MAP)
278
+
279
+ print(f"LoRA: {lora_path}")
280
+
281
  print(f"Checkpoint: {checkpoint_path}")
282
  print(f"Spatial upsampler: {spatial_upsampler_path}")
283
  print(f"Gemma root: {gemma_root}")
 
287
  distilled_checkpoint_path=checkpoint_path,
288
  spatial_upsampler_path=spatial_upsampler_path,
289
  gemma_root=gemma_root,
290
+ loras=[lora_descriptor],
291
  quantization=QuantizationPolicy.fp8_cast(),
292
  )
293
 
294
  # Preload all models for ZeroGPU tensor packing.
295
+ # >>> REPLACE the "Preload all models" block with this one:
296
+ print("Preloading models (pinning decoders/encoders but leaving transformer dynamic)...")
297
  ledger = pipeline.model_ledger
298
+
299
+ # NOTE: do NOT call ledger.transformer() here. We keep the transformer's construction dynamic
300
+ # so that changes to lora_descriptor.strength (made at runtime) are applied when the transformer
301
+ # is built. We DO preload other components that are safe to pin.
302
  _video_encoder = ledger.video_encoder()
303
  _video_decoder = ledger.video_decoder()
304
  _audio_encoder = ledger.audio_encoder()
 
308
  _text_encoder = ledger.text_encoder()
309
  _embeddings_processor = ledger.gemma_embeddings_processor()
310
 
311
+ # Replace ledger methods to return the pinned objects for those components.
312
+ # Intentionally do NOT override ledger.transformer so transformer is built when needed.
313
  ledger.video_encoder = lambda: _video_encoder
314
  ledger.video_decoder = lambda: _video_decoder
315
  ledger.audio_encoder = lambda: _audio_encoder
 
318
  ledger.spatial_upsampler = lambda: _spatial_upsampler
319
  ledger.text_encoder = lambda: _text_encoder
320
  ledger.gemma_embeddings_processor = lambda: _embeddings_processor
321
+
322
+ print("Selected models pinned. Transformer remains dynamic to reflect runtime LoRA strength.")
323
+ print("Preload complete.")
324
 
325
  print("=" * 80)
326
  print("Pipeline ready!")
 
378
  randomize_seed: bool = True,
379
  height: int = 1024,
380
  width: int = 1536,
381
+ lora_strength: float = 1.0,
382
  progress=gr.Progress(track_tqdm=True),
383
  ):
384
  try:
 
416
  tiling_config = TilingConfig.default()
417
  video_chunks_number = get_video_chunks_number(num_frames, tiling_config)
418
 
419
+ # >>> SET the LoRA descriptor strength at runtime
420
+ try:
421
+ # lora_descriptor was created at module scope during init
422
+ lora_descriptor.strength = float(lora_strength)
423
+ print(f"[LoRA] set strength = {lora_descriptor.strength}")
424
+ except Exception as e:
425
+ # Defensive: if the descriptor isn't present, continue quietly
426
+ print(f"[LoRA] could not set strength: {e}")
427
+
428
  log_memory("before pipeline call")
429
 
430
  video, audio = pipeline(
 
492
  with gr.Row():
493
  enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=False)
494
  high_res = gr.Checkbox(label="High Resolution", value=True)
495
+ # >>> ADD this line inside the Advanced Settings (e.g., after randomize_seed)
496
+ lora_strength = gr.Slider(
497
+ label="LoRA Strength",
498
+ info="Scale for the LoRA weights (0.0 = off). Set near 1.0 for full effect.",
499
+ minimum=0.0,
500
+ maximum=1.5,
501
+ value=1.0,
502
+ step=0.01,
503
+ )
504
 
505
  with gr.Column():
506
+ output_video = gr.Video(label="Generated Video", autoplay=False)
507
 
508
  gr.Examples(
509
  examples=[
 
523
  True,
524
  1024,
525
  1024,
526
+ 1,
527
  ],
528
  ],
529
  inputs=[
530
  first_image, last_image, input_audio, prompt, duration,
531
+ enhance_prompt, seed, randomize_seed, height, width, lora_strength
532
  ],
533
  )
534
 
 
554
  fn=generate_video,
555
  inputs=[
556
  first_image, last_image, input_audio, prompt, duration, enhance_prompt,
557
+ seed, randomize_seed, height, width, lora_strength
558
  ],
559
  outputs=[output_video, seed],
560
  )