ltx-video-distilled

Running on Zero

App Files Files Community

linoyts HF Staff commited on May 13

Commit

ddd3c88

verified ·

1 Parent(s): 9f55bc7

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -5

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
 from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
 from diffusers.utils import export_to_video, load_video
-pipe = LTXConditionPipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.7-diffusers", torch_dtype=torch.bfloat16)
 pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.7-Latent-Spatial-Upsampler-diffusers", vae=pipe.vae, torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 pipe_upsample.to("cuda")
@@ -20,9 +20,68 @@ def round_to_nearest_resolution_acceptable_by_vae(height, width):
 @spaces.GPU
 def generate(prompt,
              negative_prompt,
              steps,
-             seed):
-    return
 css="""
@@ -64,8 +123,9 @@ with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
       randomize_seed = gr.Checkbox(label="randomize seed")
      with gr.Row():
       steps = gr.Slider(label="Steps", minimum=1, maximum=30, value=8, step=1)
-      num_frames = gr.Slider(label="# frames", minimum=1, maximum=30, value=8, step=1)

 from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
 from diffusers.utils import export_to_video, load_video
+pipe = LTXConditionPipeline.from_pretrained("linoyts/LTX-Video-0.9.7-distilled-diffusers", torch_dtype=torch.bfloat16)
 pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.7-Latent-Spatial-Upsampler-diffusers", vae=pipe.vae, torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 pipe_upsample.to("cuda")
 @spaces.GPU
 def generate(prompt,
              negative_prompt,
+             image,
              steps,
+             num_frames,
+             seed,
+             randomize_seed):
+    expected_height, expected_width = 768, 1152
+    downscale_factor = 2 / 3
+    if image is not None:
+        condition1 = LTXVideoCondition(video=image, frame_index=0)
+    else:
+        condition1 = None
+    # Part 1. Generate video at smaller resolution
+    # Text-only conditioning is also supported without the need to pass `conditions`
+    downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
+    downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
+    latents = pipe(
+            conditions=condition1,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            width=downscaled_width,
+            height=downscaled_height,
+            num_frames=num_frames,
+            num_inference_steps=steps,
+            decode_timestep = 0.05,
+            decode_noise_scale = 0.025,
+            generator=torch.Generator().manual_seed(seed),
+            output_type="latent",
+        ).frames
+    # Part 2. Upscale generated video using latent upsampler with fewer inference steps
+    # The available latent upsampler upscales the height/width by 2x
+    upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
+    upscaled_latents = pipe_upsample(
+        latents=latents,
+        output_type="latent"
+    ).frames
+    # Part 3. Denoise the upscaled video with few steps to improve texture (optional, but recommended)
+    video = pipe(
+        conditions=condition1,
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        width=upscaled_width,
+        height=upscaled_height,
+        num_frames=num_frames,
+        denoise_strength=0.4,  # Effectively, 4 inference steps out of 10
+        num_inference_steps=10,
+        latents=upscaled_latents,
+        decode_timestep=0.05,
+        image_cond_noise_scale=0.025,
+        generator=torch.Generator().manual_seed(seed),
+        output_type="pil",
+    ).frames[0]
+    # Part 4. Downscale the video to the expected resolution
+    video = [frame.resize((expected_width, expected_height)) for frame in video]
+    return video
 css="""
       randomize_seed = gr.Checkbox(label="randomize seed")
      with gr.Row():
       steps = gr.Slider(label="Steps", minimum=1, maximum=30, value=8, step=1)
+      num_frames = gr.Slider(label="# frames", minimum=1, maximum=200, value=161, step=1)