ltx-video-distilled

Paused

App Files Files Community

ford442 commited on Aug 31

Commit

4bdb840

verified ·

1 Parent(s): 8df2c50

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -15

app.py CHANGED Viewed

@@ -131,12 +131,10 @@ def use_last_frame_as_input(video_filepath):
         return None, gr.update()
     try:
         with imageio.get_reader(video_filepath) as reader:
-            # A robust way to get the last frame
             frame_list = list(reader)
             if not frame_list:
                  raise ValueError("Video file is empty or unreadable.")
             last_frame_np = frame_list[-1]
         pil_image = Image.fromarray(last_frame_np)
         output_image_path = os.path.join(tempfile.mkdtemp(), f"last_frame_{random.randint(10000,99999)}.png")
         pil_image.save(output_image_path)
@@ -165,8 +163,6 @@ def stitch_videos(clips_list):
 def clear_clips():
     return [], "Clips created: 0", None, None
-# --- MODIFIED ---
-# The function signature has been reordered to match the way the inputs are assembled later.
 @spaces.GPU(duration=get_duration)
 def generate(prompt, negative_prompt, clips_list, input_image_filepath, input_video_filepath,
              height_ui, width_ui, mode, duration_ui, ui_frames_to_use,
@@ -192,17 +188,33 @@ def generate(prompt, negative_prompt, clips_list, input_image_filepath, input_vi
     height_padded = ((actual_height - 1) // 32 + 1) * 32
     width_padded = ((actual_width - 1) // 32 + 1) * 32
     padding_values = calculate_padding(actual_height, actual_width, height_padded, width_padded)
     call_kwargs = {
-        "prompt": prompt, "negative_prompt": negative_prompt, "height": height_padded, "width": width_padded,
-        "num_frames": max(9, ((actual_num_frames - 2) // 8 + 1) * 8 + 1), "num_inference_steps": num_steps, "frame_rate": int(fps),
-        "generator": torch.Generator(device=target_inference_device).manual_seed(int(seed_ui)), "output_type": "pt",
-        **PIPELINE_CONFIG_YAML,
-        "is_video": True, "mixed_precision": (PIPELINE_CONFIG_YAML["precision"] == "mixed_precision"),
-        "offload_to_cpu": False, "enhance_prompt": False,
     }
-    # This logic can be simplified and organized
     stg_mode_str = PIPELINE_CONFIG_YAML.get("stg_mode", "attention_values").lower()
     stg_map = {
         "stg_av": SkipLayerStrategy.AttentionValues, "attention_values": SkipLayerStrategy.AttentionValues,
@@ -316,9 +328,7 @@ with gr.Blocks(css=css) as demo:
     image_tab.select(update_task_image, outputs=[mode])
     text_tab.select(update_task_text, outputs=[mode])
     video_tab.select(update_task_video, outputs=[mode])
-    # --- MODIFIED ---
-    # The input lists are now defined explicitly and in the correct order to match the `generate` function signature.
     common_params = [
         height_input, width_input, mode, duration_input, frames_to_use,
         seed_input, randomize_seed_input, guidance_scale_input, improve_texture, num_steps, fps

         return None, gr.update()
     try:
         with imageio.get_reader(video_filepath) as reader:
             frame_list = list(reader)
             if not frame_list:
                  raise ValueError("Video file is empty or unreadable.")
             last_frame_np = frame_list[-1]
         pil_image = Image.fromarray(last_frame_np)
         output_image_path = os.path.join(tempfile.mkdtemp(), f"last_frame_{random.randint(10000,99999)}.png")
         pil_image.save(output_image_path)
 def clear_clips():
     return [], "Clips created: 0", None, None
 @spaces.GPU(duration=get_duration)
 def generate(prompt, negative_prompt, clips_list, input_image_filepath, input_video_filepath,
              height_ui, width_ui, mode, duration_ui, ui_frames_to_use,
     height_padded = ((actual_height - 1) // 32 + 1) * 32
     width_padded = ((actual_width - 1) // 32 + 1) * 32
     padding_values = calculate_padding(actual_height, actual_width, height_padded, width_padded)
+    num_frames_padded = max(9, ((actual_num_frames - 2) // 8 + 1) * 8 + 1)
+    # --- MODIFIED ---
+    # Reverted to the explicit dictionary construction to ensure all required keys are present.
     call_kwargs = {
+        "prompt": prompt,
+        "negative_prompt": negative_prompt,
+        "height": height_padded,
+        "width": width_padded,
+        "num_frames": num_frames_padded,
+        "num_inference_steps": num_steps,
+        "frame_rate": int(fps),
+        "generator": torch.Generator(device=target_inference_device).manual_seed(int(seed_ui)),
+        "output_type": "pt",
+        "conditioning_items": None,
+        "media_items": None,
+        "decode_timestep": PIPELINE_CONFIG_YAML["decode_timestep"],
+        "decode_noise_scale": PIPELINE_CONFIG_YAML["decode_noise_scale"],
+        "stochastic_sampling": PIPELINE_CONFIG_YAML["stochastic_sampling"],
+        "image_cond_noise_scale": 0.15,
+        "is_video": True,
+        "vae_per_channel_normalize": True,
+        "mixed_precision": (PIPELINE_CONFIG_YAML["precision"] == "mixed_precision"),
+        "offload_to_cpu": False,
+        "enhance_prompt": False,
     }
     stg_mode_str = PIPELINE_CONFIG_YAML.get("stg_mode", "attention_values").lower()
     stg_map = {
         "stg_av": SkipLayerStrategy.AttentionValues, "attention_values": SkipLayerStrategy.AttentionValues,
     image_tab.select(update_task_image, outputs=[mode])
     text_tab.select(update_task_text, outputs=[mode])
     video_tab.select(update_task_video, outputs=[mode])
     common_params = [
         height_input, width_input, mode, duration_input, frames_to_use,
         seed_input, randomize_seed_input, guidance_scale_input, improve_texture, num_steps, fps