wan-2-2-first-last-frame-diffuser

Paused

App Files Files Community

multimodalart HF Staff commited on Aug 31

Commit

941a8cc

verified ·

1 Parent(s): d24ffa4

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -27

app.py CHANGED Viewed

@@ -1,44 +1,40 @@
 import os
-if os.getcwd() != '/home/user/app':
-    os.chdir('/home/user/app')
 import sys
-import spaces
 import subprocess
 import asyncio
 from typing import Sequence, Mapping, Any, Union
 print("Importing ComfyUI's main.py for setup...")
 import main
 print("ComfyUI main imported.")
 import torch
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from comfy import model_management
 from PIL import Image
 import random
-import nodes # Import nodes after main has set everything up
-# --- Manually trigger the node initialization ---
-# This step is normally done inside main.start_comfyui(), but we do it here.
-# It loads all built-in, extra, and custom nodes into the NODE_CLASS_MAPPINGS.
 print("Initializing ComfyUI nodes...")
 loop = asyncio.new_event_loop()
 asyncio.set_event_loop(loop)
 loop.run_until_complete(nodes.init_extra_nodes())
 print("Nodes initialized.")
-# --- Helper function from the original script ---
 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     try:
         return obj[index]
     except KeyError:
         return obj["result"][index]
 # --- Model Downloads ---
 print("Downloading models from Hugging Face Hub...")
 hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", local_dir="models/text_encoders")
@@ -52,7 +48,6 @@ print("Downloads complete.")
 # --- ZeroGPU: Pre-load models and instantiate nodes globally ---
-# This part will now work because NODE_CLASS_MAPPINGS is correctly populated.
 cliploader = nodes.NODE_CLASS_MAPPINGS["CLIPLoader"]()
 cliptextencode = nodes.NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
 unetloader = nodes.NODE_CLASS_MAPPINGS["UNETLoader"]()
@@ -68,7 +63,6 @@ ksampleradvanced = nodes.NODE_CLASS_MAPPINGS["KSamplerAdvanced"]()
 vaedecode = nodes.NODE_CLASS_MAPPINGS["VAEDecode"]()
 createvideo = nodes.NODE_CLASS_MAPPINGS["CreateVideo"]()
 savevideo = nodes.NODE_CLASS_MAPPINGS["SaveVideo"]()
-imageresize = nodes.NODE_CLASS_MAPPINGS["ImageResize+"]()
 cliploader_38 = cliploader.load_clip(clip_name="umt5_xxl_fp8_e4m3fn_scaled.safetensors", type="wan", device="cpu")
 unetloader_37_low_noise = unetloader.load_unet(unet_name="wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
@@ -88,8 +82,7 @@ valid_models = [getattr(loader[0], 'patcher', loader[0]) for loader in model_loa
 model_management.load_models_gpu(valid_models)
 # --- App Logic ---
-def calculate_dimensions(image_path):
-    with Image.open(image_path) as img: width, height = img.size
     if width == height: return 480, 480
     if width > height: new_width, new_height = 832, int(height * (832 / width))
     else: new_height, new_width = 832, int(width * (832 / height))
@@ -97,23 +90,46 @@ def calculate_dimensions(image_path):
 @spaces.GPU(duration=120)
 def generate_video(prompt, first_image_path, last_image_path, duration_seconds):
     with torch.inference_mode():
         FPS, MAX_FRAMES = 16, 81
         length_in_frames = max(1, min(int(duration_seconds * FPS), MAX_FRAMES))
         print(f"Requested duration: {duration_seconds}s. Calculated frames: {length_in_frames}")
-        target_width, target_height = calculate_dimensions(first_image_path)
-        loaded_first_image = loadimage.load_image(image=first_image_path)
-        resized_first_image = imageresize.execute(width=target_width, height=target_height, interpolation="bicubic", method="stretch", image=get_value_at_index(loaded_first_image, 0))
-        loaded_last_image = loadimage.load_image(image=last_image_path)
-        resized_last_image = imageresize.execute(width=target_width, height=target_height, interpolation="bicubic", method="stretch", image=get_value_at_index(loaded_last_image, 0))
         cliptextencode_6 = cliptextencode.encode(text=prompt, clip=get_value_at_index(cliploader_38, 0))
         cliptextencode_7_negative = cliptextencode.encode(text="low quality, worst quality, jpeg artifacts, ugly, deformed, blurry", clip=get_value_at_index(cliploader_38, 0))
-        clipvisionencode_51 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(resized_first_image, 0))
-        clipvisionencode_87 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(resized_last_image, 0))
-        wanfirstlastframetovideo_83 = wanfirstlastframetovideo.EXECUTE_NORMALIZED(width=target_width, height=target_height, length=length_in_frames, batch_size=1, positive=get_value_at_index(cliptextencode_6, 0), negative=get_value_at_index(cliptextencode_7_negative, 0), vae=get_value_at_index(vaeloader_39, 0), clip_vision_start_image=get_value_at_index(clipvisionencode_51, 0), clip_vision_end_image=get_value_at_index(clipvisionencode_87, 0), start_image=get_value_at_index(resized_first_image, 0), end_image=get_value_at_index(resized_last_image, 0))
         ksampler_positive = get_value_at_index(wanfirstlastframetovideo_83, 0)
         ksampler_negative = get_value_at_index(wanfirstlastframetovideo_83, 1)
@@ -128,7 +144,7 @@ def generate_video(prompt, first_image_path, last_image_path, duration_seconds):
         return f"output/{savevideo_103['ui']['videos'][0]['filename']}"
-# --- Gradio Interface (no changes needed) ---
 with gr.Blocks() as app:
     gr.Markdown("# Wan 2.2 First/Last Frame to Video")
     gr.Markdown("Provide a starting image, an ending image, a text prompt, and a desired duration to generate a video transitioning between them.")
@@ -149,4 +165,7 @@ if __name__ == "__main__":
     if not os.path.exists("examples"): os.makedirs("examples")
     if not os.path.exists("examples/start.png"): Image.new('RGB', (512, 512), color='red').save('examples/start.png')
     if not os.path.exists("examples/end.png"): Image.new('RGB', (512, 512), color='blue').save('examples/end.png')
     app.launch()

 import os
 import sys
 import subprocess
 import asyncio
+import uuid
 from typing import Sequence, Mapping, Any, Union
+# --- 2. Let ComfyUI's main.py handle all initial setup ---
 print("Importing ComfyUI's main.py for setup...")
 import main
 print("ComfyUI main imported.")
+# --- 3. Now we can import the rest of the necessary modules ---
 import torch
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from comfy import model_management
+import spaces
 from PIL import Image
 import random
+import nodes
+# --- 4. Manually trigger the node initialization ---
 print("Initializing ComfyUI nodes...")
 loop = asyncio.new_event_loop()
 asyncio.set_event_loop(loop)
 loop.run_until_complete(nodes.init_extra_nodes())
 print("Nodes initialized.")
+# --- Helper function ---
 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     try:
         return obj[index]
     except KeyError:
         return obj["result"][index]
 # --- Model Downloads ---
 print("Downloading models from Hugging Face Hub...")
 hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", local_dir="models/text_encoders")
 # --- ZeroGPU: Pre-load models and instantiate nodes globally ---
 cliploader = nodes.NODE_CLASS_MAPPINGS["CLIPLoader"]()
 cliptextencode = nodes.NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
 unetloader = nodes.NODE_CLASS_MAPPINGS["UNETLoader"]()
 vaedecode = nodes.NODE_CLASS_MAPPINGS["VAEDecode"]()
 createvideo = nodes.NODE_CLASS_MAPPINGS["CreateVideo"]()
 savevideo = nodes.NODE_CLASS_MAPPINGS["SaveVideo"]()
 cliploader_38 = cliploader.load_clip(clip_name="umt5_xxl_fp8_e4m3fn_scaled.safetensors", type="wan", device="cpu")
 unetloader_37_low_noise = unetloader.load_unet(unet_name="wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
 model_management.load_models_gpu(valid_models)
 # --- App Logic ---
+def calculate_dimensions(width, height):
     if width == height: return 480, 480
     if width > height: new_width, new_height = 832, int(height * (832 / width))
     else: new_height, new_width = 832, int(width * (832 / height))
 @spaces.GPU(duration=120)
 def generate_video(prompt, first_image_path, last_image_path, duration_seconds):
+    # Create a temporary directory for resized images
+    temp_dir = f"temp_resized_{uuid.uuid4().hex}"
+    os.makedirs(temp_dir, exist_ok=True)
     with torch.inference_mode():
+        # --- Python Image Preprocessing using Pillow ---
+        print("Preprocessing images with Pillow...")
+        with Image.open(first_image_path) as img:
+            orig_width, orig_height = img.size
+        target_width, target_height = calculate_dimensions(orig_width, orig_height)
+        # Resize first image
+        with Image.open(first_image_path) as img:
+            img_resized = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
+            resized_first_path = os.path.join(temp_dir, "first_frame_resized.png")
+            img_resized.save(resized_first_path)
+        # Resize second image to match the target dimensions
+        with Image.open(last_image_path) as img:
+            img_resized = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
+            resized_last_path = os.path.join(temp_dir, "last_frame_resized.png")
+            img_resized.save(resized_last_path)
+        print(f"Images resized to {target_width}x{target_height} and saved temporarily.")
+        # --- End Preprocessing ---
         FPS, MAX_FRAMES = 16, 81
         length_in_frames = max(1, min(int(duration_seconds * FPS), MAX_FRAMES))
         print(f"Requested duration: {duration_seconds}s. Calculated frames: {length_in_frames}")
+        # Load the pre-processed images into ComfyUI
+        loaded_first_image = loadimage.load_image(image=os.path.basename(resized_first_path))
+        loaded_last_image = loadimage.load_image(image=os.path.basename(resized_last_path))
         cliptextencode_6 = cliptextencode.encode(text=prompt, clip=get_value_at_index(cliploader_38, 0))
         cliptextencode_7_negative = cliptextencode.encode(text="low quality, worst quality, jpeg artifacts, ugly, deformed, blurry", clip=get_value_at_index(cliploader_38, 0))
+        clipvisionencode_51 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(loaded_first_image, 0))
+        clipvisionencode_87 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(loaded_last_image, 0))
+        wanfirstlastframetovideo_83 = wanfirstlastframetovideo.EXECUTE_NORMALIZED(width=target_width, height=target_height, length=length_in_frames, batch_size=1, positive=get_value_at_index(cliptextencode_6, 0), negative=get_value_at_index(cliptextencode_7_negative, 0), vae=get_value_at_index(vaeloader_39, 0), clip_vision_start_image=get_value_at_index(clipvisionencode_51, 0), clip_vision_end_image=get_value_at_index(clipvisionencode_87, 0), start_image=get_value_at_index(loaded_first_image, 0), end_image=get_value_at_index(loaded_last_image, 0))
         ksampler_positive = get_value_at_index(wanfirstlastframetovideo_83, 0)
         ksampler_negative = get_value_at_index(wanfirstlastframetovideo_83, 1)
         return f"output/{savevideo_103['ui']['videos'][0]['filename']}"
+# --- Gradio Interface ---
 with gr.Blocks() as app:
     gr.Markdown("# Wan 2.2 First/Last Frame to Video")
     gr.Markdown("Provide a starting image, an ending image, a text prompt, and a desired duration to generate a video transitioning between them.")
     if not os.path.exists("examples"): os.makedirs("examples")
     if not os.path.exists("examples/start.png"): Image.new('RGB', (512, 512), color='red').save('examples/start.png')
     if not os.path.exists("examples/end.png"): Image.new('RGB', (512, 512), color='blue').save('examples/end.png')
+    # Set the input directory for LoadImage to find the temp files
+    import folder_paths
+    folder_paths.add_model_folder_path("input", "temp_resized")
     app.launch()