Spaces:

Sushantkas
/

Wan2.2_Model

Sleeping

App Files Files Community

Sushantkas commited on Mar 4

Commit

2f27afc

verified ·

1 Parent(s): bc73034

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -83

app.py CHANGED Viewed

@@ -3,77 +3,37 @@ import gradio as gr
 import torch
 import numpy as np
 from diffusers import WanImageToVideoPipeline
-from diffusers.utils import export_to_video, load_image
 from transformers import CLIPVisionModel
-## Loading Encoder
 model_id = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 print(f"Using video Model: {model_id}")
 dtype = torch.bfloat16
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-pipe = WanImageToVideoPipeline.from_pretrained(model_id, torch_dtype=dtype)
-pipe.to(device)
-try:
-    pipe.to(device)
-    print(f"Model Loaded in {device}")
-except:
-    print(f"Model loading on {device} failed as trying alternate method")
-try:
-    pipe.to("cuda")
-    print("Model Loaded in cuda")
-except:
-    print(f"Model loading on cuda also failed")
-try:
-    pipe.enable_model_cpu_offload()
-    print("Model CPU Offload Completed")
-except:
-    print("Model CPU Offload failed")
-try:
-    print("Enabling Attention Slicing ")
-    pipe.enable_attention_slicing()
-    print("Attention Slicing Enabled")
-except Exception as e:
-    print("Attention Slicing Failed")
-# Loading function for Image
-from diffusers.utils import load_image
 # ================================
-# Image Preparation Function
 # ================================
-def prepare_vertical_image(pipe, image_path, base_width=384, base_height=672):
-    """
-    Loads and resizes an image for Wan I2V vertical video generation.
-    Args:
-        pipe: WanImageToVideoPipeline (already loaded)
-        image_path (str): Path or URL to image
-        base_width (int): Desired width before adjustment
-        base_height (int): Desired height before adjustment
-    Returns:
-        resized_image (PIL.Image)
-        final_width (int)
-        final_height (int)
-    """
-    # Load image
-    image = load_image(image_path)
-    # Ensure compatibility with Wan spatial constraints
     mod_value = (
         pipe.vae_scale_factor_spatial *
         pipe.transformer.config.patch_size[1]
@@ -87,7 +47,9 @@ def prepare_vertical_image(pipe, image_path, base_width=384, base_height=672):
     return resized_image, final_width, final_height
 @spaces.GPU(size="xlarge", duration=180)
 def generate_video(input_image, prompt, negative_prompt):
@@ -95,55 +57,65 @@ def generate_video(input_image, prompt, negative_prompt):
     if input_image is None:
         return None
-    image = input_image
-    # Prepare 9:16 vertical reduced resolution
-    image, width, height = prepare_vertical_image(pipe, image)
-    print(f"Generating 10 sec vertical video at {width}x{height}")
-    # 10 seconds at 16 FPS = 160 frames
     video_frames = pipe(
         image=image,
         prompt=prompt,
         negative_prompt=negative_prompt,
         height=height,
         width=width,
-        num_frames=160,
         guidance_scale=4.5,
         num_inference_steps=25
     ).frames[0]
     output_path = "vertical_output.mp4"
     export_to_video(video_frames, output_path, fps=16)
     return output_path
 # Gradio UI
 # ================================
-with gr.Blocks(title="Wan 14B Vertical I2V") as demo:
-    gr.Markdown("## 🎬 Wan 14B Image-to-Video Generator")
-    gr.Markdown("Generate 10-second Vertical (9:16) AI Videos")
     with gr.Row():
-        input_image = gr.Image(type="pil", label="Upload Image")
-    prompt = gr.Textbox(
-        label="Prompt",
-        placeholder="Describe motion, camera movement, cinematic effect..."
-    )
-    negative_prompt = gr.Textbox(
-        label="Negative Prompt",
-        value="blurry, low quality, distorted, static",
-    )
-    generate_btn = gr.Button("Generate 10 Second Video")
-    output_video = gr.Video(label="Generated Video")
     generate_btn.click(
         generate_video,
@@ -151,7 +123,4 @@ with gr.Blocks(title="Wan 14B Vertical I2V") as demo:
         outputs=output_video
     )
-demo.launch(server_name="0.0.0.0", server_port=7860)

 import torch
 import numpy as np
 from diffusers import WanImageToVideoPipeline
+from diffusers.utils import export_to_video
 from transformers import CLIPVisionModel
 model_id = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 print(f"Using video Model: {model_id}")
 dtype = torch.bfloat16
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load pipeline
+pipe = WanImageToVideoPipeline.from_pretrained(
+    model_id,
+    torch_dtype=dtype
+)
+pipe.to(device)
+print(f"Model Loaded in {device}")
+# Memory optimizations
+pipe.enable_model_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_sequential_cpu_offload()
+print("Optimizations Enabled")
 # ================================
+# Image Preparation
 # ================================
+def prepare_vertical_image(pipe, image, base_width=384, base_height=672):
     mod_value = (
         pipe.vae_scale_factor_spatial *
         pipe.transformer.config.patch_size[1]
     return resized_image, final_width, final_height
+# ================================
+# Video Generation
+# ================================
 @spaces.GPU(size="xlarge", duration=180)
 def generate_video(input_image, prompt, negative_prompt):
     if input_image is None:
         return None
+    image, width, height = prepare_vertical_image(pipe, input_image)
+    print(f"Generating vertical video {width}x{height}")
     video_frames = pipe(
         image=image,
         prompt=prompt,
         negative_prompt=negative_prompt,
         height=height,
         width=width,
+        num_frames=161,   # FIXED
         guidance_scale=4.5,
         num_inference_steps=25
     ).frames[0]
     output_path = "vertical_output.mp4"
     export_to_video(video_frames, output_path, fps=16)
     return output_path
+# ================================
 # Gradio UI
 # ================================
+with gr.Blocks(title="Wan 2.2 Vertical I2V") as demo:
+    gr.Markdown("# 🎬 Wan 2.2 Image → Video Generator")
+    gr.Markdown("Generate **10-second Vertical (9:16) AI Videos**")
     with gr.Row():
+        # LEFT SIDE (INPUTS)
+        with gr.Column(scale=1):
+            input_image = gr.Image(
+                type="pil",
+                label="Upload Image"
+            )
+            prompt = gr.Textbox(
+                label="Prompt",
+                placeholder="Describe motion, camera movement..."
+            )
+            negative_prompt = gr.Textbox(
+                label="Negative Prompt",
+                value="blurry, low quality, distorted, static"
+            )
+            generate_btn = gr.Button("Generate Video", variant="primary")
+        # RIGHT SIDE (OUTPUT)
+        with gr.Column(scale=1):
+            output_video = gr.Video(
+                label="Generated Video"
+            )
     generate_btn.click(
         generate_video,
         outputs=output_video
     )
+demo.launch(server_name="0.0.0.0", server_port=7860)