wan2-2

Running on Zero

App Files Files Community

HAL1993 commited on 4 days ago

Commit

f2a6c91

verified ·

1 Parent(s): e65e06f

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -80

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import gc
 import tempfile
 import numpy as np
 from PIL import Image
 import gradio as gr
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
@@ -18,9 +19,9 @@ from torchao.quantization import Int8WeightOnlyConfig
 import aoti
-# ------------------------------------------------------------
-# -------------------------- CONFIG ---------------------------
-# ------------------------------------------------------------
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MAX_DIM = 832
@@ -44,9 +45,9 @@ default_negative_prompt = (
     "形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
 )
-# ------------------------------------------------------------
-# ----------------------- MODEL LOADING -----------------------
-# ------------------------------------------------------------
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained(
@@ -90,9 +91,9 @@ quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
 aoti.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/Wan2", variant="fp8da")
 aoti.aoti_blocks_load(pipe.transformer_2, "zerogpu-aoti/Wan2", variant="fp8da")
-# ------------------------------------------------------------
-# -------------------------- HELPERS -------------------------
-# ------------------------------------------------------------
 def resize_image(image: Image.Image) -> Image.Image:
     """Resize / crop the input image so the model receives a valid size."""
     width, height = image.size
@@ -107,20 +108,18 @@ def resize_image(image: Image.Image) -> Image.Image:
     img = image
     if aspect_ratio > MAX_ASPECT_RATIO:
-        # Very wide → crop width
         crop_w = int(round(height * MAX_ASPECT_RATIO))
         left = (width - crop_w) // 2
         img = image.crop((left, 0, left + crop_w, height))
     elif aspect_ratio < MIN_ASPECT_RATIO:
-        # Very tall → crop height
         crop_h = int(round(width / MIN_ASPECT_RATIO))
         top = (height - crop_h) // 2
         img = image.crop((0, top, width, top + crop_h))
     else:
-        if width > height:  # landscape
             target_w = MAX_DIM
             target_h = int(round(target_w / aspect_ratio))
-        else:                # portrait
             target_h = MAX_DIM
             target_w = int(round(target_h * aspect_ratio))
         img = image
@@ -155,7 +154,7 @@ def get_duration(
     randomize_seed,
     progress,
 ):
-    """Estimate how long the GPU will be needed – used by @spaces.GPU."""
     BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
     BASE_STEP_DURATION = 15
@@ -165,13 +164,13 @@ def get_duration(
     step_duration = BASE_STEP_DURATION * factor ** 1.5
     est = 10 + int(steps) * step_duration
-    # safety cap – we never want to block the GPU >30 s
     return min(est, 30)
 @spaces.GPU
 def translate_albanian_to_english(text):
-    """Optional helper – not used in the UI but kept unchanged."""
     if not text.strip():
         raise gr.Error("Please enter a description.")
     for attempt in range(2):
@@ -190,9 +189,9 @@ def translate_albanian_to_english(text):
     raise gr.Error("Translation failed. Please try again.")
-# ------------------------------------------------------------
-# -------------------------- MAIN FUNCTION ---------------------
-# ------------------------------------------------------------
 @spaces.GPU(duration=get_duration)
 def generate_video(
     input_image,
@@ -204,56 +203,93 @@ def generate_video(
     guidance_scale_2=1.5,
     seed=42,
     randomize_seed=False,
-    progress=None,                 # ← made optional – no UI change
 ):
-    """Generate a video from an image + prompt."""
-    if input_image is None:
-        raise gr.Error("Please upload an input image.")
-    num_frames = get_num_frames(duration_seconds)
-    current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
-    resized = resize_image(input_image)
-    # -----------------------------------------------------------------
-    # Model inference
-    # -----------------------------------------------------------------
-    out = pipe(
-        image=resized,
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        height=resized.height,
-        width=resized.width,
-        num_frames=num_frames,
-        guidance_scale=float(guidance_scale),
-        guidance_scale_2=float(guidance_scale_2),
-        num_inference_steps=int(steps),
-        generator=torch.Generator(device="cuda").manual_seed(current_seed),
-    )
-    output_frames = out.frames[0]
-    # -----------------------------------------------------------------
-    # Write temporary mp4
-    # -----------------------------------------------------------------
-    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
-        video_path = tmp.name
-    export_to_video(output_frames, video_path, fps=FIXED_FPS)
-    # Clean up GPU memory before returning (helps when the same worker is reused)
-    gc.collect()
-    torch.cuda.empty_cache()
-    return video_path, current_seed
-# ------------------------------------------------------------
-# --------------------------- UI -------------------------------
-# ------------------------------------------------------------
-def create_demo():
-    with gr.Blocks(css="", title="Fast Image to Video") as demo:
         # -----------------------------------------------------------------
-        # 500‑error guard – **exactly the same** as in your original file
         # -----------------------------------------------------------------
         gr.HTML(
             """
             <script>
@@ -265,9 +301,7 @@ def create_demo():
             """
         )
-        # -----------------------------------------------------------------
-        # All your custom CSS / visual theme – **unaltered**
-        # -----------------------------------------------------------------
         gr.HTML(
             """
             <style>
@@ -295,7 +329,7 @@ def create_demo():
             body::before{
                 content:"";
                 display:block;
-                height:600px;               /* <-- this is the top gap you asked for */
                 background:#000 !important;
             }
             .gr-blocks,.container{
@@ -378,7 +412,7 @@ def create_demo():
                 box-sizing:border-box !important;
                 display:block !important;
             }
-            /* FORCE HIDE ALL GRADIO PROCESSING ELEMENTS - 100+ SELECTORS */
             .image-container[aria-label="Generated Video"] .progress-text,
             .image-container[aria-label="Generated Video"] .gr-progress,
             .image-container[aria-label="Generated Video"] .gr-progress-bar,
@@ -429,7 +463,7 @@ def create_demo():
                 pointer-events:none!important;
                 overflow:hidden!important;
             }
-            /* EXHAUSTIVE TOOLBAR HIDING */
             .image-container[aria-label="Input Image"] .file-upload,
             .image-container[aria-label="Input Image"] .file-preview,
             .image-container[aria-label="Input Image"] .image-actions,
@@ -523,9 +557,7 @@ def create_demo():
                 animation:slide 4s ease-in-out infinite,glow-hover 3s ease-in-out infinite;
                 transform:scale(1.05);
             }
-            button[aria-label="Fullscreen"],button[aria-label="Share"]{
-                display:none!important;
-            }
             button[aria-label="Download"]{
                 transform:scale(3);
                 transform-origin:top right;
@@ -541,9 +573,7 @@ def create_demo():
             button[aria-label="Download"]:hover{
                 box-shadow:0 0 12px rgba(255,255,255,0.5)!important;
             }
-            footer,.gr-button-secondary{
-                display:none!important;
-            }
             .gr-group{
                 background:#000!important;
                 border:none!important;
@@ -573,9 +603,7 @@ def create_demo():
             """
         )
-        # -----------------------------------------------------------------
-        # UI layout – **exactly the same structure you built**
-        # -----------------------------------------------------------------
         with gr.Row(elem_id="general_items"):
             gr.Markdown("# ")
             gr.Markdown(
@@ -613,9 +641,7 @@ def create_demo():
                     elem_classes=["gradio-component", "image-container"],
                 )
-        # -----------------------------------------------------------------
-        # Wiring – unchanged component order (matches generate_video signature)
-        # -----------------------------------------------------------------
         generate_button.click(
             fn=generate_video,
             inputs=[
@@ -637,5 +663,5 @@ def create_demo():
 if __name__ == "__main__":
     demo = create_demo()
-    # keep the launch flags you originally used
     demo.queue().launch(share=True)

 import tempfile
 import numpy as np
 from PIL import Image
+import os
 import gradio as gr
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
 import aoti
+# ------------------------------------------------------------------
+# -------------------------- CONFIG -------------------------------
+# ------------------------------------------------------------------
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MAX_DIM = 832
     "形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
 )
+# ------------------------------------------------------------------
+# ----------------------- MODEL LOADING ---------------------------
+# ------------------------------------------------------------------
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained(
 aoti.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/Wan2", variant="fp8da")
 aoti.aoti_blocks_load(pipe.transformer_2, "zerogpu-aoti/Wan2", variant="fp8da")
+# ------------------------------------------------------------------
+# -------------------------- HELPERS -----------------------------
+# ------------------------------------------------------------------
 def resize_image(image: Image.Image) -> Image.Image:
     """Resize / crop the input image so the model receives a valid size."""
     width, height = image.size
     img = image
     if aspect_ratio > MAX_ASPECT_RATIO:
         crop_w = int(round(height * MAX_ASPECT_RATIO))
         left = (width - crop_w) // 2
         img = image.crop((left, 0, left + crop_w, height))
     elif aspect_ratio < MIN_ASPECT_RATIO:
         crop_h = int(round(width / MIN_ASPECT_RATIO))
         top = (height - crop_h) // 2
         img = image.crop((0, top, width, top + crop_h))
     else:
+        if width > height:
             target_w = MAX_DIM
             target_h = int(round(target_w / aspect_ratio))
+        else:
             target_h = MAX_DIM
             target_w = int(round(target_h * aspect_ratio))
         img = image
     randomize_seed,
     progress,
 ):
+    """GPU‑time estimator used by @spaces.GPU."""
     BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
     BASE_STEP_DURATION = 15
     step_duration = BASE_STEP_DURATION * factor ** 1.5
     est = 10 + int(steps) * step_duration
+    # never block the GPU for >30 s (feel free to raise while debugging)
     return min(est, 30)
 @spaces.GPU
 def translate_albanian_to_english(text):
+    """Helper – kept unchanged (not used in the UI)."""
     if not text.strip():
         raise gr.Error("Please enter a description.")
     for attempt in range(2):
     raise gr.Error("Translation failed. Please try again.")
+# ------------------------------------------------------------------
+# -------------------------- MAIN FUNCTION -------------------------
+# ------------------------------------------------------------------
 @spaces.GPU(duration=get_duration)
 def generate_video(
     input_image,
     guidance_scale_2=1.5,
     seed=42,
     randomize_seed=False,
+    progress=None,                 # optional – no UI impact
 ):
+    """Generate a video from an image + prompt – now wrapped in robust try/except."""
+    try:
+        if input_image is None:
+            raise gr.Error("Please upload an input image.")
+        # --------------------------------------------------------------
+        # 1️⃣  Compute number of frames & seed
+        # --------------------------------------------------------------
+        num_frames = get_num_frames(duration_seconds)
+        current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+        # --------------------------------------------------------------
+        # 2️⃣  Resize image to model‑compatible dimensions
+        # --------------------------------------------------------------
+        resized = resize_image(input_image)
+        # --------------------------------------------------------------
+        # 3️⃣  Model inference
+        # --------------------------------------------------------------
+        out = pipe(
+            image=resized,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=resized.height,
+            width=resized.width,
+            num_frames=num_frames,
+            guidance_scale=float(guidance_scale),
+            guidance_scale_2=float(guidance_scale_2),
+            num_inference_steps=int(steps),
+            generator=torch.Generator(device="cuda").manual_seed(current_seed),
+        )
+        # `out.frames` is a list of batches → we want the first batch
+        output_frames = out.frames[0]
+        if not output_frames or len(output_frames) == 0:
+            raise RuntimeError("Pipeline returned an empty frame list.")
+        # --------------------------------------------------------------
+        # 4️⃣  Write temporary MP4 (requires ffmpeg)
+        # --------------------------------------------------------------
+        # Ensure ffmpeg is present – the Space image usually has it, but just in case:
+        if not any(
+            os.access(os.path.join(p, "ffmpeg"), os.X_OK) for p in os.getenv("PATH", "").split(":")
+        ):
+            # If ffmpeg is missing we raise a clear error; you can install it via
+            # `!apt-get update && apt-get install -y ffmpeg` in a startup cell.
+            raise FileNotFoundError(
+                "ffmpeg binary not found. Install it in the Space with `apt-get install -y ffmpeg`."
+            )
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
+            video_path = tmp.name
+        export_to_video(output_frames, video_path, fps=FIXED_FPS)
+        # --------------------------------------------------------------
+        # 5️⃣  Clean‑up GPU memory before returning (helps repeated calls)
+        # --------------------------------------------------------------
+        gc.collect()
+        torch.cuda.empty_cache()
+        return video_path, current_seed
+    except Exception as exc:
         # -----------------------------------------------------------------
+        # Log the full traceback to the Space console – this is what you’ll see
+        # in the “View logs” tab.  Gradio will display a nice red error box.
         # -----------------------------------------------------------------
+        import traceback
+        tb = traceback.format_exc()
+        print("\n--- VIDEO‑GENERATION ERROR ------------------------------------------------")
+        print(tb)
+        print("----------------------------------------------------------------------------\n")
+        # Re‑raise as a Gradio‑friendly error (the message will appear in the UI)
+        raise gr.Error(f"Video generation failed: {str(exc)}")
+# ------------------------------------------------------------------
+# --------------------------- UI -----------------------------------
+# ------------------------------------------------------------------
+def create_demo():
+    with gr.Blocks(css="", title="Fast Image to Video") as demo:
+        # ------------------- 500‑error guard (unchanged) -------------------
         gr.HTML(
             """
             <script>
             """
         )
+        # ------------------- All your custom CSS (exact copy) -------------
         gr.HTML(
             """
             <style>
             body::before{
                 content:"";
                 display:block;
+                height:600px;               /* <-- the 600 px top gap you requested */
                 background:#000 !important;
             }
             .gr-blocks,.container{
                 box-sizing:border-box !important;
                 display:block !important;
             }
+            /* ---- hide all Gradio progress UI ---- */
             .image-container[aria-label="Generated Video"] .progress-text,
             .image-container[aria-label="Generated Video"] .gr-progress,
             .image-container[aria-label="Generated Video"] .gr-progress-bar,
                 pointer-events:none!important;
                 overflow:hidden!important;
             }
+            /* ---- hide toolbar buttons ---- */
             .image-container[aria-label="Input Image"] .file-upload,
             .image-container[aria-label="Input Image"] .file-preview,
             .image-container[aria-label="Input Image"] .image-actions,
                 animation:slide 4s ease-in-out infinite,glow-hover 3s ease-in-out infinite;
                 transform:scale(1.05);
             }
+            button[aria-label="Fullscreen"],button[aria-label="Share"]{display:none!important;}
             button[aria-label="Download"]{
                 transform:scale(3);
                 transform-origin:top right;
             button[aria-label="Download"]:hover{
                 box-shadow:0 0 12px rgba(255,255,255,0.5)!important;
             }
+            footer,.gr-button-secondary{display:none!important;}
             .gr-group{
                 background:#000!important;
                 border:none!important;
             """
         )
+        # ------------------- UI layout (unchanged) --------------------
         with gr.Row(elem_id="general_items"):
             gr.Markdown("# ")
             gr.Markdown(
                     elem_classes=["gradio-component", "image-container"],
                 )
+        # ------------------- Wiring (exact order) --------------------
         generate_button.click(
             fn=generate_video,
             inputs=[
 if __name__ == "__main__":
     demo = create_demo()
+    # Keep the same launch flags you originally used
     demo.queue().launch(share=True)