wan2-2

Running on Zero

App Files Files Community

HAL1993 commited on 5 days ago

Commit

864f23e

verified ·

1 Parent(s): d672320

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -69

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import tempfile
 import numpy as np
 from PIL import Image
 import os
 import gradio as gr
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
@@ -21,7 +22,7 @@ from torchao.quantization import Int8WeightOnlyConfig
 import aoti
-# -------------------- constants --------------------
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MAX_DIM = 832
@@ -45,7 +46,7 @@ default_negative_prompt = (
     "形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
 )
-# -------------------- load the pipeline --------------------
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained(
@@ -90,13 +91,12 @@ aoti.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/Wan2", variant="fp8da")
 aoti.aoti_blocks_load(pipe.transformer_2, "zerogpu-aoti/Wan2", variant="fp8da")
 # ------------------------------------------------------------
-#  HELPER FUNCTIONS
 # ------------------------------------------------------------
 def resize_image(image: Image.Image) -> Image.Image:
     """Resize / crop the input image to a size the model accepts."""
     w, h = image.size
-    # square shortcut
     if w == h:
         return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
@@ -105,19 +105,19 @@ def resize_image(image: Image.Image) -> Image.Image:
     MIN_AR = MIN_DIM / MAX_DIM
     img = image
-    if aspect > MAX_AR:               # very wide → crop width
         crop_w = int(round(h * MAX_AR))
         left = (w - crop_w) // 2
         img = image.crop((left, 0, left + crop_w, h))
-    elif aspect < MIN_AR:             # very tall → crop height
         crop_h = int(round(w / MIN_AR))
         top = (h - crop_h) // 2
         img = image.crop((0, top, w, top + crop_h))
     else:
-        if w > h:                     # landscape
             target_w = MAX_DIM
             target_h = int(round(target_w / aspect))
-        else:                         # portrait
             target_h = MAX_DIM
             target_w = int(round(target_h * aspect))
         img = image
@@ -152,7 +152,7 @@ def get_duration(
     randomize_seed,
     progress,
 ):
-    """GPU‑time estimator – used by @spaces.GPU."""
     BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
     BASE_STEP_DURATION = 15
@@ -162,12 +162,11 @@ def get_duration(
     step_duration = BASE_STEP_DURATION * factor ** 1.5
     est = 10 + int(steps) * step_duration
-    # never block the GPU for >30 s (feel free to raise while debugging)
-    return min(est, 30)
 # ------------------------------------------------------------
-#  MAIN GENERATION FUNCTION
 # ------------------------------------------------------------
 @spaces.GPU(duration=get_duration)
 def generate_video(
@@ -180,56 +179,70 @@ def generate_video(
     guidance_scale_2=1.5,
     seed=42,
     randomize_seed=False,
-    progress=None,                     # optional – Gradio will inject if needed
 ):
-    """Run the Wan‑2.2 pipeline and return an MP4 file."""
-    if input_image is None:
-        raise gr.Error("Please upload an input image.")
-    num_frames = get_num_frames(duration_seconds)
-    current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
-    resized = resize_image(input_image)
-    # -----------------------------------------------------------------
-    # Model inference
-    # -----------------------------------------------------------------
-    out = pipe(
-        image=resized,
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        height=resized.height,
-        width=resized.width,
-        num_frames=num_frames,
-        guidance_scale=float(guidance_scale),
-        guidance_scale_2=float(guidance_scale_2),
-        num_inference_steps=int(steps),
-        generator=torch.Generator(device="cuda").manual_seed(current_seed),
-    )
-    frames = out.frames[0]
-    # -----------------------------------------------------------------
-    # Write temporary MP4 (ffmpeg must be present – Spaces images include it)
-    # -----------------------------------------------------------------
-    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
-        video_path = tmp.name
-    export_to_video(frames, video_path, fps=FIXED_FPS)
-    # -----------------------------------------------------------------
-    # Clean up GPU memory for the next request
-    # -----------------------------------------------------------------
-    gc.collect()
-    torch.cuda.empty_cache()
-    return video_path, current_seed
 # ------------------------------------------------------------
-#  UI – EXACTLY YOUR ORIGINAL LOOK & THEME
 # ------------------------------------------------------------
 def create_demo():
     with gr.Blocks(css="", title="Fast Image to Video") as demo:
-        # ----------- 500‑error guard (unchanged) -----------
         gr.HTML(
             """
             <script>
@@ -241,7 +254,7 @@ def create_demo():
             """
         )
-        # ----------- ALL YOUR CUSTOM CSS (copy‑paste verbatim) -----------
         gr.HTML(
             """
             <style>
@@ -268,7 +281,7 @@ def create_demo():
             body::before{
                 content:"";
                 display:block;
-                height:600px;               /* <-- the top gap you designed */
                 background:#000 !important;
             }
             .gr-blocks,.container{
@@ -351,7 +364,7 @@ def create_demo():
                 box-sizing:border-box !important;
                 display:block !important;
             }
-            /* ---- hide every Gradio progress element ---- */
             .image-container[aria-label="Generated Video"] .progress-text,
             .image-container[aria-label="Generated Video"] .gr-progress,
             .image-container[aria-label="Generated Video"] .gr-progress-bar,
@@ -378,7 +391,7 @@ def create_demo():
             .image-container[aria-label="Generated Video"] *[class*="progress"],
             .image-container[aria-label="Generated Video"] *[class*="loading"],
             .image-container[aria-label="Generated Video"] *[class*="status"],
-            .image-container[aria-label="Generated Video"] *[class*="spinner"],
             .progress-text,.gr-progress,.gr-progress-bar,.progress-bar,
             [data-testid="progress"],.status,.loading,.spinner,.gr-spinner,
             .gr-loading,.gr-status,.gpu-init,.initializing,.queue,
@@ -542,7 +555,7 @@ def create_demo():
             """
         )
-        # ------------------- UI layout (identical to your original design) -------------------
         with gr.Row(elem_id="general_items"):
             gr.Markdown("# ")
             gr.Markdown(
@@ -566,8 +579,6 @@ def create_demo():
                     placeholder="Describe the desired animation or motion",
                     elem_classes=["gradio-component"],
                 )
-                # (the rest of the advanced sliders you had in the “original” UI are omitted
-                #  because your current functional code only expects the arguments below)
                 generate_button = gr.Button(
                     "Generate Video",
                     variant="primary",
@@ -582,21 +593,21 @@ def create_demo():
                     elem_classes=["gradio-component", "image-container"],
                 )
-        # ------------------- Wire the button -------------------
         generate_button.click(
             fn=generate_video,
             inputs=[
-                input_image,                # image
-                prompt,                     # prompt
-                gr.State(value=6),          # steps (default 6)
                 gr.State(value=default_negative_prompt),  # negative_prompt
-                gr.State(value=3.2),        # duration_seconds (you used 3.2 in the earlier clone)
-                gr.State(value=1.5),        # guidance_scale
-                gr.State(value=1.5),        # guidance_scale_2
-                gr.State(value=42),         # seed
-                gr.State(value=True),       # randomize_seed
             ],
-            outputs=[output_video, gr.State(value=42)],  # second output is the seed
         )
     return demo

 import numpy as np
 from PIL import Image
 import os
+import traceback
 import gradio as gr
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
 import aoti
+# ------------------- constants -------------------
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MAX_DIM = 832
     "形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
 )
+# ------------------- load pipeline -------------------
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained(
 aoti.aoti_blocks_load(pipe.transformer_2, "zerogpu-aoti/Wan2", variant="fp8da")
 # ------------------------------------------------------------
+#  HELPERS
 # ------------------------------------------------------------
 def resize_image(image: Image.Image) -> Image.Image:
     """Resize / crop the input image to a size the model accepts."""
     w, h = image.size
     if w == h:
         return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
     MIN_AR = MIN_DIM / MAX_DIM
     img = image
+    if aspect > MAX_AR:                     # very wide
         crop_w = int(round(h * MAX_AR))
         left = (w - crop_w) // 2
         img = image.crop((left, 0, left + crop_w, h))
+    elif aspect < MIN_AR:                   # very tall
         crop_h = int(round(w / MIN_AR))
         top = (h - crop_h) // 2
         img = image.crop((0, top, w, top + crop_h))
     else:
+        if w > h:                           # landscape
             target_w = MAX_DIM
             target_h = int(round(target_w / aspect))
+        else:                               # portrait
             target_h = MAX_DIM
             target_w = int(round(target_h * aspect))
         img = image
     randomize_seed,
     progress,
 ):
+    """GPU‑time estimator for @spaces.GPU."""
     BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
     BASE_STEP_DURATION = 15
     step_duration = BASE_STEP_DURATION * factor ** 1.5
     est = 10 + int(steps) * step_duration
+    return min(est, 30)   # safety cap
 # ------------------------------------------------------------
+#  MAIN GENERATION FUNCTION – now with error logging
 # ------------------------------------------------------------
 @spaces.GPU(duration=get_duration)
 def generate_video(
     guidance_scale_2=1.5,
     seed=42,
     randomize_seed=False,
+    progress=None,                 # optional – Gradio will inject if needed
 ):
+    """
+    Run the Wan‑2.2 pipeline and return an MP4 file.
+    Any exception is caught, printed to the Space logs, and re‑raised as a Gradio error.
+    """
+    try:
+        if input_image is None:
+            raise gr.Error("Please upload an input image.")
+        num_frames = get_num_frames(duration_seconds)
+        current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+        resized = resize_image(input_image)
+        # -------------------- model inference --------------------
+        out = pipe(
+            image=resized,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=resized.height,
+            width=resized.width,
+            num_frames=num_frames,
+            guidance_scale=float(guidance_scale),
+            guidance_scale_2=float(guidance_scale_2),
+            num_inference_steps=int(steps),
+            generator=torch.Generator(device="cuda").manual_seed(current_seed),
+        )
+        frames = out.frames[0]
+        if not frames or len(frames) == 0:
+            raise RuntimeError("Pipeline returned an empty frame list.")
+        # -------------------- write MP4 --------------------
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
+            video_path = tmp.name
+        export_to_video(frames, video_path, fps=FIXED_FPS)
+        # -------------------- clean up --------------------
+        gc.collect()
+        torch.cuda.empty_cache()
+        return video_path, current_seed
+    except Exception as exc:
+        # -----------------------------------------------------------------
+        # Print a full traceback to the Space console – you’ll see it in the
+        # “Logs” tab.  After you identify the problem you can simply delete
+        # this whole try/except block.
+        # -----------------------------------------------------------------
+        tb = traceback.format_exc()
+        print("\n=== VIDEO‑GENERATION ERROR =================================================")
+        print(tb)
+        print("============================================================================\n")
+        # Re‑raise as a user‑friendly Gradio error
+        raise gr.Error(f"Video generation failed: {type(exc).__name__}: {exc}")
 # ------------------------------------------------------------
+#  UI – unchanged visual theme (all CSS, 500‑error guard, gap, etc.)
 # ------------------------------------------------------------
 def create_demo():
     with gr.Blocks(css="", title="Fast Image to Video") as demo:
+        # ----- 500‑error guard (exact copy) -----
         gr.HTML(
             """
             <script>
             """
         )
+        # ----- all custom CSS (exactly as you posted) -----
         gr.HTML(
             """
             <style>
             body::before{
                 content:"";
                 display:block;
+                height:600px;               /* top gap */
                 background:#000 !important;
             }
             .gr-blocks,.container{
                 box-sizing:border-box !important;
                 display:block !important;
             }
+            /* ---- hide all Gradio progress UI ---- */
             .image-container[aria-label="Generated Video"] .progress-text,
             .image-container[aria-label="Generated Video"] .gr-progress,
             .image-container[aria-label="Generated Video"] .gr-progress-bar,
             .image-container[aria-label="Generated Video"] *[class*="progress"],
             .image-container[aria-label="Generated Video"] *[class*="loading"],
             .image-container[aria-label="Generated Video"] *[class*="status"],
+            .image-container[aria-label="Generated Video"] *[class*="spinner],
             .progress-text,.gr-progress,.gr-progress-bar,.progress-bar,
             [data-testid="progress"],.status,.loading,.spinner,.gr-spinner,
             .gr-loading,.gr-status,.gpu-init,.initializing,.queue,
             """
         )
+        # ------------------- UI components (same layout as original) -------------------
         with gr.Row(elem_id="general_items"):
             gr.Markdown("# ")
             gr.Markdown(
                     placeholder="Describe the desired animation or motion",
                     elem_classes=["gradio-component"],
                 )
                 generate_button = gr.Button(
                     "Generate Video",
                     variant="primary",
                     elem_classes=["gradio-component", "image-container"],
                 )
+        # ------------------- wiring -------------------
         generate_button.click(
             fn=generate_video,
             inputs=[
+                input_image,
+                prompt,
+                gr.State(value=6),                     # steps
                 gr.State(value=default_negative_prompt),  # negative_prompt
+                gr.State(value=3.2),                    # duration_seconds
+                gr.State(value=1.5),                    # guidance_scale
+                gr.State(value=1.5),                    # guidance_scale_2
+                gr.State(value=42),                     # seed
+                gr.State(value=True),                   # randomize_seed
             ],
+            outputs=[output_video, gr.State(value=42)],
         )
     return demo