Sushantkas commited on
Commit
2f27afc
·
verified ·
1 Parent(s): bc73034

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -83
app.py CHANGED
@@ -3,77 +3,37 @@ import gradio as gr
3
  import torch
4
  import numpy as np
5
  from diffusers import WanImageToVideoPipeline
6
- from diffusers.utils import export_to_video, load_image
7
  from transformers import CLIPVisionModel
8
 
9
-
10
-
11
- ## Loading Encoder
12
-
13
-
14
-
15
  model_id = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
16
 
17
  print(f"Using video Model: {model_id}")
 
18
  dtype = torch.bfloat16
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
 
21
- pipe = WanImageToVideoPipeline.from_pretrained(model_id, torch_dtype=dtype)
22
- pipe.to(device)
23
-
24
- try:
25
- pipe.to(device)
26
- print(f"Model Loaded in {device}")
27
- except:
28
- print(f"Model loading on {device} failed as trying alternate method")
29
- try:
30
- pipe.to("cuda")
31
- print("Model Loaded in cuda")
32
- except:
33
- print(f"Model loading on cuda also failed")
34
-
35
- try:
36
- pipe.enable_model_cpu_offload()
37
- print("Model CPU Offload Completed")
38
- except:
39
- print("Model CPU Offload failed")
40
-
41
-
42
- try:
43
- print("Enabling Attention Slicing ")
44
- pipe.enable_attention_slicing()
45
- print("Attention Slicing Enabled")
46
- except Exception as e:
47
- print("Attention Slicing Failed")
48
 
 
 
49
 
50
- # Loading function for Image
51
- from diffusers.utils import load_image
 
 
 
52
 
53
  # ================================
54
- # Image Preparation Function
55
  # ================================
56
 
57
- def prepare_vertical_image(pipe, image_path, base_width=384, base_height=672):
58
- """
59
- Loads and resizes an image for Wan I2V vertical video generation.
60
-
61
- Args:
62
- pipe: WanImageToVideoPipeline (already loaded)
63
- image_path (str): Path or URL to image
64
- base_width (int): Desired width before adjustment
65
- base_height (int): Desired height before adjustment
66
-
67
- Returns:
68
- resized_image (PIL.Image)
69
- final_width (int)
70
- final_height (int)
71
- """
72
-
73
- # Load image
74
- image = load_image(image_path)
75
 
76
- # Ensure compatibility with Wan spatial constraints
77
  mod_value = (
78
  pipe.vae_scale_factor_spatial *
79
  pipe.transformer.config.patch_size[1]
@@ -87,7 +47,9 @@ def prepare_vertical_image(pipe, image_path, base_width=384, base_height=672):
87
  return resized_image, final_width, final_height
88
 
89
 
90
-
 
 
91
 
92
  @spaces.GPU(size="xlarge", duration=180)
93
  def generate_video(input_image, prompt, negative_prompt):
@@ -95,55 +57,65 @@ def generate_video(input_image, prompt, negative_prompt):
95
  if input_image is None:
96
  return None
97
 
98
- image = input_image
99
-
100
- # Prepare 9:16 vertical reduced resolution
101
- image, width, height = prepare_vertical_image(pipe, image)
102
 
103
- print(f"Generating 10 sec vertical video at {width}x{height}")
104
 
105
- # 10 seconds at 16 FPS = 160 frames
106
  video_frames = pipe(
107
  image=image,
108
  prompt=prompt,
109
  negative_prompt=negative_prompt,
110
  height=height,
111
  width=width,
112
- num_frames=160,
113
  guidance_scale=4.5,
114
  num_inference_steps=25
115
  ).frames[0]
116
 
117
  output_path = "vertical_output.mp4"
 
118
  export_to_video(video_frames, output_path, fps=16)
119
 
120
  return output_path
121
 
122
 
 
123
  # Gradio UI
124
  # ================================
125
 
126
- with gr.Blocks(title="Wan 14B Vertical I2V") as demo:
127
 
128
- gr.Markdown("## 🎬 Wan 14B Image-to-Video Generator")
129
- gr.Markdown("Generate 10-second Vertical (9:16) AI Videos")
130
 
131
  with gr.Row():
132
- input_image = gr.Image(type="pil", label="Upload Image")
133
-
134
- prompt = gr.Textbox(
135
- label="Prompt",
136
- placeholder="Describe motion, camera movement, cinematic effect..."
137
- )
138
 
139
- negative_prompt = gr.Textbox(
140
- label="Negative Prompt",
141
- value="blurry, low quality, distorted, static",
142
- )
143
 
144
- generate_btn = gr.Button("Generate 10 Second Video")
 
 
 
145
 
146
- output_video = gr.Video(label="Generated Video")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  generate_btn.click(
149
  generate_video,
@@ -151,7 +123,4 @@ with gr.Blocks(title="Wan 14B Vertical I2V") as demo:
151
  outputs=output_video
152
  )
153
 
154
- demo.launch(server_name="0.0.0.0", server_port=7860)
155
-
156
-
157
-
 
3
  import torch
4
  import numpy as np
5
  from diffusers import WanImageToVideoPipeline
6
+ from diffusers.utils import export_to_video
7
  from transformers import CLIPVisionModel
8
 
 
 
 
 
 
 
9
  model_id = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
10
 
11
  print(f"Using video Model: {model_id}")
12
+
13
  dtype = torch.bfloat16
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
 
16
+ # Load pipeline
17
+ pipe = WanImageToVideoPipeline.from_pretrained(
18
+ model_id,
19
+ torch_dtype=dtype
20
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ pipe.to(device)
23
+ print(f"Model Loaded in {device}")
24
 
25
+ # Memory optimizations
26
+ pipe.enable_model_cpu_offload()
27
+ pipe.enable_attention_slicing()
28
+ pipe.enable_sequential_cpu_offload()
29
+ print("Optimizations Enabled")
30
 
31
  # ================================
32
+ # Image Preparation
33
  # ================================
34
 
35
+ def prepare_vertical_image(pipe, image, base_width=384, base_height=672):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
 
37
  mod_value = (
38
  pipe.vae_scale_factor_spatial *
39
  pipe.transformer.config.patch_size[1]
 
47
  return resized_image, final_width, final_height
48
 
49
 
50
+ # ================================
51
+ # Video Generation
52
+ # ================================
53
 
54
  @spaces.GPU(size="xlarge", duration=180)
55
  def generate_video(input_image, prompt, negative_prompt):
 
57
  if input_image is None:
58
  return None
59
 
60
+ image, width, height = prepare_vertical_image(pipe, input_image)
 
 
 
61
 
62
+ print(f"Generating vertical video {width}x{height}")
63
 
 
64
  video_frames = pipe(
65
  image=image,
66
  prompt=prompt,
67
  negative_prompt=negative_prompt,
68
  height=height,
69
  width=width,
70
+ num_frames=161, # FIXED
71
  guidance_scale=4.5,
72
  num_inference_steps=25
73
  ).frames[0]
74
 
75
  output_path = "vertical_output.mp4"
76
+
77
  export_to_video(video_frames, output_path, fps=16)
78
 
79
  return output_path
80
 
81
 
82
+ # ================================
83
  # Gradio UI
84
  # ================================
85
 
86
+ with gr.Blocks(title="Wan 2.2 Vertical I2V") as demo:
87
 
88
+ gr.Markdown("# 🎬 Wan 2.2 ImageVideo Generator")
89
+ gr.Markdown("Generate **10-second Vertical (9:16) AI Videos**")
90
 
91
  with gr.Row():
 
 
 
 
 
 
92
 
93
+ # LEFT SIDE (INPUTS)
94
+ with gr.Column(scale=1):
 
 
95
 
96
+ input_image = gr.Image(
97
+ type="pil",
98
+ label="Upload Image"
99
+ )
100
 
101
+ prompt = gr.Textbox(
102
+ label="Prompt",
103
+ placeholder="Describe motion, camera movement..."
104
+ )
105
+
106
+ negative_prompt = gr.Textbox(
107
+ label="Negative Prompt",
108
+ value="blurry, low quality, distorted, static"
109
+ )
110
+
111
+ generate_btn = gr.Button("Generate Video", variant="primary")
112
+
113
+ # RIGHT SIDE (OUTPUT)
114
+ with gr.Column(scale=1):
115
+
116
+ output_video = gr.Video(
117
+ label="Generated Video"
118
+ )
119
 
120
  generate_btn.click(
121
  generate_video,
 
123
  outputs=output_video
124
  )
125
 
126
+ demo.launch(server_name="0.0.0.0", server_port=7860)