Spaces:
Build error
Build error
Update app.py (#2)
Browse files- Update app.py (8ee2c9ea0139ee192904a9254cb86c15a5ce7781)
app.py
CHANGED
|
@@ -96,12 +96,13 @@ def construct_video_pipeline(model_id: str, lora_path: str):
|
|
| 96 |
pipe.unload_lora_weights()
|
| 97 |
|
| 98 |
return pipe
|
|
|
|
| 99 |
def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
|
| 100 |
# Load and preprocess frames
|
| 101 |
cond_frame1 = Image.open(frame1_path)
|
| 102 |
cond_frame2 = Image.open(frame2_path)
|
| 103 |
|
| 104 |
-
height, width = 720,
|
| 105 |
cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
|
| 106 |
cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
|
| 107 |
|
|
@@ -110,11 +111,6 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
|
|
| 110 |
cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
|
| 111 |
cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
|
| 112 |
|
| 113 |
-
# Initialize pipeline
|
| 114 |
-
model_id = "hunyuanvideo-community/HunyuanVideo"
|
| 115 |
-
lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft") # Replace with the actual LORA path
|
| 116 |
-
pipe = construct_video_pipeline(model_id, lora_path)
|
| 117 |
-
|
| 118 |
with torch.no_grad():
|
| 119 |
image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
|
| 120 |
image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
|
|
@@ -144,54 +140,7 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
|
|
| 144 |
video_bytes = video_file.read()
|
| 145 |
|
| 146 |
return video_bytes
|
| 147 |
-
|
| 148 |
-
# # Load and preprocess frames
|
| 149 |
-
# cond_frame1 = Image.open(requests.get(frame1_url, stream=True).raw)
|
| 150 |
-
# cond_frame2 = Image.open(requests.get(frame2_url, stream=True).raw)
|
| 151 |
-
|
| 152 |
-
# height, width = 720, 1280
|
| 153 |
-
# cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
|
| 154 |
-
# cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
|
| 155 |
-
|
| 156 |
-
# cond_video = np.zeros(shape=(num_frames, height, width, 3))
|
| 157 |
-
# cond_video[0], cond_video[-1] = np.array(cond_frame1), np.array(cond_frame2)
|
| 158 |
-
# cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
|
| 159 |
-
# cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
|
| 160 |
-
|
| 161 |
-
# # Initialize pipeline
|
| 162 |
-
# model_id = "hunyuanvideo-community/HunyuanVideo"
|
| 163 |
-
# lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft") # Replace with the actual LORA path
|
| 164 |
-
# pipe = construct_video_pipeline(model_id, lora_path)
|
| 165 |
-
|
| 166 |
-
# with torch.no_grad():
|
| 167 |
-
# image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
|
| 168 |
-
# image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
|
| 169 |
-
# cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
|
| 170 |
-
# cond_latents = cond_latents * pipe.vae.config.scaling_factor
|
| 171 |
-
# cond_latents = cond_latents.to(dtype=pipe.dtype)
|
| 172 |
-
# assert not torch.any(torch.isnan(cond_latents))
|
| 173 |
-
|
| 174 |
-
# # Generate video
|
| 175 |
-
# video = call_pipe(
|
| 176 |
-
# pipe,
|
| 177 |
-
# prompt=prompt,
|
| 178 |
-
# num_frames=num_frames,
|
| 179 |
-
# num_inference_steps=num_inference_steps,
|
| 180 |
-
# image_latents=cond_latents,
|
| 181 |
-
# width=width,
|
| 182 |
-
# height=height,
|
| 183 |
-
# guidance_scale=guidance_scale,
|
| 184 |
-
# generator=torch.Generator(device="cuda").manual_seed(0),
|
| 185 |
-
# ).frames[0]
|
| 186 |
-
|
| 187 |
-
# # Export to video
|
| 188 |
-
# video_path = "output.mp4"
|
| 189 |
-
# export_to_video(video, video_path, fps=24)
|
| 190 |
-
|
| 191 |
-
# with open(video_path, "rb") as video_file:
|
| 192 |
-
# video_bytes = video_file.read()
|
| 193 |
-
|
| 194 |
-
# return video_bytes
|
| 195 |
|
| 196 |
@torch.inference_mode()
|
| 197 |
def call_pipe(
|
|
@@ -199,9 +148,9 @@ def call_pipe(
|
|
| 199 |
prompt: Union[str, List[str]] = None,
|
| 200 |
prompt_2: Union[str, List[str]] = None,
|
| 201 |
height: int = 720,
|
| 202 |
-
width: int =
|
| 203 |
num_frames: int = 129,
|
| 204 |
-
num_inference_steps: int =
|
| 205 |
sigmas: Optional[List[float]] = None,
|
| 206 |
guidance_scale: float = 6.0,
|
| 207 |
num_videos_per_prompt: Optional[int] = 1,
|
|
@@ -268,7 +217,7 @@ def call_pipe(
|
|
| 268 |
|
| 269 |
# 4. Prepare timesteps
|
| 270 |
sigmas = np.linspace(1.0, 0.0, num_inference_steps + 1)[:-1] if sigmas is None else sigmas
|
| 271 |
-
timesteps, num_inference_steps =
|
| 272 |
pipe.scheduler,
|
| 273 |
num_inference_steps,
|
| 274 |
device,
|
|
@@ -345,11 +294,8 @@ def call_pipe(
|
|
| 345 |
return (video,)
|
| 346 |
return HunyuanVideoPipelineOutput(frames=video)
|
| 347 |
|
|
|
|
| 348 |
def main():
|
| 349 |
-
model_id = "hunyuanvideo-community/HunyuanVideo"
|
| 350 |
-
lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft") # Replace with the actual LORA path
|
| 351 |
-
pipe = construct_video_pipeline(model_id, lora_path)
|
| 352 |
-
|
| 353 |
gr.Markdown(
|
| 354 |
"""
|
| 355 |
- https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png
|
|
@@ -371,9 +317,7 @@ def main():
|
|
| 371 |
outputs = [
|
| 372 |
gr.Video(label="Generated Video"),
|
| 373 |
]
|
| 374 |
-
|
| 375 |
-
return generate_video(pipe, *args)
|
| 376 |
-
|
| 377 |
# Create the Gradio interface
|
| 378 |
iface = gr.Interface(
|
| 379 |
fn=generate_video_wrapper,
|
|
|
|
| 96 |
pipe.unload_lora_weights()
|
| 97 |
|
| 98 |
return pipe
|
| 99 |
+
|
| 100 |
def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
|
| 101 |
# Load and preprocess frames
|
| 102 |
cond_frame1 = Image.open(frame1_path)
|
| 103 |
cond_frame2 = Image.open(frame2_path)
|
| 104 |
|
| 105 |
+
height, width = 720, 1280
|
| 106 |
cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
|
| 107 |
cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
|
| 108 |
|
|
|
|
| 111 |
cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
|
| 112 |
cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
with torch.no_grad():
|
| 115 |
image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
|
| 116 |
image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
|
|
|
|
| 140 |
video_bytes = video_file.read()
|
| 141 |
|
| 142 |
return video_bytes
|
| 143 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
@torch.inference_mode()
|
| 146 |
def call_pipe(
|
|
|
|
| 148 |
prompt: Union[str, List[str]] = None,
|
| 149 |
prompt_2: Union[str, List[str]] = None,
|
| 150 |
height: int = 720,
|
| 151 |
+
width: int = 1280,
|
| 152 |
num_frames: int = 129,
|
| 153 |
+
num_inference_steps: int = 50,
|
| 154 |
sigmas: Optional[List[float]] = None,
|
| 155 |
guidance_scale: float = 6.0,
|
| 156 |
num_videos_per_prompt: Optional[int] = 1,
|
|
|
|
| 217 |
|
| 218 |
# 4. Prepare timesteps
|
| 219 |
sigmas = np.linspace(1.0, 0.0, num_inference_steps + 1)[:-1] if sigmas is None else sigmas
|
| 220 |
+
timesteps, num_inference_steps = retrieve_argument timesteps(
|
| 221 |
pipe.scheduler,
|
| 222 |
num_inference_steps,
|
| 223 |
device,
|
|
|
|
| 294 |
return (video,)
|
| 295 |
return HunyuanVideoPipelineOutput(frames=video)
|
| 296 |
|
| 297 |
+
|
| 298 |
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
gr.Markdown(
|
| 300 |
"""
|
| 301 |
- https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png
|
|
|
|
| 317 |
outputs = [
|
| 318 |
gr.Video(label="Generated Video"),
|
| 319 |
]
|
| 320 |
+
|
|
|
|
|
|
|
| 321 |
# Create the Gradio interface
|
| 322 |
iface = gr.Interface(
|
| 323 |
fn=generate_video_wrapper,
|