HunYuan-Keyframe2VID-Control-Lora

Build error

App Files Files Community

LPX55 commited on Mar 19

Commit

889c6e6

verified ·

1 Parent(s): 8d52ba2

Update app.py (#2)

Browse files

- Update app.py (8ee2c9ea0139ee192904a9254cb86c15a5ce7781)

Files changed (1) hide show

app.py +8 -64

app.py CHANGED Viewed

@@ -96,12 +96,13 @@ def construct_video_pipeline(model_id: str, lora_path: str):
     pipe.unload_lora_weights()
     return pipe
 def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
     # Load and preprocess frames
     cond_frame1 = Image.open(frame1_path)
     cond_frame2 = Image.open(frame2_path)
-    height, width = 720, 720
     cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
     cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
@@ -110,11 +111,6 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
     cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
     cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
-    # Initialize pipeline
-    model_id = "hunyuanvideo-community/HunyuanVideo"
-    lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft")  # Replace with the actual LORA path
-    pipe = construct_video_pipeline(model_id, lora_path)
     with torch.no_grad():
         image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
         image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous()  # [B, F, C, H, W] -> [B, C, F, H, W]
@@ -144,54 +140,7 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
         video_bytes = video_file.read()
     return video_bytes
-# def generate_video(prompt: str, frame1_url: str, frame2_url: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
-#     # Load and preprocess frames
-#     cond_frame1 = Image.open(requests.get(frame1_url, stream=True).raw)
-#     cond_frame2 = Image.open(requests.get(frame2_url, stream=True).raw)
-#     height, width = 720, 1280
-#     cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
-#     cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
-#     cond_video = np.zeros(shape=(num_frames, height, width, 3))
-#     cond_video[0], cond_video[-1] = np.array(cond_frame1), np.array(cond_frame2)
-#     cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
-#     cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
-#     # Initialize pipeline
-#     model_id = "hunyuanvideo-community/HunyuanVideo"
-#     lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft")  # Replace with the actual LORA path
-#     pipe = construct_video_pipeline(model_id, lora_path)
-#     with torch.no_grad():
-#         image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
-#         image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous()  # [B, F, C, H, W] -> [B, C, F, H, W]
-#         cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
-#         cond_latents = cond_latents * pipe.vae.config.scaling_factor
-#         cond_latents = cond_latents.to(dtype=pipe.dtype)
-#         assert not torch.any(torch.isnan(cond_latents))
-#     # Generate video
-#     video = call_pipe(
-#         pipe,
-#         prompt=prompt,
-#         num_frames=num_frames,
-#         num_inference_steps=num_inference_steps,
-#         image_latents=cond_latents,
-#         width=width,
-#         height=height,
-#         guidance_scale=guidance_scale,
-#         generator=torch.Generator(device="cuda").manual_seed(0),
-#     ).frames[0]
-#     # Export to video
-#     video_path = "output.mp4"
-#     export_to_video(video, video_path, fps=24)
-#     with open(video_path, "rb") as video_file:
-#         video_bytes = video_file.read()
-#     return video_bytes
 @torch.inference_mode()
 def call_pipe(
@@ -199,9 +148,9 @@ def call_pipe(
     prompt: Union[str, List[str]] = None,
     prompt_2: Union[str, List[str]] = None,
     height: int = 720,
-    width: int = 720,
     num_frames: int = 129,
-    num_inference_steps: int = 30,
     sigmas: Optional[List[float]] = None,
     guidance_scale: float = 6.0,
     num_videos_per_prompt: Optional[int] = 1,
@@ -268,7 +217,7 @@ def call_pipe(
     # 4. Prepare timesteps
     sigmas = np.linspace(1.0, 0.0, num_inference_steps + 1)[:-1] if sigmas is None else sigmas
-    timesteps, num_inference_steps = retrieve_timesteps(
         pipe.scheduler,
         num_inference_steps,
         device,
@@ -345,11 +294,8 @@ def call_pipe(
         return (video,)
     return HunyuanVideoPipelineOutput(frames=video)
 def main():
-    model_id = "hunyuanvideo-community/HunyuanVideo"
-    lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft")  # Replace with the actual LORA path
-    pipe = construct_video_pipeline(model_id, lora_path)
     gr.Markdown(
     """
     - https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png
@@ -371,9 +317,7 @@ def main():
     outputs = [
         gr.Video(label="Generated Video"),
     ]
-    def generate_video_wrapper(*args):
-        return generate_video(pipe, *args)
     # Create the Gradio interface
     iface = gr.Interface(
         fn=generate_video_wrapper,

     pipe.unload_lora_weights()
     return pipe
 def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
     # Load and preprocess frames
     cond_frame1 = Image.open(frame1_path)
     cond_frame2 = Image.open(frame2_path)
+    height, width = 720, 1280
     cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
     cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
     cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
     cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
     with torch.no_grad():
         image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
         image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous()  # [B, F, C, H, W] -> [B, C, F, H, W]
         video_bytes = video_file.read()
     return video_bytes
 @torch.inference_mode()
 def call_pipe(
     prompt: Union[str, List[str]] = None,
     prompt_2: Union[str, List[str]] = None,
     height: int = 720,
+    width: int = 1280,
     num_frames: int = 129,
+    num_inference_steps: int = 50,
     sigmas: Optional[List[float]] = None,
     guidance_scale: float = 6.0,
     num_videos_per_prompt: Optional[int] = 1,
     # 4. Prepare timesteps
     sigmas = np.linspace(1.0, 0.0, num_inference_steps + 1)[:-1] if sigmas is None else sigmas
+    timesteps, num_inference_steps = retrieve_argument timesteps(
         pipe.scheduler,
         num_inference_steps,
         device,
         return (video,)
     return HunyuanVideoPipelineOutput(frames=video)
 def main():
     gr.Markdown(
     """
     - https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png
     outputs = [
         gr.Video(label="Generated Video"),
     ]
     # Create the Gradio interface
     iface = gr.Interface(
         fn=generate_video_wrapper,