Spaces:

jonluca
/

HunyuanVideo

Runtime error

App Files Files Community

jonluca commited on Jan 24

Commit

566bd68

verified ·

1 Parent(s): da8972b

Use community version

Browse files

Files changed (1) hide show

app.py +32 -66

app.py CHANGED Viewed

@@ -16,36 +16,32 @@ from hyvideo.constants import NEGATIVE_PROMPT
 from huggingface_hub import snapshot_download
-if torch.cuda.device_count() > 0:
-    snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=True)
-    snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True)
-    class Args:
-        def __init__(self, input_dir, output_dir):
-            self.input_dir = input_dir
-            self.output_dir = output_dir
-    # Create the object
-    args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder")
-    preprocess_text_encoder_tokenizer(args)
-    snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True)
-def initialize_model(model_path):
-    print("initialize_model: " + model_path)
-    if torch.cuda.device_count() == 0:
-        return None
-    args = parse_args()
-    models_root_path = Path(model_path)
-    if not models_root_path.exists():
-        raise ValueError(f"`models_root` not exists: {models_root_path}")
-    print(f"`models_root` exists: {models_root_path}")
-    hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
-    print("Model initialized: " + model_path)
     return hunyuan_video_sampler
-model = initialize_model("ckpts")
 def generate_video(
     prompt,
@@ -81,20 +77,6 @@ def generate_video_gpu(
     guidance_scale,
     flow_shift,
     embedded_guidance_scale
-):
-    return None
-@spaces.GPU(duration=120)
-def generate_video_gpu2(
-    model,
-    prompt,
-    resolution,
-    video_length,
-    seed,
-    num_inference_steps,
-    guidance_scale,
-    flow_shift,
-    embedded_guidance_scale
 ):
     print("generate_video_gpu (prompt: " + prompt + ")")
     if torch.cuda.device_count() == 0:
@@ -106,37 +88,21 @@ def generate_video_gpu2(
     width, height = int(width), int(height)
     negative_prompt = "" # not applicable in the inference
     print("Predicting video...")
-    outputs = model.predict(
         prompt=prompt,
         height=height,
         width=width,
-        video_length=video_length,
         seed=seed,
-        negative_prompt=negative_prompt,
-        infer_steps=num_inference_steps,
         guidance_scale=guidance_scale,
-        num_videos_per_prompt=1,
-        flow_shift=flow_shift,
-        batch_size=1,
-        embedded_guidance_scale=embedded_guidance_scale
-    )
-    print("Video predicted")
-    samples = outputs["samples"]
-    sample = samples[0].unsqueeze(0)
-    save_path = "./gradio_outputs"
-    os.makedirs(save_path, exist_ok=True)
-    time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
-    video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
-    save_videos_grid(sample, video_path, fps=24)
-    logger.info(f"Sample saved to: {video_path}")
-    print("Return the video")
     return video_path
 def create_demo(model_path):
     with gr.Blocks() as demo:
         if torch.cuda.device_count() == 0:

 from huggingface_hub import snapshot_download
+# if torch.cuda.device_count() > 0:
+#     snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=False)
+#     snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True)
+#     class Args:
+#         def __init__(self, input_dir, output_dir):
+#             self.input_dir = input_dir
+#             self.output_dir = output_dir
+#     # Create the object
+#     args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder")
+#     preprocess_text_encoder_tokenizer(args)
+#     snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True)
+def initialize_model():
+    model_id = "hunyuanvideo-community/HunyuanVideo"
+    transformer = HunyuanVideoTransformer3DModel.from_pretrained(
+        model_id, subfolder="transformer", torch_dtype=torch.bfloat16
+    )
+    model = HunyuanVideoPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.float16)
+    model.vae.enable_tiling()
+    model.to("cuda")
     return hunyuan_video_sampler
+model = initialize_model()
 def generate_video(
     prompt,
     guidance_scale,
     flow_shift,
     embedded_guidance_scale
 ):
     print("generate_video_gpu (prompt: " + prompt + ")")
     if torch.cuda.device_count() == 0:
     width, height = int(width), int(height)
     negative_prompt = "" # not applicable in the inference
     print("Predicting video...")
+    frames: List[PIL.Image.Image] = model(
         prompt=prompt,
         height=height,
         width=width,
+        num_frames=video_length,
         seed=seed,
+        num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
+        num_videos_per_prompt=1
+    ).frames[0]
+    output_video = export_to_video(frames, fps=15)
     return video_path
 def create_demo(model_path):
     with gr.Blocks() as demo:
         if torch.cuda.device_count() == 0: