Spaces:
Runtime error
Runtime error
Use community version
Browse files
app.py
CHANGED
|
@@ -16,36 +16,32 @@ from hyvideo.constants import NEGATIVE_PROMPT
|
|
| 16 |
|
| 17 |
from huggingface_hub import snapshot_download
|
| 18 |
|
| 19 |
-
if torch.cuda.device_count() > 0:
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
def initialize_model(
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
print(f"`models_root` exists: {models_root_path}")
|
| 44 |
-
hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
|
| 45 |
-
print("Model initialized: " + model_path)
|
| 46 |
return hunyuan_video_sampler
|
| 47 |
|
| 48 |
-
model = initialize_model(
|
| 49 |
|
| 50 |
def generate_video(
|
| 51 |
prompt,
|
|
@@ -81,20 +77,6 @@ def generate_video_gpu(
|
|
| 81 |
guidance_scale,
|
| 82 |
flow_shift,
|
| 83 |
embedded_guidance_scale
|
| 84 |
-
):
|
| 85 |
-
return None
|
| 86 |
-
|
| 87 |
-
@spaces.GPU(duration=120)
|
| 88 |
-
def generate_video_gpu2(
|
| 89 |
-
model,
|
| 90 |
-
prompt,
|
| 91 |
-
resolution,
|
| 92 |
-
video_length,
|
| 93 |
-
seed,
|
| 94 |
-
num_inference_steps,
|
| 95 |
-
guidance_scale,
|
| 96 |
-
flow_shift,
|
| 97 |
-
embedded_guidance_scale
|
| 98 |
):
|
| 99 |
print("generate_video_gpu (prompt: " + prompt + ")")
|
| 100 |
if torch.cuda.device_count() == 0:
|
|
@@ -106,37 +88,21 @@ def generate_video_gpu2(
|
|
| 106 |
width, height = int(width), int(height)
|
| 107 |
negative_prompt = "" # not applicable in the inference
|
| 108 |
print("Predicting video...")
|
| 109 |
-
|
| 110 |
-
outputs = model.predict(
|
| 111 |
prompt=prompt,
|
| 112 |
height=height,
|
| 113 |
width=width,
|
| 114 |
-
|
| 115 |
seed=seed,
|
| 116 |
-
|
| 117 |
-
infer_steps=num_inference_steps,
|
| 118 |
guidance_scale=guidance_scale,
|
| 119 |
-
num_videos_per_prompt=1
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
)
|
| 124 |
-
|
| 125 |
-
print("Video predicted")
|
| 126 |
-
samples = outputs["samples"]
|
| 127 |
-
sample = samples[0].unsqueeze(0)
|
| 128 |
-
|
| 129 |
-
save_path = "./gradio_outputs"
|
| 130 |
-
os.makedirs(save_path, exist_ok=True)
|
| 131 |
-
|
| 132 |
-
time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
|
| 133 |
-
video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
|
| 134 |
-
save_videos_grid(sample, video_path, fps=24)
|
| 135 |
-
logger.info(f"Sample saved to: {video_path}")
|
| 136 |
-
|
| 137 |
-
print("Return the video")
|
| 138 |
return video_path
|
| 139 |
|
|
|
|
| 140 |
def create_demo(model_path):
|
| 141 |
with gr.Blocks() as demo:
|
| 142 |
if torch.cuda.device_count() == 0:
|
|
|
|
| 16 |
|
| 17 |
from huggingface_hub import snapshot_download
|
| 18 |
|
| 19 |
+
# if torch.cuda.device_count() > 0:
|
| 20 |
+
# snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=False)
|
| 21 |
+
# snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True)
|
| 22 |
+
|
| 23 |
+
# class Args:
|
| 24 |
+
# def __init__(self, input_dir, output_dir):
|
| 25 |
+
# self.input_dir = input_dir
|
| 26 |
+
# self.output_dir = output_dir
|
| 27 |
+
|
| 28 |
+
# # Create the object
|
| 29 |
+
# args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder")
|
| 30 |
+
# preprocess_text_encoder_tokenizer(args)
|
| 31 |
+
# snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True)
|
| 32 |
+
|
| 33 |
+
def initialize_model():
|
| 34 |
+
model_id = "hunyuanvideo-community/HunyuanVideo"
|
| 35 |
+
|
| 36 |
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
|
| 37 |
+
model_id, subfolder="transformer", torch_dtype=torch.bfloat16
|
| 38 |
+
)
|
| 39 |
+
model = HunyuanVideoPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.float16)
|
| 40 |
+
model.vae.enable_tiling()
|
| 41 |
+
model.to("cuda")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
return hunyuan_video_sampler
|
| 43 |
|
| 44 |
+
model = initialize_model()
|
| 45 |
|
| 46 |
def generate_video(
|
| 47 |
prompt,
|
|
|
|
| 77 |
guidance_scale,
|
| 78 |
flow_shift,
|
| 79 |
embedded_guidance_scale
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
):
|
| 81 |
print("generate_video_gpu (prompt: " + prompt + ")")
|
| 82 |
if torch.cuda.device_count() == 0:
|
|
|
|
| 88 |
width, height = int(width), int(height)
|
| 89 |
negative_prompt = "" # not applicable in the inference
|
| 90 |
print("Predicting video...")
|
| 91 |
+
frames: List[PIL.Image.Image] = model(
|
|
|
|
| 92 |
prompt=prompt,
|
| 93 |
height=height,
|
| 94 |
width=width,
|
| 95 |
+
num_frames=video_length,
|
| 96 |
seed=seed,
|
| 97 |
+
num_inference_steps=num_inference_steps,
|
|
|
|
| 98 |
guidance_scale=guidance_scale,
|
| 99 |
+
num_videos_per_prompt=1
|
| 100 |
+
).frames[0]
|
| 101 |
+
|
| 102 |
+
output_video = export_to_video(frames, fps=15)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
return video_path
|
| 104 |
|
| 105 |
+
|
| 106 |
def create_demo(model_path):
|
| 107 |
with gr.Blocks() as demo:
|
| 108 |
if torch.cuda.device_count() == 0:
|