File size: 3,232 Bytes
e3dcab5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
from huggingface_hub import InferenceClient
import base64
import os
from pathlib import Path
import time
def save_video(base64_video: str, output_path: str):
"""Save base64 encoded video to a file"""
video_bytes = base64.b64decode(base64_video)
with open(output_path, "wb") as f:
f.write(video_bytes)
def generate_video(
prompt: str,
endpoint_url: str,
token: str = None,
resolution: str = "1280x720",
video_length: int = 129,
num_inference_steps: int = 50,
seed: int = -1,
guidance_scale: float = 1.0,
flow_shift: float = 7.0,
embedded_guidance_scale: float = 6.0
) -> str:
"""Generate a video using the custom inference endpoint.
Args:
prompt: Text prompt describing the video
endpoint_url: Full URL to the inference endpoint
token: HuggingFace API token for authentication
resolution: Video resolution (default: "1280x720")
video_length: Number of frames (default: 129 for 5s)
num_inference_steps: Number of inference steps (default: 50)
seed: Random seed, -1 for random (default: -1)
guidance_scale: Guidance scale value (default: 1.0)
flow_shift: Flow shift value (default: 7.0)
embedded_guidance_scale: Embedded guidance scale (default: 6.0)
Returns:
Path to the saved video file
"""
# Initialize client
client = InferenceClient(model=endpoint_url, token=token)
# Prepare payload
payload = {
"inputs": prompt,
"resolution": resolution,
"video_length": video_length,
"num_inference_steps": num_inference_steps,
"seed": seed,
"guidance_scale": guidance_scale,
"flow_shift": flow_shift,
"embedded_guidance_scale": embedded_guidance_scale
}
# Make request
response = client.post(json=payload)
result = response.json()
# Save video
timestamp = int(time.time())
output_path = f"generated_video_{timestamp}.mp4"
save_video(result["video_base64"], output_path)
print(f"Video generated with seed {result['seed']}")
return output_path
if __name__ == "__main__":
hf_api_token = os.environ.get('HF_API_TOKEN', '')
endpoint_url = os.environ.get('ENDPOINT_URL', '')
video_path = generate_video(
endpoint_url=endpoint_url,
token=hf_api_token,
prompt="A cat walks on the grass, realistic style.",
# min resolution is 64x64, max is 4096x4096 (increment steps are by 16px)
# however the model is designed for 1280x720
resolution="1280x720",
# numbers of frames plus one (max 1024?)
# increments by 4 frames
video_length=49, # 129,
# number of denoising/sampling steps (default: 30)
num_inference_steps: int = 15, # 50,
seed: int = -1, # -1 to keep it random
# not sure why we have two guidance scales
guidance_scale = 1.0, # 3
# strength of prompt guidance (default: 6.0)
embedded_guidance_scale: float = 6.0
# video length (larger values result in shorter videos, default: 9.0, max: 30)
flow_shift: float = 9.0,
)
print(f"Video saved to: {video_path}")
|