from huggingface_hub import InferenceClient import base64 import os from pathlib import Path import time def save_video(base64_video: str, output_path: str): """Save base64 encoded video to a file""" video_bytes = base64.b64decode(base64_video) with open(output_path, "wb") as f: f.write(video_bytes) def generate_video( prompt: str, endpoint_url: str, token: str = None, resolution: str = "1280x720", video_length: int = 129, num_inference_steps: int = 50, seed: int = -1, guidance_scale: float = 1.0, flow_shift: float = 7.0, embedded_guidance_scale: float = 6.0 ) -> str: """Generate a video using the custom inference endpoint. Args: prompt: Text prompt describing the video endpoint_url: Full URL to the inference endpoint token: HuggingFace API token for authentication resolution: Video resolution (default: "1280x720") video_length: Number of frames (default: 129 for 5s) num_inference_steps: Number of inference steps (default: 50) seed: Random seed, -1 for random (default: -1) guidance_scale: Guidance scale value (default: 1.0) flow_shift: Flow shift value (default: 7.0) embedded_guidance_scale: Embedded guidance scale (default: 6.0) Returns: Path to the saved video file """ # Initialize client client = InferenceClient(model=endpoint_url, token=token) # Prepare payload payload = { "inputs": prompt, "resolution": resolution, "video_length": video_length, "num_inference_steps": num_inference_steps, "seed": seed, "guidance_scale": guidance_scale, "flow_shift": flow_shift, "embedded_guidance_scale": embedded_guidance_scale } # Make request response = client.post(json=payload) result = response.json() # Save video timestamp = int(time.time()) output_path = f"generated_video_{timestamp}.mp4" save_video(result["video_base64"], output_path) print(f"Video generated with seed {result['seed']}") return output_path if __name__ == "__main__": hf_api_token = os.environ.get('HF_API_TOKEN', '') endpoint_url = os.environ.get('ENDPOINT_URL', '') video_path = generate_video( endpoint_url=endpoint_url, token=hf_api_token, prompt="A cat walks on the grass, realistic style.", # min resolution is 64x64, max is 4096x4096 (increment steps are by 16px) # however the model is designed for 1280x720 resolution="1280x720", # numbers of frames plus one (max 1024?) # increments by 4 frames video_length=49, # 129, # number of denoising/sampling steps (default: 30) num_inference_steps: int = 15, # 50, seed: int = -1, # -1 to keep it random # not sure why we have two guidance scales guidance_scale = 1.0, # 3 # strength of prompt guidance (default: 6.0) embedded_guidance_scale: float = 6.0 # video length (larger values result in shorter videos, default: 9.0, max: 30) flow_shift: float = 9.0, ) print(f"Video saved to: {video_path}")