File size: 3,232 Bytes
e3dcab5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from huggingface_hub import InferenceClient
import base64
import os
from pathlib import Path
import time

def save_video(base64_video: str, output_path: str):
    """Save base64 encoded video to a file"""
    video_bytes = base64.b64decode(base64_video)
    with open(output_path, "wb") as f:
        f.write(video_bytes)

def generate_video(
    prompt: str,
    endpoint_url: str,
    token: str = None,
    resolution: str = "1280x720",
    video_length: int = 129,
    num_inference_steps: int = 50,
    seed: int = -1,
    guidance_scale: float = 1.0,
    flow_shift: float = 7.0,
    embedded_guidance_scale: float = 6.0
) -> str:
    """Generate a video using the custom inference endpoint.
    
    Args:
        prompt: Text prompt describing the video
        endpoint_url: Full URL to the inference endpoint
        token: HuggingFace API token for authentication
        resolution: Video resolution (default: "1280x720")
        video_length: Number of frames (default: 129 for 5s)
        num_inference_steps: Number of inference steps (default: 50) 
        seed: Random seed, -1 for random (default: -1)
        guidance_scale: Guidance scale value (default: 1.0)
        flow_shift: Flow shift value (default: 7.0)
        embedded_guidance_scale: Embedded guidance scale (default: 6.0)
    
    Returns:
        Path to the saved video file
    """
    # Initialize client
    client = InferenceClient(model=endpoint_url, token=token)
    
    # Prepare payload
    payload = {
        "inputs": prompt,
        "resolution": resolution,
        "video_length": video_length,
        "num_inference_steps": num_inference_steps,
        "seed": seed,
        "guidance_scale": guidance_scale,
        "flow_shift": flow_shift,
        "embedded_guidance_scale": embedded_guidance_scale
    }

    # Make request
    response = client.post(json=payload)
    result = response.json()
    
    # Save video
    timestamp = int(time.time())
    output_path = f"generated_video_{timestamp}.mp4"
    save_video(result["video_base64"], output_path)
    
    print(f"Video generated with seed {result['seed']}")
    return output_path

if __name__ == "__main__":

    hf_api_token = os.environ.get('HF_API_TOKEN', '')
    endpoint_url = os.environ.get('ENDPOINT_URL', '')

    video_path = generate_video(
        endpoint_url=endpoint_url,
        token=hf_api_token,

        prompt="A cat walks on the grass, realistic style.",

        # min resolution is 64x64, max is 4096x4096 (increment steps are by 16px)
        # however the model is designed for 1280x720
        resolution="1280x720",

        # numbers of frames plus one (max 1024?)
        # increments by 4 frames
        video_length=49, # 129,

        # number of denoising/sampling steps (default: 30)
        num_inference_steps: int = 15, # 50,

        seed: int = -1, # -1 to keep it random

        # not sure why we have two guidance scales
        guidance_scale = 1.0, # 3

        # strength of prompt guidance (default: 6.0)
        embedded_guidance_scale: float = 6.0


        # video length (larger values result in shorter videos, default: 9.0, max: 30)
        flow_shift: float = 9.0,

    )
    print(f"Video saved to: {video_path}")