Spaces:
Runtime error
Runtime error
File size: 8,349 Bytes
a5fc5ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
import uuid
import torch
from diffusers.utils import export_to_video
from diffusers import AutoencoderKLWan, WanPipeline
from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
import os
import time
import requests
import json
from PIL import Image as PIL_Image
from google import genai
from google.genai import types
from google.cloud import aiplatform
from google.cloud import storage
import matplotlib.pyplot as plt
import mediapy as media
def wan_text_to_video(prompt, negative_prompt):
# Available models: Wan-AI/Wan2.1-T2V-14B-Diffusers, Wan-AI/Wan2.1-T2V-1.3B-Diffusers
# model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
flow_shift = 5.0 # 5.0 for 720P, 3.0 for 480P
scheduler = UniPCMultistepScheduler(prediction_type='flow_prediction', use_flow_sigmas=True, num_train_timesteps=1000, flow_shift=flow_shift)
pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
pipe.scheduler = scheduler
pipe.to("cpu")
prompt = ("A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the "
"dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through "
"the window.")
negative_prompt = ("Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, "
"images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, "
"incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, "
"misshapen limbs, fused fingers, still picture, messy background, three legs, many people in "
"the background, walking backwards")
output = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
height=720,
width=1280,
num_frames=81,
guidance_scale=5.0,
).frames[0]
export_to_video(output, "output.mp4", fps=16)
return "output.mp4"
def gcp_veo(prompt: str = "a cat reading a book"):
PROJECT_ID = "gcp-credit-applying-to-g-suite"
LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")
BUCKET_NAME = "dante-test-123456-output"
OUTPUT_GCS_PATH = f"gs://{BUCKET_NAME}/videos/output_{int(time.time())}.mp4"
# Initialize Vertex AI
aiplatform.init(project=PROJECT_ID, location=LOCATION)
# Initialize Generative AI client
client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)
# Video Generation Pipeline
video_model = "veo-2.0-generate-001"
# video_model = "veo-3.0-generate-preview"
aspect_ratio = "16:9"
operation = client.models.generate_videos(
model=video_model,
prompt=prompt,
config=types.GenerateVideosConfig(
aspect_ratio=aspect_ratio,
output_gcs_uri=OUTPUT_GCS_PATH,
number_of_videos=1,
duration_seconds=5,
person_generation="allow_adult",
enhance_prompt=True,
),
)
# Poll until operation is complete
print("Generating video...")
while not operation.done:
time.sleep(15)
operation = client.operations.get(operation)
print(f"Operation status: {operation}")
# Error Handling
if operation.error:
raise Exception(f"Video generation failed: {operation.error}")
# Get the generated video URI
if operation.response and operation.result.generated_videos:
video_uri = operation.result.generated_videos[0].video.uri
print(f"Video generated at: {video_uri}")
# Download the video from GCS to local
storage_client = storage.Client(project=PROJECT_ID)
bucket = storage_client.bucket(BUCKET_NAME)
blob_name = video_uri.replace(f"gs://{BUCKET_NAME}/", "")
blob = bucket.blob(blob_name)
local_output_path = f"output/sample-{uuid.uuid1()}.mp4"
# Ensure local directory exists
os.makedirs(os.path.dirname(local_output_path), exist_ok=True)
# Download the video
blob.download_to_filename(local_output_path)
print(f"Video downloaded to: {local_output_path}")
# Delete the file from GCS
blob.delete()
print(f"Video deleted from GCS: {video_uri}")
return local_output_path
else:
raise Exception("No video generated or response is empty")
def hailuo_text_to_video(
prompt: str,
model: str = "T2V-01-Director",
output_file_name: str = "output.mp4",
api_key: str = ""
) -> str:
def invoke_video_generation()->str:
print("-----------------Submit video generation task-----------------")
url = "https://api.minimaxi.chat/v1/video_generation"
payload = json.dumps({
"prompt": prompt,
"model": model
})
headers = {
'authorization': 'Bearer ' + api_key,
'content-type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
task_id = response.json()['task_id']
print("Video generation task submitted successfully, task ID.:"+task_id)
return task_id
def query_video_generation(task_id: str):
url = "https://api.minimaxi.chat/v1/query/video_generation?task_id="+task_id
headers = {
'authorization': 'Bearer ' + api_key
}
response = requests.request("GET", url, headers=headers)
status = response.json()['status']
if status == 'Preparing':
print("...Preparing...")
return "", 'Preparing'
elif status == 'Queueing':
print("...In the queue...")
return "", 'Queueing'
elif status == 'Processing':
print("...Generating...")
return "", 'Processing'
elif status == 'Success':
return response.json()['file_id'], "Finished"
elif status == 'Fail':
return "", "Fail"
else:
return "", "Unknown"
def fetch_video_result(file_id: str):
print("---------------Video generated successfully, downloading now---------------")
url = "https://api.minimaxi.chat/v1/files/retrieve?file_id="+file_id
headers = {
'authorization': 'Bearer '+api_key,
}
response = requests.request("GET", url, headers=headers)
print(response.text)
download_url = response.json()['file']['download_url']
print("Video download link:" + download_url)
with open(output_file_name, 'wb') as f:
f.write(requests.get(download_url).content)
print("THe video has been downloaded in:"+os.getcwd()+'/'+output_file_name)
task_id = invoke_video_generation()
print("-----------------Video generation task submitted -----------------")
while True:
time.sleep(10)
file_id, status = query_video_generation(task_id)
if file_id != "":
fetch_video_result(file_id)
print("---------------Successful---------------")
break
elif status == "Fail" or status == "Unknown":
print("---------------Failed---------------")
break
return os.getcwd()+'/'+output_file_name
def generate_video(prompt, model_id, negative_prompt=None):
video_path = None
if model_id == "Wan2.1":
video_path = wan_text_to_video(prompt, negative_prompt)
elif model_id == "SkyReels-V2":
raise ValueError("SkyReels-V2 model not yet implemented.")
elif model_id == "Veo-2":
video_path = gcp_veo(prompt)
elif model_id == "T2V-01-Director":
video_path = hailuo_text_to_video(prompt)
return video_path
# Only available for cuda / cpu
# wan_text_to_video()
# if __name__ == "__main__":
# try:
# local_path = gcp_veo_3(
# prompt="a cat reading a book",
# local_output_path="output/cat_reading_book.mp4"
# )
# print(f"Success! Video saved at: {local_path}")
# except Exception as e:
# print(f"Error: {e}")
|