docuai / app.py
IAMTFRMZA's picture
Update app.py
aae969f verified
import os
import torch
import random
import math
import time
from datetime import datetime
from typing import Union, List
from huggingface_hub import hf_hub_download
import numpy as np
import PIL.Image
from diffusers import CogVideoXPipeline, CogVideoXDDIMScheduler, CogVideoXDPMScheduler
from diffusers.utils import export_to_video
import moviepy.editor as mp
def download_file(repo_id, filename, subfolder):
return hf_hub_download(repo_id=repo_id, filename=filename, subfolder=subfolder)
def convert_to_gif(video_path):
clip = mp.VideoFileClip(video_path)
clip = clip.set_fps(8)
clip = clip.resize(height=240)
gif_path = video_path.replace(".mp4", ".gif")
clip.write_gif(gif_path, fps=8)
return gif_path
def save_video(tensor: Union[List[np.ndarray], List[PIL.Image.Image]], fps: int = 8):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
video_path = f"{timestamp}.mp4"
export_to_video(tensor, video_path, fps=fps)
return video_path
# Downloading necessary files
scheduler_config_path = download_file("vdo/CogVideoX-5b", "scheduler_config.json", "scheduler")
text_encoder_config_path = download_file("vdo/CogVideoX-5b", "config.json", "text_encoder")
text_encoder_model_1_path = download_file("vdo/CogVideoX-5b", "model-00001-of-00002.safetensors", "text_encoder")
text_encoder_model_2_path = download_file("vdo/CogVideoX-5b", "model-00002-of-00002.safetensors", "text_encoder")
text_encoder_index_path = download_file("vdo/CogVideoX-5b", "model.safetensors.index.json", "text_encoder")
tokenizer_added_tokens_path = download_file("vdo/CogVideoX-5b", "added_tokens.json", "tokenizer")
tokenizer_special_tokens_map_path = download_file("vdo/CogVideoX-5b", "special_tokens_map.json", "tokenizer")
tokenizer_model_path = download_file("vdo/CogVideoX-5b", "spiece.model", "tokenizer")
tokenizer_config_path = download_file("vdo/CogVideoX-5b", "tokenizer_config.json", "tokenizer")
transformer_config_path = download_file("vdo/CogVideoX-5b", "config.json", "transformer")
transformer_model_1_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model-00001-of-00002.safetensors", "transformer")
transformer_model_2_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model-00002-of-00002.safetensors", "transformer")
transformer_index_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model.safetensors.index.json", "transformer")
vae_config_path = download_file("vdo/CogVideoX-5b", "config.json", "vae")
vae_model_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model.safetensors", "vae")
configuration_path = download_file("vdo/CogVideoX-5b", "configuration.json", "")
model_index_path = download_file("vdo/CogVideoX-5b", "model_index.json", "")
pipe = CogVideoXPipeline.from_pretrained("/content/CogVideoX-5b", torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()
prompt = "A golden retriever, sporting sleek black sunglasses, with its lengthy fur flowing in the breeze, sprints playfully across a rooftop terrace, recently refreshed by a light rain. The scene unfolds from a distance, the dog's energetic bounds growing larger as it approaches the camera, its tail wagging with unrestrained joy, while droplets of water glisten on the concrete behind it. The overcast sky provides a dramatic backdrop, emphasizing the vibrant golden coat of the canine as it dashes towards the viewer."
seed = 0
if seed == 0:
random.seed(int(time.time()))
seed = random.randint(0, 18446744073709551615)
print(seed)
with torch.inference_mode():
video_pt = pipe(
prompt=prompt,
num_videos_per_prompt=1,
num_inference_steps=50,
num_frames=49,
use_dynamic_cfg=True,
output_type="pt",
guidance_scale=7.0,
generator=torch.Generator(device="cpu").manual_seed(seed),
).frames
batch_size = video_pt.shape[0]
batch_video_frames = []
for batch_idx in range(batch_size):
pt_image = video_pt[batch_idx]
pt_image = torch.stack([pt_image[i] for i in range(pt_image.shape[0])])
image_np = VaeImageProcessor.pt_to_numpy(pt_image)
image_pil = VaeImageProcessor.numpy_to_pil(image_np)
batch_video_frames.append(image_pil)
video_path = save_video(batch_video_frames[0], fps=math.ceil((len(batch_video_frames[0]) - 1) / 6))
gif_path = convert_to_gif(video_path)