Spaces:

IAMTFRMZA
/

docuai

Runtime error

App Files Files Community

docuai / app.py

IAMTFRMZA

Update app.py

aae969f verified 3 months ago

raw

history blame contribute delete

4.42 kB

	import os
	import torch
	import random
	import math
	import time
	from datetime import datetime
	from typing import Union, List
	from huggingface_hub import hf_hub_download
	import numpy as np
	import PIL.Image
	from diffusers import CogVideoXPipeline, CogVideoXDDIMScheduler, CogVideoXDPMScheduler
	from diffusers.utils import export_to_video
	import moviepy.editor as mp

	def download_file(repo_id, filename, subfolder):
	return hf_hub_download(repo_id=repo_id, filename=filename, subfolder=subfolder)

	def convert_to_gif(video_path):
	clip = mp.VideoFileClip(video_path)
	clip = clip.set_fps(8)
	clip = clip.resize(height=240)
	gif_path = video_path.replace(".mp4", ".gif")
	clip.write_gif(gif_path, fps=8)
	return gif_path

	def save_video(tensor: Union[List[np.ndarray], List[PIL.Image.Image]], fps: int = 8):
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	video_path = f"{timestamp}.mp4"
	export_to_video(tensor, video_path, fps=fps)
	return video_path

	# Downloading necessary files
	scheduler_config_path = download_file("vdo/CogVideoX-5b", "scheduler_config.json", "scheduler")
	text_encoder_config_path = download_file("vdo/CogVideoX-5b", "config.json", "text_encoder")
	text_encoder_model_1_path = download_file("vdo/CogVideoX-5b", "model-00001-of-00002.safetensors", "text_encoder")
	text_encoder_model_2_path = download_file("vdo/CogVideoX-5b", "model-00002-of-00002.safetensors", "text_encoder")
	text_encoder_index_path = download_file("vdo/CogVideoX-5b", "model.safetensors.index.json", "text_encoder")
	tokenizer_added_tokens_path = download_file("vdo/CogVideoX-5b", "added_tokens.json", "tokenizer")
	tokenizer_special_tokens_map_path = download_file("vdo/CogVideoX-5b", "special_tokens_map.json", "tokenizer")
	tokenizer_model_path = download_file("vdo/CogVideoX-5b", "spiece.model", "tokenizer")
	tokenizer_config_path = download_file("vdo/CogVideoX-5b", "tokenizer_config.json", "tokenizer")
	transformer_config_path = download_file("vdo/CogVideoX-5b", "config.json", "transformer")
	transformer_model_1_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model-00001-of-00002.safetensors", "transformer")
	transformer_model_2_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model-00002-of-00002.safetensors", "transformer")
	transformer_index_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model.safetensors.index.json", "transformer")
	vae_config_path = download_file("vdo/CogVideoX-5b", "config.json", "vae")
	vae_model_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model.safetensors", "vae")
	configuration_path = download_file("vdo/CogVideoX-5b", "configuration.json", "")
	model_index_path = download_file("vdo/CogVideoX-5b", "model_index.json", "")

	pipe = CogVideoXPipeline.from_pretrained("/content/CogVideoX-5b", torch_dtype=torch.float16)
	pipe.enable_model_cpu_offload()
	pipe.enable_sequential_cpu_offload()
	pipe.vae.enable_slicing()
	pipe.vae.enable_tiling()

	prompt = "A golden retriever, sporting sleek black sunglasses, with its lengthy fur flowing in the breeze, sprints playfully across a rooftop terrace, recently refreshed by a light rain. The scene unfolds from a distance, the dog's energetic bounds growing larger as it approaches the camera, its tail wagging with unrestrained joy, while droplets of water glisten on the concrete behind it. The overcast sky provides a dramatic backdrop, emphasizing the vibrant golden coat of the canine as it dashes towards the viewer."
	seed = 0

	if seed == 0:
	random.seed(int(time.time()))
	seed = random.randint(0, 18446744073709551615)
	print(seed)

	with torch.inference_mode():
	video_pt = pipe(
	prompt=prompt,
	num_videos_per_prompt=1,
	num_inference_steps=50,
	num_frames=49,
	use_dynamic_cfg=True,
	output_type="pt",
	guidance_scale=7.0,
	generator=torch.Generator(device="cpu").manual_seed(seed),
	).frames

	batch_size = video_pt.shape[0]
	batch_video_frames = []
	for batch_idx in range(batch_size):
	pt_image = video_pt[batch_idx]
	pt_image = torch.stack([pt_image[i] for i in range(pt_image.shape[0])])

	image_np = VaeImageProcessor.pt_to_numpy(pt_image)
	image_pil = VaeImageProcessor.numpy_to_pil(image_np)
	batch_video_frames.append(image_pil)

	video_path = save_video(batch_video_frames[0], fps=math.ceil((len(batch_video_frames[0]) - 1) / 6))
	gif_path = convert_to_gif(video_path)