import os
import threading
import time
import gradio as gr
import torch
# from diffusers import CogVideoXPipeline
from models.pipeline import VchitectXLPipeline
from diffusers.utils import export_to_video
from datetime import datetime, timedelta
# from openai import OpenAI
import spaces
import moviepy.editor as mp
import os
from huggingface_hub import login
login(token=os.getenv('HF_TOKEN'))
dtype = torch.float16
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = VchitectXLPipeline("Vchitect/Vchitect-XL-2B",device)
os.makedirs("./output", exist_ok=True)
os.makedirs("./gradio_tmp", exist_ok=True)
@spaces.GPU(duration=120)
def infer(prompt: str, progress=gr.Progress(track_tqdm=True)):
torch.cuda.empty_cache()
with torch.cuda.amp.autocast(dtype=torch.bfloat16):
video = pipe(
prompt,
negative_prompt="",
num_inference_steps=50,
guidance_scale=7.5,
width=768,
height=432, #480x288 624x352 432x240 768x432
frames=16
)
return video
def save_video(tensor):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
video_path = f"./output/{timestamp}.mp4"
os.makedirs(os.path.dirname(video_path), exist_ok=True)
export_to_video(tensor, video_path)
return video_path
def convert_to_gif(video_path):
clip = mp.VideoFileClip(video_path)
clip = clip.set_fps(8)
clip = clip.resize(height=240)
gif_path = video_path.replace(".mp4", ".gif")
clip.write_gif(gif_path, fps=8)
return gif_path
def delete_old_files():
while True:
now = datetime.now()
cutoff = now - timedelta(minutes=10)
directories = ["./output", "./gradio_tmp"]
for directory in directories:
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
if os.path.isfile(file_path):
file_mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
if file_mtime < cutoff:
os.remove(file_path)
time.sleep(600)
threading.Thread(target=delete_old_files, daemon=True).start()
with gr.Blocks() as demo:
gr.Markdown("""
Vchitect-XL 2B Huggingface Space🤗
⚠️ This demo is for academic research and experiential use only.
Users should strictly adhere to local laws and ethics.
""")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here", lines=5)
# with gr.Row():
# gr.Markdown(
# "✨Upon pressing the enhanced prompt button, we will use [GLM-4 Model](https://github.com/THUDM/GLM-4) to polish the prompt and overwrite the original one.")
# enhance_button = gr.Button("✨ Enhance Prompt(Optional)")
with gr.Column():
# gr.Markdown("**Optional Parameters** (default values are recommended)
"
# "Increasing the number of inference steps will produce more detailed videos, but it will slow down the process.
"
# "50 steps are recommended for most cases.
"
# "For the 5B model, 50 steps will take approximately 350 seconds.")
# with gr.Row():
# num_inference_steps = gr.Number(label="Inference Steps", value=50)
# guidance_scale = gr.Number(label="Guidance Scale", value=7.5)
generate_button = gr.Button("🎬 Generate Video")
with gr.Column():
video_output = gr.Video(label="CogVideoX Generate Video", width=768, height=432)
with gr.Row():
download_video_button = gr.File(label="📥 Download Video", visible=False)
download_gif_button = gr.File(label="📥 Download GIF", visible=False)
def generate(prompt, model_choice, progress=gr.Progress(track_tqdm=True)):
tensor = infer(prompt, progress=progress)
video_path = save_video(tensor)
video_update = gr.update(visible=True, value=video_path)
gif_path = convert_to_gif(video_path)
gif_update = gr.update(visible=True, value=gif_path)
return video_path, video_update, gif_update
# def enhance_prompt_func(prompt):
# return convert_prompt(prompt, retry_times=1)
generate_button.click(
generate,
inputs=[prompt],
outputs=[video_output, download_video_button, download_gif_button]
)
# enhance_button.click(
# enhance_prompt_func,
# inputs=[prompt],
# outputs=[prompt]
# )
if __name__ == "__main__":
demo.launch()