Text2Video-Zero / app.py
lev1's picture
T2V, Video Pix2Pix and Pose-Guided Gen
714bf26
raw
history blame
3.42 kB
import gradio as gr
import torch
from model import Model, ModelType
# from app_canny import create_demo as create_demo_canny
from app_pose import create_demo as create_demo_pose
from app_text_to_video import create_demo as create_demo_text_to_video
from app_pix2pix_video import create_demo as create_demo_pix2pix_video
# from app_canny_db import create_demo as create_demo_canny_db
model = Model(device='cuda', dtype=torch.float16)
with gr.Blocks(css='style.css') as demo:
gr.HTML(
"""
<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
<h1 style="font-weight: 900; font-size: 3rem; margin: 0rem">
Text2Video-Zero
</h1>
<h2 style="font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem">
We propose <b>Text2Video-Zero, the first zero-shot text-to-video syntenes framework</b>, that also natively supports, Video Instruct Pix2Pix, Pose Conditional, Edge Conditional
and, Edge Conditional and DreamBooth Specialized applications.
</h2>
<h3 style="font-weight: 450; font-size: 1rem; margin: 0rem">
Levon Khachatryan, Andranik Movsisyan, Vahram Tadevosyan, Roberto Henschel, Atlas Wang, Shant Navasardyan
and <a href="https://www.humphreyshi.com/home">Humphrey Shi</a>
[<a href="" style="color:blue;">arXiv</a>]
[<a href="" style="color:blue;">GitHub</a>]
</h3>
</div>
""")
with gr.Tab('Zero-Shot Text2Video'):
# pass
create_demo_text_to_video(model)
with gr.Tab('Video Instruct Pix2Pix'):
# pass
create_demo_pix2pix_video(model)
with gr.Tab('Pose Conditional'):
# pass
create_demo_pose(model)
with gr.Tab('Edge Conditional'):
pass
# create_demo_canny(model)
with gr.Tab('Edge Conditional and Dreambooth Specialized'):
pass
# create_demo_canny_db(model)
gr.HTML(
"""
<div style="text-align: justify; max-width: 1200px; margin: 20px auto;">
<h3 style="font-weight: 450; font-size: 0.8rem; margin: 0rem">
<b>Version: v1.0</b>
</h3>
<h3 style="font-weight: 450; font-size: 0.8rem; margin: 0rem">
<b>Caution</b>:
We would like the raise the awareness of users of this demo of its potential issues and concerns.
Like previous large foundation models, Text2Video-Zero could be problematic in some cases, partially we use pretrained Stable Diffusion, therefore Text2Video-Zero can Inherit Its Imperfections.
So far, we keep all features available for research testing both to show the great potential of the Text2Video-Zero framework and to collect important feedback to improve the model in the future.
We welcome researchers and users to report issues with the HuggingFace community discussion feature or email the authors.
</h3>
<h3 style="font-weight: 450; font-size: 0.8rem; margin: 0rem">
<b>Biases and content acknowledgement</b>:
Beware that Text2Video-Zero may output content that reinforces or exacerbates societal biases, as well as realistic faces, pornography, and violence.
Text2Video-Zero in this demo is meant only for research purposes.
</h3>
</div>
""")
demo.launch(debug=True)
# demo.queue(api_open=False).launch(file_directories=['temporal'], share=True)