import gradio as gr from model import Model import os on_huggingspace = os.environ.get("SPACE_AUTHOR_NAME") == "PAIR" def create_demo(model: Model): examples = [ ["__assets__/depth_videos_depth/girl_dancing.mp4", "A stormtrooper, masterpiece, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/girl_dancing.mp4", "Oil painting of a catwoman, masterpiece, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/girl_dancing.mp4", "Oil painting of a girl dancing closed eyes, masterpiece, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/woman.mp4", "A robot is dancing in the Sahara desert, detailed, and professional photo"], ["__assets__/depth_videos_depth/woman.mp4", "Wonder woman is dancing, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/woman.mp4", "Oil painting of a girl dancing close-up, masterpiece, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/man.mp4", "An astronaut is Dancing in space, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/man.mp4", "Iron Man is dancing, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/man.mp4", "Spiderman is Dancing, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/halloween.mp4", "Beautiful blonde girl, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/halloween.mp4", "Beautiful brunette girl, a high-quality, detailed, and professional photo"], ["__assets__/depth_videos_depth/halloween.mp4", "Beautiful red-haired girl, a high-quality, detailed, and professional photo"], ] with gr.Blocks() as demo: with gr.Row(): gr.Markdown('## Text and Depth Conditional Video Generation') with gr.Row(): gr.HTML( """

Description: For performance purposes, our current preview release supports any input videos but caps output videos after 80 frames and the input videos are scaled down before processing.

""") with gr.Row(): with gr.Column(): input_video = gr.Video( label="Input Video", source='upload', format="mp4", visible=True).style(height="auto") with gr.Column(): prompt = gr.Textbox(label='Prompt') run_button = gr.Button(label='Run') with gr.Accordion('Advanced options', open=False): watermark = gr.Radio(["Picsart AI Research", "Text2Video-Zero", "None"], label="Watermark", value='Picsart AI Research') chunk_size = gr.Slider( label="Chunk size", minimum=2, maximum=16, value=2, step=1, visible=not on_huggingspace, info="Number of frames processed at once. Reduce for lower memory usage.") merging_ratio = gr.Slider( label="Merging ratio", minimum=0.0, maximum=0.9, step=0.1, value=0.0, visible=not on_huggingspace, info="Ratio of how many tokens are merged. The higher the more compression (less memory and faster inference).") with gr.Column(): result = gr.Video(label="Generated Video").style(height="auto") inputs = [ input_video, prompt, chunk_size, watermark, merging_ratio, ] gr.Examples(examples=examples, inputs=inputs, outputs=result, fn=model.process_controlnet_depth, # cache_examples=on_huggingspace, cache_examples=False, run_on_click=False, ) run_button.click(fn=model.process_controlnet_depth, inputs=inputs, outputs=result,) return demo