import gradio as gr import torch from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler from diffusers.utils import export_to_video pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16) pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) pipe.enable_model_cpu_offload() def infer(prompt, num_inference_steps): #prompt = "Darth Vader is surfing on waves" video_frames = pipe(prompt, num_inference_steps=40, height=320, width=576, num_frames=24).frames video_path = export_to_video(video_frames) print(video_path) return video_path css = """ #col-container {max-width: 510px; margin-left: auto; margin-right: auto;} a {text-decoration-line: underline; font-weight: 600;} """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.HTML("""

Zeroscope Text-to-Video

A watermark-free Modelscope-based video model optimized for producing high-quality 16:9 compositions and a smooth video output.
This model was trained using 9,923 clips and 29,769 tagged frames at 24 frames, 576x320 resolution.

""") prompt_in = gr.Textbox(label="Prompt", placeholder="Darth Vader is surfing on waves") inference_steps = gr.Slider(minimum=10, maximum=100, step=1, value=40, interactive=False) submit_btn = gr.Button("Submit") video_result = gr.Video(label="Video Output") submit_btn.click(fn=infer, inputs=[prompt_in, inference_steps], outputs=[video_result]) demo.queue(max_size=12).launch()