text-to-video2

Running

text-to-video2 / app.py

Create app.py

6f16ef1 12 months ago

2.21 kB

	import gradio as gr
	import torch
	from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
	from diffusers.utils import export_to_video

	pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16)
	pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
	pipe.enable_model_cpu_offload()

	def infer(prompt, num_inference_steps):
	#prompt = "Darth Vader is surfing on waves"
	video_frames = pipe(prompt, num_inference_steps=40, height=320, width=576, num_frames=24).frames
	video_path = export_to_video(video_frames)
	print(video_path)
	return video_path

	css = """
	#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
	a {text-decoration-line: underline; font-weight: 600;}
	"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
	<div
	style="
	display: inline-flex;
	align-items: center;
	gap: 0.8rem;
	font-size: 1.75rem;
	"
	>
	<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
	Zeroscope Text-to-Video
	</h1>
	</div>
	<p style="margin-bottom: 10px; font-size: 94%">
	A watermark-free Modelscope-based video model optimized for producing high-quality 16:9 compositions and a smooth video output. <br />
	This model was trained using 9,923 clips and 29,769 tagged frames at 24 frames, 576x320 resolution.

	</p>
	</div>""")

	prompt_in = gr.Textbox(label="Prompt", placeholder="Darth Vader is surfing on waves")
	inference_steps = gr.Slider(minimum=10, maximum=100, step=1, value=40, interactive=False)
	submit_btn = gr.Button("Submit")
	video_result = gr.Video(label="Video Output")

	submit_btn.click(fn=infer,
	inputs=[prompt_in, inference_steps],
	outputs=[video_result])

	demo.queue(max_size=12).launch()