mochi-1 / app.py
sab
fix layout
c3dbd3d
raw
history blame
2.28 kB
import os
if os.environ.get("SPACES_ZERO_GPU") is not None:
import spaces
else:
class spaces:
@staticmethod
def GPU(func):
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
import torch
from diffusers import MochiPipeline
from diffusers.utils import export_to_video
import gradio as gr
import config as cfg
# Load the pre-trained model
pipe = MochiPipeline.from_pretrained(cfg.MODEL_PRE_TRAINED_ID, variant="bf16", torch_dtype=torch.bfloat16)
# Enable memory-saving optimizations
pipe.enable_model_cpu_offload()
pipe.enable_vae_tiling()
@spaces.GPU(duration=600)
def generate_video(prompt, num_frames=84, fps=30, high_quality=False):
if high_quality:
print("High quality option selected. Requires 42GB VRAM.")
# Check if running on ZeroGPU
if os.environ.get("SPACES_ZERO_GPU") is not None:
raise RuntimeError("High quality option may fail on ZeroGPU environments.")
with torch.autocast("cuda", torch.bfloat16, cache_enabled=False):
frames = pipe(prompt, num_frames=num_frames).frames[0]
else:
print("Standard quality option selected.")
frames = pipe(prompt, num_frames=num_frames).frames[0]
# Export frames as video
video_path = "mochi.mp4"
export_to_video(frames, video_path, fps=fps)
return video_path
# Create the Gradio interface
interface = gr.Interface(
fn=generate_video,
inputs=[
gr.Textbox(lines=2, placeholder="Enter your text prompt here... 💡"),
gr.Slider(minimum=1, maximum=240, value=84, label="Number of frames 🎞️"),
gr.Slider(minimum=1, maximum=60, value=30, label="FPS (Frames per second) ⏱️"),
gr.Checkbox(label="High Quality Output (requires 42GB VRAM, may fail on ZeroGPU)")
],
outputs=gr.Video(),
title=cfg.TITLE,
description=cfg.DESCRIPTION,
examples=cfg.EXAMPLES,
article=cfg.BUY_ME_A_COFFE
)
# Center the title and description using custom CSS
interface.css = """
.interface-title {
text-align: center;
}
.interface-description {
text-align: center;
}
"""
# Launch the application
if __name__ == "__main__":
interface.launch()