za / app.py
Muhammadidrees's picture
Update app.py
21b2dc6 verified
import gradio as gr
import torch
from diffusers import DiffusionPipeline
# ---------------------- MODEL INITIALIZATION ----------------------
# Use 'balanced' for multi-device setups and CPU fallback for Spaces without GPU
device_map = "balanced" if torch.cuda.is_available() else "cpu"
flux_model = DiffusionPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map=device_map
)
omni_model = DiffusionPipeline.from_pretrained(
"tencent/OmniAvatar",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map=device_map
)
# ---------------------- MAIN GENERATION FUNCTION ----------------------
def generate_video(image, audio, prompt, style="claymation"):
device = "cuda" if torch.cuda.is_available() else "cpu"
flux_model.to(device)
omni_model.to(device)
try:
# Step 1: Stylize input image using FLUX-Kontext
stylized_image = flux_model(
prompt=prompt,
image=image,
guidance_scale=7.5,
num_inference_steps=30
).images[0]
# Step 2: Animate the stylized image with OmniAvatar
result = omni_model(
image=stylized_image,
audio=audio,
style=style,
)
# Return the generated video if available
if isinstance(result, dict) and "video" in result:
return result["video"]
elif hasattr(result, "videos"):
return result.videos[0]
else:
return f"⚠️ Unexpected output format: {type(result)}"
except Exception as e:
return f"⚠️ Error during generation: {str(e)}"
# ---------------------- GRADIO UI ----------------------
with gr.Blocks(title="🎭 Claymation Talking Avatar Generator") as demo:
gr.Markdown("""
# 🎬 Claymation Talking Avatar Generator
Generate claymation-style speaking avatars using **FLUX-Kontext** for stylization
and **OmniAvatar** for lip-synced animation.
""")
with gr.Row():
image_input = gr.Image(label="πŸ§‘ Upload Character Image", type="filepath")
audio_input = gr.Audio(label="🎀 Upload Voice Audio", type="filepath")
prompt = gr.Textbox(
label="πŸ“ Prompt (Optional)",
value="A claymation character speaking realistically",
placeholder="Describe the style or mood..."
)
with gr.Row():
style_dropdown = gr.Dropdown(
choices=["claymation", "toon", "realistic"],
value="claymation",
label="🎨 Style"
)
generate_button = gr.Button("πŸš€ Generate Video")
video_output = gr.Video(label="πŸŽ₯ Generated Output")
generate_button.click(
fn=generate_video,
inputs=[image_input, audio_input, prompt, style_dropdown],
outputs=video_output
)
# ---------------------- LAUNCH ----------------------
demo.queue().launch(debug=True, share=False)