ar0551's picture
Update app.py
49df16c verified
import gradio as gr
from diffusers import ControlNetModel, StableDiffusionXLPipeline, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler
import torch
import numpy as np
import cv2
from PIL import Image
import spaces
# 🌟 set device and precision
device = "cuda"
precision = torch.float16
# πŸ—οΈ Load ControlNet model for Canny and Depth
controlnet_canny = ControlNetModel.from_pretrained(
"xinsir/controlnet-canny-sdxl-1.0",
torch_dtype=precision
)
controlnet_depth = ControlNetModel.from_pretrained(
"xinsir/controlnet-depth-sdxl-1.0",
torch_dtype=precision
)
controlnet = [controlnet_canny, controlnet_depth]
# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision)
# Scheduler
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
# Stable Diffusion Model with ControlNet
pipe_canny_depth = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
vae=vae,
torch_dtype=precision,
scheduler=eulera_scheduler,
)
pipe_canny_depth.to(device)
# 🎨 Image generation function from image
@spaces.GPU
def generate_image(prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale):
# Generate styled image using ControlNet
result = pipe_canny_depth(
prompt=prompt,
image=[canny_input, depth_input],
num_inference_steps=30,
guidance_scale=guidance,
controlnet_conditioning_scale=[float(canny_conditioning_scale), float(depth_conditioning_scale)],
strength=strength
).images[0]
return result
# πŸ–₯️ Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# πŸ—οΈ 3D Screenshot to Styled Render with ControlNet")
with gr.Row():
with gr.Column():
canny_input = gr.Image(label="Upload Canny Screenshot", type="pil")
canny_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Canny Conditioning Scale")
with gr.Column():
depth_input = gr.Image(label="Upload Depth (ZBuffer) Screenshot", type="pil")
depth_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Depth Conditioning Scale")
with gr.Row():
prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset")
generate_img_button = gr.Button("Generate from Image")
with gr.Row():
strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength")
guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)")
with gr.Row():
result_output = gr.Image(label="Generated Styled Image")
# πŸ”— Generate Button Action
generate_img_button.click(
fn=generate_image,
inputs=[prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale],
outputs=[result_output]
)
# πŸš€ Launch the app
demo.launch(share=True)