Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from diffusers import ControlNetModel, StableDiffusionXLPipeline, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler | |
import torch | |
import numpy as np | |
import cv2 | |
from PIL import Image | |
import spaces | |
# π set device and precision | |
device = "cuda" | |
precision = torch.float16 | |
# ποΈ Load ControlNet model for Canny and Depth | |
controlnet_canny = ControlNetModel.from_pretrained( | |
"xinsir/controlnet-canny-sdxl-1.0", | |
torch_dtype=precision | |
) | |
controlnet_depth = ControlNetModel.from_pretrained( | |
"xinsir/controlnet-depth-sdxl-1.0", | |
torch_dtype=precision | |
) | |
controlnet = [controlnet_canny, controlnet_depth] | |
# when test with other base model, you need to change the vae also. | |
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision) | |
# Scheduler | |
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler") | |
# Stable Diffusion Model with ControlNet | |
pipe_canny_depth = StableDiffusionXLControlNetPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-base-1.0", | |
controlnet=controlnet, | |
vae=vae, | |
torch_dtype=precision, | |
scheduler=eulera_scheduler, | |
) | |
pipe_canny_depth.to(device) | |
# π¨ Image generation function from image | |
def generate_image(prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale): | |
# Generate styled image using ControlNet | |
result = pipe_canny_depth( | |
prompt=prompt, | |
image=[canny_input, depth_input], | |
num_inference_steps=30, | |
guidance_scale=guidance, | |
controlnet_conditioning_scale=[float(canny_conditioning_scale), float(depth_conditioning_scale)], | |
strength=strength | |
).images[0] | |
return result | |
# π₯οΈ Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("# ποΈ 3D Screenshot to Styled Render with ControlNet") | |
with gr.Row(): | |
with gr.Column(): | |
canny_input = gr.Image(label="Upload Canny Screenshot", type="pil") | |
canny_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Canny Conditioning Scale") | |
with gr.Column(): | |
depth_input = gr.Image(label="Upload Depth (ZBuffer) Screenshot", type="pil") | |
depth_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Depth Conditioning Scale") | |
with gr.Row(): | |
prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset") | |
generate_img_button = gr.Button("Generate from Image") | |
with gr.Row(): | |
strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength") | |
guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)") | |
with gr.Row(): | |
result_output = gr.Image(label="Generated Styled Image") | |
# π Generate Button Action | |
generate_img_button.click( | |
fn=generate_image, | |
inputs=[prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale], | |
outputs=[result_output] | |
) | |
# π Launch the app | |
demo.launch(share=True) |