import gradio as gr import torch import os from PIL import Image import numpy as np from diffusers import StableDiffusionDepth2ImgPipeline from pathlib import Path device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') is_gpu_associated = torch.cuda.is_available() dept2img = StableDiffusionDepth2ImgPipeline.from_pretrained( "stabilityai/stable-diffusion-2-depth", torch_dtype=torch.float16, ).to(device) css = ''' .instruction{position: absolute; top: 0;right: 0;margin-top: 0px !important} .arrow{position: absolute;top: 0;right: -110px;margin-top: -8px !important} #component-4, #component-3, #component-10{min-height: 0} .duplicate-button img{margin: 0} ''' def pad_image(input_image): pad_w, pad_h = np.max(((2, 2), np.ceil( np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size im_padded = Image.fromarray( np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge')) w, h = im_padded.size if w == h: return im_padded elif w > h: new_image = Image.new(im_padded.mode, (w, w), (0, 0, 0)) new_image.paste(im_padded, (0, (w - h) // 2)) return new_image else: new_image = Image.new(im_padded.mode, (h, h), (0, 0, 0)) new_image.paste(im_padded, ((h - w) // 2, 0)) return new_image def predict(input_image, prompt, negative_prompt, steps, num_samples, scale, seed, strength, depth_image=None): if not is_gpu_associated: raise gr.Error("Please associate a T4 GPU for this Space") torch.cuda.empty_cache() depth = None if depth_image is not None: depth_image = pad_image(depth_image) depth_image = depth_image.resize((512, 512)) depth = np.array(depth_image.convert("L")) depth = depth.astype(np.float32) / 255.0 depth = depth[None, None] depth = torch.from_numpy(depth) init_image = input_image.convert("RGB") image = pad_image(init_image) # resize to integer multiple of 32 image = image.resize((512, 512)) result = dept2img( image=image, prompt=prompt, negative_prompt=negative_prompt, depth_image=depth, seed=seed, strength=strength, num_inference_steps=steps, guidance_scale=scale, num_images_per_prompt=num_samples, ) return result['images'] block = gr.Blocks().queue() with block: with gr.Box(): if is_gpu_associated: top_description = gr.HTML(f'''

Depth2Img Web UI

Create variations of an image while preserving shape and depth!

''') else: top_description = gr.HTML(f'''

Depth2Img Web UI

Create variations of an image while preserving shape and depth!

There's only one step left before you can run the app: attribute a T4 GPU to it (via the Settings tab) and run the app below. You will be billed by the minute from when you activate the GPU until it is turned it off.

''') with gr.Row(): with gr.Column(): input_image = gr.Image(source='upload', type="pil") # depth_image = gr.Image( # source='upload', type="pil", label="Depth image Optional", value=None) depth_image = None prompt = gr.Textbox(label="Prompt") negative_prompt = gr.Textbox(label="Negative Pompt") run_button = gr.Button(label="Run") with gr.Accordion("Advanced options", open=False): num_samples = gr.Slider( label="Images", minimum=1, maximum=4, value=1, step=1) steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=50, step=1) scale = gr.Slider( label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1 ) strength = gr.Slider( label="Strength", minimum=0.0, maximum=1.0, value=0.9, step=0.01 ) seed = gr.Slider( label="Seed", minimum=0, maximum=2147483647, step=1, randomize=True, ) with gr.Column(): gallery = gr.Gallery(label="Generated images", show_label=False).style( grid=[2], height="auto") if is_gpu_associated: gr.Examples( examples=[ ["./examples/original_iso.png", "hogwarts castle", "", 50, 4, 10.0, 123123123, 0.8], ["./examples/original_sword.png", "flaming sword", "", 50, 4, 9.0, 1734133747, 0.8], ], inputs=[input_image, prompt, negative_prompt, steps, num_samples, scale, seed, strength], outputs=[gallery], fn=predict, cache_examples=True, ) run_button.click(fn=predict, inputs=[input_image, prompt, negative_prompt, steps, num_samples, scale, seed, strength], outputs=[gallery]) block.launch(show_api=False)