import gradio as gr import openai from dotenv import load_dotenv import os import torch from diffusers import ControlNetModel, StableDiffusionControlNetPipeline, UniPCMultistepScheduler import cv2 from PIL import Image import numpy as np from diffusers.utils import load_image # Let's load the popular vermeer image image = load_image( "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png" ) image = np.array(image) low_threshold = 100 high_threshold = 200 image = cv2.Canny(image, low_threshold, high_threshold) image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) canny_image = Image.fromarray(image) controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-scribble", torch_dtype=torch.float16).to("cuda") pipe = StableDiffusionControlNetPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16 ).to("cuda") pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) # this command loads the individual model components on GPU on-demand. pipe.enable_model_cpu_offload() # prompt = "closeup face photo of caucasian lady in black clothes, night city street, bokeh" # negative_prompt = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck" n_steps = 25 generator = torch.manual_seed(0) # out_image = pipe( # prompt=prompt, num_inference_steps=20, generator=generator, image=canny_image # ).images[0] def predict(prompt,negative_prompt): # prompt, negative_prompt = inputs image = pipe( prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=n_steps, generator=generator, image=canny_image # denoising_end=high_noise_frac, # image=seg_image, # output_type="latent", ).images[0] # image = refiner( # prompt=prompt, # num_inference_steps=n_steps, # denoising_start=high_noise_frac, # image=image, # ).images[0] return image demo = gr.Interface(fn=predict, inputs=[gr.Textbox(value="prompt"), gr.Textbox(value="negative prompt")], outputs="image") if __name__ == "__main__": demo.launch()