from diffusers import StableDiffusionControlNetPipeline, ControlNetModel from diffusers import UniPCMultistepScheduler import gradio as gr import torch from gradio.components import Markdown # Models controlnet_pose = ControlNetModel.from_pretrained( "lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16 ) controlnet_canny = ControlNetModel.from_pretrained( "lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16 ) pipe = StableDiffusionControlNetPipeline.from_pretrained( "dreamlike-art/dreamlike-anime-1.0", controlnet=[controlnet_pose, controlnet_canny], safety_checker=None, torch_dtype=torch.float16 ).to('cuda') pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) # This command loads the individual model components on GPU on-demand. So, we don't # need to explicitly call pipe.to("cuda"). #pipe.enable_model_cpu_offload() # xformers pipe.enable_xformers_memory_efficient_attention() # Generator seed, generator = torch.manual_seed(0) negative_prompt = ("worst quality, low quality, lowres, bad anatomy, bad hands, " "missing fingers, extra digit, fewer digits") markdown = """ ## Generate controlled outputs with Mult-ControlNet and Stable Diffusion using 🤗Diffusers This Space uses pose lines and canny edged image as the additional conditioning. Please refer to the "Examples" for what kind of images are appropriate. The Followings are tools available to create such images. In this example, tool 1 is being used. 1. [Character bones that look like Openpose for blender Ver4.7 Depth+Canny](https://toyxyz.gumroad.com/l/ciojz) 2. [open-pose-editor](https://github.com/ZhUyU1997/open-pose-editor) """ # **This space using these models**: # - ControlNet Model (canny): [lllyasviel/sd-controlnet-canny](https://hf.co/lllyasviel/sd-controlnet-canny) # - ControlNet Model (openpose): [lllyasviel/sd-controlnet-openpose](https://hf.co/lllyasviel/sd-controlnet-openpose) # - SD Base Model: [dreamlike-art/dreamlike-anime-1.0](https://hf.co/dreamlike-art/dreamlike-anime-1.0) def generate_images(pose_image, canny_image, prompt): output = pipe( prompt=prompt, negative_prompt=negative_prompt, image=[pose_image, canny_image], generator=generator, num_images_per_prompt=3, num_inference_steps=20, ) all_outputs = [] all_outputs.append(pose_image) all_outputs.append(canny_image) for image in output.images: all_outputs.append(image) return all_outputs gr.Interface( generate_images, inputs=[ gr.Image(type="pil"), gr.Image(type="pil"), gr.Textbox( label="Enter your prompt", max_lines=1, placeholder="Enter your prompt", ), ], outputs=gr.Gallery().style(grid=[2], height="auto"), description=markdown, examples=[ ["p11_clip.png", "c11_clip.png", "masterpiece, a professional portrait of woman wearing white shirts, smile, heart hand sign" ], # ["p13_clip.png", # "c13_clip.png", # "masterpiece, a professional portrait of woman wearing white shirts, smile" # ], ["p12_clip.png", "c12_clip.png", "masterpiece, a professional portrait of woman wearing white shirts, smile" ], ], allow_flagging=False, ).launch(enable_queue=True)