import sys import os import torch from pathlib import Path from huggingface_hub import hf_hub_download from PIL import Image, ImageSequence, ImageOps from typing import List import numpy as np sys.path.append(os.path.dirname("./ComfyUI/")) from ComfyUI.nodes import ( CheckpointLoaderSimple, VAEDecode, VAEEncode, KSampler, EmptyLatentImage, CLIPTextEncode, ) from ComfyUI.comfy_extras.nodes_compositing import JoinImageWithAlpha from ComfyUI.comfy_extras.nodes_mask import InvertMask, MaskToImage from ComfyUI.comfy import samplers from ComfyUI.custom_nodes.layerdiffuse.layered_diffusion import ( LayeredDiffusionFG, LayeredDiffusionDecode, LayeredDiffusionCond, ) import gradio as gr MODEL_PATH = hf_hub_download( repo_id="lllyasviel/fav_models", subfolder="fav", filename="juggernautXL_v8Rundiffusion.safetensors", ) try: os.symlink( MODEL_PATH, Path("./ComfyUI/models/checkpoints/juggernautXL_v8Rundiffusion.safetensors"), ) except FileExistsError: pass with torch.inference_mode(): ckpt_load_checkpoint = CheckpointLoaderSimple().load_checkpoint ckpt = ckpt_load_checkpoint(ckpt_name="juggernautXL_v8Rundiffusion.safetensors") cliptextencode = CLIPTextEncode().encode emptylatentimage_generate = EmptyLatentImage().generate ksampler_sample = KSampler().sample vae_decode = VAEDecode().decode vae_encode = VAEEncode().encode ld_fg_apply_layered_diffusion = LayeredDiffusionFG().apply_layered_diffusion ld_cond_apply_layered_diffusion = LayeredDiffusionCond().apply_layered_diffusion ld_decode = LayeredDiffusionDecode().decode mask_to_image = MaskToImage().mask_to_image invert_mask = InvertMask().invert join_image_with_alpha = JoinImageWithAlpha().join_image_with_alpha def tensor_to_pil(images: torch.Tensor | List[torch.Tensor]) -> List[Image.Image]: if not isinstance(images, list): images = [images] imgs = [] for image in images: i = 255.0 * image.cpu().numpy() img = Image.fromarray(np.clip(np.squeeze(i), 0, 255).astype(np.uint8)) imgs.append(img) return imgs def pad_image(input_image): pad_w, pad_h = ( np.max(((2, 2), np.ceil(np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size ) im_padded = Image.fromarray( np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode="edge") ) w, h = im_padded.size if w == h: return im_padded elif w > h: new_image = Image.new(im_padded.mode, (w, w), (0, 0, 0)) new_image.paste(im_padded, (0, (w - h) // 2)) return new_image else: new_image = Image.new(im_padded.mode, (h, h), (0, 0, 0)) new_image.paste(im_padded, ((h - w) // 2, 0)) return new_image def pil_to_tensor(image: Image.Image) -> tuple[torch.Tensor, torch.Tensor]: output_images = [] output_masks = [] for i in ImageSequence.Iterator(image): i = ImageOps.exif_transpose(i) if i.mode == "I": i = i.point(lambda i: i * (1 / 255)) image = i.convert("RGB") image = np.array(image).astype(np.float32) / 255.0 image = torch.from_numpy(image)[None,] if "A" in i.getbands(): mask = np.array(i.getchannel("A")).astype(np.float32) / 255.0 mask = 1.0 - torch.from_numpy(mask) else: mask = torch.zeros((64, 64), dtype=torch.float32, device="cpu") output_images.append(image) output_masks.append(mask.unsqueeze(0)) if len(output_images) > 1: output_image = torch.cat(output_images, dim=0) output_mask = torch.cat(output_masks, dim=0) else: output_image = output_images[0] output_mask = output_masks[0] return (output_image, output_mask) def predict( prompt: str, negative_prompt: str, input_image: Image.Image | None, cond_mode: str, seed: int, sampler_name: str, scheduler: str, steps: int, cfg: float, denoise: float, ): with torch.inference_mode(): cliptextencode_prompt = cliptextencode( text=prompt, clip=ckpt[1], ) cliptextencode_negative_prompt = cliptextencode( text=negative_prompt, clip=ckpt[1], ) emptylatentimage_sample = emptylatentimage_generate( width=1024, height=1024, batch_size=1 ) if input_image is not None: img_tensor = pil_to_tensor(pad_image(input_image).resize((1024, 1024))) img_latent = vae_encode(pixels=img_tensor[0], vae=ckpt[2]) layereddiffusionapply_sample = ld_cond_apply_layered_diffusion( config=cond_mode, weight=1, model=ckpt[0], cond=cliptextencode_prompt[0], uncond=cliptextencode_negative_prompt[0], latent=img_latent[0], ) ksampler = ksampler_sample( steps=steps, cfg=cfg, sampler_name=sampler_name, scheduler=scheduler, seed=seed, model=layereddiffusionapply_sample[0], positive=layereddiffusionapply_sample[1], negative=layereddiffusionapply_sample[2], latent_image=emptylatentimage_sample[0], denoise=denoise, ) vaedecode_sample = vae_decode( samples=ksampler[0], vae=ckpt[2], ) layereddiffusiondecode_sample = ld_decode( sd_version="SDXL", sub_batch_size=16, samples=ksampler[0], images=vaedecode_sample[0], ) rgb_img = tensor_to_pil(vaedecode_sample[0]) return flatten([rgb_img]) else: layereddiffusionapply_sample = ld_fg_apply_layered_diffusion( config="SDXL, Conv Injection", weight=1, model=ckpt[0] ) ksampler = ksampler_sample( steps=steps, cfg=cfg, sampler_name=sampler_name, scheduler=scheduler, seed=seed, model=layereddiffusionapply_sample[0], positive=cliptextencode_prompt[0], negative=cliptextencode_negative_prompt[0], latent_image=emptylatentimage_sample[0], denoise=denoise, ) vaedecode_sample = vae_decode( samples=ksampler[0], vae=ckpt[2], ) layereddiffusiondecode_sample = ld_decode( sd_version="SDXL", sub_batch_size=16, samples=ksampler[0], images=vaedecode_sample[0], ) mask = mask_to_image(mask=layereddiffusiondecode_sample[1]) ld_image = tensor_to_pil(layereddiffusiondecode_sample[0][0]) inverted_mask = invert_mask(mask=layereddiffusiondecode_sample[1]) rgba_img = join_image_with_alpha( image=layereddiffusiondecode_sample[0], alpha=inverted_mask[0] ) rgba_img = tensor_to_pil(rgba_img[0]) mask = tensor_to_pil(mask[0]) rgb_img = tensor_to_pil(vaedecode_sample[0]) return flatten([rgba_img, mask, rgb_img, ld_image]) examples = [["An old men sit on a chair looking at the sky"]] def flatten(l: List[List[any]]) -> List[any]: return [item for sublist in l for item in sublist] def predict_examples(prompt, negative_prompt): return predict( prompt, negative_prompt, None, None, 0, "euler", "normal", 20, 8.0, 1.0 ) css = """ .gradio-container{ max-width: 60rem; } """ with gr.Blocks(css=css) as blocks: gr.Markdown("""# LayerDiffuse (unofficial) """) with gr.Row(): with gr.Column(): prompt = gr.Text(label="Prompt") negative_prompt = gr.Text(label="Negative Prompt") button = gr.Button("Generate") with gr.Accordion(open=False, label="Input Images (Optional)"): cond_mode = gr.Radio( value="SDXL, Foreground", choices=["SDXL, Foreground", "SDXL, Background"], info="Whether to use input image as foreground or background", ) input_image = gr.Image(label="Input Image", type="pil") with gr.Accordion(open=False, label="Advanced Options"): seed = gr.Slider( label="Seed", value=0, minimum=-1, maximum=0xFFFFFFFFFFFFFFFF, step=1, randomize=True, ) sampler_name = gr.Dropdown( choices=samplers.KSampler.SAMPLERS, label="Sampler Name", value=samplers.KSampler.SAMPLERS[0], ) scheduler = gr.Dropdown( choices=samplers.KSampler.SCHEDULERS, label="Scheduler", value=samplers.KSampler.SCHEDULERS[0], ) steps = gr.Number( label="Steps", value=20, minimum=1, maximum=10000, step=1 ) cfg = gr.Number( label="CFG", value=8.0, minimum=0.0, maximum=100.0, step=0.1 ) denoise = gr.Number( label="Denoise", value=1.0, minimum=0.0, maximum=1.0, step=0.01 ) with gr.Column(scale=1.8): gallery = gr.Gallery( columns=[2], rows=[2], object_fit="contain", height="unset" ) inputs = [ prompt, negative_prompt, input_image, cond_mode, seed, sampler_name, scheduler, steps, cfg, denoise, ] outputs = [gallery] gr.Examples( fn=predict_examples, examples=examples, inputs=[prompt, negative_prompt], outputs=outputs, cache_examples=False, ) button.click(fn=predict, inputs=inputs, outputs=outputs) if __name__ == "__main__": blocks.launch()