radames's picture
preload
d2c46d0
raw history blame
No virus
9.99 kB
import sys
import os
import torch
from PIL import Image, ImageSequence, ImageOps
from typing import List
import numpy as np
sys.path.append(os.path.dirname("./ComfyUI/"))
from ComfyUI.nodes import (
CheckpointLoaderSimple,
VAEDecode,
VAEEncode,
KSampler,
EmptyLatentImage,
CLIPTextEncode,
)
from ComfyUI.comfy_extras.nodes_compositing import JoinImageWithAlpha
from ComfyUI.comfy_extras.nodes_mask import InvertMask, MaskToImage
from ComfyUI.comfy import samplers
from ComfyUI.custom_nodes.layerdiffuse.layered_diffusion import (
LayeredDiffusionFG,
LayeredDiffusionDecode,
LayeredDiffusionCond,
)
import gradio as gr
with torch.inference_mode():
ckpt_load_checkpoint = CheckpointLoaderSimple().load_checkpoint
ckpt = ckpt_load_checkpoint(ckpt_name="juggernautXL_v8Rundiffusion.safetensors")
cliptextencode = CLIPTextEncode().encode
emptylatentimage_generate = EmptyLatentImage().generate
ksampler_sample = KSampler().sample
vae_decode = VAEDecode().decode
vae_encode = VAEEncode().encode
ld_fg_apply_layered_diffusion = LayeredDiffusionFG().apply_layered_diffusion
ld_cond_apply_layered_diffusion = LayeredDiffusionCond().apply_layered_diffusion
ld_decode = LayeredDiffusionDecode().decode
mask_to_image = MaskToImage().mask_to_image
invert_mask = InvertMask().invert
join_image_with_alpha = JoinImageWithAlpha().join_image_with_alpha
def tensor_to_pil(images: torch.Tensor | List[torch.Tensor]) -> List[Image.Image]:
if not isinstance(images, list):
images = [images]
imgs = []
for image in images:
i = 255.0 * image.cpu().numpy()
img = Image.fromarray(np.clip(np.squeeze(i), 0, 255).astype(np.uint8))
imgs.append(img)
return imgs
def pad_image(input_image):
pad_w, pad_h = (
np.max(((2, 2), np.ceil(np.array(input_image.size) / 64).astype(int)), axis=0)
* 64
- input_image.size
)
im_padded = Image.fromarray(
np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode="edge")
)
w, h = im_padded.size
if w == h:
return im_padded
elif w > h:
new_image = Image.new(im_padded.mode, (w, w), (0, 0, 0))
new_image.paste(im_padded, (0, (w - h) // 2))
return new_image
else:
new_image = Image.new(im_padded.mode, (h, h), (0, 0, 0))
new_image.paste(im_padded, ((h - w) // 2, 0))
return new_image
def pil_to_tensor(image: Image.Image) -> tuple[torch.Tensor, torch.Tensor]:
output_images = []
output_masks = []
for i in ImageSequence.Iterator(image):
i = ImageOps.exif_transpose(i)
if i.mode == "I":
i = i.point(lambda i: i * (1 / 255))
image = i.convert("RGB")
image = np.array(image).astype(np.float32) / 255.0
image = torch.from_numpy(image)[None,]
if "A" in i.getbands():
mask = np.array(i.getchannel("A")).astype(np.float32) / 255.0
mask = 1.0 - torch.from_numpy(mask)
else:
mask = torch.zeros((64, 64), dtype=torch.float32, device="cpu")
output_images.append(image)
output_masks.append(mask.unsqueeze(0))
if len(output_images) > 1:
output_image = torch.cat(output_images, dim=0)
output_mask = torch.cat(output_masks, dim=0)
else:
output_image = output_images[0]
output_mask = output_masks[0]
return (output_image, output_mask)
def predict(
prompt: str,
negative_prompt: str,
input_image: Image.Image | None,
cond_mode: str,
seed: int,
sampler_name: str,
scheduler: str,
steps: int,
cfg: float,
denoise: float,
):
with torch.inference_mode():
cliptextencode_prompt = cliptextencode(
text=prompt,
clip=ckpt[1],
)
cliptextencode_negative_prompt = cliptextencode(
text=negative_prompt,
clip=ckpt[1],
)
emptylatentimage_sample = emptylatentimage_generate(
width=1024, height=1024, batch_size=1
)
if input_image is not None:
img_tensor = pil_to_tensor(pad_image(input_image).resize((1024, 1024)))
img_latent = vae_encode(pixels=img_tensor[0], vae=ckpt[2])
layereddiffusionapply_sample = ld_cond_apply_layered_diffusion(
config=cond_mode,
weight=1,
model=ckpt[0],
cond=cliptextencode_prompt[0],
uncond=cliptextencode_negative_prompt[0],
latent=img_latent[0],
)
ksampler = ksampler_sample(
steps=steps,
cfg=cfg,
sampler_name=sampler_name,
scheduler=scheduler,
seed=seed,
model=layereddiffusionapply_sample[0],
positive=layereddiffusionapply_sample[1],
negative=layereddiffusionapply_sample[2],
latent_image=emptylatentimage_sample[0],
denoise=denoise,
)
vaedecode_sample = vae_decode(
samples=ksampler[0],
vae=ckpt[2],
)
layereddiffusiondecode_sample = ld_decode(
sd_version="SDXL",
sub_batch_size=16,
samples=ksampler[0],
images=vaedecode_sample[0],
)
rgb_img = tensor_to_pil(vaedecode_sample[0])
return flatten([rgb_img])
else:
layereddiffusionapply_sample = ld_fg_apply_layered_diffusion(
config="SDXL, Conv Injection", weight=1, model=ckpt[0]
)
ksampler = ksampler_sample(
steps=steps,
cfg=cfg,
sampler_name=sampler_name,
scheduler=scheduler,
seed=seed,
model=layereddiffusionapply_sample[0],
positive=cliptextencode_prompt[0],
negative=cliptextencode_negative_prompt[0],
latent_image=emptylatentimage_sample[0],
denoise=denoise,
)
vaedecode_sample = vae_decode(
samples=ksampler[0],
vae=ckpt[2],
)
layereddiffusiondecode_sample = ld_decode(
sd_version="SDXL",
sub_batch_size=16,
samples=ksampler[0],
images=vaedecode_sample[0],
)
mask = mask_to_image(mask=layereddiffusiondecode_sample[1])
ld_image = tensor_to_pil(layereddiffusiondecode_sample[0][0])
inverted_mask = invert_mask(mask=layereddiffusiondecode_sample[1])
rgba_img = join_image_with_alpha(
image=layereddiffusiondecode_sample[0], alpha=inverted_mask[0]
)
rgba_img = tensor_to_pil(rgba_img[0])
mask = tensor_to_pil(mask[0])
rgb_img = tensor_to_pil(vaedecode_sample[0])
return flatten([rgba_img, mask, rgb_img, ld_image])
examples = [["An old men sit on a chair looking at the sky"]]
def flatten(l: List[List[any]]) -> List[any]:
return [item for sublist in l for item in sublist]
def predict_examples(prompt, negative_prompt):
return predict(
prompt, negative_prompt, None, None, 0, "euler", "normal", 20, 8.0, 1.0
)
css = """
.gradio-container{
max-width: 60rem;
}
"""
with gr.Blocks(css=css) as blocks:
gr.Markdown("""# LayerDiffuse (unofficial)
""")
with gr.Row():
with gr.Column():
prompt = gr.Text(label="Prompt")
negative_prompt = gr.Text(label="Negative Prompt")
button = gr.Button("Generate")
with gr.Accordion(open=False, label="Input Images (Optional)"):
cond_mode = gr.Radio(
value="SDXL, Foreground",
choices=["SDXL, Foreground", "SDXL, Background"],
info="Whether to use input image as foreground or background",
)
input_image = gr.Image(label="Input Image", type="pil")
with gr.Accordion(open=False, label="Advanced Options"):
seed = gr.Slider(
label="Seed",
value=0,
minimum=-1,
maximum=0xFFFFFFFFFFFFFFFF,
step=1,
randomize=True,
)
sampler_name = gr.Dropdown(
choices=samplers.KSampler.SAMPLERS,
label="Sampler Name",
value=samplers.KSampler.SAMPLERS[0],
)
scheduler = gr.Dropdown(
choices=samplers.KSampler.SCHEDULERS,
label="Scheduler",
value=samplers.KSampler.SCHEDULERS[0],
)
steps = gr.Number(
label="Steps", value=20, minimum=1, maximum=10000, step=1
)
cfg = gr.Number(
label="CFG", value=8.0, minimum=0.0, maximum=100.0, step=0.1
)
denoise = gr.Number(
label="Denoise", value=1.0, minimum=0.0, maximum=1.0, step=0.01
)
with gr.Column(scale=1.8):
gallery = gr.Gallery(
columns=[2], rows=[2], object_fit="contain", height="unset"
)
inputs = [
prompt,
negative_prompt,
input_image,
cond_mode,
seed,
sampler_name,
scheduler,
steps,
cfg,
denoise,
]
outputs = [gallery]
gr.Examples(
fn=predict_examples,
examples=examples,
inputs=[prompt, negative_prompt],
outputs=outputs,
cache_examples=False,
)
button.click(fn=predict, inputs=inputs, outputs=outputs)
if __name__ == "__main__":
blocks.launch()