Spaces:

radames
/

LayerDiffuse-gradio-unofficial

Running

App Files Files Community

radames commited on Mar 15, 2024

Commit

bb5540a

1 Parent(s): 4bf9d3c

bg removal

Browse files

Files changed (3) hide show

app.py +136 -172
briarmbg.py +460 -0
utils.py +114 -0

app.py CHANGED Viewed

@@ -1,11 +1,18 @@
 import sys
 import os
 import torch
-from pathlib import Path
-from huggingface_hub import hf_hub_download
-from PIL import Image, ImageSequence, ImageOps
 from typing import List
 import numpy as np
 sys.path.append(os.path.dirname("./ComfyUI/"))
 from ComfyUI.nodes import (
@@ -27,20 +34,11 @@ from ComfyUI.custom_nodes.layerdiffuse.layered_diffusion import (
     LayeredDiffusionCond,
 )
 import gradio as gr
-MODEL_PATH = hf_hub_download(
-    repo_id="lllyasviel/fav_models",
-    subfolder="fav",
-    filename="juggernautXL_v8Rundiffusion.safetensors",
-)
-try:
-    os.symlink(
-        MODEL_PATH,
-        Path("./ComfyUI/models/checkpoints/juggernautXL_v8Rundiffusion.safetensors"),
-    )
-except FileExistsError:
-    pass
 with torch.inference_mode():
     ckpt_load_checkpoint = CheckpointLoaderSimple().load_checkpoint
@@ -58,73 +56,14 @@ ld_decode = LayeredDiffusionDecode().decode
 mask_to_image = MaskToImage().mask_to_image
 invert_mask = InvertMask().invert
 join_image_with_alpha = JoinImageWithAlpha().join_image_with_alpha
-def tensor_to_pil(images: torch.Tensor | List[torch.Tensor]) -> List[Image.Image]:
-    if not isinstance(images, list):
-        images = [images]
-    imgs = []
-    for image in images:
-        i = 255.0 * image.cpu().numpy()
-        img = Image.fromarray(np.clip(np.squeeze(i), 0, 255).astype(np.uint8))
-        imgs.append(img)
-    return imgs
-def pad_image(input_image):
-    pad_w, pad_h = (
-        np.max(((2, 2), np.ceil(np.array(input_image.size) / 64).astype(int)), axis=0)
-        * 64
-        - input_image.size
-    )
-    im_padded = Image.fromarray(
-        np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode="edge")
-    )
-    w, h = im_padded.size
-    if w == h:
-        return im_padded
-    elif w > h:
-        new_image = Image.new(im_padded.mode, (w, w), (0, 0, 0))
-        new_image.paste(im_padded, (0, (w - h) // 2))
-        return new_image
-    else:
-        new_image = Image.new(im_padded.mode, (h, h), (0, 0, 0))
-        new_image.paste(im_padded, ((h - w) // 2, 0))
-        return new_image
-def pil_to_tensor(image: Image.Image) -> tuple[torch.Tensor, torch.Tensor]:
-    output_images = []
-    output_masks = []
-    for i in ImageSequence.Iterator(image):
-        i = ImageOps.exif_transpose(i)
-        if i.mode == "I":
-            i = i.point(lambda i: i * (1 / 255))
-        image = i.convert("RGB")
-        image = np.array(image).astype(np.float32) / 255.0
-        image = torch.from_numpy(image)[None,]
-        if "A" in i.getbands():
-            mask = np.array(i.getchannel("A")).astype(np.float32) / 255.0
-            mask = 1.0 - torch.from_numpy(mask)
-        else:
-            mask = torch.zeros((64, 64), dtype=torch.float32, device="cpu")
-        output_images.append(image)
-        output_masks.append(mask.unsqueeze(0))
-    if len(output_images) > 1:
-        output_image = torch.cat(output_images, dim=0)
-        output_mask = torch.cat(output_masks, dim=0)
-    else:
-        output_image = output_images[0]
-        output_mask = output_masks[0]
-    return (output_image, output_mask)
 def predict(
     prompt: str,
     negative_prompt: str,
     input_image: Image.Image | None,
     cond_mode: str,
     seed: int,
     sampler_name: str,
@@ -133,95 +72,115 @@ def predict(
     cfg: float,
     denoise: float,
 ):
-    with torch.inference_mode():
-        cliptextencode_prompt = cliptextencode(
-            text=prompt,
-            clip=ckpt[1],
-        )
-        cliptextencode_negative_prompt = cliptextencode(
-            text=negative_prompt,
-            clip=ckpt[1],
-        )
-        emptylatentimage_sample = emptylatentimage_generate(
-            width=1024, height=1024, batch_size=1
-        )
-        if input_image is not None:
-            img_tensor = pil_to_tensor(pad_image(input_image).resize((1024, 1024)))
-            img_latent = vae_encode(pixels=img_tensor[0], vae=ckpt[2])
-            layereddiffusionapply_sample = ld_cond_apply_layered_diffusion(
-                config=cond_mode,
-                weight=1,
-                model=ckpt[0],
-                cond=cliptextencode_prompt[0],
-                uncond=cliptextencode_negative_prompt[0],
-                latent=img_latent[0],
             )
-            ksampler = ksampler_sample(
-                steps=steps,
-                cfg=cfg,
-                sampler_name=sampler_name,
-                scheduler=scheduler,
-                seed=seed,
-                model=layereddiffusionapply_sample[0],
-                positive=layereddiffusionapply_sample[1],
-                negative=layereddiffusionapply_sample[2],
-                latent_image=emptylatentimage_sample[0],
-                denoise=denoise,
-            )
-            vaedecode_sample = vae_decode(
-                samples=ksampler[0],
-                vae=ckpt[2],
             )
-            layereddiffusiondecode_sample = ld_decode(
-                sd_version="SDXL",
-                sub_batch_size=16,
-                samples=ksampler[0],
-                images=vaedecode_sample[0],
             )
-            rgb_img = tensor_to_pil(vaedecode_sample[0])
-            return flatten([rgb_img])
-        else:
-            layereddiffusionapply_sample = ld_fg_apply_layered_diffusion(
-                config="SDXL, Conv Injection", weight=1, model=ckpt[0]
-            )
-            ksampler = ksampler_sample(
-                steps=steps,
-                cfg=cfg,
-                sampler_name=sampler_name,
-                scheduler=scheduler,
-                seed=seed,
-                model=layereddiffusionapply_sample[0],
-                positive=cliptextencode_prompt[0],
-                negative=cliptextencode_negative_prompt[0],
-                latent_image=emptylatentimage_sample[0],
-                denoise=denoise,
-            )
-            vaedecode_sample = vae_decode(
-                samples=ksampler[0],
-                vae=ckpt[2],
-            )
-            layereddiffusiondecode_sample = ld_decode(
-                sd_version="SDXL",
-                sub_batch_size=16,
-                samples=ksampler[0],
-                images=vaedecode_sample[0],
-            )
-            mask = mask_to_image(mask=layereddiffusiondecode_sample[1])
-            ld_image = tensor_to_pil(layereddiffusiondecode_sample[0][0])
-            inverted_mask = invert_mask(mask=layereddiffusiondecode_sample[1])
-            rgba_img = join_image_with_alpha(
-                image=layereddiffusiondecode_sample[0], alpha=inverted_mask[0]
-            )
-            rgba_img = tensor_to_pil(rgba_img[0])
-            mask = tensor_to_pil(mask[0])
-            rgb_img = tensor_to_pil(vaedecode_sample[0])
-            return flatten([rgba_img, mask, rgb_img, ld_image])
 examples = [["An old men sit on a chair looking at the sky"]]
@@ -233,18 +192,18 @@ def flatten(l: List[List[any]]) -> List[any]:
 def predict_examples(prompt, negative_prompt):
     return predict(
-        prompt, negative_prompt, None, None, 0, "euler", "normal", 20, 8.0, 1.0
     )
 css = """
 .gradio-container{
-    max-width: 60rem;
 }
 """
 with gr.Blocks(css=css) as blocks:
     gr.Markdown("""# LayerDiffuse (unofficial)
                 """)
     with gr.Row():
@@ -253,12 +212,18 @@ with gr.Blocks(css=css) as blocks:
             negative_prompt = gr.Text(label="Negative Prompt")
             button = gr.Button("Generate")
             with gr.Accordion(open=False, label="Input Images (Optional)"):
-                cond_mode = gr.Radio(
-                    value="SDXL, Foreground",
-                    choices=["SDXL, Foreground", "SDXL, Background"],
-                    info="Whether to use input image as foreground or background",
-                )
-                input_image = gr.Image(label="Input Image", type="pil")
             with gr.Accordion(open=False, label="Advanced Options"):
                 seed = gr.Slider(
                     label="Seed",
@@ -278,8 +243,8 @@ with gr.Blocks(css=css) as blocks:
                     label="Scheduler",
                     value=samplers.KSampler.SCHEDULERS[0],
                 )
-                steps = gr.Number(
-                    label="Steps", value=20, minimum=1, maximum=10000, step=1
                 )
                 cfg = gr.Number(
                     label="CFG", value=8.0, minimum=0.0, maximum=100.0, step=0.1
@@ -289,14 +254,13 @@ with gr.Blocks(css=css) as blocks:
                 )
         with gr.Column(scale=1.8):
-            gallery = gr.Gallery(
-                columns=[2], rows=[2], object_fit="contain", height="unset"
-            )
     inputs = [
         prompt,
         negative_prompt,
         input_image,
         cond_mode,
         seed,
         sampler_name,

 import sys
 import os
 import torch
+from PIL import Image
 from typing import List
 import numpy as np
+from utils import (
+    tensor_to_pil,
+    pil_to_tensor,
+    pad_image,
+    postprocess_image,
+    preprocess_image,
+    downloadModels,
+)
 sys.path.append(os.path.dirname("./ComfyUI/"))
 from ComfyUI.nodes import (
     LayeredDiffusionCond,
 )
 import gradio as gr
+from briarmbg import BriaRMBG
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+downloadModels()
 with torch.inference_mode():
     ckpt_load_checkpoint = CheckpointLoaderSimple().load_checkpoint
 mask_to_image = MaskToImage().mask_to_image
 invert_mask = InvertMask().invert
 join_image_with_alpha = JoinImageWithAlpha().join_image_with_alpha
+rmbg_model = BriaRMBG.from_pretrained("briaai/RMBG-1.4").to(device)
 def predict(
     prompt: str,
     negative_prompt: str,
     input_image: Image.Image | None,
+    remove_bg: bool,
     cond_mode: str,
     seed: int,
     sampler_name: str,
     cfg: float,
     denoise: float,
 ):
+    try:
+        with torch.inference_mode():
+            cliptextencode_prompt = cliptextencode(
+                text=prompt,
+                clip=ckpt[1],
             )
+            cliptextencode_negative_prompt = cliptextencode(
+                text=negative_prompt,
+                clip=ckpt[1],
             )
+            emptylatentimage_sample = emptylatentimage_generate(
+                width=1024, height=1024, batch_size=1
             )
+            if input_image is not None:
+                input_image = pad_image(input_image).resize((1024, 1024))
+                if remove_bg:
+                    orig_im_size = input_image.size
+                    image = preprocess_image(np.array(input_image), [1024, 1024]).to(
+                        device
+                    )
+                    result = rmbg_model(image)
+                    # post process
+                    result_mask_image = postprocess_image(result[0][0], orig_im_size)
+                    # save result
+                    pil_mask = Image.fromarray(result_mask_image)
+                    no_bg_image = Image.new("RGBA", pil_mask.size, (0, 0, 0, 0))
+                    no_bg_image.paste(input_image, mask=pil_mask)
+                    input_image = no_bg_image
+                img_tensor = pil_to_tensor(input_image)
+                img_latent = vae_encode(pixels=img_tensor[0], vae=ckpt[2])
+                layereddiffusionapply_sample = ld_cond_apply_layered_diffusion(
+                    config=cond_mode,
+                    weight=1,
+                    model=ckpt[0],
+                    cond=cliptextencode_prompt[0],
+                    uncond=cliptextencode_negative_prompt[0],
+                    latent=img_latent[0],
+                )
+                ksampler = ksampler_sample(
+                    steps=steps,
+                    cfg=cfg,
+                    sampler_name=sampler_name,
+                    scheduler=scheduler,
+                    seed=seed,
+                    model=layereddiffusionapply_sample[0],
+                    positive=layereddiffusionapply_sample[1],
+                    negative=layereddiffusionapply_sample[2],
+                    latent_image=emptylatentimage_sample[0],
+                    denoise=denoise,
+                )
+                vaedecode_sample = vae_decode(
+                    samples=ksampler[0],
+                    vae=ckpt[2],
+                )
+                layereddiffusiondecode_sample = ld_decode(
+                    sd_version="SDXL",
+                    sub_batch_size=16,
+                    samples=ksampler[0],
+                    images=vaedecode_sample[0],
+                )
+                rgb_img = tensor_to_pil(vaedecode_sample[0])
+                return flatten([rgb_img])
+            else:
+                layereddiffusionapply_sample = ld_fg_apply_layered_diffusion(
+                    config="SDXL, Conv Injection", weight=1, model=ckpt[0]
+                )
+                ksampler = ksampler_sample(
+                    steps=steps,
+                    cfg=cfg,
+                    sampler_name=sampler_name,
+                    scheduler=scheduler,
+                    seed=seed,
+                    model=layereddiffusionapply_sample[0],
+                    positive=cliptextencode_prompt[0],
+                    negative=cliptextencode_negative_prompt[0],
+                    latent_image=emptylatentimage_sample[0],
+                    denoise=denoise,
+                )
+                vaedecode_sample = vae_decode(
+                    samples=ksampler[0],
+                    vae=ckpt[2],
+                )
+                layereddiffusiondecode_sample = ld_decode(
+                    sd_version="SDXL",
+                    sub_batch_size=16,
+                    samples=ksampler[0],
+                    images=vaedecode_sample[0],
+                )
+                mask = mask_to_image(mask=layereddiffusiondecode_sample[1])
+                ld_image = tensor_to_pil(layereddiffusiondecode_sample[0][0])
+                inverted_mask = invert_mask(mask=layereddiffusiondecode_sample[1])
+                rgba_img = join_image_with_alpha(
+                    image=layereddiffusiondecode_sample[0], alpha=inverted_mask[0]
+                )
+                rgba_img = tensor_to_pil(rgba_img[0])
+                mask = tensor_to_pil(mask[0])
+                rgb_img = tensor_to_pil(vaedecode_sample[0])
+                return flatten([rgba_img, mask])
+                # return flatten([rgba_img, mask, rgb_img, ld_image])
+    except Exception as e:
+        raise gr.Error(e)
 examples = [["An old men sit on a chair looking at the sky"]]
 def predict_examples(prompt, negative_prompt):
     return predict(
+        prompt, negative_prompt, None, False, None, 0, "euler", "normal", 20, 8.0, 1.0
     )
 css = """
 .gradio-container{
+    max-width: 50rem;
 }
 """
 with gr.Blocks(css=css) as blocks:
     gr.Markdown("""# LayerDiffuse (unofficial)
+                Using ComfyUI building blocks with custom node by [huchenlei](https://github.com/huchenlei/ComfyUI-layerdiffuse)
                 """)
     with gr.Row():
             negative_prompt = gr.Text(label="Negative Prompt")
             button = gr.Button("Generate")
             with gr.Accordion(open=False, label="Input Images (Optional)"):
+                with gr.Group():
+                    cond_mode = gr.Radio(
+                        value="SDXL, Foreground",
+                        choices=["SDXL, Foreground", "SDXL, Background"],
+                        info="Whether to use input image as foreground or background",
+                    )
+                    remove_bg = gr.Checkbox(
+                        info="Remove background using BriaRMBG",
+                        label="Remove Background",
+                        value=False,
+                    )
+                    input_image = gr.Image(label="Input Image", type="pil")
             with gr.Accordion(open=False, label="Advanced Options"):
                 seed = gr.Slider(
                     label="Seed",
                     label="Scheduler",
                     value=samplers.KSampler.SCHEDULERS[0],
                 )
+                steps = gr.Slider(
+                    label="Steps", value=20, minimum=1, maximum=30, step=1
                 )
                 cfg = gr.Number(
                     label="CFG", value=8.0, minimum=0.0, maximum=100.0, step=0.1
                 )
         with gr.Column(scale=1.8):
+            gallery = gr.Gallery(columns=[2], object_fit="contain", height="unset")
     inputs = [
         prompt,
         negative_prompt,
         input_image,
+        remove_bg,
         cond_mode,
         seed,
         sampler_name,

briarmbg.py ADDED Viewed

	@@ -0,0 +1,460 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from huggingface_hub import PyTorchModelHubMixin
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(
+            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate, stride=stride
+        )
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src, tar):
+    src = F.interpolate(src, size=tar.shape[2:], mode="bilinear")
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
+        super(RSU7, self).__init__()
+        self.in_ch = in_ch
+        self.mid_ch = mid_ch
+        self.out_ch = out_ch
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)  ## 1 -> 1/2
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
+        hx6dup = _upsample_like(hx6d, hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
+        return hx1d + hxin
+class myrebnconv(nn.Module):
+    def __init__(
+        self,
+        in_ch=3,
+        out_ch=1,
+        kernel_size=3,
+        stride=1,
+        padding=1,
+        dilation=1,
+        groups=1,
+    ):
+        super(myrebnconv, self).__init__()
+        self.conv = nn.Conv2d(
+            in_ch,
+            out_ch,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_ch)
+        self.rl = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.rl(self.bn(self.conv(x)))
+class BriaRMBG(nn.Module, PyTorchModelHubMixin):
+    def __init__(self, config: dict = {"in_ch": 3, "out_ch": 1}):
+        super(BriaRMBG, self).__init__()
+        in_ch = config["in_ch"]
+        out_ch = config["out_ch"]
+        self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1)
+        self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage1 = RSU7(64, 32, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 32, 128)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(128, 64, 256)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(256, 128, 512)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(512, 256, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 256, 512)
+        # decoder
+        self.stage5d = RSU4F(1024, 256, 512)
+        self.stage4d = RSU4(1024, 128, 256)
+        self.stage3d = RSU5(512, 64, 128)
+        self.stage2d = RSU6(256, 32, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+        # self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    def forward(self, x):
+        hx = x
+        hxin = self.conv_in(hx)
+        # hx = self.pool_in(hxin)
+        # stage 1
+        hx1 = self.stage1(hxin)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # -------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d1 = _upsample_like(d1, x)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, x)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, x)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, x)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, x)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, x)
+        return [
+            F.sigmoid(d1),
+            F.sigmoid(d2),
+            F.sigmoid(d3),
+            F.sigmoid(d4),
+            F.sigmoid(d5),
+            F.sigmoid(d6),
+        ], [hx1d, hx2d, hx3d, hx4d, hx5d, hx6]

utils.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import os
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torchvision.transforms.functional import normalize
+from PIL import Image, ImageOps, ImageSequence
+from typing import List
+from pathlib import Path
+from huggingface_hub import snapshot_download, hf_hub_download
+def tensor_to_pil(images: torch.Tensor | List[torch.Tensor]) -> List[Image.Image]:
+    if not isinstance(images, list):
+        images = [images]
+    imgs = []
+    for image in images:
+        i = 255.0 * image.cpu().numpy()
+        img = Image.fromarray(np.clip(np.squeeze(i), 0, 255).astype(np.uint8))
+        imgs.append(img)
+    return imgs
+def pad_image(input_image):
+    pad_w, pad_h = (
+        np.max(((2, 2), np.ceil(np.array(input_image.size) / 64).astype(int)), axis=0)
+        * 64
+        - input_image.size
+    )
+    im_padded = Image.fromarray(
+        np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode="edge")
+    )
+    w, h = im_padded.size
+    if w == h:
+        return im_padded
+    elif w > h:
+        new_image = Image.new(im_padded.mode, (w, w), (0, 0, 0))
+        new_image.paste(im_padded, (0, (w - h) // 2))
+        return new_image
+    else:
+        new_image = Image.new(im_padded.mode, (h, h), (0, 0, 0))
+        new_image.paste(im_padded, ((h - w) // 2, 0))
+        return new_image
+def pil_to_tensor(image: Image.Image) -> tuple[torch.Tensor, torch.Tensor]:
+    output_images = []
+    output_masks = []
+    for i in ImageSequence.Iterator(image):
+        i = ImageOps.exif_transpose(i)
+        if i.mode == "I":
+            i = i.point(lambda i: i * (1 / 255))
+        image = i.convert("RGB")
+        image = np.array(image).astype(np.float32) / 255.0
+        image = torch.from_numpy(image)[None,]
+        if "A" in i.getbands():
+            mask = np.array(i.getchannel("A")).astype(np.float32) / 255.0
+            mask = 1.0 - torch.from_numpy(mask)
+        else:
+            mask = torch.zeros((64, 64), dtype=torch.float32, device="cpu")
+        output_images.append(image)
+        output_masks.append(mask.unsqueeze(0))
+    if len(output_images) > 1:
+        output_image = torch.cat(output_images, dim=0)
+        output_mask = torch.cat(output_masks, dim=0)
+    else:
+        output_image = output_images[0]
+        output_mask = output_masks[0]
+    return (output_image, output_mask)
+def preprocess_image(im: np.ndarray, model_input_size: list) -> torch.Tensor:
+    if len(im.shape) < 3:
+        im = im[:, :, np.newaxis]
+    # orig_im_size=im.shape[0:2]
+    im_tensor = torch.tensor(im, dtype=torch.float32).permute(2, 0, 1)
+    im_tensor = F.interpolate(
+        torch.unsqueeze(im_tensor, 0), size=model_input_size, mode="bilinear"
+    ).type(torch.uint8)
+    image = torch.divide(im_tensor, 255.0)
+    image = normalize(image, [0.5, 0.5, 0.5], [1.0, 1.0, 1.0])
+    return image
+def postprocess_image(result: torch.Tensor, im_size: list) -> np.ndarray:
+    result = torch.squeeze(F.interpolate(result, size=im_size, mode="bilinear"), 0)
+    ma = torch.max(result)
+    mi = torch.min(result)
+    result = (result - mi) / (ma - mi)
+    im_array = (result * 255).permute(1, 2, 0).cpu().data.numpy().astype(np.uint8)
+    im_array = np.squeeze(im_array)
+    return im_array
+def downloadModels():
+    MODEL_PATH = hf_hub_download(
+        repo_id="lllyasviel/fav_models",
+        subfolder="fav",
+        filename="juggernautXL_v8Rundiffusion.safetensors",
+    )
+    LAYERS_PATH = snapshot_download(
+        repo_id="LayerDiffusion/layerdiffusion-v1", allow_patterns="*.safetensors"
+    )
+    for file in Path(LAYERS_PATH).glob("*.safetensors"):
+        target_path = Path(f"./ComfyUI/models/layer_model/{file.name}")
+        if not target_path.exists():
+            os.symlink(file, target_path)
+    model_target_path = Path(
+        "./ComfyUI/models/checkpoints/juggernautXL_v8Rundiffusion.safetensors"
+    )
+    if not model_target_path.exists():
+        os.symlink(MODEL_PATH, model_target_path)