Spaces:

Towsif7
/

genius_bgremover

Sleeping

App Files Files Community

Towsif7 commited on Aug 10, 2023

Commit

59e40e1

•

1 Parent(s): ccfae17

firrst commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +36 -0
carvekit/__init__.py +1 -0
carvekit/__main__.py +149 -0
carvekit/__pycache__/__init__.cpython-38.pyc +0 -0
carvekit/api/__init__.py +0 -0
carvekit/api/__pycache__/__init__.cpython-38.pyc +0 -0
carvekit/api/__pycache__/high.cpython-38.pyc +0 -0
carvekit/api/__pycache__/interface.cpython-38.pyc +0 -0
carvekit/api/high.py +100 -0
carvekit/api/interface.py +77 -0
carvekit/ml/__init__.py +4 -0
carvekit/ml/__pycache__/__init__.cpython-38.pyc +0 -0
carvekit/ml/arch/__init__.py +0 -0
carvekit/ml/arch/__pycache__/__init__.cpython-38.pyc +0 -0
carvekit/ml/arch/basnet/__init__.py +0 -0
carvekit/ml/arch/basnet/__pycache__/__init__.cpython-38.pyc +0 -0
carvekit/ml/arch/basnet/__pycache__/basnet.cpython-38.pyc +0 -0
carvekit/ml/arch/basnet/basnet.py +478 -0
carvekit/ml/arch/fba_matting/__init__.py +0 -0
carvekit/ml/arch/fba_matting/__pycache__/__init__.cpython-38.pyc +0 -0
carvekit/ml/arch/fba_matting/__pycache__/layers_WS.cpython-38.pyc +0 -0
carvekit/ml/arch/fba_matting/__pycache__/models.cpython-38.pyc +0 -0
carvekit/ml/arch/fba_matting/__pycache__/resnet_GN_WS.cpython-38.pyc +0 -0
carvekit/ml/arch/fba_matting/__pycache__/resnet_bn.cpython-38.pyc +0 -0
carvekit/ml/arch/fba_matting/__pycache__/transforms.cpython-38.pyc +0 -0
carvekit/ml/arch/fba_matting/layers_WS.py +57 -0
carvekit/ml/arch/fba_matting/models.py +341 -0
carvekit/ml/arch/fba_matting/resnet_GN_WS.py +151 -0
carvekit/ml/arch/fba_matting/resnet_bn.py +169 -0
carvekit/ml/arch/fba_matting/transforms.py +45 -0
carvekit/ml/arch/tracerb7/__init__.py +0 -0
carvekit/ml/arch/tracerb7/__pycache__/__init__.cpython-38.pyc +0 -0
carvekit/ml/arch/tracerb7/__pycache__/att_modules.cpython-38.pyc +0 -0
carvekit/ml/arch/tracerb7/__pycache__/conv_modules.cpython-38.pyc +0 -0
carvekit/ml/arch/tracerb7/__pycache__/effi_utils.cpython-38.pyc +0 -0
carvekit/ml/arch/tracerb7/__pycache__/efficientnet.cpython-38.pyc +0 -0
carvekit/ml/arch/tracerb7/__pycache__/tracer.cpython-38.pyc +0 -0
carvekit/ml/arch/tracerb7/att_modules.py +290 -0
carvekit/ml/arch/tracerb7/conv_modules.py +88 -0
carvekit/ml/arch/tracerb7/effi_utils.py +579 -0
carvekit/ml/arch/tracerb7/efficientnet.py +325 -0
carvekit/ml/arch/tracerb7/tracer.py +97 -0
carvekit/ml/arch/u2net/__init__.py +0 -0
carvekit/ml/arch/u2net/__pycache__/__init__.cpython-38.pyc +0 -0
carvekit/ml/arch/u2net/__pycache__/u2net.cpython-38.pyc +0 -0
carvekit/ml/arch/u2net/u2net.py +172 -0
carvekit/ml/files/__init__.py +7 -0
carvekit/ml/files/__pycache__/__init__.cpython-38.pyc +0 -0
carvekit/ml/files/__pycache__/models_loc.cpython-38.pyc +0 -0
carvekit/ml/files/models_loc.py +70 -0

app.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import streamlit as st
+from carvekit.api.interface import Interface
+from carvekit.ml.wrap.fba_matting import FBAMatting
+from carvekit.ml.wrap.tracer_b7 import TracerUniversalB7
+from carvekit.pipelines.postprocessing import MattingMethod
+from carvekit.pipelines.preprocessing import PreprocessingStub
+from carvekit.trimap.generator import TrimapGenerator
+from PIL import Image
+# Create Streamlit app title
+st.title("Image Background Remover")
+# Create a file uploader
+uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"])
+if uploaded_file is not None:
+    # Load the image
+    image = Image.open(uploaded_file)
+    # Set up ML pipeline
+    seg_net = TracerUniversalB7(device='cpu', batch_size=1)
+    fba = FBAMatting(device='cpu', input_tensor_size=2048, batch_size=1)
+    trimap = TrimapGenerator()
+    preprocessing = PreprocessingStub()
+    postprocessing = MattingMethod(matting_module=fba, trimap_generator=trimap, device='cpu')
+    interface = Interface(pre_pipe=preprocessing, post_pipe=postprocessing, seg_pipe=seg_net)
+    # Process the image
+    processed_bg = interface([image])[0]
+    # Display original and processed images
+    col1, col2 = st.columns(2)
+    with col1:
+        st.image(image, caption='Original Image', use_column_width=True)
+    with col2:
+        st.image(processed_bg, caption='Background Removed', use_column_width=True)

carvekit/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ version = "4.1.0"

carvekit/__main__.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from pathlib import Path
+import click
+import tqdm
+from carvekit.utils.image_utils import ALLOWED_SUFFIXES
+from carvekit.utils.pool_utils import batch_generator, thread_pool_processing
+from carvekit.web.schemas.config import MLConfig
+from carvekit.web.utils.init_utils import init_interface
+from carvekit.utils.fs_utils import save_file
+@click.command(
+    "removebg",
+    help="Performs background removal on specified photos using console interface.",
+)
+@click.option("-i", required=True, type=str, help="Path to input file or dir")
+@click.option("-o", default="none", type=str, help="Path to output file or dir")
+@click.option("--pre", default="none", type=str, help="Preprocessing method")
+@click.option("--post", default="fba", type=str, help="Postprocessing method.")
+@click.option("--net", default="tracer_b7", type=str, help="Segmentation Network")
+@click.option(
+    "--recursive",
+    default=False,
+    type=bool,
+    help="Enables recursive search for images in a folder",
+)
+@click.option(
+    "--batch_size",
+    default=10,
+    type=int,
+    help="Batch Size for list of images to be loaded to RAM",
+)
+@click.option(
+    "--batch_size_seg",
+    default=5,
+    type=int,
+    help="Batch size for list of images to be processed by segmentation " "network",
+)
+@click.option(
+    "--batch_size_mat",
+    default=1,
+    type=int,
+    help="Batch size for list of images to be processed by matting " "network",
+)
+@click.option(
+    "--seg_mask_size",
+    default=640,
+    type=int,
+    help="The size of the input image for the segmentation neural network.",
+)
+@click.option(
+    "--matting_mask_size",
+    default=2048,
+    type=int,
+    help="The size of the input image for the matting neural network.",
+)
+@click.option(
+    "--trimap_dilation",
+    default=30,
+    type=int,
+    help="The size of the offset radius from the object mask in "
+    "pixels when forming an unknown area",
+)
+@click.option(
+    "--trimap_erosion",
+    default=5,
+    type=int,
+    help="The number of iterations of erosion that the object's "
+    "mask will be subjected to before forming an unknown area",
+)
+@click.option(
+    "--trimap_prob_threshold",
+    default=231,
+    type=int,
+    help="Probability threshold at which the prob_filter "
+    "and prob_as_unknown_area operations will be "
+    "applied",
+)
+@click.option("--device", default="cpu", type=str, help="Processing Device.")
+@click.option(
+    "--fp16", default=False, type=bool, help="Enables mixed precision processing."
+)
+def removebg(
+    i: str,
+    o: str,
+    pre: str,
+    post: str,
+    net: str,
+    recursive: bool,
+    batch_size: int,
+    batch_size_seg: int,
+    batch_size_mat: int,
+    seg_mask_size: int,
+    matting_mask_size: int,
+    device: str,
+    fp16: bool,
+    trimap_dilation: int,
+    trimap_erosion: int,
+    trimap_prob_threshold: int,
+):
+    out_path = Path(o)
+    input_path = Path(i)
+    if input_path.is_dir():
+        if recursive:
+            all_images = input_path.rglob("*.*")
+        else:
+            all_images = input_path.glob("*.*")
+        all_images = [
+            i
+            for i in all_images
+            if i.suffix.lower() in ALLOWED_SUFFIXES and "_bg_removed" not in i.name
+        ]
+    else:
+        all_images = [input_path]
+    interface_config = MLConfig(
+        segmentation_network=net,
+        preprocessing_method=pre,
+        postprocessing_method=post,
+        device=device,
+        batch_size_seg=batch_size_seg,
+        batch_size_matting=batch_size_mat,
+        seg_mask_size=seg_mask_size,
+        matting_mask_size=matting_mask_size,
+        fp16=fp16,
+        trimap_dilation=trimap_dilation,
+        trimap_erosion=trimap_erosion,
+        trimap_prob_threshold=trimap_prob_threshold,
+    )
+    interface = init_interface(interface_config)
+    for image_batch in tqdm.tqdm(
+        batch_generator(all_images, n=batch_size),
+        total=int(len(all_images) / batch_size),
+        desc="Removing background",
+        unit=" image batch",
+        colour="blue",
+    ):
+        images_without_background = interface(image_batch)  # Remove background
+        thread_pool_processing(
+            lambda x: save_file(out_path, image_batch[x], images_without_background[x]),
+            range((len(image_batch))),
+        )  # Drop images to fs
+if __name__ == "__main__":
+    removebg()

carvekit/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (187 Bytes). View file

carvekit/api/__init__.py ADDED Viewed

File without changes

carvekit/api/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (174 Bytes). View file

carvekit/api/__pycache__/high.cpython-38.pyc ADDED Viewed

Binary file (3.71 kB). View file

carvekit/api/__pycache__/interface.cpython-38.pyc ADDED Viewed

Binary file (2.87 kB). View file

carvekit/api/high.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""
+Source url: https://github.com/OPHoperHPO/image-background-remove-tool
+Author: Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+License: Apache License 2.0
+"""
+import warnings
+from carvekit.api.interface import Interface
+from carvekit.ml.wrap.fba_matting import FBAMatting
+from carvekit.ml.wrap.tracer_b7 import TracerUniversalB7
+from carvekit.ml.wrap.u2net import U2NET
+from carvekit.pipelines.postprocessing import MattingMethod
+from carvekit.trimap.generator import TrimapGenerator
+class HiInterface(Interface):
+    def __init__(
+        self,
+        object_type: str = "object",
+        batch_size_seg=2,
+        batch_size_matting=1,
+        device="cpu",
+        seg_mask_size=640,
+        matting_mask_size=2048,
+        trimap_prob_threshold=231,
+        trimap_dilation=30,
+        trimap_erosion_iters=5,
+        fp16=False,
+    ):
+        """
+        Initializes High Level interface.
+        Args:
+            object_type: Interest object type. Can be "object" or "hairs-like".
+            matting_mask_size:  The size of the input image for the matting neural network.
+            seg_mask_size: The size of the input image for the segmentation neural network.
+            batch_size_seg: Number of images processed per one segmentation neural network call.
+            batch_size_matting: Number of images processed per one matting neural network call.
+            device: Processing device
+            fp16: Use half precision. Reduce memory usage and increase speed. Experimental support
+            trimap_prob_threshold: Probability threshold at which the prob_filter and prob_as_unknown_area operations will be applied
+            trimap_dilation: The size of the offset radius from the object mask in pixels when forming an unknown area
+            trimap_erosion_iters: The number of iterations of erosion that the object's mask will be subjected to before forming an unknown area
+        Notes:
+            1. Changing seg_mask_size may cause an out-of-memory error if the value is too large, and it may also
+            result in reduced precision. I do not recommend changing this value. You can change matting_mask_size in
+            range from (1024 to 4096) to improve object edge refining quality, but it will cause extra large RAM and
+            video memory consume. Also, you can change batch size to accelerate background removal, but it also causes
+            extra large video memory consume, if value is too big.
+            2. Changing trimap_prob_threshold, trimap_kernel_size, trimap_erosion_iters may improve object edge
+            refining quality,
+        """
+        if object_type == "object":
+            self.u2net = TracerUniversalB7(
+                device=device,
+                batch_size=batch_size_seg,
+                input_image_size=seg_mask_size,
+                fp16=fp16,
+            )
+        elif object_type == "hairs-like":
+            self.u2net = U2NET(
+                device=device,
+                batch_size=batch_size_seg,
+                input_image_size=seg_mask_size,
+                fp16=fp16,
+            )
+        else:
+            warnings.warn(
+                f"Unknown object type: {object_type}. Using default object type: object"
+            )
+            self.u2net = TracerUniversalB7(
+                device=device,
+                batch_size=batch_size_seg,
+                input_image_size=seg_mask_size,
+                fp16=fp16,
+            )
+        self.fba = FBAMatting(
+            batch_size=batch_size_matting,
+            device=device,
+            input_tensor_size=matting_mask_size,
+            fp16=fp16,
+        )
+        self.trimap_generator = TrimapGenerator(
+            prob_threshold=trimap_prob_threshold,
+            kernel_size=trimap_dilation,
+            erosion_iters=trimap_erosion_iters,
+        )
+        super(HiInterface, self).__init__(
+            pre_pipe=None,
+            seg_pipe=self.u2net,
+            post_pipe=MattingMethod(
+                matting_module=self.fba,
+                trimap_generator=self.trimap_generator,
+                device=device,
+            ),
+            device=device,
+        )

carvekit/api/interface.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+Source url: https://github.com/OPHoperHPO/image-background-remove-tool
+Author: Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+License: Apache License 2.0
+"""
+from pathlib import Path
+from typing import Union, List, Optional
+from PIL import Image
+from carvekit.ml.wrap.basnet import BASNET
+from carvekit.ml.wrap.deeplab_v3 import DeepLabV3
+from carvekit.ml.wrap.u2net import U2NET
+from carvekit.ml.wrap.tracer_b7 import TracerUniversalB7
+from carvekit.pipelines.preprocessing import PreprocessingStub
+from carvekit.pipelines.postprocessing import MattingMethod
+from carvekit.utils.image_utils import load_image
+from carvekit.utils.mask_utils import apply_mask
+from carvekit.utils.pool_utils import thread_pool_processing
+class Interface:
+    def __init__(
+        self,
+        seg_pipe: Union[U2NET, BASNET, DeepLabV3, TracerUniversalB7],
+        pre_pipe: Optional[Union[PreprocessingStub]] = None,
+        post_pipe: Optional[Union[MattingMethod]] = None,
+        device="cpu",
+    ):
+        """
+        Initializes an object for interacting with pipelines and other components of the CarveKit framework.
+        Args:
+            pre_pipe: Initialized pre-processing pipeline object
+            seg_pipe: Initialized segmentation network object
+            post_pipe: Initialized postprocessing pipeline object
+            device: The processing device that will be used to apply the masks to the images.
+        """
+        self.device = device
+        self.preprocessing_pipeline = pre_pipe
+        self.segmentation_pipeline = seg_pipe
+        self.postprocessing_pipeline = post_pipe
+    def __call__(
+        self, images: List[Union[str, Path, Image.Image]]
+    ) -> List[Image.Image]:
+        """
+        Removes the background from the specified images.
+        Args:
+            images: list of input images
+        Returns:
+            List of images without background as PIL.Image.Image instances
+        """
+        images = thread_pool_processing(load_image, images)
+        if self.preprocessing_pipeline is not None:
+            masks: List[Image.Image] = self.preprocessing_pipeline(
+                interface=self, images=images
+            )
+        else:
+            masks: List[Image.Image] = self.segmentation_pipeline(images=images)
+        if self.postprocessing_pipeline is not None:
+            images: List[Image.Image] = self.postprocessing_pipeline(
+                images=images, masks=masks
+            )
+        else:
+            images = list(
+                map(
+                    lambda x: apply_mask(
+                        image=images[x], mask=masks[x], device=self.device
+                    ),
+                    range(len(images)),
+                )
+            )
+        return images

carvekit/ml/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from carvekit.utils.models_utils import fix_seed, suppress_warnings
+fix_seed()
+suppress_warnings()

carvekit/ml/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (277 Bytes). View file

carvekit/ml/arch/__init__.py ADDED Viewed

File without changes

carvekit/ml/arch/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (178 Bytes). View file

carvekit/ml/arch/basnet/__init__.py ADDED Viewed

File without changes

carvekit/ml/arch/basnet/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (185 Bytes). View file

carvekit/ml/arch/basnet/__pycache__/basnet.cpython-38.pyc ADDED Viewed

Binary file (10 kB). View file

carvekit/ml/arch/basnet/basnet.py ADDED Viewed

	@@ -0,0 +1,478 @@

+"""
+Source url: https://github.com/NathanUA/BASNet
+Modified by Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+License: MIT License
+"""
+import torch
+import torch.nn as nn
+from torchvision import models
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class BasicBlockDe(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlockDe, self).__init__()
+        self.convRes = conv3x3(inplanes, planes, stride)
+        self.bnRes = nn.BatchNorm2d(planes)
+        self.reluRes = nn.ReLU(inplace=True)
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = self.convRes(x)
+        residual = self.bnRes(residual)
+        residual = self.reluRes(residual)
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class RefUnet(nn.Module):
+    def __init__(self, in_ch, inc_ch):
+        super(RefUnet, self).__init__()
+        self.conv0 = nn.Conv2d(in_ch, inc_ch, 3, padding=1)
+        self.conv1 = nn.Conv2d(inc_ch, 64, 3, padding=1)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.pool1 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
+        self.bn2 = nn.BatchNorm2d(64)
+        self.relu2 = nn.ReLU(inplace=True)
+        self.pool2 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
+        self.bn3 = nn.BatchNorm2d(64)
+        self.relu3 = nn.ReLU(inplace=True)
+        self.pool3 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
+        self.bn4 = nn.BatchNorm2d(64)
+        self.relu4 = nn.ReLU(inplace=True)
+        self.pool4 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        self.conv5 = nn.Conv2d(64, 64, 3, padding=1)
+        self.bn5 = nn.BatchNorm2d(64)
+        self.relu5 = nn.ReLU(inplace=True)
+        self.conv_d4 = nn.Conv2d(128, 64, 3, padding=1)
+        self.bn_d4 = nn.BatchNorm2d(64)
+        self.relu_d4 = nn.ReLU(inplace=True)
+        self.conv_d3 = nn.Conv2d(128, 64, 3, padding=1)
+        self.bn_d3 = nn.BatchNorm2d(64)
+        self.relu_d3 = nn.ReLU(inplace=True)
+        self.conv_d2 = nn.Conv2d(128, 64, 3, padding=1)
+        self.bn_d2 = nn.BatchNorm2d(64)
+        self.relu_d2 = nn.ReLU(inplace=True)
+        self.conv_d1 = nn.Conv2d(128, 64, 3, padding=1)
+        self.bn_d1 = nn.BatchNorm2d(64)
+        self.relu_d1 = nn.ReLU(inplace=True)
+        self.conv_d0 = nn.Conv2d(64, 1, 3, padding=1)
+        self.upscore2 = nn.Upsample(
+            scale_factor=2, mode="bilinear", align_corners=False
+        )
+    def forward(self, x):
+        hx = x
+        hx = self.conv0(hx)
+        hx1 = self.relu1(self.bn1(self.conv1(hx)))
+        hx = self.pool1(hx1)
+        hx2 = self.relu2(self.bn2(self.conv2(hx)))
+        hx = self.pool2(hx2)
+        hx3 = self.relu3(self.bn3(self.conv3(hx)))
+        hx = self.pool3(hx3)
+        hx4 = self.relu4(self.bn4(self.conv4(hx)))
+        hx = self.pool4(hx4)
+        hx5 = self.relu5(self.bn5(self.conv5(hx)))
+        hx = self.upscore2(hx5)
+        d4 = self.relu_d4(self.bn_d4(self.conv_d4(torch.cat((hx, hx4), 1))))
+        hx = self.upscore2(d4)
+        d3 = self.relu_d3(self.bn_d3(self.conv_d3(torch.cat((hx, hx3), 1))))
+        hx = self.upscore2(d3)
+        d2 = self.relu_d2(self.bn_d2(self.conv_d2(torch.cat((hx, hx2), 1))))
+        hx = self.upscore2(d2)
+        d1 = self.relu_d1(self.bn_d1(self.conv_d1(torch.cat((hx, hx1), 1))))
+        residual = self.conv_d0(d1)
+        return x + residual
+class BASNet(nn.Module):
+    def __init__(self, n_channels, n_classes):
+        super(BASNet, self).__init__()
+        resnet = models.resnet34(pretrained=False)
+        # -------------Encoder--------------
+        self.inconv = nn.Conv2d(n_channels, 64, 3, padding=1)
+        self.inbn = nn.BatchNorm2d(64)
+        self.inrelu = nn.ReLU(inplace=True)
+        # stage 1
+        self.encoder1 = resnet.layer1  # 224
+        # stage 2
+        self.encoder2 = resnet.layer2  # 112
+        # stage 3
+        self.encoder3 = resnet.layer3  # 56
+        # stage 4
+        self.encoder4 = resnet.layer4  # 28
+        self.pool4 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        # stage 5
+        self.resb5_1 = BasicBlock(512, 512)
+        self.resb5_2 = BasicBlock(512, 512)
+        self.resb5_3 = BasicBlock(512, 512)  # 14
+        self.pool5 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        # stage 6
+        self.resb6_1 = BasicBlock(512, 512)
+        self.resb6_2 = BasicBlock(512, 512)
+        self.resb6_3 = BasicBlock(512, 512)  # 7
+        # -------------Bridge--------------
+        # stage Bridge
+        self.convbg_1 = nn.Conv2d(512, 512, 3, dilation=2, padding=2)  # 7
+        self.bnbg_1 = nn.BatchNorm2d(512)
+        self.relubg_1 = nn.ReLU(inplace=True)
+        self.convbg_m = nn.Conv2d(512, 512, 3, dilation=2, padding=2)
+        self.bnbg_m = nn.BatchNorm2d(512)
+        self.relubg_m = nn.ReLU(inplace=True)
+        self.convbg_2 = nn.Conv2d(512, 512, 3, dilation=2, padding=2)
+        self.bnbg_2 = nn.BatchNorm2d(512)
+        self.relubg_2 = nn.ReLU(inplace=True)
+        # -------------Decoder--------------
+        # stage 6d
+        self.conv6d_1 = nn.Conv2d(1024, 512, 3, padding=1)  # 16
+        self.bn6d_1 = nn.BatchNorm2d(512)
+        self.relu6d_1 = nn.ReLU(inplace=True)
+        self.conv6d_m = nn.Conv2d(512, 512, 3, dilation=2, padding=2)
+        self.bn6d_m = nn.BatchNorm2d(512)
+        self.relu6d_m = nn.ReLU(inplace=True)
+        self.conv6d_2 = nn.Conv2d(512, 512, 3, dilation=2, padding=2)
+        self.bn6d_2 = nn.BatchNorm2d(512)
+        self.relu6d_2 = nn.ReLU(inplace=True)
+        # stage 5d
+        self.conv5d_1 = nn.Conv2d(1024, 512, 3, padding=1)  # 16
+        self.bn5d_1 = nn.BatchNorm2d(512)
+        self.relu5d_1 = nn.ReLU(inplace=True)
+        self.conv5d_m = nn.Conv2d(512, 512, 3, padding=1)
+        self.bn5d_m = nn.BatchNorm2d(512)
+        self.relu5d_m = nn.ReLU(inplace=True)
+        self.conv5d_2 = nn.Conv2d(512, 512, 3, padding=1)
+        self.bn5d_2 = nn.BatchNorm2d(512)
+        self.relu5d_2 = nn.ReLU(inplace=True)
+        # stage 4d
+        self.conv4d_1 = nn.Conv2d(1024, 512, 3, padding=1)  # 32
+        self.bn4d_1 = nn.BatchNorm2d(512)
+        self.relu4d_1 = nn.ReLU(inplace=True)
+        self.conv4d_m = nn.Conv2d(512, 512, 3, padding=1)
+        self.bn4d_m = nn.BatchNorm2d(512)
+        self.relu4d_m = nn.ReLU(inplace=True)
+        self.conv4d_2 = nn.Conv2d(512, 256, 3, padding=1)
+        self.bn4d_2 = nn.BatchNorm2d(256)
+        self.relu4d_2 = nn.ReLU(inplace=True)
+        # stage 3d
+        self.conv3d_1 = nn.Conv2d(512, 256, 3, padding=1)  # 64
+        self.bn3d_1 = nn.BatchNorm2d(256)
+        self.relu3d_1 = nn.ReLU(inplace=True)
+        self.conv3d_m = nn.Conv2d(256, 256, 3, padding=1)
+        self.bn3d_m = nn.BatchNorm2d(256)
+        self.relu3d_m = nn.ReLU(inplace=True)
+        self.conv3d_2 = nn.Conv2d(256, 128, 3, padding=1)
+        self.bn3d_2 = nn.BatchNorm2d(128)
+        self.relu3d_2 = nn.ReLU(inplace=True)
+        # stage 2d
+        self.conv2d_1 = nn.Conv2d(256, 128, 3, padding=1)  # 128
+        self.bn2d_1 = nn.BatchNorm2d(128)
+        self.relu2d_1 = nn.ReLU(inplace=True)
+        self.conv2d_m = nn.Conv2d(128, 128, 3, padding=1)
+        self.bn2d_m = nn.BatchNorm2d(128)
+        self.relu2d_m = nn.ReLU(inplace=True)
+        self.conv2d_2 = nn.Conv2d(128, 64, 3, padding=1)
+        self.bn2d_2 = nn.BatchNorm2d(64)
+        self.relu2d_2 = nn.ReLU(inplace=True)
+        # stage 1d
+        self.conv1d_1 = nn.Conv2d(128, 64, 3, padding=1)  # 256
+        self.bn1d_1 = nn.BatchNorm2d(64)
+        self.relu1d_1 = nn.ReLU(inplace=True)
+        self.conv1d_m = nn.Conv2d(64, 64, 3, padding=1)
+        self.bn1d_m = nn.BatchNorm2d(64)
+        self.relu1d_m = nn.ReLU(inplace=True)
+        self.conv1d_2 = nn.Conv2d(64, 64, 3, padding=1)
+        self.bn1d_2 = nn.BatchNorm2d(64)
+        self.relu1d_2 = nn.ReLU(inplace=True)
+        # -------------Bilinear Upsampling--------------
+        self.upscore6 = nn.Upsample(
+            scale_factor=32, mode="bilinear", align_corners=False
+        )
+        self.upscore5 = nn.Upsample(
+            scale_factor=16, mode="bilinear", align_corners=False
+        )
+        self.upscore4 = nn.Upsample(
+            scale_factor=8, mode="bilinear", align_corners=False
+        )
+        self.upscore3 = nn.Upsample(
+            scale_factor=4, mode="bilinear", align_corners=False
+        )
+        self.upscore2 = nn.Upsample(
+            scale_factor=2, mode="bilinear", align_corners=False
+        )
+        # -------------Side Output--------------
+        self.outconvb = nn.Conv2d(512, 1, 3, padding=1)
+        self.outconv6 = nn.Conv2d(512, 1, 3, padding=1)
+        self.outconv5 = nn.Conv2d(512, 1, 3, padding=1)
+        self.outconv4 = nn.Conv2d(256, 1, 3, padding=1)
+        self.outconv3 = nn.Conv2d(128, 1, 3, padding=1)
+        self.outconv2 = nn.Conv2d(64, 1, 3, padding=1)
+        self.outconv1 = nn.Conv2d(64, 1, 3, padding=1)
+        # -------------Refine Module-------------
+        self.refunet = RefUnet(1, 64)
+    def forward(self, x):
+        hx = x
+        # -------------Encoder-------------
+        hx = self.inconv(hx)
+        hx = self.inbn(hx)
+        hx = self.inrelu(hx)
+        h1 = self.encoder1(hx)  # 256
+        h2 = self.encoder2(h1)  # 128
+        h3 = self.encoder3(h2)  # 64
+        h4 = self.encoder4(h3)  # 32
+        hx = self.pool4(h4)  # 16
+        hx = self.resb5_1(hx)
+        hx = self.resb5_2(hx)
+        h5 = self.resb5_3(hx)
+        hx = self.pool5(h5)  # 8
+        hx = self.resb6_1(hx)
+        hx = self.resb6_2(hx)
+        h6 = self.resb6_3(hx)
+        # -------------Bridge-------------
+        hx = self.relubg_1(self.bnbg_1(self.convbg_1(h6)))  # 8
+        hx = self.relubg_m(self.bnbg_m(self.convbg_m(hx)))
+        hbg = self.relubg_2(self.bnbg_2(self.convbg_2(hx)))
+        # -------------Decoder-------------
+        hx = self.relu6d_1(self.bn6d_1(self.conv6d_1(torch.cat((hbg, h6), 1))))
+        hx = self.relu6d_m(self.bn6d_m(self.conv6d_m(hx)))
+        hd6 = self.relu6d_2(self.bn6d_2(self.conv6d_2(hx)))
+        hx = self.upscore2(hd6)  # 8 -> 16
+        hx = self.relu5d_1(self.bn5d_1(self.conv5d_1(torch.cat((hx, h5), 1))))
+        hx = self.relu5d_m(self.bn5d_m(self.conv5d_m(hx)))
+        hd5 = self.relu5d_2(self.bn5d_2(self.conv5d_2(hx)))
+        hx = self.upscore2(hd5)  # 16 -> 32
+        hx = self.relu4d_1(self.bn4d_1(self.conv4d_1(torch.cat((hx, h4), 1))))
+        hx = self.relu4d_m(self.bn4d_m(self.conv4d_m(hx)))
+        hd4 = self.relu4d_2(self.bn4d_2(self.conv4d_2(hx)))
+        hx = self.upscore2(hd4)  # 32 -> 64
+        hx = self.relu3d_1(self.bn3d_1(self.conv3d_1(torch.cat((hx, h3), 1))))
+        hx = self.relu3d_m(self.bn3d_m(self.conv3d_m(hx)))
+        hd3 = self.relu3d_2(self.bn3d_2(self.conv3d_2(hx)))
+        hx = self.upscore2(hd3)  # 64 -> 128
+        hx = self.relu2d_1(self.bn2d_1(self.conv2d_1(torch.cat((hx, h2), 1))))
+        hx = self.relu2d_m(self.bn2d_m(self.conv2d_m(hx)))
+        hd2 = self.relu2d_2(self.bn2d_2(self.conv2d_2(hx)))
+        hx = self.upscore2(hd2)  # 128 -> 256
+        hx = self.relu1d_1(self.bn1d_1(self.conv1d_1(torch.cat((hx, h1), 1))))
+        hx = self.relu1d_m(self.bn1d_m(self.conv1d_m(hx)))
+        hd1 = self.relu1d_2(self.bn1d_2(self.conv1d_2(hx)))
+        # -------------Side Output-------------
+        db = self.outconvb(hbg)
+        db = self.upscore6(db)  # 8->256
+        d6 = self.outconv6(hd6)
+        d6 = self.upscore6(d6)  # 8->256
+        d5 = self.outconv5(hd5)
+        d5 = self.upscore5(d5)  # 16->256
+        d4 = self.outconv4(hd4)
+        d4 = self.upscore4(d4)  # 32->256
+        d3 = self.outconv3(hd3)
+        d3 = self.upscore3(d3)  # 64->256
+        d2 = self.outconv2(hd2)
+        d2 = self.upscore2(d2)  # 128->256
+        d1 = self.outconv1(hd1)  # 256
+        # -------------Refine Module-------------
+        dout = self.refunet(d1)  # 256
+        return (
+            torch.sigmoid(dout),
+            torch.sigmoid(d1),
+            torch.sigmoid(d2),
+            torch.sigmoid(d3),
+            torch.sigmoid(d4),
+            torch.sigmoid(d5),
+            torch.sigmoid(d6),
+            torch.sigmoid(db),
+        )

carvekit/ml/arch/fba_matting/__init__.py ADDED Viewed

File without changes

carvekit/ml/arch/fba_matting/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (190 Bytes). View file

carvekit/ml/arch/fba_matting/__pycache__/layers_WS.cpython-38.pyc ADDED Viewed

Binary file (1.6 kB). View file

carvekit/ml/arch/fba_matting/__pycache__/models.cpython-38.pyc ADDED Viewed

Binary file (8.24 kB). View file

carvekit/ml/arch/fba_matting/__pycache__/resnet_GN_WS.cpython-38.pyc ADDED Viewed

Binary file (4.45 kB). View file

carvekit/ml/arch/fba_matting/__pycache__/resnet_bn.cpython-38.pyc ADDED Viewed

Binary file (4.69 kB). View file

carvekit/ml/arch/fba_matting/__pycache__/transforms.cpython-38.pyc ADDED Viewed

Binary file (1.58 kB). View file

carvekit/ml/arch/fba_matting/layers_WS.py ADDED Viewed

	@@ -0,0 +1,57 @@

+"""
+Modified by Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+Source url: https://github.com/MarcoForte/FBA_Matting
+License: MIT License
+"""
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+class Conv2d(nn.Conv2d):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        bias=True,
+    ):
+        super(Conv2d, self).__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+        )
+    def forward(self, x):
+        # return super(Conv2d, self).forward(x)
+        weight = self.weight
+        weight_mean = (
+            weight.mean(dim=1, keepdim=True)
+            .mean(dim=2, keepdim=True)
+            .mean(dim=3, keepdim=True)
+        )
+        weight = weight - weight_mean
+        # std = (weight).view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5
+        std = (
+            torch.sqrt(torch.var(weight.view(weight.size(0), -1), dim=1) + 1e-12).view(
+                -1, 1, 1, 1
+            )
+            + 1e-5
+        )
+        weight = weight / std.expand_as(weight)
+        return F.conv2d(
+            x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups
+        )
+def BatchNorm2d(num_features):
+    return nn.GroupNorm(num_channels=num_features, num_groups=32)

carvekit/ml/arch/fba_matting/models.py ADDED Viewed

	@@ -0,0 +1,341 @@

+"""
+Modified by Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+Source url: https://github.com/MarcoForte/FBA_Matting
+License: MIT License
+"""
+import torch
+import torch.nn as nn
+import carvekit.ml.arch.fba_matting.resnet_GN_WS as resnet_GN_WS
+import carvekit.ml.arch.fba_matting.layers_WS as L
+import carvekit.ml.arch.fba_matting.resnet_bn as resnet_bn
+from functools import partial
+class FBA(nn.Module):
+    def __init__(self, encoder: str):
+        super(FBA, self).__init__()
+        self.encoder = build_encoder(arch=encoder)
+        self.decoder = fba_decoder(batch_norm=True if "BN" in encoder else False)
+    def forward(self, image, two_chan_trimap, image_n, trimap_transformed):
+        resnet_input = torch.cat((image_n, trimap_transformed, two_chan_trimap), 1)
+        conv_out, indices = self.encoder(resnet_input, return_feature_maps=True)
+        return self.decoder(conv_out, image, indices, two_chan_trimap)
+class ResnetDilatedBN(nn.Module):
+    def __init__(self, orig_resnet, dilate_scale=8):
+        super(ResnetDilatedBN, self).__init__()
+        if dilate_scale == 8:
+            orig_resnet.layer3.apply(partial(self._nostride_dilate, dilate=2))
+            orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=4))
+        elif dilate_scale == 16:
+            orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=2))
+        # take pretrained resnet, except AvgPool and FC
+        self.conv1 = orig_resnet.conv1
+        self.bn1 = orig_resnet.bn1
+        self.relu1 = orig_resnet.relu1
+        self.conv2 = orig_resnet.conv2
+        self.bn2 = orig_resnet.bn2
+        self.relu2 = orig_resnet.relu2
+        self.conv3 = orig_resnet.conv3
+        self.bn3 = orig_resnet.bn3
+        self.relu3 = orig_resnet.relu3
+        self.maxpool = orig_resnet.maxpool
+        self.layer1 = orig_resnet.layer1
+        self.layer2 = orig_resnet.layer2
+        self.layer3 = orig_resnet.layer3
+        self.layer4 = orig_resnet.layer4
+    def _nostride_dilate(self, m, dilate):
+        classname = m.__class__.__name__
+        if classname.find("Conv") != -1:
+            # the convolution with stride
+            if m.stride == (2, 2):
+                m.stride = (1, 1)
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate // 2, dilate // 2)
+                    m.padding = (dilate // 2, dilate // 2)
+            # other convoluions
+            else:
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate, dilate)
+                    m.padding = (dilate, dilate)
+    def forward(self, x, return_feature_maps=False):
+        conv_out = [x]
+        x = self.relu1(self.bn1(self.conv1(x)))
+        x = self.relu2(self.bn2(self.conv2(x)))
+        x = self.relu3(self.bn3(self.conv3(x)))
+        conv_out.append(x)
+        x, indices = self.maxpool(x)
+        x = self.layer1(x)
+        conv_out.append(x)
+        x = self.layer2(x)
+        conv_out.append(x)
+        x = self.layer3(x)
+        conv_out.append(x)
+        x = self.layer4(x)
+        conv_out.append(x)
+        if return_feature_maps:
+            return conv_out, indices
+        return [x]
+class Resnet(nn.Module):
+    def __init__(self, orig_resnet):
+        super(Resnet, self).__init__()
+        # take pretrained resnet, except AvgPool and FC
+        self.conv1 = orig_resnet.conv1
+        self.bn1 = orig_resnet.bn1
+        self.relu1 = orig_resnet.relu1
+        self.conv2 = orig_resnet.conv2
+        self.bn2 = orig_resnet.bn2
+        self.relu2 = orig_resnet.relu2
+        self.conv3 = orig_resnet.conv3
+        self.bn3 = orig_resnet.bn3
+        self.relu3 = orig_resnet.relu3
+        self.maxpool = orig_resnet.maxpool
+        self.layer1 = orig_resnet.layer1
+        self.layer2 = orig_resnet.layer2
+        self.layer3 = orig_resnet.layer3
+        self.layer4 = orig_resnet.layer4
+    def forward(self, x, return_feature_maps=False):
+        conv_out = []
+        x = self.relu1(self.bn1(self.conv1(x)))
+        x = self.relu2(self.bn2(self.conv2(x)))
+        x = self.relu3(self.bn3(self.conv3(x)))
+        conv_out.append(x)
+        x, indices = self.maxpool(x)
+        x = self.layer1(x)
+        conv_out.append(x)
+        x = self.layer2(x)
+        conv_out.append(x)
+        x = self.layer3(x)
+        conv_out.append(x)
+        x = self.layer4(x)
+        conv_out.append(x)
+        if return_feature_maps:
+            return conv_out
+        return [x]
+class ResnetDilated(nn.Module):
+    def __init__(self, orig_resnet, dilate_scale=8):
+        super(ResnetDilated, self).__init__()
+        if dilate_scale == 8:
+            orig_resnet.layer3.apply(partial(self._nostride_dilate, dilate=2))
+            orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=4))
+        elif dilate_scale == 16:
+            orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=2))
+        # take pretrained resnet, except AvgPool and FC
+        self.conv1 = orig_resnet.conv1
+        self.bn1 = orig_resnet.bn1
+        self.relu = orig_resnet.relu
+        self.maxpool = orig_resnet.maxpool
+        self.layer1 = orig_resnet.layer1
+        self.layer2 = orig_resnet.layer2
+        self.layer3 = orig_resnet.layer3
+        self.layer4 = orig_resnet.layer4
+    def _nostride_dilate(self, m, dilate):
+        classname = m.__class__.__name__
+        if classname.find("Conv") != -1:
+            # the convolution with stride
+            if m.stride == (2, 2):
+                m.stride = (1, 1)
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate // 2, dilate // 2)
+                    m.padding = (dilate // 2, dilate // 2)
+            # other convoluions
+            else:
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate, dilate)
+                    m.padding = (dilate, dilate)
+    def forward(self, x, return_feature_maps=False):
+        conv_out = [x]
+        x = self.relu(self.bn1(self.conv1(x)))
+        conv_out.append(x)
+        x, indices = self.maxpool(x)
+        x = self.layer1(x)
+        conv_out.append(x)
+        x = self.layer2(x)
+        conv_out.append(x)
+        x = self.layer3(x)
+        conv_out.append(x)
+        x = self.layer4(x)
+        conv_out.append(x)
+        if return_feature_maps:
+            return conv_out, indices
+        return [x]
+def norm(dim, bn=False):
+    if bn is False:
+        return nn.GroupNorm(32, dim)
+    else:
+        return nn.BatchNorm2d(dim)
+def fba_fusion(alpha, img, F, B):
+    F = alpha * img + (1 - alpha**2) * F - alpha * (1 - alpha) * B
+    B = (1 - alpha) * img + (2 * alpha - alpha**2) * B - alpha * (1 - alpha) * F
+    F = torch.clamp(F, 0, 1)
+    B = torch.clamp(B, 0, 1)
+    la = 0.1
+    alpha = (alpha * la + torch.sum((img - B) * (F - B), 1, keepdim=True)) / (
+        torch.sum((F - B) * (F - B), 1, keepdim=True) + la
+    )
+    alpha = torch.clamp(alpha, 0, 1)
+    return alpha, F, B
+class fba_decoder(nn.Module):
+    def __init__(self, batch_norm=False):
+        super(fba_decoder, self).__init__()
+        pool_scales = (1, 2, 3, 6)
+        self.batch_norm = batch_norm
+        self.ppm = []
+        for scale in pool_scales:
+            self.ppm.append(
+                nn.Sequential(
+                    nn.AdaptiveAvgPool2d(scale),
+                    L.Conv2d(2048, 256, kernel_size=1, bias=True),
+                    norm(256, self.batch_norm),
+                    nn.LeakyReLU(),
+                )
+            )
+        self.ppm = nn.ModuleList(self.ppm)
+        self.conv_up1 = nn.Sequential(
+            L.Conv2d(
+                2048 + len(pool_scales) * 256, 256, kernel_size=3, padding=1, bias=True
+            ),
+            norm(256, self.batch_norm),
+            nn.LeakyReLU(),
+            L.Conv2d(256, 256, kernel_size=3, padding=1),
+            norm(256, self.batch_norm),
+            nn.LeakyReLU(),
+        )
+        self.conv_up2 = nn.Sequential(
+            L.Conv2d(256 + 256, 256, kernel_size=3, padding=1, bias=True),
+            norm(256, self.batch_norm),
+            nn.LeakyReLU(),
+        )
+        if self.batch_norm:
+            d_up3 = 128
+        else:
+            d_up3 = 64
+        self.conv_up3 = nn.Sequential(
+            L.Conv2d(256 + d_up3, 64, kernel_size=3, padding=1, bias=True),
+            norm(64, self.batch_norm),
+            nn.LeakyReLU(),
+        )
+        self.unpool = nn.MaxUnpool2d(2, stride=2)
+        self.conv_up4 = nn.Sequential(
+            nn.Conv2d(64 + 3 + 3 + 2, 32, kernel_size=3, padding=1, bias=True),
+            nn.LeakyReLU(),
+            nn.Conv2d(32, 16, kernel_size=3, padding=1, bias=True),
+            nn.LeakyReLU(),
+            nn.Conv2d(16, 7, kernel_size=1, padding=0, bias=True),
+        )
+    def forward(self, conv_out, img, indices, two_chan_trimap):
+        conv5 = conv_out[-1]
+        input_size = conv5.size()
+        ppm_out = [conv5]
+        for pool_scale in self.ppm:
+            ppm_out.append(
+                nn.functional.interpolate(
+                    pool_scale(conv5),
+                    (input_size[2], input_size[3]),
+                    mode="bilinear",
+                    align_corners=False,
+                )
+            )
+        ppm_out = torch.cat(ppm_out, 1)
+        x = self.conv_up1(ppm_out)
+        x = torch.nn.functional.interpolate(
+            x, scale_factor=2, mode="bilinear", align_corners=False
+        )
+        x = torch.cat((x, conv_out[-4]), 1)
+        x = self.conv_up2(x)
+        x = torch.nn.functional.interpolate(
+            x, scale_factor=2, mode="bilinear", align_corners=False
+        )
+        x = torch.cat((x, conv_out[-5]), 1)
+        x = self.conv_up3(x)
+        x = torch.nn.functional.interpolate(
+            x, scale_factor=2, mode="bilinear", align_corners=False
+        )
+        x = torch.cat((x, conv_out[-6][:, :3], img, two_chan_trimap), 1)
+        output = self.conv_up4(x)
+        alpha = torch.clamp(output[:, 0][:, None], 0, 1)
+        F = torch.sigmoid(output[:, 1:4])
+        B = torch.sigmoid(output[:, 4:7])
+        # FBA Fusion
+        alpha, F, B = fba_fusion(alpha, img, F, B)
+        output = torch.cat((alpha, F, B), 1)
+        return output
+def build_encoder(arch="resnet50_GN"):
+    if arch == "resnet50_GN_WS":
+        orig_resnet = resnet_GN_WS.__dict__["l_resnet50"]()
+        net_encoder = ResnetDilated(orig_resnet, dilate_scale=8)
+    elif arch == "resnet50_BN":
+        orig_resnet = resnet_bn.__dict__["l_resnet50"]()
+        net_encoder = ResnetDilatedBN(orig_resnet, dilate_scale=8)
+    else:
+        raise ValueError("Architecture undefined!")
+    num_channels = 3 + 6 + 2
+    if num_channels > 3:
+        net_encoder_sd = net_encoder.state_dict()
+        conv1_weights = net_encoder_sd["conv1.weight"]
+        c_out, c_in, h, w = conv1_weights.size()
+        conv1_mod = torch.zeros(c_out, num_channels, h, w)
+        conv1_mod[:, :3, :, :] = conv1_weights
+        conv1 = net_encoder.conv1
+        conv1.in_channels = num_channels
+        conv1.weight = torch.nn.Parameter(conv1_mod)
+        net_encoder.conv1 = conv1
+        net_encoder_sd["conv1.weight"] = conv1_mod
+        net_encoder.load_state_dict(net_encoder_sd)
+    return net_encoder

carvekit/ml/arch/fba_matting/resnet_GN_WS.py ADDED Viewed

	@@ -0,0 +1,151 @@

+"""
+Modified by Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+Source url: https://github.com/MarcoForte/FBA_Matting
+License: MIT License
+"""
+import torch.nn as nn
+import carvekit.ml.arch.fba_matting.layers_WS as L
+__all__ = ["ResNet", "l_resnet50"]
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return L.Conv2d(
+        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return L.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = L.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = L.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = conv1x1(inplanes, planes)
+        self.bn1 = L.BatchNorm2d(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn2 = L.BatchNorm2d(planes)
+        self.conv3 = conv1x1(planes, planes * self.expansion)
+        self.bn3 = L.BatchNorm2d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000):
+        super(ResNet, self).__init__()
+        self.inplanes = 64
+        self.conv1 = L.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = L.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(
+            kernel_size=3, stride=2, padding=1, return_indices=True
+        )
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                L.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+def l_resnet50(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    return model

carvekit/ml/arch/fba_matting/resnet_bn.py ADDED Viewed

	@@ -0,0 +1,169 @@

+"""
+Modified by Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+Source url: https://github.com/MarcoForte/FBA_Matting
+License: MIT License
+"""
+import torch.nn as nn
+import math
+from torch.nn import BatchNorm2d
+__all__ = ["ResNet"]
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.bn2 = BatchNorm2d(planes, momentum=0.01)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000):
+        self.inplanes = 128
+        super(ResNet, self).__init__()
+        self.conv1 = conv3x3(3, 64, stride=2)
+        self.bn1 = BatchNorm2d(64)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(64, 64)
+        self.bn2 = BatchNorm2d(64)
+        self.relu2 = nn.ReLU(inplace=True)
+        self.conv3 = conv3x3(64, 128)
+        self.bn3 = BatchNorm2d(128)
+        self.relu3 = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(
+            kernel_size=3, stride=2, padding=1, return_indices=True
+        )
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AvgPool2d(7, stride=1)
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2.0 / n))
+            elif isinstance(m, BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.relu1(self.bn1(self.conv1(x)))
+        x = self.relu2(self.bn2(self.conv2(x)))
+        x = self.relu3(self.bn3(self.conv3(x)))
+        x, indices = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+def l_resnet50():
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3])
+    return model

carvekit/ml/arch/fba_matting/transforms.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""
+Modified by Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+Source url: https://github.com/MarcoForte/FBA_Matting
+License: MIT License
+"""
+import cv2
+import numpy as np
+group_norm_std = [0.229, 0.224, 0.225]
+group_norm_mean = [0.485, 0.456, 0.406]
+def dt(a):
+    return cv2.distanceTransform((a * 255).astype(np.uint8), cv2.DIST_L2, 0)
+def trimap_transform(trimap):
+    h, w = trimap.shape[0], trimap.shape[1]
+    clicks = np.zeros((h, w, 6))
+    for k in range(2):
+        if np.count_nonzero(trimap[:, :, k]) > 0:
+            dt_mask = -dt(1 - trimap[:, :, k]) ** 2
+            L = 320
+            clicks[:, :, 3 * k] = np.exp(dt_mask / (2 * ((0.02 * L) ** 2)))
+            clicks[:, :, 3 * k + 1] = np.exp(dt_mask / (2 * ((0.08 * L) ** 2)))
+            clicks[:, :, 3 * k + 2] = np.exp(dt_mask / (2 * ((0.16 * L) ** 2)))
+    return clicks
+def groupnorm_normalise_image(img, format="nhwc"):
+    """
+    Accept rgb in range 0,1
+    """
+    if format == "nhwc":
+        for i in range(3):
+            img[..., i] = (img[..., i] - group_norm_mean[i]) / group_norm_std[i]
+    else:
+        for i in range(3):
+            img[..., i, :, :] = (
+                img[..., i, :, :] - group_norm_mean[i]
+            ) / group_norm_std[i]
+    return img

carvekit/ml/arch/tracerb7/__init__.py ADDED Viewed

File without changes

carvekit/ml/arch/tracerb7/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (187 Bytes). View file

carvekit/ml/arch/tracerb7/__pycache__/att_modules.cpython-38.pyc ADDED Viewed

Binary file (7.42 kB). View file

carvekit/ml/arch/tracerb7/__pycache__/conv_modules.cpython-38.pyc ADDED Viewed

Binary file (2.44 kB). View file

carvekit/ml/arch/tracerb7/__pycache__/effi_utils.cpython-38.pyc ADDED Viewed

Binary file (14.9 kB). View file

carvekit/ml/arch/tracerb7/__pycache__/efficientnet.cpython-38.pyc ADDED Viewed

Binary file (8.02 kB). View file

carvekit/ml/arch/tracerb7/__pycache__/tracer.cpython-38.pyc ADDED Viewed

Binary file (2.83 kB). View file

carvekit/ml/arch/tracerb7/att_modules.py ADDED Viewed

	@@ -0,0 +1,290 @@

+"""
+Source url: https://github.com/Karel911/TRACER
+Author: Min Seok Lee and Wooseok Shin
+License: Apache License 2.0
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from carvekit.ml.arch.tracerb7.conv_modules import BasicConv2d, DWConv, DWSConv
+class RFB_Block(nn.Module):
+    def __init__(self, in_channel, out_channel):
+        super(RFB_Block, self).__init__()
+        self.relu = nn.ReLU(True)
+        self.branch0 = nn.Sequential(
+            BasicConv2d(in_channel, out_channel, 1),
+        )
+        self.branch1 = nn.Sequential(
+            BasicConv2d(in_channel, out_channel, 1),
+            BasicConv2d(out_channel, out_channel, kernel_size=(1, 3), padding=(0, 1)),
+            BasicConv2d(out_channel, out_channel, kernel_size=(3, 1), padding=(1, 0)),
+            BasicConv2d(out_channel, out_channel, 3, padding=3, dilation=3),
+        )
+        self.branch2 = nn.Sequential(
+            BasicConv2d(in_channel, out_channel, 1),
+            BasicConv2d(out_channel, out_channel, kernel_size=(1, 5), padding=(0, 2)),
+            BasicConv2d(out_channel, out_channel, kernel_size=(5, 1), padding=(2, 0)),
+            BasicConv2d(out_channel, out_channel, 3, padding=5, dilation=5),
+        )
+        self.branch3 = nn.Sequential(
+            BasicConv2d(in_channel, out_channel, 1),
+            BasicConv2d(out_channel, out_channel, kernel_size=(1, 7), padding=(0, 3)),
+            BasicConv2d(out_channel, out_channel, kernel_size=(7, 1), padding=(3, 0)),
+            BasicConv2d(out_channel, out_channel, 3, padding=7, dilation=7),
+        )
+        self.conv_cat = BasicConv2d(4 * out_channel, out_channel, 3, padding=1)
+        self.conv_res = BasicConv2d(in_channel, out_channel, 1)
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        x3 = self.branch3(x)
+        x_cat = torch.cat((x0, x1, x2, x3), 1)
+        x_cat = self.conv_cat(x_cat)
+        x = self.relu(x_cat + self.conv_res(x))
+        return x
+class GlobalAvgPool(nn.Module):
+    def __init__(self, flatten=False):
+        super(GlobalAvgPool, self).__init__()
+        self.flatten = flatten
+    def forward(self, x):
+        if self.flatten:
+            in_size = x.size()
+            return x.view((in_size[0], in_size[1], -1)).mean(dim=2)
+        else:
+            return (
+                x.view(x.size(0), x.size(1), -1)
+                .mean(-1)
+                .view(x.size(0), x.size(1), 1, 1)
+            )
+class UnionAttentionModule(nn.Module):
+    def __init__(self, n_channels, only_channel_tracing=False):
+        super(UnionAttentionModule, self).__init__()
+        self.GAP = GlobalAvgPool()
+        self.confidence_ratio = 0.1
+        self.bn = nn.BatchNorm2d(n_channels)
+        self.norm = nn.Sequential(
+            nn.BatchNorm2d(n_channels), nn.Dropout3d(self.confidence_ratio)
+        )
+        self.channel_q = nn.Conv2d(
+            in_channels=n_channels,
+            out_channels=n_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=False,
+        )
+        self.channel_k = nn.Conv2d(
+            in_channels=n_channels,
+            out_channels=n_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=False,
+        )
+        self.channel_v = nn.Conv2d(
+            in_channels=n_channels,
+            out_channels=n_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=False,
+        )
+        self.fc = nn.Conv2d(
+            in_channels=n_channels,
+            out_channels=n_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=False,
+        )
+        if only_channel_tracing is False:
+            self.spatial_q = nn.Conv2d(
+                in_channels=n_channels,
+                out_channels=1,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=False,
+            )
+            self.spatial_k = nn.Conv2d(
+                in_channels=n_channels,
+                out_channels=1,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=False,
+            )
+            self.spatial_v = nn.Conv2d(
+                in_channels=n_channels,
+                out_channels=1,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=False,
+            )
+        self.sigmoid = nn.Sigmoid()
+    def masking(self, x, mask):
+        mask = mask.squeeze(3).squeeze(2)
+        threshold = torch.quantile(
+            mask.float(), self.confidence_ratio, dim=-1, keepdim=True
+        )
+        mask[mask <= threshold] = 0.0
+        mask = mask.unsqueeze(2).unsqueeze(3)
+        mask = mask.expand(-1, x.shape[1], x.shape[2], x.shape[3]).contiguous()
+        masked_x = x * mask
+        return masked_x
+    def Channel_Tracer(self, x):
+        avg_pool = self.GAP(x)
+        x_norm = self.norm(avg_pool)
+        q = self.channel_q(x_norm).squeeze(-1)
+        k = self.channel_k(x_norm).squeeze(-1)
+        v = self.channel_v(x_norm).squeeze(-1)
+        # softmax(Q*K^T)
+        QK_T = torch.matmul(q, k.transpose(1, 2))
+        alpha = F.softmax(QK_T, dim=-1)
+        # a*v
+        att = torch.matmul(alpha, v).unsqueeze(-1)
+        att = self.fc(att)
+        att = self.sigmoid(att)
+        output = (x * att) + x
+        alpha_mask = att.clone()
+        return output, alpha_mask
+    def forward(self, x):
+        X_c, alpha_mask = self.Channel_Tracer(x)
+        X_c = self.bn(X_c)
+        x_drop = self.masking(X_c, alpha_mask)
+        q = self.spatial_q(x_drop).squeeze(1)
+        k = self.spatial_k(x_drop).squeeze(1)
+        v = self.spatial_v(x_drop).squeeze(1)
+        # softmax(Q*K^T)
+        QK_T = torch.matmul(q, k.transpose(1, 2))
+        alpha = F.softmax(QK_T, dim=-1)
+        output = torch.matmul(alpha, v).unsqueeze(1) + v.unsqueeze(1)
+        return output
+class aggregation(nn.Module):
+    def __init__(self, channel):
+        super(aggregation, self).__init__()
+        self.relu = nn.ReLU(True)
+        self.upsample = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)
+        self.conv_upsample1 = BasicConv2d(channel[2], channel[1], 3, padding=1)
+        self.conv_upsample2 = BasicConv2d(channel[2], channel[0], 3, padding=1)
+        self.conv_upsample3 = BasicConv2d(channel[1], channel[0], 3, padding=1)
+        self.conv_upsample4 = BasicConv2d(channel[2], channel[2], 3, padding=1)
+        self.conv_upsample5 = BasicConv2d(
+            channel[2] + channel[1], channel[2] + channel[1], 3, padding=1
+        )
+        self.conv_concat2 = BasicConv2d(
+            (channel[2] + channel[1]), (channel[2] + channel[1]), 3, padding=1
+        )
+        self.conv_concat3 = BasicConv2d(
+            (channel[0] + channel[1] + channel[2]),
+            (channel[0] + channel[1] + channel[2]),
+            3,
+            padding=1,
+        )
+        self.UAM = UnionAttentionModule(channel[0] + channel[1] + channel[2])
+    def forward(self, e4, e3, e2):
+        e4_1 = e4
+        e3_1 = self.conv_upsample1(self.upsample(e4)) * e3
+        e2_1 = (
+            self.conv_upsample2(self.upsample(self.upsample(e4)))
+            * self.conv_upsample3(self.upsample(e3))
+            * e2
+        )
+        e3_2 = torch.cat((e3_1, self.conv_upsample4(self.upsample(e4_1))), 1)
+        e3_2 = self.conv_concat2(e3_2)
+        e2_2 = torch.cat((e2_1, self.conv_upsample5(self.upsample(e3_2))), 1)
+        x = self.conv_concat3(e2_2)
+        output = self.UAM(x)
+        return output
+class ObjectAttention(nn.Module):
+    def __init__(self, channel, kernel_size):
+        super(ObjectAttention, self).__init__()
+        self.channel = channel
+        self.DWSConv = DWSConv(
+            channel, channel // 2, kernel=kernel_size, padding=1, kernels_per_layer=1
+        )
+        self.DWConv1 = nn.Sequential(
+            DWConv(channel // 2, channel // 2, kernel=1, padding=0, dilation=1),
+            BasicConv2d(channel // 2, channel // 8, 1),
+        )
+        self.DWConv2 = nn.Sequential(
+            DWConv(channel // 2, channel // 2, kernel=3, padding=1, dilation=1),
+            BasicConv2d(channel // 2, channel // 8, 1),
+        )
+        self.DWConv3 = nn.Sequential(
+            DWConv(channel // 2, channel // 2, kernel=3, padding=3, dilation=3),
+            BasicConv2d(channel // 2, channel // 8, 1),
+        )
+        self.DWConv4 = nn.Sequential(
+            DWConv(channel // 2, channel // 2, kernel=3, padding=5, dilation=5),
+            BasicConv2d(channel // 2, channel // 8, 1),
+        )
+        self.conv1 = BasicConv2d(channel // 2, 1, 1)
+    def forward(self, decoder_map, encoder_map):
+        """
+        Args:
+            decoder_map: decoder representation (B, 1, H, W).
+            encoder_map: encoder block output (B, C, H, W).
+        Returns:
+            decoder representation: (B, 1, H, W)
+        """
+        mask_bg = -1 * torch.sigmoid(decoder_map) + 1  # Sigmoid & Reverse
+        mask_ob = torch.sigmoid(decoder_map)  # object attention
+        x = mask_ob.expand(-1, self.channel, -1, -1).mul(encoder_map)
+        edge = mask_bg.clone()
+        edge[edge > 0.93] = 0
+        x = x + (edge * encoder_map)
+        x = self.DWSConv(x)
+        skip = x.clone()
+        x = (
+            torch.cat(
+                [self.DWConv1(x), self.DWConv2(x), self.DWConv3(x), self.DWConv4(x)],
+                dim=1,
+            )
+            + skip
+        )
+        x = torch.relu(self.conv1(x))
+        return x + decoder_map

carvekit/ml/arch/tracerb7/conv_modules.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""
+Source url: https://github.com/Karel911/TRACER
+Author: Min Seok Lee and Wooseok Shin
+License: Apache License 2.0
+"""
+import torch.nn as nn
+class BasicConv2d(nn.Module):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        stride=(1, 1),
+        padding=(0, 0),
+        dilation=(1, 1),
+    ):
+        super(BasicConv2d, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channel,
+            out_channel,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            bias=False,
+        )
+        self.bn = nn.BatchNorm2d(out_channel)
+        self.selu = nn.SELU()
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.selu(x)
+        return x
+class DWConv(nn.Module):
+    def __init__(self, in_channel, out_channel, kernel, dilation, padding):
+        super(DWConv, self).__init__()
+        self.out_channel = out_channel
+        self.DWConv = nn.Conv2d(
+            in_channel,
+            out_channel,
+            kernel_size=kernel,
+            padding=padding,
+            groups=in_channel,
+            dilation=dilation,
+            bias=False,
+        )
+        self.bn = nn.BatchNorm2d(out_channel)
+        self.selu = nn.SELU()
+    def forward(self, x):
+        x = self.DWConv(x)
+        out = self.selu(self.bn(x))
+        return out
+class DWSConv(nn.Module):
+    def __init__(self, in_channel, out_channel, kernel, padding, kernels_per_layer):
+        super(DWSConv, self).__init__()
+        self.out_channel = out_channel
+        self.DWConv = nn.Conv2d(
+            in_channel,
+            in_channel * kernels_per_layer,
+            kernel_size=kernel,
+            padding=padding,
+            groups=in_channel,
+            bias=False,
+        )
+        self.bn = nn.BatchNorm2d(in_channel * kernels_per_layer)
+        self.selu = nn.SELU()
+        self.PWConv = nn.Conv2d(
+            in_channel * kernels_per_layer, out_channel, kernel_size=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(out_channel)
+    def forward(self, x):
+        x = self.DWConv(x)
+        x = self.selu(self.bn(x))
+        out = self.PWConv(x)
+        out = self.selu(self.bn2(out))
+        return out

carvekit/ml/arch/tracerb7/effi_utils.py ADDED Viewed

	@@ -0,0 +1,579 @@

+"""
+Original author: lukemelas (github username)
+Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
+With adjustments and added comments by workingcoder (github username).
+License: Apache License 2.0
+Reimplemented: Min Seok Lee and Wooseok Shin
+"""
+import collections
+import re
+from functools import partial
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+# Parameters for the entire model (stem, all blocks, and head)
+GlobalParams = collections.namedtuple(
+    "GlobalParams",
+    [
+        "width_coefficient",
+        "depth_coefficient",
+        "image_size",
+        "dropout_rate",
+        "num_classes",
+        "batch_norm_momentum",
+        "batch_norm_epsilon",
+        "drop_connect_rate",
+        "depth_divisor",
+        "min_depth",
+        "include_top",
+    ],
+)
+# Parameters for an individual model block
+BlockArgs = collections.namedtuple(
+    "BlockArgs",
+    [
+        "num_repeat",
+        "kernel_size",
+        "stride",
+        "expand_ratio",
+        "input_filters",
+        "output_filters",
+        "se_ratio",
+        "id_skip",
+    ],
+)
+# Set GlobalParams and BlockArgs's defaults
+GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
+BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
+# An ordinary implementation of Swish function
+class Swish(nn.Module):
+    def forward(self, x):
+        return x * torch.sigmoid(x)
+# A memory-efficient implementation of Swish function
+class SwishImplementation(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, i):
+        result = i * torch.sigmoid(i)
+        ctx.save_for_backward(i)
+        return result
+    @staticmethod
+    def backward(ctx, grad_output):
+        i = ctx.saved_tensors[0]
+        sigmoid_i = torch.sigmoid(i)
+        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
+class MemoryEfficientSwish(nn.Module):
+    def forward(self, x):
+        return SwishImplementation.apply(x)
+def round_filters(filters, global_params):
+    """Calculate and round number of filters based on width multiplier.
+       Use width_coefficient, depth_divisor and min_depth of global_params.
+    Args:
+        filters (int): Filters number to be calculated.
+        global_params (namedtuple): Global params of the model.
+    Returns:
+        new_filters: New filters number after calculating.
+    """
+    multiplier = global_params.width_coefficient
+    if not multiplier:
+        return filters
+    divisor = global_params.depth_divisor
+    min_depth = global_params.min_depth
+    filters *= multiplier
+    min_depth = min_depth or divisor  # pay attention to this line when using min_depth
+    # follow the formula transferred from official TensorFlow implementation
+    new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
+    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
+        new_filters += divisor
+    return int(new_filters)
+def round_repeats(repeats, global_params):
+    """Calculate module's repeat number of a block based on depth multiplier.
+       Use depth_coefficient of global_params.
+    Args:
+        repeats (int): num_repeat to be calculated.
+        global_params (namedtuple): Global params of the model.
+    Returns:
+        new repeat: New repeat number after calculating.
+    """
+    multiplier = global_params.depth_coefficient
+    if not multiplier:
+        return repeats
+    # follow the formula transferred from official TensorFlow implementation
+    return int(math.ceil(multiplier * repeats))
+def drop_connect(inputs, p, training):
+    """Drop connect.
+    Args:
+        input (tensor: BCWH): Input of this structure.
+        p (float: 0.0~1.0): Probability of drop connection.
+        training (bool): The running mode.
+    Returns:
+        output: Output after drop connection.
+    """
+    assert 0 <= p <= 1, "p must be in range of [0,1]"
+    if not training:
+        return inputs
+    batch_size = inputs.shape[0]
+    keep_prob = 1 - p
+    # generate binary_tensor mask according to probability (p for 0, 1-p for 1)
+    random_tensor = keep_prob
+    random_tensor += torch.rand(
+        [batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device
+    )
+    binary_tensor = torch.floor(random_tensor)
+    output = inputs / keep_prob * binary_tensor
+    return output
+def get_width_and_height_from_size(x):
+    """Obtain height and width from x.
+    Args:
+        x (int, tuple or list): Data size.
+    Returns:
+        size: A tuple or list (H,W).
+    """
+    if isinstance(x, int):
+        return x, x
+    if isinstance(x, list) or isinstance(x, tuple):
+        return x
+    else:
+        raise TypeError()
+def calculate_output_image_size(input_image_size, stride):
+    """Calculates the output image size when using Conv2dSamePadding with a stride.
+       Necessary for static padding. Thanks to mannatsingh for pointing this out.
+    Args:
+        input_image_size (int, tuple or list): Size of input image.
+        stride (int, tuple or list): Conv2d operation's stride.
+    Returns:
+        output_image_size: A list [H,W].
+    """
+    if input_image_size is None:
+        return None
+    image_height, image_width = get_width_and_height_from_size(input_image_size)
+    stride = stride if isinstance(stride, int) else stride[0]
+    image_height = int(math.ceil(image_height / stride))
+    image_width = int(math.ceil(image_width / stride))
+    return [image_height, image_width]
+# Note:
+# The following 'SamePadding' functions make output size equal ceil(input size/stride).
+# Only when stride equals 1, can the output size be the same as input size.
+# Don't be confused by their function names ! ! !
+def get_same_padding_conv2d(image_size=None):
+    """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
+       Static padding is necessary for ONNX exporting of models.
+    Args:
+        image_size (int or tuple): Size of the image.
+    Returns:
+        Conv2dDynamicSamePadding or Conv2dStaticSamePadding.
+    """
+    if image_size is None:
+        return Conv2dDynamicSamePadding
+    else:
+        return partial(Conv2dStaticSamePadding, image_size=image_size)
+class Conv2dDynamicSamePadding(nn.Conv2d):
+    """2D Convolutions like TensorFlow, for a dynamic image size.
+    The padding is operated in forward function by calculating dynamically.
+    """
+    # Tips for 'SAME' mode padding.
+    #     Given the following:
+    #         i: width or height
+    #         s: stride
+    #         k: kernel size
+    #         d: dilation
+    #         p: padding
+    #     Output after Conv2d:
+    #         o = floor((i+p-((k-1)*d+1))/s+1)
+    # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1),
+    # => p = (i-1)*s+((k-1)*d+1)-i
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        dilation=1,
+        groups=1,
+        bias=True,
+    ):
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias
+        )
+        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
+    def forward(self, x):
+        ih, iw = x.size()[-2:]
+        kh, kw = self.weight.size()[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(
+            iw / sw
+        )  # change the output size according to stride ! ! !
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(
+                x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
+            )
+        return F.conv2d(
+            x,
+            self.weight,
+            self.bias,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+        )
+class Conv2dStaticSamePadding(nn.Conv2d):
+    """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
+    The padding mudule is calculated in construction function, then used in forward.
+    """
+    # With the same calculation as Conv2dDynamicSamePadding
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        image_size=None,
+        **kwargs
+    ):
+        super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
+        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
+        # Calculate padding based on image size and save it
+        assert image_size is not None
+        ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
+        kh, kw = self.weight.size()[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            self.static_padding = nn.ZeroPad2d(
+                (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
+            )
+        else:
+            self.static_padding = nn.Identity()
+    def forward(self, x):
+        x = self.static_padding(x)
+        x = F.conv2d(
+            x,
+            self.weight,
+            self.bias,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+        )
+        return x
+def get_same_padding_maxPool2d(image_size=None):
+    """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
+       Static padding is necessary for ONNX exporting of models.
+    Args:
+        image_size (int or tuple): Size of the image.
+    Returns:
+        MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding.
+    """
+    if image_size is None:
+        return MaxPool2dDynamicSamePadding
+    else:
+        return partial(MaxPool2dStaticSamePadding, image_size=image_size)
+class MaxPool2dDynamicSamePadding(nn.MaxPool2d):
+    """2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size.
+    The padding is operated in forward function by calculating dynamically.
+    """
+    def __init__(
+        self,
+        kernel_size,
+        stride,
+        padding=0,
+        dilation=1,
+        return_indices=False,
+        ceil_mode=False,
+    ):
+        super().__init__(
+            kernel_size, stride, padding, dilation, return_indices, ceil_mode
+        )
+        self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
+        self.kernel_size = (
+            [self.kernel_size] * 2
+            if isinstance(self.kernel_size, int)
+            else self.kernel_size
+        )
+        self.dilation = (
+            [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
+        )
+    def forward(self, x):
+        ih, iw = x.size()[-2:]
+        kh, kw = self.kernel_size
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(
+                x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
+            )
+        return F.max_pool2d(
+            x,
+            self.kernel_size,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.ceil_mode,
+            self.return_indices,
+        )
+class MaxPool2dStaticSamePadding(nn.MaxPool2d):
+    """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
+    The padding mudule is calculated in construction function, then used in forward.
+    """
+    def __init__(self, kernel_size, stride, image_size=None, **kwargs):
+        super().__init__(kernel_size, stride, **kwargs)
+        self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
+        self.kernel_size = (
+            [self.kernel_size] * 2
+            if isinstance(self.kernel_size, int)
+            else self.kernel_size
+        )
+        self.dilation = (
+            [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
+        )
+        # Calculate padding based on image size and save it
+        assert image_size is not None
+        ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
+        kh, kw = self.kernel_size
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            self.static_padding = nn.ZeroPad2d(
+                (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
+            )
+        else:
+            self.static_padding = nn.Identity()
+    def forward(self, x):
+        x = self.static_padding(x)
+        x = F.max_pool2d(
+            x,
+            self.kernel_size,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.ceil_mode,
+            self.return_indices,
+        )
+        return x
+class BlockDecoder(object):
+    """Block Decoder for readability,
+    straight from the official TensorFlow repository.
+    """
+    @staticmethod
+    def _decode_block_string(block_string):
+        """Get a block through a string notation of arguments.
+        Args:
+            block_string (str): A string notation of arguments.
+                                Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'.
+        Returns:
+            BlockArgs: The namedtuple defined at the top of this file.
+        """
+        assert isinstance(block_string, str)
+        ops = block_string.split("_")
+        options = {}
+        for op in ops:
+            splits = re.split(r"(\d.*)", op)
+            if len(splits) >= 2:
+                key, value = splits[:2]
+                options[key] = value
+        # Check stride
+        assert ("s" in options and len(options["s"]) == 1) or (
+            len(options["s"]) == 2 and options["s"][0] == options["s"][1]
+        )
+        return BlockArgs(
+            num_repeat=int(options["r"]),
+            kernel_size=int(options["k"]),
+            stride=[int(options["s"][0])],
+            expand_ratio=int(options["e"]),
+            input_filters=int(options["i"]),
+            output_filters=int(options["o"]),
+            se_ratio=float(options["se"]) if "se" in options else None,
+            id_skip=("noskip" not in block_string),
+        )
+    @staticmethod
+    def _encode_block_string(block):
+        """Encode a block to a string.
+        Args:
+            block (namedtuple): A BlockArgs type argument.
+        Returns:
+            block_string: A String form of BlockArgs.
+        """
+        args = [
+            "r%d" % block.num_repeat,
+            "k%d" % block.kernel_size,
+            "s%d%d" % (block.strides[0], block.strides[1]),
+            "e%s" % block.expand_ratio,
+            "i%d" % block.input_filters,
+            "o%d" % block.output_filters,
+        ]
+        if 0 < block.se_ratio <= 1:
+            args.append("se%s" % block.se_ratio)
+        if block.id_skip is False:
+            args.append("noskip")
+        return "_".join(args)
+    @staticmethod
+    def decode(string_list):
+        """Decode a list of string notations to specify blocks inside the network.
+        Args:
+            string_list (list[str]): A list of strings, each string is a notation of block.
+        Returns:
+            blocks_args: A list of BlockArgs namedtuples of block args.
+        """
+        assert isinstance(string_list, list)
+        blocks_args = []
+        for block_string in string_list:
+            blocks_args.append(BlockDecoder._decode_block_string(block_string))
+        return blocks_args
+    @staticmethod
+    def encode(blocks_args):
+        """Encode a list of BlockArgs to a list of strings.
+        Args:
+            blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args.
+        Returns:
+            block_strings: A list of strings, each string is a notation of block.
+        """
+        block_strings = []
+        for block in blocks_args:
+            block_strings.append(BlockDecoder._encode_block_string(block))
+        return block_strings
+def create_block_args(
+    width_coefficient=None,
+    depth_coefficient=None,
+    image_size=None,
+    dropout_rate=0.2,
+    drop_connect_rate=0.2,
+    num_classes=1000,
+    include_top=True,
+):
+    """Create BlockArgs and GlobalParams for efficientnet model.
+    Args:
+        width_coefficient (float)
+        depth_coefficient (float)
+        image_size (int)
+        dropout_rate (float)
+        drop_connect_rate (float)
+        num_classes (int)
+        Meaning as the name suggests.
+    Returns:
+        blocks_args, global_params.
+    """
+    # Blocks args for the whole model(efficientnet-b0 by default)
+    # It will be modified in the construction of EfficientNet Class according to model
+    blocks_args = [
+        "r1_k3_s11_e1_i32_o16_se0.25",
+        "r2_k3_s22_e6_i16_o24_se0.25",
+        "r2_k5_s22_e6_i24_o40_se0.25",
+        "r3_k3_s22_e6_i40_o80_se0.25",
+        "r3_k5_s11_e6_i80_o112_se0.25",
+        "r4_k5_s22_e6_i112_o192_se0.25",
+        "r1_k3_s11_e6_i192_o320_se0.25",
+    ]
+    blocks_args = BlockDecoder.decode(blocks_args)
+    global_params = GlobalParams(
+        width_coefficient=width_coefficient,
+        depth_coefficient=depth_coefficient,
+        image_size=image_size,
+        dropout_rate=dropout_rate,
+        num_classes=num_classes,
+        batch_norm_momentum=0.99,
+        batch_norm_epsilon=1e-3,
+        drop_connect_rate=drop_connect_rate,
+        depth_divisor=8,
+        min_depth=None,
+        include_top=include_top,
+    )
+    return blocks_args, global_params

carvekit/ml/arch/tracerb7/efficientnet.py ADDED Viewed

	@@ -0,0 +1,325 @@

+"""
+Source url: https://github.com/lukemelas/EfficientNet-PyTorch
+Modified by Min Seok Lee, Wooseok Shin, Nikita Selin
+License: Apache License 2.0
+Changes:
+    - Added support for extracting edge features
+    - Added support for extracting object features at different levels
+    - Refactored the code
+"""
+from typing import Any, List
+import torch
+from torch import nn
+from torch.nn import functional as F
+from carvekit.ml.arch.tracerb7.effi_utils import (
+    get_same_padding_conv2d,
+    calculate_output_image_size,
+    MemoryEfficientSwish,
+    drop_connect,
+    round_filters,
+    round_repeats,
+    Swish,
+    create_block_args,
+)
+class MBConvBlock(nn.Module):
+    """Mobile Inverted Residual Bottleneck Block.
+    Args:
+        block_args (namedtuple): BlockArgs, defined in utils.py.
+        global_params (namedtuple): GlobalParam, defined in utils.py.
+        image_size (tuple or list): [image_height, image_width].
+    References:
+        [1] https://arxiv.org/abs/1704.04861 (MobileNet v1)
+        [2] https://arxiv.org/abs/1801.04381 (MobileNet v2)
+        [3] https://arxiv.org/abs/1905.02244 (MobileNet v3)
+    """
+    def __init__(self, block_args, global_params, image_size=None):
+        super().__init__()
+        self._block_args = block_args
+        self._bn_mom = (
+            1 - global_params.batch_norm_momentum
+        )  # pytorch's difference from tensorflow
+        self._bn_eps = global_params.batch_norm_epsilon
+        self.has_se = (self._block_args.se_ratio is not None) and (
+            0 < self._block_args.se_ratio <= 1
+        )
+        self.id_skip = (
+            block_args.id_skip
+        )  # whether to use skip connection and drop connect
+        # Expansion phase (Inverted Bottleneck)
+        inp = self._block_args.input_filters  # number of input channels
+        oup = (
+            self._block_args.input_filters * self._block_args.expand_ratio
+        )  # number of output channels
+        if self._block_args.expand_ratio != 1:
+            Conv2d = get_same_padding_conv2d(image_size=image_size)
+            self._expand_conv = Conv2d(
+                in_channels=inp, out_channels=oup, kernel_size=1, bias=False
+            )
+            self._bn0 = nn.BatchNorm2d(
+                num_features=oup, momentum=self._bn_mom, eps=self._bn_eps
+            )
+            # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size
+        # Depthwise convolution phase
+        k = self._block_args.kernel_size
+        s = self._block_args.stride
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._depthwise_conv = Conv2d(
+            in_channels=oup,
+            out_channels=oup,
+            groups=oup,  # groups makes it depthwise
+            kernel_size=k,
+            stride=s,
+            bias=False,
+        )
+        self._bn1 = nn.BatchNorm2d(
+            num_features=oup, momentum=self._bn_mom, eps=self._bn_eps
+        )
+        image_size = calculate_output_image_size(image_size, s)
+        # Squeeze and Excitation layer, if desired
+        if self.has_se:
+            Conv2d = get_same_padding_conv2d(image_size=(1, 1))
+            num_squeezed_channels = max(
+                1, int(self._block_args.input_filters * self._block_args.se_ratio)
+            )
+            self._se_reduce = Conv2d(
+                in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1
+            )
+            self._se_expand = Conv2d(
+                in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1
+            )
+        # Pointwise convolution phase
+        final_oup = self._block_args.output_filters
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._project_conv = Conv2d(
+            in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False
+        )
+        self._bn2 = nn.BatchNorm2d(
+            num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps
+        )
+        self._swish = MemoryEfficientSwish()
+    def forward(self, inputs, drop_connect_rate=None):
+        """MBConvBlock's forward function.
+        Args:
+            inputs (tensor): Input tensor.
+            drop_connect_rate (bool): Drop connect rate (float, between 0 and 1).
+        Returns:
+            Output of this block after processing.
+        """
+        # Expansion and Depthwise Convolution
+        x = inputs
+        if self._block_args.expand_ratio != 1:
+            x = self._expand_conv(inputs)
+            x = self._bn0(x)
+            x = self._swish(x)
+        x = self._depthwise_conv(x)
+        x = self._bn1(x)
+        x = self._swish(x)
+        # Squeeze and Excitation
+        if self.has_se:
+            x_squeezed = F.adaptive_avg_pool2d(x, 1)
+            x_squeezed = self._se_reduce(x_squeezed)
+            x_squeezed = self._swish(x_squeezed)
+            x_squeezed = self._se_expand(x_squeezed)
+            x = torch.sigmoid(x_squeezed) * x
+        # Pointwise Convolution
+        x = self._project_conv(x)
+        x = self._bn2(x)
+        # Skip connection and drop connect
+        input_filters, output_filters = (
+            self._block_args.input_filters,
+            self._block_args.output_filters,
+        )
+        if (
+            self.id_skip
+            and self._block_args.stride == 1
+            and input_filters == output_filters
+        ):
+            # The combination of skip connection and drop connect brings about stochastic depth.
+            if drop_connect_rate:
+                x = drop_connect(x, p=drop_connect_rate, training=self.training)
+            x = x + inputs  # skip connection
+        return x
+    def set_swish(self, memory_efficient=True):
+        """Sets swish function as memory efficient (for training) or standard (for export).
+        Args:
+            memory_efficient (bool): Whether to use memory-efficient version of swish.
+        """
+        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
+class EfficientNet(nn.Module):
+    def __init__(self, blocks_args=None, global_params=None):
+        super().__init__()
+        assert isinstance(blocks_args, list), "blocks_args should be a list"
+        assert len(blocks_args) > 0, "block args must be greater than 0"
+        self._global_params = global_params
+        self._blocks_args = blocks_args
+        # Batch norm parameters
+        bn_mom = 1 - self._global_params.batch_norm_momentum
+        bn_eps = self._global_params.batch_norm_epsilon
+        # Get stem static or dynamic convolution depending on image size
+        image_size = global_params.image_size
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        # Stem
+        in_channels = 3  # rgb
+        out_channels = round_filters(
+            32, self._global_params
+        )  # number of output channels
+        self._conv_stem = Conv2d(
+            in_channels, out_channels, kernel_size=3, stride=2, bias=False
+        )
+        self._bn0 = nn.BatchNorm2d(
+            num_features=out_channels, momentum=bn_mom, eps=bn_eps
+        )
+        image_size = calculate_output_image_size(image_size, 2)
+        # Build blocks
+        self._blocks = nn.ModuleList([])
+        for block_args in self._blocks_args:
+            # Update block input and output filters based on depth multiplier.
+            block_args = block_args._replace(
+                input_filters=round_filters(
+                    block_args.input_filters, self._global_params
+                ),
+                output_filters=round_filters(
+                    block_args.output_filters, self._global_params
+                ),
+                num_repeat=round_repeats(block_args.num_repeat, self._global_params),
+            )
+            # The first block needs to take care of stride and filter size increase.
+            self._blocks.append(
+                MBConvBlock(block_args, self._global_params, image_size=image_size)
+            )
+            image_size = calculate_output_image_size(image_size, block_args.stride)
+            if block_args.num_repeat > 1:  # modify block_args to keep same output size
+                block_args = block_args._replace(
+                    input_filters=block_args.output_filters, stride=1
+                )
+            for _ in range(block_args.num_repeat - 1):
+                self._blocks.append(
+                    MBConvBlock(block_args, self._global_params, image_size=image_size)
+                )
+                # image_size = calculate_output_image_size(image_size, block_args.stride)  # stride = 1
+        self._swish = MemoryEfficientSwish()
+    def set_swish(self, memory_efficient=True):
+        """Sets swish function as memory efficient (for training) or standard (for export).
+        Args:
+            memory_efficient (bool): Whether to use memory-efficient version of swish.
+        """
+        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
+        for block in self._blocks:
+            block.set_swish(memory_efficient)
+    def extract_endpoints(self, inputs):
+        endpoints = dict()
+        # Stem
+        x = self._swish(self._bn0(self._conv_stem(inputs)))
+        prev_x = x
+        # Blocks
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(
+                    self._blocks
+                )  # scale drop connect_rate
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            if prev_x.size(2) > x.size(2):
+                endpoints["reduction_{}".format(len(endpoints) + 1)] = prev_x
+            prev_x = x
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        endpoints["reduction_{}".format(len(endpoints) + 1)] = x
+        return endpoints
+    def _change_in_channels(self, in_channels):
+        """Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
+        Args:
+            in_channels (int): Input data's channel number.
+        """
+        if in_channels != 3:
+            Conv2d = get_same_padding_conv2d(image_size=self._global_params.image_size)
+            out_channels = round_filters(32, self._global_params)
+            self._conv_stem = Conv2d(
+                in_channels, out_channels, kernel_size=3, stride=2, bias=False
+            )
+class EfficientEncoderB7(EfficientNet):
+    def __init__(self):
+        super().__init__(
+            *create_block_args(
+                width_coefficient=2.0,
+                depth_coefficient=3.1,
+                dropout_rate=0.5,
+                image_size=600,
+            )
+        )
+        self._change_in_channels(3)
+        self.block_idx = [10, 17, 37, 54]
+        self.channels = [48, 80, 224, 640]
+    def initial_conv(self, inputs):
+        x = self._swish(self._bn0(self._conv_stem(inputs)))
+        return x
+    def get_blocks(self, x, H, W, block_idx):
+        features = []
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(
+                    self._blocks
+                )  # scale drop connect_rate
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            if idx == block_idx[0]:
+                features.append(x.clone())
+            if idx == block_idx[1]:
+                features.append(x.clone())
+            if idx == block_idx[2]:
+                features.append(x.clone())
+            if idx == block_idx[3]:
+                features.append(x.clone())
+        return features
+    def forward(self, inputs: torch.Tensor) -> List[Any]:
+        B, C, H, W = inputs.size()
+        x = self.initial_conv(inputs)  # Prepare input for the backbone
+        return self.get_blocks(
+            x, H, W, block_idx=self.block_idx
+        )  # Get backbone features and edge maps

carvekit/ml/arch/tracerb7/tracer.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+Source url: https://github.com/Karel911/TRACER
+Author: Min Seok Lee and Wooseok Shin
+Modified by Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+License: Apache License 2.0
+Changes:
+    - Refactored code
+    - Removed unused code
+    - Added comments
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import List, Optional, Tuple
+from torch import Tensor
+from carvekit.ml.arch.tracerb7.efficientnet import EfficientEncoderB7
+from carvekit.ml.arch.tracerb7.att_modules import (
+    RFB_Block,
+    aggregation,
+    ObjectAttention,
+)
+class TracerDecoder(nn.Module):
+    """Tracer Decoder"""
+    def __init__(
+        self,
+        encoder: EfficientEncoderB7,
+        features_channels: Optional[List[int]] = None,
+        rfb_channel: Optional[List[int]] = None,
+    ):
+        """
+        Initialize the tracer decoder.
+        Args:
+            encoder: The encoder to use.
+            features_channels: The channels of the backbone features at different stages. default: [48, 80, 224, 640]
+            rfb_channel: The channels of the RFB features. default: [32, 64, 128]
+        """
+        super().__init__()
+        if rfb_channel is None:
+            rfb_channel = [32, 64, 128]
+        if features_channels is None:
+            features_channels = [48, 80, 224, 640]
+        self.encoder = encoder
+        self.features_channels = features_channels
+        # Receptive Field Blocks
+        features_channels = rfb_channel
+        self.rfb2 = RFB_Block(self.features_channels[1], features_channels[0])
+        self.rfb3 = RFB_Block(self.features_channels[2], features_channels[1])
+        self.rfb4 = RFB_Block(self.features_channels[3], features_channels[2])
+        # Multi-level aggregation
+        self.agg = aggregation(features_channels)
+        # Object Attention
+        self.ObjectAttention2 = ObjectAttention(
+            channel=self.features_channels[1], kernel_size=3
+        )
+        self.ObjectAttention1 = ObjectAttention(
+            channel=self.features_channels[0], kernel_size=3
+        )
+    def forward(self, inputs: torch.Tensor) -> Tensor:
+        """
+        Forward pass of the tracer decoder.
+        Args:
+            inputs: Preprocessed images.
+        Returns:
+            Tensors of segmentation masks and mask of object edges.
+        """
+        features = self.encoder(inputs)
+        x3_rfb = self.rfb2(features[1])
+        x4_rfb = self.rfb3(features[2])
+        x5_rfb = self.rfb4(features[3])
+        D_0 = self.agg(x5_rfb, x4_rfb, x3_rfb)
+        ds_map0 = F.interpolate(D_0, scale_factor=8, mode="bilinear")
+        D_1 = self.ObjectAttention2(D_0, features[1])
+        ds_map1 = F.interpolate(D_1, scale_factor=8, mode="bilinear")
+        ds_map = F.interpolate(D_1, scale_factor=2, mode="bilinear")
+        D_2 = self.ObjectAttention1(ds_map, features[0])
+        ds_map2 = F.interpolate(D_2, scale_factor=4, mode="bilinear")
+        final_map = (ds_map2 + ds_map1 + ds_map0) / 3
+        return torch.sigmoid(final_map)

carvekit/ml/arch/u2net/__init__.py ADDED Viewed

File without changes

carvekit/ml/arch/u2net/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (184 Bytes). View file

carvekit/ml/arch/u2net/__pycache__/u2net.cpython-38.pyc ADDED Viewed

Binary file (6.13 kB). View file

carvekit/ml/arch/u2net/u2net.py ADDED Viewed

	@@ -0,0 +1,172 @@

+"""
+Modified by Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+Source url: https://github.com/xuebinqin/U-2-Net
+License: Apache License 2.0
+"""
+from typing import Union
+import torch
+import torch.nn as nn
+import math
+__all__ = ["U2NETArchitecture"]
+def _upsample_like(x, size):
+    return nn.Upsample(size=size, mode="bilinear", align_corners=False)(x)
+def _size_map(x, height):
+    # {height: size} for Upsample
+    size = list(x.shape[-2:])
+    sizes = {}
+    for h in range(1, height):
+        sizes[h] = size
+        size = [math.ceil(w / 2) for w in size]
+    return sizes
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dilate=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(
+            in_ch, out_ch, 3, padding=1 * dilate, dilation=1 * dilate
+        )
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.relu_s1(self.bn_s1(self.conv_s1(x)))
+class RSU(nn.Module):
+    def __init__(self, name, height, in_ch, mid_ch, out_ch, dilated=False):
+        super(RSU, self).__init__()
+        self.name = name
+        self.height = height
+        self.dilated = dilated
+        self._make_layers(height, in_ch, mid_ch, out_ch, dilated)
+    def forward(self, x):
+        sizes = _size_map(x, self.height)
+        x = self.rebnconvin(x)
+        # U-Net like symmetric encoder-decoder structure
+        def unet(x, height=1):
+            if height < self.height:
+                x1 = getattr(self, f"rebnconv{height}")(x)
+                if not self.dilated and height < self.height - 1:
+                    x2 = unet(getattr(self, "downsample")(x1), height + 1)
+                else:
+                    x2 = unet(x1, height + 1)
+                x = getattr(self, f"rebnconv{height}d")(torch.cat((x2, x1), 1))
+                return (
+                    _upsample_like(x, sizes[height - 1])
+                    if not self.dilated and height > 1
+                    else x
+                )
+            else:
+                return getattr(self, f"rebnconv{height}")(x)
+        return x + unet(x)
+    def _make_layers(self, height, in_ch, mid_ch, out_ch, dilated=False):
+        self.add_module("rebnconvin", REBNCONV(in_ch, out_ch))
+        self.add_module("downsample", nn.MaxPool2d(2, stride=2, ceil_mode=True))
+        self.add_module("rebnconv1", REBNCONV(out_ch, mid_ch))
+        self.add_module("rebnconv1d", REBNCONV(mid_ch * 2, out_ch))
+        for i in range(2, height):
+            dilate = 1 if not dilated else 2 ** (i - 1)
+            self.add_module(f"rebnconv{i}", REBNCONV(mid_ch, mid_ch, dilate=dilate))
+            self.add_module(
+                f"rebnconv{i}d", REBNCONV(mid_ch * 2, mid_ch, dilate=dilate)
+            )
+        dilate = 2 if not dilated else 2 ** (height - 1)
+        self.add_module(f"rebnconv{height}", REBNCONV(mid_ch, mid_ch, dilate=dilate))
+class U2NETArchitecture(nn.Module):
+    def __init__(self, cfg_type: Union[dict, str] = "full", out_ch: int = 1):
+        super(U2NETArchitecture, self).__init__()
+        if isinstance(cfg_type, str):
+            if cfg_type == "full":
+                layers_cfgs = {
+                    # cfgs for building RSUs and sides
+                    # {stage : [name, (height(L), in_ch, mid_ch, out_ch, dilated), side]}
+                    "stage1": ["En_1", (7, 3, 32, 64), -1],
+                    "stage2": ["En_2", (6, 64, 32, 128), -1],
+                    "stage3": ["En_3", (5, 128, 64, 256), -1],
+                    "stage4": ["En_4", (4, 256, 128, 512), -1],
+                    "stage5": ["En_5", (4, 512, 256, 512, True), -1],
+                    "stage6": ["En_6", (4, 512, 256, 512, True), 512],
+                    "stage5d": ["De_5", (4, 1024, 256, 512, True), 512],
+                    "stage4d": ["De_4", (4, 1024, 128, 256), 256],
+                    "stage3d": ["De_3", (5, 512, 64, 128), 128],
+                    "stage2d": ["De_2", (6, 256, 32, 64), 64],
+                    "stage1d": ["De_1", (7, 128, 16, 64), 64],
+                }
+            else:
+                raise ValueError("Unknown U^2-Net architecture conf. name")
+        elif isinstance(cfg_type, dict):
+            layers_cfgs = cfg_type
+        else:
+            raise ValueError("Unknown U^2-Net architecture conf. type")
+        self.out_ch = out_ch
+        self._make_layers(layers_cfgs)
+    def forward(self, x):
+        sizes = _size_map(x, self.height)
+        maps = []  # storage for maps
+        # side saliency map
+        def unet(x, height=1):
+            if height < 6:
+                x1 = getattr(self, f"stage{height}")(x)
+                x2 = unet(getattr(self, "downsample")(x1), height + 1)
+                x = getattr(self, f"stage{height}d")(torch.cat((x2, x1), 1))
+                side(x, height)
+                return _upsample_like(x, sizes[height - 1]) if height > 1 else x
+            else:
+                x = getattr(self, f"stage{height}")(x)
+                side(x, height)
+                return _upsample_like(x, sizes[height - 1])
+        def side(x, h):
+            # side output saliency map (before sigmoid)
+            x = getattr(self, f"side{h}")(x)
+            x = _upsample_like(x, sizes[1])
+            maps.append(x)
+        def fuse():
+            # fuse saliency probability maps
+            maps.reverse()
+            x = torch.cat(maps, 1)
+            x = getattr(self, "outconv")(x)
+            maps.insert(0, x)
+            return [torch.sigmoid(x) for x in maps]
+        unet(x)
+        maps = fuse()
+        return maps
+    def _make_layers(self, cfgs):
+        self.height = int((len(cfgs) + 1) / 2)
+        self.add_module("downsample", nn.MaxPool2d(2, stride=2, ceil_mode=True))
+        for k, v in cfgs.items():
+            # build rsu block
+            self.add_module(k, RSU(v[0], *v[1]))
+            if v[2] > 0:
+                # build side layer
+                self.add_module(
+                    f"side{v[0][-1]}", nn.Conv2d(v[2], self.out_ch, 3, padding=1)
+                )
+        # build fuse layer
+        self.add_module(
+            "outconv", nn.Conv2d(int(self.height * self.out_ch), self.out_ch, 1)
+        )

carvekit/ml/files/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from pathlib import Path
+carvekit_dir = Path.home().joinpath(".cache/carvekit")
+carvekit_dir.mkdir(parents=True, exist_ok=True)
+checkpoints_dir = carvekit_dir.joinpath("checkpoints")

carvekit/ml/files/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (365 Bytes). View file

carvekit/ml/files/__pycache__/models_loc.cpython-38.pyc ADDED Viewed

Binary file (2.01 kB). View file

carvekit/ml/files/models_loc.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+Source url: https://github.com/OPHoperHPO/image-background-remove-tool
+Author: Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
+License: Apache License 2.0
+"""
+import pathlib
+from carvekit.ml.files import checkpoints_dir
+from carvekit.utils.download_models import downloader
+def u2net_full_pretrained() -> pathlib.Path:
+    """Returns u2net pretrained model location
+    Returns:
+        pathlib.Path to model location
+    """
+    return downloader("u2net.pth")
+def basnet_pretrained() -> pathlib.Path:
+    """Returns basnet pretrained model location
+    Returns:
+        pathlib.Path to model location
+    """
+    return downloader("basnet.pth")
+def deeplab_pretrained() -> pathlib.Path:
+    """Returns basnet pretrained model location
+    Returns:
+        pathlib.Path to model location
+    """
+    return downloader("deeplab.pth")
+def fba_pretrained() -> pathlib.Path:
+    """Returns basnet pretrained model location
+    Returns:
+        pathlib.Path to model location
+    """
+    return downloader("fba_matting.pth")
+def tracer_b7_pretrained() -> pathlib.Path:
+    """Returns TRACER with EfficientNet v1 b7 encoder pretrained model location
+    Returns:
+        pathlib.Path to model location
+    """
+    return downloader("tracer_b7.pth")
+def tracer_hair_pretrained() -> pathlib.Path:
+    """Returns TRACER with EfficientNet v1 b7 encoder model for hair segmentation location
+    Returns:
+        pathlib.Path to model location
+    """
+    return downloader("tracer_hair.pth")
+def download_all():
+    u2net_full_pretrained()
+    fba_pretrained()
+    deeplab_pretrained()
+    basnet_pretrained()
+    tracer_b7_pretrained()