Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

README.md +74 -3
dis-repo.patch +87 -0
models/isnet-base-model.pth +3 -0
models/ormbg.onnx +3 -0
models/ormbg.pth +3 -0
utils/__pycache__/isnet.cpython-312.pyc +0 -0
utils/isnet.py +647 -0
utils/pth_to_onnx.py +59 -0

README.md CHANGED Viewed

@@ -1,3 +1,74 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+tags:
+  - art
+pretty_name: Open Remove Background Model
+---
+# Open Remove Background Model (ormbg)
+This model is a **fully open-source background remover** optimized for images with humans. It is based on [Highly Accurate Dichotomous Image Segmentation research](https://github.com/xuebinqin/DIS).
+This model is similar to [RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4), but with open training data/process and commercially free to use.
+## Inference
+```
+test
+```
+## Training
+The model was trained with the [Human Segmentation Dataset](https://huggingface.co/datasets/schirrmacher/humans).
+After 10.000 iterations with a single NVIDIA GeForce RTX 4090 the following achievements were made:
+- Training time: 8 hours
+- Training loss 0.1179
+- Validation loss: 0.1284
+- Maximum F1 score: 0.9928
+- Mean Absolute Error: 0.005
+Output model: `/models/ormbg.pth`.
+## Want to train your own model?
+Checkout _Highly Accurate Dichotomous Image Segmentation_ code:
+```
+git clone https://github.com/xuebinqin/DIS.git
+cd DIS
+```
+Follow the installation instructions on https://github.com/xuebinqin/DIS?tab=readme-ov-file#1-clone-this-repo
+Download or create some data ([like this](https://huggingface.co/datasets/schirrmacher/humans)) and place it into the DIS project folder.
+I am applying the folder structure:
+- training/im (images)
+- training/gt (ground truth)
+- validation/im (images)
+- validation/gt (ground truth)
+Apply this git patch for setting the right paths and remove normalization of images:
+```
+git apply dis-repo.patch
+```
+Start training:
+```
+cd IS-Net
+python train_valid_inference_main.py
+```
+Export to ONNX (modify paths if needed):
+```
+python utils/pth_to_onnx.py
+```
+## Support
+If you identify edge cases or issues with the model, please contact me!

dis-repo.patch ADDED Viewed

	@@ -0,0 +1,87 @@

+diff --git a/IS-Net/Inference.py b/IS-Net/Inference.py
+index 0b2907d..ca8484b 100644
+--- a/IS-Net/Inference.py
++++ b/IS-Net/Inference.py
+@@ -40,7 +40,7 @@ if __name__ == "__main__":
+             im_tensor = torch.tensor(im, dtype=torch.float32).permute(2,0,1)
+             im_tensor = F.upsample(torch.unsqueeze(im_tensor,0), input_size, mode="bilinear").type(torch.uint8)
+             image = torch.divide(im_tensor,255.0)
+-            image = normalize(image,[0.5,0.5,0.5],[1.0,1.0,1.0])
++            #image = normalize(image,[0.5,0.5,0.5],[1.0,1.0,1.0])
+             if torch.cuda.is_available():
+                 image=image.cuda()
+diff --git a/IS-Net/train_valid_inference_main.py b/IS-Net/train_valid_inference_main.py
+index 375bb26..ad9043c 100644
+--- a/IS-Net/train_valid_inference_main.py
++++ b/IS-Net/train_valid_inference_main.py
+@@ -536,10 +536,10 @@ def main(train_datasets,
+                                                              cache_size = hypar["cache_size"],
+                                                              cache_boost = hypar["cache_boost_train"],
+                                                              my_transforms = [
+-                                                                             GOSRandomHFlip(), ## this line can be uncommented for horizontal flip augmetation
++                                                                             #GOSRandomHFlip(), ## this line can be uncommented for horizontal flip augmetation
+                                                                              # GOSResize(hypar["input_size"]),
+                                                                              # GOSRandomCrop(hypar["crop_size"]), ## this line can be uncommented for randomcrop augmentation
+-                                                                              GOSNormalize([0.5,0.5,0.5],[1.0,1.0,1.0]),
++                                                                              #GOSNormalize([0.5,0.5,0.5],[1.0,1.0,1.0]),
+                                                                               ],
+                                                              batch_size = hypar["batch_size_train"],
+                                                              shuffle = True)
+@@ -547,7 +547,7 @@ def main(train_datasets,
+                                                              cache_size = hypar["cache_size"],
+                                                              cache_boost = hypar["cache_boost_train"],
+                                                              my_transforms = [
+-                                                                              GOSNormalize([0.5,0.5,0.5],[1.0,1.0,1.0]),
++                                                                              #GOSNormalize([0.5,0.5,0.5],[1.0,1.0,1.0]),
+                                                                               ],
+                                                              batch_size = hypar["batch_size_valid"],
+                                                              shuffle = False)
+@@ -561,7 +561,7 @@ def main(train_datasets,
+                                                           cache_size = hypar["cache_size"],
+                                                           cache_boost = hypar["cache_boost_valid"],
+                                                           my_transforms = [
+-                                                                           GOSNormalize([0.5,0.5,0.5],[1.0,1.0,1.0]),
++                                                                           #GOSNormalize([0.5,0.5,0.5],[1.0,1.0,1.0]),
+                                                                            # GOSResize(hypar["input_size"])
+                                                                            ],
+                                                           batch_size=hypar["batch_size_valid"],
+@@ -618,19 +618,19 @@ if __name__ == "__main__":
+     train_datasets, valid_datasets = [], []
+     dataset_1, dataset_1 = {}, {}
+-    dataset_tr = {"name": "DIS5K-TR",
+-                 "im_dir": "../DIS5K/DIS-TR/im",
+-                 "gt_dir": "../DIS5K/DIS-TR/gt",
+-                 "im_ext": ".jpg",
++    dataset_tr = {"name": "training",
++                 "im_dir": "../training/im",
++                 "gt_dir": "../training/gt",
++                 "im_ext": ".png",
+                  "gt_ext": ".png",
+-                 "cache_dir":"../DIS5K-Cache/DIS-TR"}
++                 "cache_dir":"../cache/training"}
+-    dataset_vd = {"name": "DIS5K-VD",
+-                 "im_dir": "../DIS5K/DIS-VD/im",
+-                 "gt_dir": "../DIS5K/DIS-VD/gt",
+-                 "im_ext": ".jpg",
++    dataset_vd = {"name": "validation",
++                 "im_dir": "../validation/im",
++                 "gt_dir": "../validation/gt",
++                 "im_ext": ".png",
+                  "gt_ext": ".png",
+-                 "cache_dir":"../DIS5K-Cache/DIS-VD"}
++                 "cache_dir":"../cache/validation"}
+     dataset_te1 = {"name": "DIS5K-TE1",
+                  "im_dir": "../DIS5K/DIS-TE1/im",
+@@ -685,7 +685,7 @@ if __name__ == "__main__":
+     if hypar["mode"] == "train":
+         hypar["valid_out_dir"] = "" ## for "train" model leave it as "", for "valid"("inference") mode: set it according to your local directory
+         hypar["model_path"] ="../saved_models/IS-Net-test" ## model weights saving (or restoring) path
+-        hypar["restore_model"] = "" ## name of the segmentation model weights .pth for resume training process from last stop or for the inferencing
++        hypar["restore_model"] = "isnet-base-model.pth" ## name of the segmentation model weights .pth for resume training process from last stop or for the inferencing
+         hypar["start_ite"] = 0 ## start iteration for the training, can be changed to match the restored training process
+         hypar["gt_encoder_model"] = ""
+     else: ## configure the segmentation output path and the to-be-used model weights path

models/isnet-base-model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e1aafea58f0b55d0c35077e0ceade6ba1ba2bce372fd4f8f77215391f3fac13
+size 176579397

models/ormbg.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e91dc17c7cd8eff882d06f293e34b0ca6d33e6f5d71c87b439bd59820f03c49
+size 176180252

models/ormbg.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba387a8348526875024f59aa97d23af9cacfff77abf4e9af14332bf477c088fa
+size 176719216

utils/__pycache__/isnet.cpython-312.pyc ADDED Viewed

Binary file (27.6 kB). View file

utils/isnet.py ADDED Viewed

	@@ -0,0 +1,647 @@

+import torch
+import torch.nn as nn
+from torchvision import models
+import torch.nn.functional as F
+# https://github.com/xuebinqin/DIS/blob/main/IS-Net/models/isnet.py
+bce_loss = nn.BCELoss(size_average=True)
+def muti_loss_fusion(preds, target):
+    loss0 = 0.0
+    loss = 0.0
+    for i in range(0, len(preds)):
+        # print("i: ", i, preds[i].shape)
+        if preds[i].shape[2] != target.shape[2] or preds[i].shape[3] != target.shape[3]:
+            # tmp_target = _upsample_like(target,preds[i])
+            tmp_target = F.interpolate(
+                target, size=preds[i].size()[2:], mode="bilinear", align_corners=True
+            )
+            loss = loss + bce_loss(preds[i], tmp_target)
+        else:
+            loss = loss + bce_loss(preds[i], target)
+        if i == 0:
+            loss0 = loss
+    return loss0, loss
+fea_loss = nn.MSELoss(size_average=True)
+kl_loss = nn.KLDivLoss(size_average=True)
+l1_loss = nn.L1Loss(size_average=True)
+smooth_l1_loss = nn.SmoothL1Loss(size_average=True)
+def muti_loss_fusion_kl(preds, target, dfs, fs, mode="MSE"):
+    loss0 = 0.0
+    loss = 0.0
+    for i in range(0, len(preds)):
+        # print("i: ", i, preds[i].shape)
+        if preds[i].shape[2] != target.shape[2] or preds[i].shape[3] != target.shape[3]:
+            # tmp_target = _upsample_like(target,preds[i])
+            tmp_target = F.interpolate(
+                target, size=preds[i].size()[2:], mode="bilinear", align_corners=True
+            )
+            loss = loss + bce_loss(preds[i], tmp_target)
+        else:
+            loss = loss + bce_loss(preds[i], target)
+        if i == 0:
+            loss0 = loss
+    for i in range(0, len(dfs)):
+        if mode == "MSE":
+            loss = loss + fea_loss(
+                dfs[i], fs[i]
+            )  ### add the mse loss of features as additional constraints
+            # print("fea_loss: ", fea_loss(dfs[i],fs[i]).item())
+        elif mode == "KL":
+            loss = loss + kl_loss(F.log_softmax(dfs[i], dim=1), F.softmax(fs[i], dim=1))
+            # print("kl_loss: ", kl_loss(F.log_softmax(dfs[i],dim=1),F.softmax(fs[i],dim=1)).item())
+        elif mode == "MAE":
+            loss = loss + l1_loss(dfs[i], fs[i])
+            # print("ls_loss: ", l1_loss(dfs[i],fs[i]))
+        elif mode == "SmoothL1":
+            loss = loss + smooth_l1_loss(dfs[i], fs[i])
+            # print("SmoothL1: ", smooth_l1_loss(dfs[i],fs[i]).item())
+    return loss0, loss
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(
+            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate, stride=stride
+        )
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src, tar):
+    src = F.upsample(src, size=tar.shape[2:], mode="bilinear")
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
+        super(RSU7, self).__init__()
+        self.in_ch = in_ch
+        self.mid_ch = mid_ch
+        self.out_ch = out_ch
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)  ## 1 -> 1/2
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
+        hx6dup = _upsample_like(hx6d, hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
+        return hx1d + hxin
+class myrebnconv(nn.Module):
+    def __init__(
+        self,
+        in_ch=3,
+        out_ch=1,
+        kernel_size=3,
+        stride=1,
+        padding=1,
+        dilation=1,
+        groups=1,
+    ):
+        super(myrebnconv, self).__init__()
+        self.conv = nn.Conv2d(
+            in_ch,
+            out_ch,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_ch)
+        self.rl = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.rl(self.bn(self.conv(x)))
+class ISNetGTEncoder(nn.Module):
+    def __init__(self, in_ch=1, out_ch=1):
+        super(ISNetGTEncoder, self).__init__()
+        self.conv_in = myrebnconv(
+            in_ch, 16, 3, stride=2, padding=1
+        )  # nn.Conv2d(in_ch,64,3,stride=2,padding=1)
+        self.stage1 = RSU7(16, 16, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 16, 64)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(64, 32, 128)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(128, 32, 256)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(256, 64, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 64, 512)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+    def compute_loss(self, preds, targets):
+        return muti_loss_fusion(preds, targets)
+    def forward(self, x):
+        hx = x
+        hxin = self.conv_in(hx)
+        # hx = self.pool_in(hxin)
+        # stage 1
+        hx1 = self.stage1(hxin)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        # side output
+        d1 = self.side1(hx1)
+        d1 = _upsample_like(d1, x)
+        d2 = self.side2(hx2)
+        d2 = _upsample_like(d2, x)
+        d3 = self.side3(hx3)
+        d3 = _upsample_like(d3, x)
+        d4 = self.side4(hx4)
+        d4 = _upsample_like(d4, x)
+        d5 = self.side5(hx5)
+        d5 = _upsample_like(d5, x)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, x)
+        # d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
+        return [
+            F.sigmoid(d1),
+            F.sigmoid(d2),
+            F.sigmoid(d3),
+            F.sigmoid(d4),
+            F.sigmoid(d5),
+            F.sigmoid(d6),
+        ], [hx1, hx2, hx3, hx4, hx5, hx6]
+class ISNetDIS(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(ISNetDIS, self).__init__()
+        self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1)
+        self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage1 = RSU7(64, 32, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 32, 128)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(128, 64, 256)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(256, 128, 512)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(512, 256, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 256, 512)
+        # decoder
+        self.stage5d = RSU4F(1024, 256, 512)
+        self.stage4d = RSU4(1024, 128, 256)
+        self.stage3d = RSU5(512, 64, 128)
+        self.stage2d = RSU6(256, 32, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+        # self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    def compute_loss_kl(self, preds, targets, dfs, fs, mode="MSE"):
+        # return muti_loss_fusion(preds,targets)
+        return muti_loss_fusion_kl(preds, targets, dfs, fs, mode=mode)
+    def compute_loss(self, preds, targets):
+        # return muti_loss_fusion(preds,targets)
+        return muti_loss_fusion(preds, targets)
+    def forward(self, x):
+        hx = x
+        hxin = self.conv_in(hx)
+        # hx = self.pool_in(hxin)
+        # stage 1
+        hx1 = self.stage1(hxin)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # -------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d1 = _upsample_like(d1, x)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, x)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, x)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, x)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, x)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, x)
+        # d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
+        return [
+            F.sigmoid(d1),
+            F.sigmoid(d2),
+            F.sigmoid(d3),
+            F.sigmoid(d4),
+            F.sigmoid(d5),
+            F.sigmoid(d6),
+        ], [hx1d, hx2d, hx3d, hx4d, hx5d, hx6]

utils/pth_to_onnx.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+import argparse
+from isnet import ISNetDIS
+def export_to_onnx(model_path, onnx_path):
+    net = ISNetDIS()
+    if torch.cuda.is_available():
+        net.load_state_dict(torch.load(model_path))
+        net = net.cuda()
+    else:
+        net.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
+    net.eval()
+    # Create a dummy input tensor. The size should match the model's input size.
+    # Adjust the dimensions as necessary; here it is assumed the input is a 3-channel image.
+    dummy_input = torch.randn(
+        1,
+        3,
+        1024,
+        1024,
+        device="cuda" if torch.cuda.is_available() else "cpu",
+    )
+    torch.onnx.export(
+        net,
+        dummy_input,
+        onnx_path,
+        export_params=True,
+        opset_version=10,
+        do_constant_folding=True,
+        input_names=["input"],
+        output_names=["output"],
+    )
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Export a trained model to ONNX format."
+    )
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        default="./models/ormbg.pth",
+        help="The path to the trained model file.",
+    )
+    parser.add_argument(
+        "--onnx_path",
+        type=str,
+        default="./models/example.onnx",
+        help="The path where the ONNX model will be saved.",
+    )
+    args = parser.parse_args()
+    export_to_onnx(args.model_path, args.onnx_path)