Spaces:

anfruizhu
/

phenotyping_pipeline

Sleeping

App Files Files Community

Andres Felipe Ruiz-Hurtado commited on Nov 19, 2024

Commit

9f3ae4a

1 Parent(s): 173edf9

initial

Browse files

Files changed (9) hide show

.gitignore +162 -0
bgremover.py +744 -0
main.py +204 -0
requirements.txt +9 -0
u2net_utils/__init__.py +0 -0
u2net_utils/data_loader.py +266 -0
u2net_utils/model/__init__.py +2 -0
u2net_utils/model/u2net.py +525 -0
u2net_utils/model/u2net_refactor.py +168 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

bgremover.py ADDED Viewed

	@@ -0,0 +1,744 @@

+import cv2 as cv
+import numpy as np
+from PIL import Image
+import glob
+import pathlib
+import sys
+import u2net_utils
+import os
+from skimage import io, transform
+import torch
+import torchvision
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms#, utils
+# import torch.optim as optim
+from u2net_utils.data_loader import RescaleT
+from u2net_utils.data_loader import ToTensor
+from u2net_utils.data_loader import ToTensorLab
+from u2net_utils.data_loader import SalObjDataset
+from u2net_utils.model import U2NET # full size version 173.6 MB
+from u2net_utils.model import U2NETP # small version u2net 4.7 MB
+from torchvision import models
+import onnxruntime as ort
+import cv2 as cv
+import numpy as np
+from torchvision.transforms import v2 as transforms
+# MODEL_PATH = r"\\CATALOGUE.CGIARAD.ORG\AcceleratedBreedingInitiative\4.Scripts\AndresRuiz\local_mydata_gpu\models\u2net.pth"
+# MODEL_PATH = r"D:\CIAT\catalogue\AcceleratedBreedingInitiative\1.Data\16. Spidermites_AdrianK\best_models"
+# MODEL_PATH = r"D:\local_mydata\models\spidermites\best_models"
+MODEL_PATH = "./models"
+#************************
+# from loguru import logger
+# from segment_anything import build_sam, SamPredictor, SamAutomaticMaskGenerator
+# import subprocess
+# # Grounding DINO
+# import GroundingDINO.groundingdino.datasets.transforms as T
+# from GroundingDINO.groundingdino.models import build_model
+# from GroundingDINO.groundingdino.util import box_ops
+# from GroundingDINO.groundingdino.util.slconfig import SLConfig
+# from GroundingDINO.groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
+# from huggingface_hub import hf_hub_download
+import gc
+def clear():
+    gc.collect()
+    torch.cuda.empty_cache()
+# normalize the predicted SOD probability map
+def normPRED(d):
+    ma = torch.max(d)
+    mi = torch.min(d)
+    dn = (d-mi)/(ma-mi)
+    return dn
+class BackgroundRemover():
+    def __init__(self):
+        #Load model
+        #model_dir = "/workspace/u2net.pth"
+        #model_dir = "D:/local_mydata/models/u2net.pth"
+        model_dir = r"\\CATALOGUE.CGIARAD.ORG\AcceleratedBreedingInitiative\4.Scripts\AndresRuiz\local_mydata_gpu\models\u2net.pth"
+        model_dir = os.path.join(MODEL_PATH, "u2net.pth")
+        ## Load model
+        net = U2NET(3,1)
+        if torch.cuda.is_available():
+            net.load_state_dict(torch.load(model_dir))
+            net.cuda()
+        else:
+            net.load_state_dict(torch.load(model_dir, map_location='cpu'))
+        net.eval()
+        self.net = net
+    def remove_background(self, filepath_image):
+        img_name_list = [filepath_image]
+        test_salobj_dataset = SalObjDataset(img_name_list = img_name_list,
+                                            lbl_name_list = [],
+                                            transform=transforms.Compose([RescaleT(320),
+                                                                        ToTensorLab(flag=0)])
+                                            )
+        test_salobj_dataloader = DataLoader(test_salobj_dataset,
+                                            batch_size=1,
+                                            shuffle=False,
+                                            num_workers=1)
+        net = self.net
+        for i_test, data_test in enumerate(test_salobj_dataloader):
+            print("inferencing:",img_name_list[i_test].split(os.sep)[-1])
+            inputs_test = data_test['image']
+            inputs_test = inputs_test.type(torch.FloatTensor)
+            if torch.cuda.is_available():
+                inputs_test = Variable(inputs_test.cuda())
+            else:
+                inputs_test = Variable(inputs_test)
+            d1,d2,d3,d4,d5,d6,d7= net(inputs_test)
+            # normalization
+            pred = d1[:,0,:,:]
+            pred = normPRED(pred)
+            # save results to test_results folder
+            #if not os.path.exists(prediction_dir):
+            #    os.makedirs(prediction_dir, exist_ok=True)
+            #save_output(img_name_list[i_test],pred,prediction_dir)
+            predict = pred
+            predict = predict.squeeze()
+            #mask_torch.permute(1, 2, 0).detach().cpu().numpy()
+            predict_np = predict.cpu().data.numpy()
+            img = cv.imread(filepath_image)
+            w = img.shape[1]
+            h = img.shape[0]
+            #im = Image.fromarray(predict_np*255).convert('RGB')
+            #image = io.imread(filepath_image)
+            #imo = im.resize((image.shape[1],image.shape[0]),resample=Image.BILINEAR)
+            imo = cv.resize(predict_np, (w,h), cv.INTER_LINEAR )
+            #del d1,d2,d3,d4,d5,d6,d7
+            return imo
+    def remove_background_save(self, path_in, path_out, path_out_mask = None):
+        print("remove_background_save")
+        mask_torch = self.remove_background(path_in)
+        mask = mask_torch*255
+        mask = mask.astype(np.uint8)
+        img = cv.imread(path_in)
+        mask0 = mask#cv.UMat(cv.imread(mask,0))
+        #127
+        #200
+        ret,binary_mask = cv.threshold(mask0,80,255,cv.THRESH_BINARY)
+        binary_mask = np.uint8(binary_mask)
+        res = cv.bitwise_and(img,img, mask = binary_mask)
+        cv.imwrite(path_out, res)
+        if not (path_out_mask == None):
+            cv.imwrite(path_out_mask, mask)
+    def remove_background_dir(self, path_in, path_out):
+        img_name_list = glob.glob(os.path.join(path_in, "*.jpg"))
+        for img_name in img_name_list:
+            img_name_output = img_name.replace(path_in, path_out)
+            if not os.path.exists(img_name_output):
+                self.remove_background_save(img_name, img_name_output)
+                print(img_name.replace(path_in, path_out))
+    def remove_background_gradio(self, np_image):
+        w = np_image.shape[1]
+        h = np_image.shape[0]
+        #image = torch.tensor(np_image)
+        #image = image.permute(2,0,1)
+        image = np_image#Image.fromarray(np_image)
+        imidx = np.array([0])
+        #label = "test"
+        #***
+        label_3 = np.zeros(image.shape)
+        label = np.zeros(label_3.shape[0:2])
+        if(3==len(label_3.shape)):
+            label = label_3[:,:,0]
+        elif(2==len(label_3.shape)):
+            label = label_3
+        if(3==len(image.shape) and 2==len(label.shape)):
+            label = label[:,:,np.newaxis]
+        elif(2==len(image.shape) and 2==len(label.shape)):
+            image = image[:,:,np.newaxis]
+            label = label[:,:,np.newaxis]
+        #***
+        sample = {'imidx':imidx, 'image':image, 'label':label}
+        print(image.shape)
+        print(label.shape)
+        eval_transform = transforms.Compose([RescaleT(320),ToTensorLab(flag=0)])
+        #eval_transform = transforms.Compose([RescaleT(320)])
+        #eval_transform = transforms.Compose([RescaleT(320)])
+        #eval_transform = transforms.Compose([ToTensorLab(flag=0)])
+        #eval_transform = transforms.Compose([transforms.Resize(320)
+        #                                     , transforms.ToTensor()])
+        #eval_transform = transforms.Compose([transforms.Resize(320)])
+        test_salobj_dataloader = DataLoader(sample,
+                                        batch_size=1,
+                                        shuffle=False,
+                                        num_workers=1)
+        sample = eval_transform(sample)
+        net = self.net
+        #for i_test, data_test in enumerate(test_salobj_dataloader):
+        #device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+        #x = eval_transform(sample)
+        #x = x[:3, ...].to(device)
+        inputs_test = sample['image']
+        inputs_test = inputs_test.type(torch.FloatTensor)
+        inputs_test = inputs_test.unsqueeze(0)
+        print(inputs_test.shape)
+        if torch.cuda.is_available():
+            inputs_test = Variable(inputs_test.cuda())
+        else:
+            inputs_test = Variable(inputs_test)
+        d1,d2,d3,d4,d5,d6,d7= net(inputs_test)
+        # normalization
+        pred = d1[:,0,:,:]
+        pred = normPRED(pred)
+        predict = pred
+        predict = predict.squeeze()
+        #mask_torch.permute(1, 2, 0).detach().cpu().numpy()
+        predict_np = predict.cpu().data.numpy()
+        imo = cv.resize(predict_np, (w,h), cv.INTER_LINEAR )
+        mask = imo*255
+        mask = mask.astype(np.uint8)
+        mask0 = mask#cv.UMat(cv.imread(mask,0))
+        #127
+        #200
+        ret,binary_mask = cv.threshold(mask0,80,255,cv.THRESH_BINARY)
+        #ret,binary_mask = cv.threshold(mask0,233,255,cv.THRESH_BINARY)
+        binary_mask = np.uint8(binary_mask)
+        res = cv.bitwise_and(np_image,np_image, mask = binary_mask)
+        return mask, res
+    def apply_mask(self, input, mask, threshold):
+        mask = cv.cvtColor(mask, cv.COLOR_BGR2GRAY)
+        ret,binary_mask = cv.threshold(mask,threshold,255,cv.THRESH_BINARY)
+        #binary_mask = np.uint8(binary_mask)
+        #binary_mask = mask
+        print("apply mask")
+        print(input.shape)
+        print(input.dtype)
+        print(binary_mask.shape)
+        print(binary_mask.dtype)
+        res = cv.bitwise_and(input,input, mask = binary_mask)
+        # foreground_alpha = mask.astype(np.float32) / 255.0
+        # # Create a new image to store the result with same size and type as foreground
+        # blended_image = np.zeros_like(input)
+        # # Loop through each pixel and apply alpha based on mask value
+        # for channel in range(3):  # Loop through BGR channels
+        #     blended_image[:, :, channel] = input[:, :, channel] * foreground_alpha
+        return res, binary_mask
+def get_transform(train = True):
+    transforms_list = []
+    #if train:
+    #    transforms.append(T.RandomHorizontalFlip(0.5))
+    transforms_list.append(transforms.Resize(256))
+    transforms_list.append(transforms.CenterCrop(256))
+    #transforms_list.append(transforms.ToDtype(torch.float, scale=True))
+    transforms_list.append(transforms.ToTensor())
+    #transforms_list.append(transforms.ToDtype(torch.float32, scale=True))
+    transforms_list.append(transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))
+    return transforms.Compose(transforms_list)
+class DamageClassifier():
+    def __init__(self):
+        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+        self.model_name =""
+    def initialize(self, model_name):
+        #Load model
+        if model_name == "Resnet18":
+            model_filepath = r"\\CATALOGUE.CGIARAD.ORG\AcceleratedBreedingInitiative\1.Data\16. Spidermites_AdrianK\best_models\resnet18_SpidermitesModel.pth"
+            model_filepath = os.path.join(MODEL_PATH, "resnet18_SpidermitesModel.pth")
+            model = models.resnet18(weights='IMAGENET1K_V1')
+        if model_name == "Resnet152":
+            model_filepath = r"\\CATALOGUE.CGIARAD.ORG\AcceleratedBreedingInitiative\1.Data\16. Spidermites_AdrianK\best_models\short_resnet152_SpidermitesModel_44_44.pth"
+            model_filepath = os.path.join(MODEL_PATH, "short_resnet152_SpidermitesModel_44_44.pth")
+            model = models.resnet152(weights='IMAGENET1K_V1')
+        if model_name == "Googlenet":
+            model_filepath = r"\\catalogue.cgiarad.org\AcceleratedBreedingInitiative\1.Data\16. Spidermites_AdrianK\best_models\regnet_x_32gf_SpidermitesModel.pth"
+            model_filepath = model_filepath = os.path.join(MODEL_PATH, "regnet_x_32gf_SpidermitesModel.pth")
+            model = models.regnet_x_32gf(weights='IMAGENET1K_V1')
+        if model_name == "Regnet32":
+            model_filepath = r"\\CATALOGUE.CGIARAD.ORG\AcceleratedBreedingInitiative\1.Data\16. Spidermites_AdrianK\best_models\short_resnet18_SpidermitesModel.pth"
+            model_filepath = model_filepath = os.path.join(MODEL_PATH, "short_resnet18_SpidermitesModel.pth")
+            model = models.resnet18(weights='IMAGENET1K_V1')
+        #Add fully connected layer at the end with num_classes as output
+        num_ftrs = model.fc.in_features
+        model.fc = nn.Linear(num_ftrs, 4)
+        if torch.cuda.is_available():
+            model.load_state_dict(torch.load(model_filepath))
+            model.cuda()
+        else:
+            model.load_state_dict(torch.load(model_filepath, map_location='cpu'))
+        model.eval()
+        self.model = model
+        self.model_name = model_name
+        return
+    def inference(self, np_image, model_name):
+        if model_name == "Regnet":
+            model_filepath = r"\\CATALOGUE.CGIARAD.ORG\AcceleratedBreedingInitiative\1.Data\16. Spidermites_AdrianK\best_models\regnet_x_32gf_SpidermitesModel.onnx"
+            model_filepath = model_filepath = os.path.join(MODEL_PATH, "regnet_x_32gf_SpidermitesModel.onnx")
+            ort_sess = ort.InferenceSession(model_filepath
+                                ,providers=ort.get_available_providers()
+                                )
+            transforms_list = []
+            transforms_list.append(transforms.ToTensor())
+            transforms_list.append(transforms.Resize(512))
+            transforms_list.append(transforms.CenterCrop(512))
+            transforms_list.append(transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))
+            apply_t =  transforms.Compose(transforms_list)
+            img = apply_t(np_image)
+            imgs = np.array([img.numpy()])
+            outputs = ort_sess.run(None, {'input': [img.numpy()]})
+            np_res = outputs[0][0]
+            final_res = {'0-(No damage)': np_res[0]
+                            ,'1-3-(Moderately damaged)': np_res[1]
+                            ,'4-7-(Damaged)': np_res[2]
+                            ,'8-10-(Severely damaged)': np_res[3]}
+            return final_res
+        else:
+            if self.model_name != model_name:
+                self.initialize(model_name)
+            with torch.no_grad():
+                print("inference")
+                print(np_image.shape)
+                pil_image = Image.fromarray(np_image.astype('uint8'))
+                data_transforms = get_transform(train = False)
+                img = data_transforms(pil_image)
+                inputs = img.to(self.device)
+                outputs = self.model(inputs.unsqueeze(0))
+                #_, preds = torch.max(outputs, 1)
+                print(outputs)
+                _, preds = torch.max(outputs, 1)
+                print(preds)
+                m = nn.Softmax(dim=1)
+                res = m(outputs)
+                print(res)
+                np_res = res[0].cpu().numpy()
+                print(np_res)
+                final_res = {'0-(No damage)': np_res[0]
+                            ,'1-3-(Moderately damaged)': np_res[1]
+                            ,'4-7-(Damaged)': np_res[2]
+                            ,'8-10-(Severely damaged)': np_res[3]}
+                return final_res
+class ColorCheckerDetector():
+    def __init__(self):
+        return
+    def process(self, np_image_mask, np_image):
+        ret,binary_mask = cv.threshold(np_image_mask,80,255,cv.THRESH_BINARY)
+        binary_mask_C = cv.cvtColor(binary_mask, cv.COLOR_BGR2GRAY) #change to single channel
+        (contours, hierarchy) = cv.findContours(binary_mask_C, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
+        main_contour = contours[0]
+        # compute the center of the contour
+        moments = cv.moments(main_contour)
+        cx = int(moments["m10"] / moments["m00"])
+        cy = int(moments["m01"] / moments["m00"])
+        # Bounding rect
+        bb_x,bb_y,bb_w,bb_h = cv.boundingRect(binary_mask_C)
+        # Min Bounding rect
+        rect = cv.minAreaRect(main_contour)
+        box = cv.boxPoints(rect)
+        box = np.int64(box)
+        # Fitting line
+        rows,cols = binary_mask_C.shape[:2]
+        #[vx,vy,x,y] = cv.fitLine(main_contour, cv.DIST_L2,0,0.01,0.01)
+        [vx,vy,x,y] = cv.fitLine(box, cv.DIST_L2,0,0.01,0.01)
+        lefty = int((-x*vy/vx) + y)
+        righty = int(((cols-x)*vy/vx)+y)
+        point1 = (cols-1,righty)
+        point2 = (0,lefty)
+        angle = np.arctan2(np.abs(righty-lefty),cols)
+        # rotation matrix
+        M_rot = cv.getRotationMatrix2D((cx, cy), -angle*180.0/np.pi, 1.0)
+        rotated = cv.warpAffine(np_image, M_rot, (binary_mask.shape[1], binary_mask.shape[0]))
+        #perspective transform
+        input_pts = box.astype(np.float32)
+        maxHeight = 200
+        maxWidth = 290
+        output_pts = np.float32([[0, 0],
+                        [maxWidth - 1, 0],
+                        [maxWidth - 1, maxHeight - 1] ,
+                        [0, maxHeight - 1]]
+                        )
+        M_per = cv.getPerspectiveTransform(input_pts,output_pts)
+        corrected = cv.warpPerspective(np_image,M_per,(maxWidth, maxHeight),flags=cv.INTER_LINEAR)
+        res = cv.drawContours(np_image, main_contour, -1, (255,255,0), 5)
+        res = cv.rectangle(res,(bb_x,bb_y),(bb_x+bb_w,bb_y+bb_h),(0,255,0),5)
+        res = cv.drawContours(res,[box],0,(0,0,255),5)
+        res = cv.line(res,(cols-1,righty),(0,lefty),(0,0,255),5)
+        return [res, rotated, corrected]
+class BatchProcessor():
+    def __init__(self):
+        return
+    def batch_process(self, input_dir, output_dir, output_suffixes = ["output"], format="jpg", pattern='**/*.tiff', processing_fc=None, output_format = None):
+        if processing_fc == None:
+            print("Processing function is None")
+            return
+        else:
+            if output_format == None:
+                output_format = format
+            # Get list of files in folder and subfolders
+            pattern = '**/*.'  + format
+            files = glob.glob(pattern, root_dir=input_dir, recursive=True)
+            for file in files:
+                filepath = os.path.join(input_dir, file)
+                basename = os.path.basename(filepath)
+                parent_dir = os.path.dirname(filepath)
+                extra_path = file.replace(basename,"")
+                output_dir = os.path.join(output_dir, extra_path)
+                # Create output filepath list
+                output_filepaths = []
+                for suffix in output_suffixes:
+                    output_filepaths.append(os.path.join(output_dir, basename.replace("." + format, "_" + suffix + "." + output_format)))
+                if not os.path.exists(output_filepaths[0]):# Process only if first output file does not exist
+                    if not os.path.exists(output_dir): # Create subfolders if necessary
+                        pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
+                    processing_fc(filepath, output_filepaths) # Process and save file
+                    print(file)
+                    print(output_filepaths[0])
+                    print("****")
+class Segmentor():
+    def __init__(self):
+        self.sam_predictor = None
+        self.groundingdino_model = None
+        #self.sam_checkpoint = './sam_vit_h_4b8939.pth'
+        #self.sam_checkpoint = r"\\CATALOGUE.CGIARAD.ORG\AcceleratedBreedingInitiative\4.Scripts\AndresRuiz\local_mydata_backup\model\sam_vit_h_4b8939.pth"
+        self.sam_checkpoint = r"D:\local_mydev\Grounded-Segment-Anything\sam_vit_h_4b8939.pth"
+        # self.config_file = 'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py'
+        # self.ckpt_repo_id = "ShilongLiu/GroundingDINO"
+        # self.ckpt_filename = "groundingdino_swint_ogc.pth"
+        self.config_file = r"D:\local_mydev\gsam\GroundingDINO\groundingdino\config\GroundingDINO_SwinT_OGC.py"
+        self.ckpt_repo_id = "ShilongLiu/GroundingDINO"
+        self.ckpt_filename = "groundingdino_swint_ogc.pth"
+        self.device ='cpu'
+        self.load_sam_model(self.device)
+        self.load_groundingdino_model(self.device)
+        return
+    def get_sam_vit_h_4b8939(self):
+        return
+        # if not os.path.exists('./sam_vit_h_4b8939.pth'):
+        #     logger.info(f"get sam_vit_h_4b8939.pth...")
+        #     result = subprocess.run(['wget', '-nv', 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth'], check=True)
+        #     print(f'wget sam_vit_h_4b8939.pth result = {result}')
+    def load_sam_model(self, device):
+        sam_checkpoint = self.sam_checkpoint
+        # initialize SAM
+        self.get_sam_vit_h_4b8939()
+        logger.info(f"initialize SAM model...")
+        sam_device = device
+        sam_model = build_sam(checkpoint=sam_checkpoint).to(sam_device)
+        self.sam_predictor = SamPredictor(sam_model)
+        self.sam_mask_generator = SamAutomaticMaskGenerator(sam_model)
+    def get_grounding_output(self, model, image, caption, box_threshold, text_threshold, with_logits=True, device="cpu"):
+        caption = caption.lower()
+        caption = caption.strip()
+        if not caption.endswith("."):
+            caption = caption + "."
+        model = model.to(device)
+        image = image.to(device)
+        with torch.no_grad():
+            outputs = model(image[None], captions=[caption])
+        logits = outputs["pred_logits"].cpu().sigmoid()[0]  # (nq, 256)
+        boxes = outputs["pred_boxes"].cpu()[0]  # (nq, 4)
+        logits.shape[0]
+        # filter output
+        logits_filt = logits.clone()
+        boxes_filt = boxes.clone()
+        filt_mask = logits_filt.max(dim=1)[0] > box_threshold
+        logits_filt = logits_filt[filt_mask]  # num_filt, 256
+        boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
+        logits_filt.shape[0]
+        # get phrase
+        tokenlizer = model.tokenizer
+        tokenized = tokenlizer(caption)
+        # build pred
+        pred_phrases = []
+        for logit, box in zip(logits_filt, boxes_filt):
+            pred_phrase = get_phrases_from_posmap(logit > text_threshold, tokenized, tokenlizer)
+            if with_logits:
+                pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})")
+            else:
+                pred_phrases.append(pred_phrase)
+        return boxes_filt, pred_phrases
+    def load_model_hf(self, model_config_path, repo_id, filename, device='cpu'):
+        args = SLConfig.fromfile(model_config_path)
+        model = build_model(args)
+        args.device = device
+        cache_file = hf_hub_download(repo_id=repo_id, filename=filename)
+        checkpoint = torch.load(cache_file, map_location=device)
+        print(checkpoint['model'])
+        log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
+        print("Model loaded from {} \n => {}".format(cache_file, log))
+        _ = model.eval()
+        return model
+    def load_groundingdino_model(self, device):
+        config_file = self.config_file
+        ckpt_repo_id = self.ckpt_repo_id
+        ckpt_filename = self.ckpt_filename
+        # initialize groundingdino model
+        logger.info(f"initialize groundingdino model...")
+        self.groundingdino_model = self.load_model_hf(config_file, ckpt_repo_id, ckpt_filename, device=device) #'cpu')
+        logger.info(f"initialize groundingdino model...{type(self.groundingdino_model)}")
+    def show_mask(self, mask, random_color=False):
+        if random_color:
+            color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
+        else:
+            color = np.array([30/255, 144/255, 255/255, 0.6])
+        color = np.array([1.0, 0, 0, 1.0])
+        h, w = mask.shape[-2:]
+        mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
+        return mask_image
+    def process(self, np_image, text_prompt):
+        results = []
+        results.append(np_image)
+        #results.append(np_image)
+        sam_predictor = self.sam_predictor
+        groundingdino_model = self.groundingdino_model
+        image = np_image
+        #text_prompt = text_prompt.strip()
+        box_threshold = 0.3
+        text_threshold = 0.25
+        size = image.shape
+        H, W = size[1], size[0]
+        # RUN grounding dino model
+        groundingdino_device = 'cpu'
+        #image_dino = torch.from_numpy(image)
+        image_dino = Image.fromarray(image)
+        transform = T.Compose(
+            [
+                T.RandomResize([800], max_size=1333),
+                T.ToTensor(),
+                T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+            ]
+        )
+        print(image.shape)
+        image_dino, _ = transform(image_dino, None)  # 3, h, w
+        boxes_filt, pred_phrases =self.get_grounding_output(
+            groundingdino_model, image_dino, text_prompt, box_threshold, text_threshold, device=groundingdino_device
+        )
+        if sam_predictor:
+            sam_predictor.set_image(image)
+        if sam_predictor:
+            for i in range(boxes_filt.size(0)):
+                boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
+                boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
+                boxes_filt[i][2:] += boxes_filt[i][:2]
+            transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2])
+            masks, _, _, _ = sam_predictor.predict_torch(
+                point_coords = None,
+                point_labels = None,
+                boxes = transformed_boxes,
+                multimask_output = False,
+            )
+            print("RESULTS*************")
+            print(len(masks))
+            # results = []
+            for mask in masks:
+                print(type(mask))
+                print(mask.shape)
+                #mask_img = mask.cpu().data.numpy()
+                mask_img =self.show_mask(mask.cpu().numpy())
+                print(type(mask_img))
+                print(mask_img.shape)
+                results.append(mask_img)
+            #     results.append(mask.cpu().numpy())
+            return results
+            #assert sam_checkpoint, 'sam_checkpoint is not found!'
+        return None

main.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import gradio as gr
+from bgremover import BackgroundRemover
+from bgremover import DamageClassifier
+from bgremover import clear
+from bgremover import ColorCheckerDetector
+from bgremover import Segmentor
+import rasterio
+import os
+from PIL import Image
+from gradio_client import Client
+PRELOAD_MODELS = False
+if PRELOAD_MODELS:
+    backgroundRemover = BackgroundRemover()
+    damage_classifier =  DamageClassifier()
+    segmentor = Segmentor()
+def process(input_img):
+    if PRELOAD_MODELS:
+        global backgroundRemover
+    else:
+        backgroundRemover = BackgroundRemover()
+    output_mask, output_img = backgroundRemover.remove_background_gradio(input_img)
+    return [output_img, output_mask]
+def process_classification(input_img, model_name):
+    if PRELOAD_MODELS:
+        global damage_classifier
+    else:
+        damage_classifier =  DamageClassifier()
+    res = damage_classifier.inference(input_img, model_name)
+    #return {'No damage': 0.1, 'Moderately damaged': 0.1,'Damaged': 0.7, 'Severy damaged': 0.1}
+    return res
+def segment_plant(threshold, input_im, im_mask):
+    if PRELOAD_MODELS:
+        global backgroundRemover
+    else:
+        backgroundRemover = BackgroundRemover()
+    print("segment plant", threshold)
+    res, mask = backgroundRemover.apply_mask(input_im, im_mask, threshold)
+    return res, mask
+def rectangle(im, im_mask):
+    colorCheckerDetector = ColorCheckerDetector()
+    return colorCheckerDetector.process(im_mask, im)
+def get_file_content(file):
+	with rasterio.open(file) as src:
+		# Read the image data
+		image_data = src.read()
+		image = Image.fromarray((image_data[0] * 255).astype(np.uint8))
+	return (gr.Image(value=image, type="pil"))
+def on_img_color_load(input):
+    print("on_img_color_load")
+    print(input)
+def run_anything_task(input_image):
+    text_prompt = "color-checker"
+    task_type = "inpainting"
+    #text_prompt = "rocket"
+    if PRELOAD_MODELS:
+        global segmentor
+    else:
+        segmentor = Segmentor()
+    return segmentor.process(input_image, text_prompt)
+with gr.Blocks(title="Phenotyping pipeline") as demo:
+    gr.Markdown(
+    """
+    # Phenotyping pipeline
+    Modular phenotyping pipeline.
+    """)
+    input_im = gr.Image(render=False)
+    im_result = gr.Image(render=False)
+    im_mask = gr.Image(render=False)
+    im_masked = gr.Image(render=False)
+    im_color = gr.Image(render=False)
+    im_color_orginal = gr.Image(render=False)
+    im_color.change(on_img_color_load, im_color)
+    im_color_checker_mask = gr.Image(render=False)
+    with gr.Tab("Damage Classification"):
+        model_option = gr.Dropdown(
+            ["Regnet", "Resnet18", "Resnet152", "Googlenet"]
+            , label="Classification model"
+            , info="The classification model to use for inference"
+            , value="Regnet"
+        )
+        gr.Interface(fn=process_classification
+                    , inputs= [input_im, model_option]
+                    , outputs="label"
+                    , examples = [
+                        ["183_Week_1_(28th_Aug_-_1st_Sept.)_2023_nd.jpg"]
+                        ,["20_WEEK_5_(_FIELD_A)_md.jpg"]
+                        ,["30_WEEK_5_(_FIELD_A)_damaged.jpg"]
+                        ,["25_WEEK_4_(_Field_A)_sd.jpg"]
+                        #,["30_WEEK_4_(_Field_A)_sd.jpg"]
+                    ]
+                    )
+        #gr.Button("Classify")
+    with gr.Tab("Color Checker detection"):
+        #gr.Interface(fn=process_classification, inputs= input_im, outputs="label" )
+        #gr.Button("Classify")
+        gr.Interface(fn=run_anything_task, inputs= input_im, outputs=gr.Gallery() )
+    with gr.Tab("Color Calibration"):
+        #gr.Interface(fn=process_classification, inputs= input_im, outputs="label" )
+        #gr.Button("Classify")
+        gr.Interface(fn=rectangle
+                    , inputs= [input_im, im_color_checker_mask]
+                    , outputs=gr.Gallery()
+                    , examples = [["264_WEEK_5_(_FIELD_A).jpg","264_mask.jpg"]]
+                    )
+        gr.Button("Calibrate")
+    with gr.Tab("Plant segmentation"):
+        with gr.Column(scale=1):
+            #gr.Interface(fn=process, inputs= gr.Image(), outputs=[im_result, "image"] )
+            gr.Interface(fn=process, inputs= input_im, outputs=[im_result, im_mask] )
+            slider_thresh = gr.Slider(minimum=0, maximum=255, value=100, step=1, label="Threshold"
+                    , info="Segmentation threshold", interactive=True)
+            slider_thresh.release(fn=segment_plant, inputs = [slider_thresh, input_im, im_mask], outputs = [gr.Image(), gr.Image()])
+            #button = gr.Button("Clip")
+            #button.click()
+            #gr.Image(value=im_masked)
+    # with gr.Tab("Damage segmentation"):
+    #     gr.Button("Damage")
+    # with gr.Tab("Batch processing"):
+    #     gr.Button("Run")
+    # with gr.Tab("Batch processing"):
+    #     gr.Interface(fn=run_anything_task, inputs= input_im, outputs= gr.Gallery())
+    #with gr.Tab("Tests"):
+        # gr.Markdown("# Preview Images:")
+        # with gr.Group(visible=True):
+        #     with gr.Row(visible=True):
+        #         preview = gr.FileExplorer( scale      = 1,
+        #                     glob        = "*.tif",
+        #                     value       = ["./"],
+        #                     file_count  = "single",
+        #                     root_dir    = "./",
+        #                     elem_id     = "file",
+        #                     every= 1,
+        #                     interactive=True
+        #                     )
+        #         #image = gr.Image(type="pil")
+        #         image = gr.Image()
+        # preview.change(get_file_content, preview, image)
+if __name__ == "__main__":
+    #demo.launch(show_api=False)
+    #client = Client(demo)
+    #demo.launch(show_api=True, server_name="0.0.0.0", server_port=int(os.environ.get("GRADIO_SERVER_PORT", 7861)))
+    demo.launch(allowed_paths=["30_WEEK_5_(_FIELD_A)_damaged.jpg"],server_port=int(os.environ.get("GRADIO_SERVER_PORT", 7861)), share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+matplotlib
+numpy
+opencv-python
+pillow
+scikit-image
+scikit-learn
+torch
+torchvision
+gradio

u2net_utils/__init__.py ADDED Viewed

File without changes

u2net_utils/data_loader.py ADDED Viewed

	@@ -0,0 +1,266 @@

+# data loader
+from __future__ import print_function, division
+import glob
+import torch
+from skimage import io, transform, color
+import numpy as np
+import random
+import math
+import matplotlib.pyplot as plt
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms, utils
+from PIL import Image
+#==========================dataset load==========================
+class RescaleT(object):
+	def __init__(self,output_size):
+		assert isinstance(output_size,(int,tuple))
+		self.output_size = output_size
+	def __call__(self,sample):
+		imidx, image, label = sample['imidx'], sample['image'],sample['label']
+		h, w = image.shape[:2]
+		if isinstance(self.output_size,int):
+			if h > w:
+				new_h, new_w = self.output_size*h/w,self.output_size
+			else:
+				new_h, new_w = self.output_size,self.output_size*w/h
+		else:
+			new_h, new_w = self.output_size
+		new_h, new_w = int(new_h), int(new_w)
+		# #resize the image to new_h x new_w and convert image from range [0,255] to [0,1]
+		# img = transform.resize(image,(new_h,new_w),mode='constant')
+		# lbl = transform.resize(label,(new_h,new_w),mode='constant', order=0, preserve_range=True)
+		img = transform.resize(image,(self.output_size,self.output_size),mode='constant')
+		lbl = transform.resize(label,(self.output_size,self.output_size),mode='constant', order=0, preserve_range=True)
+		return {'imidx':imidx, 'image':img,'label':lbl}
+class Rescale(object):
+	def __init__(self,output_size):
+		assert isinstance(output_size,(int,tuple))
+		self.output_size = output_size
+	def __call__(self,sample):
+		imidx, image, label = sample['imidx'], sample['image'],sample['label']
+		if random.random() >= 0.5:
+			image = image[::-1]
+			label = label[::-1]
+		h, w = image.shape[:2]
+		if isinstance(self.output_size,int):
+			if h > w:
+				new_h, new_w = self.output_size*h/w,self.output_size
+			else:
+				new_h, new_w = self.output_size,self.output_size*w/h
+		else:
+			new_h, new_w = self.output_size
+		new_h, new_w = int(new_h), int(new_w)
+		# #resize the image to new_h x new_w and convert image from range [0,255] to [0,1]
+		img = transform.resize(image,(new_h,new_w),mode='constant')
+		lbl = transform.resize(label,(new_h,new_w),mode='constant', order=0, preserve_range=True)
+		return {'imidx':imidx, 'image':img,'label':lbl}
+class RandomCrop(object):
+	def __init__(self,output_size):
+		assert isinstance(output_size, (int, tuple))
+		if isinstance(output_size, int):
+			self.output_size = (output_size, output_size)
+		else:
+			assert len(output_size) == 2
+			self.output_size = output_size
+	def __call__(self,sample):
+		imidx, image, label = sample['imidx'], sample['image'], sample['label']
+		if random.random() >= 0.5:
+			image = image[::-1]
+			label = label[::-1]
+		h, w = image.shape[:2]
+		new_h, new_w = self.output_size
+		top = np.random.randint(0, h - new_h)
+		left = np.random.randint(0, w - new_w)
+		image = image[top: top + new_h, left: left + new_w]
+		label = label[top: top + new_h, left: left + new_w]
+		return {'imidx':imidx,'image':image, 'label':label}
+class ToTensor(object):
+	"""Convert ndarrays in sample to Tensors."""
+	def __call__(self, sample):
+		imidx, image, label = sample['imidx'], sample['image'], sample['label']
+		tmpImg = np.zeros((image.shape[0],image.shape[1],3))
+		tmpLbl = np.zeros(label.shape)
+		image = image/np.max(image)
+		if(np.max(label)<1e-6):
+			label = label
+		else:
+			label = label/np.max(label)
+		if image.shape[2]==1:
+			tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
+			tmpImg[:,:,1] = (image[:,:,0]-0.485)/0.229
+			tmpImg[:,:,2] = (image[:,:,0]-0.485)/0.229
+		else:
+			tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
+			tmpImg[:,:,1] = (image[:,:,1]-0.456)/0.224
+			tmpImg[:,:,2] = (image[:,:,2]-0.406)/0.225
+		tmpLbl[:,:,0] = label[:,:,0]
+		tmpImg = tmpImg.transpose((2, 0, 1))
+		tmpLbl = label.transpose((2, 0, 1))
+		return {'imidx':torch.from_numpy(imidx), 'image': torch.from_numpy(tmpImg), 'label': torch.from_numpy(tmpLbl)}
+class ToTensorLab(object):
+	"""Convert ndarrays in sample to Tensors."""
+	def __init__(self,flag=0):
+		self.flag = flag
+	def __call__(self, sample):
+		imidx, image, label =sample['imidx'], sample['image'], sample['label']
+		tmpLbl = np.zeros(label.shape)
+		if(np.max(label)<1e-6):
+			label = label
+		else:
+			label = label/np.max(label)
+		# change the color space
+		if self.flag == 2: # with rgb and Lab colors
+			tmpImg = np.zeros((image.shape[0],image.shape[1],6))
+			tmpImgt = np.zeros((image.shape[0],image.shape[1],3))
+			if image.shape[2]==1:
+				tmpImgt[:,:,0] = image[:,:,0]
+				tmpImgt[:,:,1] = image[:,:,0]
+				tmpImgt[:,:,2] = image[:,:,0]
+			else:
+				tmpImgt = image
+			tmpImgtl = color.rgb2lab(tmpImgt)
+			# nomalize image to range [0,1]
+			tmpImg[:,:,0] = (tmpImgt[:,:,0]-np.min(tmpImgt[:,:,0]))/(np.max(tmpImgt[:,:,0])-np.min(tmpImgt[:,:,0]))
+			tmpImg[:,:,1] = (tmpImgt[:,:,1]-np.min(tmpImgt[:,:,1]))/(np.max(tmpImgt[:,:,1])-np.min(tmpImgt[:,:,1]))
+			tmpImg[:,:,2] = (tmpImgt[:,:,2]-np.min(tmpImgt[:,:,2]))/(np.max(tmpImgt[:,:,2])-np.min(tmpImgt[:,:,2]))
+			tmpImg[:,:,3] = (tmpImgtl[:,:,0]-np.min(tmpImgtl[:,:,0]))/(np.max(tmpImgtl[:,:,0])-np.min(tmpImgtl[:,:,0]))
+			tmpImg[:,:,4] = (tmpImgtl[:,:,1]-np.min(tmpImgtl[:,:,1]))/(np.max(tmpImgtl[:,:,1])-np.min(tmpImgtl[:,:,1]))
+			tmpImg[:,:,5] = (tmpImgtl[:,:,2]-np.min(tmpImgtl[:,:,2]))/(np.max(tmpImgtl[:,:,2])-np.min(tmpImgtl[:,:,2]))
+			# tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))
+			tmpImg[:,:,0] = (tmpImg[:,:,0]-np.mean(tmpImg[:,:,0]))/np.std(tmpImg[:,:,0])
+			tmpImg[:,:,1] = (tmpImg[:,:,1]-np.mean(tmpImg[:,:,1]))/np.std(tmpImg[:,:,1])
+			tmpImg[:,:,2] = (tmpImg[:,:,2]-np.mean(tmpImg[:,:,2]))/np.std(tmpImg[:,:,2])
+			tmpImg[:,:,3] = (tmpImg[:,:,3]-np.mean(tmpImg[:,:,3]))/np.std(tmpImg[:,:,3])
+			tmpImg[:,:,4] = (tmpImg[:,:,4]-np.mean(tmpImg[:,:,4]))/np.std(tmpImg[:,:,4])
+			tmpImg[:,:,5] = (tmpImg[:,:,5]-np.mean(tmpImg[:,:,5]))/np.std(tmpImg[:,:,5])
+		elif self.flag == 1: #with Lab color
+			tmpImg = np.zeros((image.shape[0],image.shape[1],3))
+			if image.shape[2]==1:
+				tmpImg[:,:,0] = image[:,:,0]
+				tmpImg[:,:,1] = image[:,:,0]
+				tmpImg[:,:,2] = image[:,:,0]
+			else:
+				tmpImg = image
+			tmpImg = color.rgb2lab(tmpImg)
+			# tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))
+			tmpImg[:,:,0] = (tmpImg[:,:,0]-np.min(tmpImg[:,:,0]))/(np.max(tmpImg[:,:,0])-np.min(tmpImg[:,:,0]))
+			tmpImg[:,:,1] = (tmpImg[:,:,1]-np.min(tmpImg[:,:,1]))/(np.max(tmpImg[:,:,1])-np.min(tmpImg[:,:,1]))
+			tmpImg[:,:,2] = (tmpImg[:,:,2]-np.min(tmpImg[:,:,2]))/(np.max(tmpImg[:,:,2])-np.min(tmpImg[:,:,2]))
+			tmpImg[:,:,0] = (tmpImg[:,:,0]-np.mean(tmpImg[:,:,0]))/np.std(tmpImg[:,:,0])
+			tmpImg[:,:,1] = (tmpImg[:,:,1]-np.mean(tmpImg[:,:,1]))/np.std(tmpImg[:,:,1])
+			tmpImg[:,:,2] = (tmpImg[:,:,2]-np.mean(tmpImg[:,:,2]))/np.std(tmpImg[:,:,2])
+		else: # with rgb color
+			tmpImg = np.zeros((image.shape[0],image.shape[1],3))
+			image = image/np.max(image)
+			if image.shape[2]==1:
+				tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
+				tmpImg[:,:,1] = (image[:,:,0]-0.485)/0.229
+				tmpImg[:,:,2] = (image[:,:,0]-0.485)/0.229
+			else:
+				tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
+				tmpImg[:,:,1] = (image[:,:,1]-0.456)/0.224
+				tmpImg[:,:,2] = (image[:,:,2]-0.406)/0.225
+		tmpLbl[:,:,0] = label[:,:,0]
+		tmpImg = tmpImg.transpose((2, 0, 1))
+		tmpLbl = label.transpose((2, 0, 1))
+		return {'imidx':torch.from_numpy(imidx), 'image': torch.from_numpy(tmpImg), 'label': torch.from_numpy(tmpLbl)}
+class SalObjDataset(Dataset):
+	def __init__(self,img_name_list,lbl_name_list,transform=None):
+		# self.root_dir = root_dir
+		# self.image_name_list = glob.glob(image_dir+'*.png')
+		# self.label_name_list = glob.glob(label_dir+'*.png')
+		self.image_name_list = img_name_list
+		self.label_name_list = lbl_name_list
+		self.transform = transform
+	def __len__(self):
+		return len(self.image_name_list)
+	def __getitem__(self,idx):
+		# image = Image.open(self.image_name_list[idx])#io.imread(self.image_name_list[idx])
+		# label = Image.open(self.label_name_list[idx])#io.imread(self.label_name_list[idx])
+		image = io.imread(self.image_name_list[idx])
+		imname = self.image_name_list[idx]
+		imidx = np.array([idx])
+		if(0==len(self.label_name_list)):
+			label_3 = np.zeros(image.shape)
+		else:
+			label_3 = io.imread(self.label_name_list[idx])
+		label = np.zeros(label_3.shape[0:2])
+		if(3==len(label_3.shape)):
+			label = label_3[:,:,0]
+		elif(2==len(label_3.shape)):
+			label = label_3
+		if(3==len(image.shape) and 2==len(label.shape)):
+			label = label[:,:,np.newaxis]
+		elif(2==len(image.shape) and 2==len(label.shape)):
+			image = image[:,:,np.newaxis]
+			label = label[:,:,np.newaxis]
+		sample = {'imidx':imidx, 'image':image, 'label':label}
+		if self.transform:
+			sample = self.transform(sample)
+		return sample

u2net_utils/model/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .u2net import U2NET
2	+ from .u2net import U2NETP

u2net_utils/model/u2net.py ADDED Viewed

	@@ -0,0 +1,525 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class REBNCONV(nn.Module):
+    def __init__(self,in_ch=3,out_ch=3,dirate=1):
+        super(REBNCONV,self).__init__()
+        self.conv_s1 = nn.Conv2d(in_ch,out_ch,3,padding=1*dirate,dilation=1*dirate)
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self,x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src,tar):
+    src = F.upsample(src,size=tar.shape[2:],mode='bilinear')
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):#UNet07DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU7,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool4 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool5 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d =  self.rebnconv6d(torch.cat((hx7,hx6),1))
+        hx6dup = _upsample_like(hx6d,hx5)
+        hx5d =  self.rebnconv5d(torch.cat((hx6dup,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):#UNet06DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool4 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d =  self.rebnconv5d(torch.cat((hx6,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):#UNet05DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):#UNet04DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):#UNet04FRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4,hx3),1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d,hx2),1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d,hx1),1))
+        return hx1d + hxin
+##### U^2-Net ####
+class U2NET(nn.Module):
+    def __init__(self,in_ch=3,out_ch=1):
+        super(U2NET,self).__init__()
+        self.stage1 = RSU7(in_ch,32,64)
+        self.pool12 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage2 = RSU6(64,32,128)
+        self.pool23 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage3 = RSU5(128,64,256)
+        self.pool34 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage4 = RSU4(256,128,512)
+        self.pool45 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage5 = RSU4F(512,256,512)
+        self.pool56 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage6 = RSU4F(512,256,512)
+        # decoder
+        self.stage5d = RSU4F(1024,256,512)
+        self.stage4d = RSU4(1024,128,256)
+        self.stage3d = RSU5(512,64,128)
+        self.stage2d = RSU6(256,32,64)
+        self.stage1d = RSU7(128,16,64)
+        self.side1 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side2 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side3 = nn.Conv2d(128,out_ch,3,padding=1)
+        self.side4 = nn.Conv2d(256,out_ch,3,padding=1)
+        self.side5 = nn.Conv2d(512,out_ch,3,padding=1)
+        self.side6 = nn.Conv2d(512,out_ch,3,padding=1)
+        self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    def forward(self,x):
+        hx = x
+        #stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        #stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        #stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        #stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        #stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        #stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6,hx5)
+        #-------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup,hx1),1))
+        #side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2,d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3,d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4,d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5,d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6,d1)
+        d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
+        return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)
+### U^2-Net small ###
+class U2NETP(nn.Module):
+    def __init__(self,in_ch=3,out_ch=1):
+        super(U2NETP,self).__init__()
+        self.stage1 = RSU7(in_ch,16,64)
+        self.pool12 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage2 = RSU6(64,16,64)
+        self.pool23 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage3 = RSU5(64,16,64)
+        self.pool34 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage4 = RSU4(64,16,64)
+        self.pool45 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage5 = RSU4F(64,16,64)
+        self.pool56 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage6 = RSU4F(64,16,64)
+        # decoder
+        self.stage5d = RSU4F(128,16,64)
+        self.stage4d = RSU4(128,16,64)
+        self.stage3d = RSU5(128,16,64)
+        self.stage2d = RSU6(128,16,64)
+        self.stage1d = RSU7(128,16,64)
+        self.side1 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side2 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side3 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side4 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side5 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side6 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    def forward(self,x):
+        hx = x
+        #stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        #stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        #stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        #stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        #stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        #stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6,hx5)
+        #decoder
+        hx5d = self.stage5d(torch.cat((hx6up,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup,hx1),1))
+        #side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2,d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3,d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4,d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5,d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6,d1)
+        d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
+        return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)

u2net_utils/model/u2net_refactor.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import torch
+import torch.nn as nn
+import math
+__all__ = ['U2NET_full', 'U2NET_lite']
+def _upsample_like(x, size):
+    return nn.Upsample(size=size, mode='bilinear', align_corners=False)(x)
+def _size_map(x, height):
+    # {height: size} for Upsample
+    size = list(x.shape[-2:])
+    sizes = {}
+    for h in range(1, height):
+        sizes[h] = size
+        size = [math.ceil(w / 2) for w in size]
+    return sizes
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dilate=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(in_ch, out_ch, 3, padding=1 * dilate, dilation=1 * dilate)
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.relu_s1(self.bn_s1(self.conv_s1(x)))
+class RSU(nn.Module):
+    def __init__(self, name, height, in_ch, mid_ch, out_ch, dilated=False):
+        super(RSU, self).__init__()
+        self.name = name
+        self.height = height
+        self.dilated = dilated
+        self._make_layers(height, in_ch, mid_ch, out_ch, dilated)
+    def forward(self, x):
+        sizes = _size_map(x, self.height)
+        x = self.rebnconvin(x)
+        # U-Net like symmetric encoder-decoder structure
+        def unet(x, height=1):
+            if height < self.height:
+                x1 = getattr(self, f'rebnconv{height}')(x)
+                if not self.dilated and height < self.height - 1:
+                    x2 = unet(getattr(self, 'downsample')(x1), height + 1)
+                else:
+                    x2 = unet(x1, height + 1)
+                x = getattr(self, f'rebnconv{height}d')(torch.cat((x2, x1), 1))
+                return _upsample_like(x, sizes[height - 1]) if not self.dilated and height > 1 else x
+            else:
+                return getattr(self, f'rebnconv{height}')(x)
+        return x + unet(x)
+    def _make_layers(self, height, in_ch, mid_ch, out_ch, dilated=False):
+        self.add_module('rebnconvin', REBNCONV(in_ch, out_ch))
+        self.add_module('downsample', nn.MaxPool2d(2, stride=2, ceil_mode=True))
+        self.add_module(f'rebnconv1', REBNCONV(out_ch, mid_ch))
+        self.add_module(f'rebnconv1d', REBNCONV(mid_ch * 2, out_ch))
+        for i in range(2, height):
+            dilate = 1 if not dilated else 2 ** (i - 1)
+            self.add_module(f'rebnconv{i}', REBNCONV(mid_ch, mid_ch, dilate=dilate))
+            self.add_module(f'rebnconv{i}d', REBNCONV(mid_ch * 2, mid_ch, dilate=dilate))
+        dilate = 2 if not dilated else 2 ** (height - 1)
+        self.add_module(f'rebnconv{height}', REBNCONV(mid_ch, mid_ch, dilate=dilate))
+class U2NET(nn.Module):
+    def __init__(self, cfgs, out_ch):
+        super(U2NET, self).__init__()
+        self.out_ch = out_ch
+        self._make_layers(cfgs)
+    def forward(self, x):
+        sizes = _size_map(x, self.height)
+        maps = []  # storage for maps
+        # side saliency map
+        def unet(x, height=1):
+            if height < 6:
+                x1 = getattr(self, f'stage{height}')(x)
+                x2 = unet(getattr(self, 'downsample')(x1), height + 1)
+                x = getattr(self, f'stage{height}d')(torch.cat((x2, x1), 1))
+                side(x, height)
+                return _upsample_like(x, sizes[height - 1]) if height > 1 else x
+            else:
+                x = getattr(self, f'stage{height}')(x)
+                side(x, height)
+                return _upsample_like(x, sizes[height - 1])
+        def side(x, h):
+            # side output saliency map (before sigmoid)
+            x = getattr(self, f'side{h}')(x)
+            x = _upsample_like(x, sizes[1])
+            maps.append(x)
+        def fuse():
+            # fuse saliency probability maps
+            maps.reverse()
+            x = torch.cat(maps, 1)
+            x = getattr(self, 'outconv')(x)
+            maps.insert(0, x)
+            return [torch.sigmoid(x) for x in maps]
+        unet(x)
+        maps = fuse()
+        return maps
+    def _make_layers(self, cfgs):
+        self.height = int((len(cfgs) + 1) / 2)
+        self.add_module('downsample', nn.MaxPool2d(2, stride=2, ceil_mode=True))
+        for k, v in cfgs.items():
+            # build rsu block
+            self.add_module(k, RSU(v[0], *v[1]))
+            if v[2] > 0:
+                # build side layer
+                self.add_module(f'side{v[0][-1]}', nn.Conv2d(v[2], self.out_ch, 3, padding=1))
+        # build fuse layer
+        self.add_module('outconv', nn.Conv2d(int(self.height * self.out_ch), self.out_ch, 1))
+def U2NET_full():
+    full = {
+        # cfgs for building RSUs and sides
+        # {stage : [name, (height(L), in_ch, mid_ch, out_ch, dilated), side]}
+        'stage1': ['En_1', (7, 3, 32, 64), -1],
+        'stage2': ['En_2', (6, 64, 32, 128), -1],
+        'stage3': ['En_3', (5, 128, 64, 256), -1],
+        'stage4': ['En_4', (4, 256, 128, 512), -1],
+        'stage5': ['En_5', (4, 512, 256, 512, True), -1],
+        'stage6': ['En_6', (4, 512, 256, 512, True), 512],
+        'stage5d': ['De_5', (4, 1024, 256, 512, True), 512],
+        'stage4d': ['De_4', (4, 1024, 128, 256), 256],
+        'stage3d': ['De_3', (5, 512, 64, 128), 128],
+        'stage2d': ['De_2', (6, 256, 32, 64), 64],
+        'stage1d': ['De_1', (7, 128, 16, 64), 64],
+    }
+    return U2NET(cfgs=full, out_ch=1)
+def U2NET_lite():
+    lite = {
+        # cfgs for building RSUs and sides
+        # {stage : [name, (height(L), in_ch, mid_ch, out_ch, dilated), side]}
+        'stage1': ['En_1', (7, 3, 16, 64), -1],
+        'stage2': ['En_2', (6, 64, 16, 64), -1],
+        'stage3': ['En_3', (5, 64, 16, 64), -1],
+        'stage4': ['En_4', (4, 64, 16, 64), -1],
+        'stage5': ['En_5', (4, 64, 16, 64, True), -1],
+        'stage6': ['En_6', (4, 64, 16, 64, True), 64],
+        'stage5d': ['De_5', (4, 128, 16, 64, True), 64],
+        'stage4d': ['De_4', (4, 128, 16, 64), 64],
+        'stage3d': ['De_3', (5, 128, 16, 64), 64],
+        'stage2d': ['De_2', (6, 128, 16, 64), 64],
+        'stage1d': ['De_1', (7, 128, 16, 64), 64],
+    }
+    return U2NET(cfgs=lite, out_ch=1)