Spaces:

hylee
/

u2net_portrait

Build error

App Files Files Community

hylee commited on May 9, 2022

Commit

de51c6d

•

1 Parent(s): ada0e20

init

Browse files

Files changed (24) hide show

U-2-Net/__pycache__/data_loader.cpython-38.pyc +0 -0
U-2-Net/data_loader.py +266 -0
U-2-Net/gradio/demo.py +37 -0
U-2-Net/model/__init__.py +2 -0
U-2-Net/model/__pycache__/__init__.cpython-36.pyc +0 -0
U-2-Net/model/__pycache__/__init__.cpython-37.pyc +0 -0
U-2-Net/model/__pycache__/__init__.cpython-38.pyc +0 -0
U-2-Net/model/__pycache__/u2net.cpython-36.pyc +0 -0
U-2-Net/model/__pycache__/u2net.cpython-37.pyc +0 -0
U-2-Net/model/__pycache__/u2net.cpython-38.pyc +0 -0
U-2-Net/model/u2net.py +525 -0
U-2-Net/model/u2net_refactor.py +168 -0
U-2-Net/requirements.txt +9 -0
U-2-Net/saved_models/face_detection_cv2/haarcascade_frontalface_default.xml +0 -0
U-2-Net/saved_models/u2net_portrait/u2net_portrait.pth +3 -0
U-2-Net/setup_model_weights.py +13 -0
U-2-Net/u2net_human_seg_test.py +117 -0
U-2-Net/u2net_portrait_composite.py +141 -0
U-2-Net/u2net_portrait_demo.py +175 -0
U-2-Net/u2net_portrait_test.py +117 -0
U-2-Net/u2net_test.py +122 -0
U-2-Net/u2net_train.py +164 -0
app.py +25 -0
requirements.txt +6 -0

U-2-Net/__pycache__/data_loader.cpython-38.pyc ADDED Viewed

Binary file (8.75 kB). View file

U-2-Net/data_loader.py ADDED Viewed

	@@ -0,0 +1,266 @@

+# data loader
+from __future__ import print_function, division
+import glob
+import torch
+from skimage import io, transform, color
+import numpy as np
+import random
+import math
+import matplotlib.pyplot as plt
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms, utils
+from PIL import Image
+#==========================dataset load==========================
+class RescaleT(object):
+	def __init__(self,output_size):
+		assert isinstance(output_size,(int,tuple))
+		self.output_size = output_size
+	def __call__(self,sample):
+		imidx, image, label = sample['imidx'], sample['image'],sample['label']
+		h, w = image.shape[:2]
+		if isinstance(self.output_size,int):
+			if h > w:
+				new_h, new_w = self.output_size*h/w,self.output_size
+			else:
+				new_h, new_w = self.output_size,self.output_size*w/h
+		else:
+			new_h, new_w = self.output_size
+		new_h, new_w = int(new_h), int(new_w)
+		# #resize the image to new_h x new_w and convert image from range [0,255] to [0,1]
+		# img = transform.resize(image,(new_h,new_w),mode='constant')
+		# lbl = transform.resize(label,(new_h,new_w),mode='constant', order=0, preserve_range=True)
+		img = transform.resize(image,(self.output_size,self.output_size),mode='constant')
+		lbl = transform.resize(label,(self.output_size,self.output_size),mode='constant', order=0, preserve_range=True)
+		return {'imidx':imidx, 'image':img,'label':lbl}
+class Rescale(object):
+	def __init__(self,output_size):
+		assert isinstance(output_size,(int,tuple))
+		self.output_size = output_size
+	def __call__(self,sample):
+		imidx, image, label = sample['imidx'], sample['image'],sample['label']
+		if random.random() >= 0.5:
+			image = image[::-1]
+			label = label[::-1]
+		h, w = image.shape[:2]
+		if isinstance(self.output_size,int):
+			if h > w:
+				new_h, new_w = self.output_size*h/w,self.output_size
+			else:
+				new_h, new_w = self.output_size,self.output_size*w/h
+		else:
+			new_h, new_w = self.output_size
+		new_h, new_w = int(new_h), int(new_w)
+		# #resize the image to new_h x new_w and convert image from range [0,255] to [0,1]
+		img = transform.resize(image,(new_h,new_w),mode='constant')
+		lbl = transform.resize(label,(new_h,new_w),mode='constant', order=0, preserve_range=True)
+		return {'imidx':imidx, 'image':img,'label':lbl}
+class RandomCrop(object):
+	def __init__(self,output_size):
+		assert isinstance(output_size, (int, tuple))
+		if isinstance(output_size, int):
+			self.output_size = (output_size, output_size)
+		else:
+			assert len(output_size) == 2
+			self.output_size = output_size
+	def __call__(self,sample):
+		imidx, image, label = sample['imidx'], sample['image'], sample['label']
+		if random.random() >= 0.5:
+			image = image[::-1]
+			label = label[::-1]
+		h, w = image.shape[:2]
+		new_h, new_w = self.output_size
+		top = np.random.randint(0, h - new_h)
+		left = np.random.randint(0, w - new_w)
+		image = image[top: top + new_h, left: left + new_w]
+		label = label[top: top + new_h, left: left + new_w]
+		return {'imidx':imidx,'image':image, 'label':label}
+class ToTensor(object):
+	"""Convert ndarrays in sample to Tensors."""
+	def __call__(self, sample):
+		imidx, image, label = sample['imidx'], sample['image'], sample['label']
+		tmpImg = np.zeros((image.shape[0],image.shape[1],3))
+		tmpLbl = np.zeros(label.shape)
+		image = image/np.max(image)
+		if(np.max(label)<1e-6):
+			label = label
+		else:
+			label = label/np.max(label)
+		if image.shape[2]==1:
+			tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
+			tmpImg[:,:,1] = (image[:,:,0]-0.485)/0.229
+			tmpImg[:,:,2] = (image[:,:,0]-0.485)/0.229
+		else:
+			tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
+			tmpImg[:,:,1] = (image[:,:,1]-0.456)/0.224
+			tmpImg[:,:,2] = (image[:,:,2]-0.406)/0.225
+		tmpLbl[:,:,0] = label[:,:,0]
+		tmpImg = tmpImg.transpose((2, 0, 1))
+		tmpLbl = label.transpose((2, 0, 1))
+		return {'imidx':torch.from_numpy(imidx), 'image': torch.from_numpy(tmpImg), 'label': torch.from_numpy(tmpLbl)}
+class ToTensorLab(object):
+	"""Convert ndarrays in sample to Tensors."""
+	def __init__(self,flag=0):
+		self.flag = flag
+	def __call__(self, sample):
+		imidx, image, label =sample['imidx'], sample['image'], sample['label']
+		tmpLbl = np.zeros(label.shape)
+		if(np.max(label)<1e-6):
+			label = label
+		else:
+			label = label/np.max(label)
+		# change the color space
+		if self.flag == 2: # with rgb and Lab colors
+			tmpImg = np.zeros((image.shape[0],image.shape[1],6))
+			tmpImgt = np.zeros((image.shape[0],image.shape[1],3))
+			if image.shape[2]==1:
+				tmpImgt[:,:,0] = image[:,:,0]
+				tmpImgt[:,:,1] = image[:,:,0]
+				tmpImgt[:,:,2] = image[:,:,0]
+			else:
+				tmpImgt = image
+			tmpImgtl = color.rgb2lab(tmpImgt)
+			# nomalize image to range [0,1]
+			tmpImg[:,:,0] = (tmpImgt[:,:,0]-np.min(tmpImgt[:,:,0]))/(np.max(tmpImgt[:,:,0])-np.min(tmpImgt[:,:,0]))
+			tmpImg[:,:,1] = (tmpImgt[:,:,1]-np.min(tmpImgt[:,:,1]))/(np.max(tmpImgt[:,:,1])-np.min(tmpImgt[:,:,1]))
+			tmpImg[:,:,2] = (tmpImgt[:,:,2]-np.min(tmpImgt[:,:,2]))/(np.max(tmpImgt[:,:,2])-np.min(tmpImgt[:,:,2]))
+			tmpImg[:,:,3] = (tmpImgtl[:,:,0]-np.min(tmpImgtl[:,:,0]))/(np.max(tmpImgtl[:,:,0])-np.min(tmpImgtl[:,:,0]))
+			tmpImg[:,:,4] = (tmpImgtl[:,:,1]-np.min(tmpImgtl[:,:,1]))/(np.max(tmpImgtl[:,:,1])-np.min(tmpImgtl[:,:,1]))
+			tmpImg[:,:,5] = (tmpImgtl[:,:,2]-np.min(tmpImgtl[:,:,2]))/(np.max(tmpImgtl[:,:,2])-np.min(tmpImgtl[:,:,2]))
+			# tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))
+			tmpImg[:,:,0] = (tmpImg[:,:,0]-np.mean(tmpImg[:,:,0]))/np.std(tmpImg[:,:,0])
+			tmpImg[:,:,1] = (tmpImg[:,:,1]-np.mean(tmpImg[:,:,1]))/np.std(tmpImg[:,:,1])
+			tmpImg[:,:,2] = (tmpImg[:,:,2]-np.mean(tmpImg[:,:,2]))/np.std(tmpImg[:,:,2])
+			tmpImg[:,:,3] = (tmpImg[:,:,3]-np.mean(tmpImg[:,:,3]))/np.std(tmpImg[:,:,3])
+			tmpImg[:,:,4] = (tmpImg[:,:,4]-np.mean(tmpImg[:,:,4]))/np.std(tmpImg[:,:,4])
+			tmpImg[:,:,5] = (tmpImg[:,:,5]-np.mean(tmpImg[:,:,5]))/np.std(tmpImg[:,:,5])
+		elif self.flag == 1: #with Lab color
+			tmpImg = np.zeros((image.shape[0],image.shape[1],3))
+			if image.shape[2]==1:
+				tmpImg[:,:,0] = image[:,:,0]
+				tmpImg[:,:,1] = image[:,:,0]
+				tmpImg[:,:,2] = image[:,:,0]
+			else:
+				tmpImg = image
+			tmpImg = color.rgb2lab(tmpImg)
+			# tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))
+			tmpImg[:,:,0] = (tmpImg[:,:,0]-np.min(tmpImg[:,:,0]))/(np.max(tmpImg[:,:,0])-np.min(tmpImg[:,:,0]))
+			tmpImg[:,:,1] = (tmpImg[:,:,1]-np.min(tmpImg[:,:,1]))/(np.max(tmpImg[:,:,1])-np.min(tmpImg[:,:,1]))
+			tmpImg[:,:,2] = (tmpImg[:,:,2]-np.min(tmpImg[:,:,2]))/(np.max(tmpImg[:,:,2])-np.min(tmpImg[:,:,2]))
+			tmpImg[:,:,0] = (tmpImg[:,:,0]-np.mean(tmpImg[:,:,0]))/np.std(tmpImg[:,:,0])
+			tmpImg[:,:,1] = (tmpImg[:,:,1]-np.mean(tmpImg[:,:,1]))/np.std(tmpImg[:,:,1])
+			tmpImg[:,:,2] = (tmpImg[:,:,2]-np.mean(tmpImg[:,:,2]))/np.std(tmpImg[:,:,2])
+		else: # with rgb color
+			tmpImg = np.zeros((image.shape[0],image.shape[1],3))
+			image = image/np.max(image)
+			if image.shape[2]==1:
+				tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
+				tmpImg[:,:,1] = (image[:,:,0]-0.485)/0.229
+				tmpImg[:,:,2] = (image[:,:,0]-0.485)/0.229
+			else:
+				tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
+				tmpImg[:,:,1] = (image[:,:,1]-0.456)/0.224
+				tmpImg[:,:,2] = (image[:,:,2]-0.406)/0.225
+		tmpLbl[:,:,0] = label[:,:,0]
+		tmpImg = tmpImg.transpose((2, 0, 1))
+		tmpLbl = label.transpose((2, 0, 1))
+		return {'imidx':torch.from_numpy(imidx), 'image': torch.from_numpy(tmpImg), 'label': torch.from_numpy(tmpLbl)}
+class SalObjDataset(Dataset):
+	def __init__(self,img_name_list,lbl_name_list,transform=None):
+		# self.root_dir = root_dir
+		# self.image_name_list = glob.glob(image_dir+'*.png')
+		# self.label_name_list = glob.glob(label_dir+'*.png')
+		self.image_name_list = img_name_list
+		self.label_name_list = lbl_name_list
+		self.transform = transform
+	def __len__(self):
+		return len(self.image_name_list)
+	def __getitem__(self,idx):
+		# image = Image.open(self.image_name_list[idx])#io.imread(self.image_name_list[idx])
+		# label = Image.open(self.label_name_list[idx])#io.imread(self.label_name_list[idx])
+		image = io.imread(self.image_name_list[idx])
+		imname = self.image_name_list[idx]
+		imidx = np.array([idx])
+		if(0==len(self.label_name_list)):
+			label_3 = np.zeros(image.shape)
+		else:
+			label_3 = io.imread(self.label_name_list[idx])
+		label = np.zeros(label_3.shape[0:2])
+		if(3==len(label_3.shape)):
+			label = label_3[:,:,0]
+		elif(2==len(label_3.shape)):
+			label = label_3
+		if(3==len(image.shape) and 2==len(label.shape)):
+			label = label[:,:,np.newaxis]
+		elif(2==len(image.shape) and 2==len(label.shape)):
+			image = image[:,:,np.newaxis]
+			label = label[:,:,np.newaxis]
+		sample = {'imidx':imidx, 'image':image, 'label':label}
+		if self.transform:
+			sample = self.transform(sample)
+		return sample

U-2-Net/gradio/demo.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import cv2
+import paddlehub as hub
+import gradio as gr
+import torch
+# Images
+torch.hub.download_url_to_file('https://cdn.pixabay.com/photo/2018/08/12/16/59/ara-3601194_1280.jpg', 'parrot.jpg')
+torch.hub.download_url_to_file('https://cdn.pixabay.com/photo/2016/10/21/14/46/fox-1758183_1280.jpg', 'fox.jpg')
+model = hub.Module(name='U2Net')
+def infer(img):
+  result = model.Segmentation(
+      images=[cv2.imread(img.name)],
+      paths=None,
+      batch_size=1,
+      input_size=320,
+      output_dir='output',
+      visualization=True)
+  return result[0]['front'][:,:,::-1], result[0]['mask']
+inputs = gr.inputs.Image(type='file', label="Original Image")
+outputs = [
+           gr.outputs.Image(type="numpy",label="Front"),
+           gr.outputs.Image(type="numpy",label="Mask")
+           ]
+title = "U^2-Net"
+description = "demo for U^2-Net. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2005.09007'>U^2-Net: Going Deeper with Nested U-Structure for Salient Object Detection</a> | <a href='https://github.com/xuebinqin/U-2-Net'>Github Repo</a></p>"
+examples = [
+  ['fox.jpg'],
+  ['parrot.jpg']
+]
+gr.Interface(infer, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()

U-2-Net/model/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .u2net import U2NET
2	+ from .u2net import U2NETP

U-2-Net/model/__pycache__/__init__.cpython-36.pyc ADDED Viewed

Binary file (257 Bytes). View file

U-2-Net/model/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (190 Bytes). View file

U-2-Net/model/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (203 Bytes). View file

U-2-Net/model/__pycache__/u2net.cpython-36.pyc ADDED Viewed

Binary file (11.6 kB). View file

U-2-Net/model/__pycache__/u2net.cpython-37.pyc ADDED Viewed

Binary file (11.1 kB). View file

U-2-Net/model/__pycache__/u2net.cpython-38.pyc ADDED Viewed

Binary file (10.5 kB). View file

U-2-Net/model/u2net.py ADDED Viewed

	@@ -0,0 +1,525 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class REBNCONV(nn.Module):
+    def __init__(self,in_ch=3,out_ch=3,dirate=1):
+        super(REBNCONV,self).__init__()
+        self.conv_s1 = nn.Conv2d(in_ch,out_ch,3,padding=1*dirate,dilation=1*dirate)
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self,x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src,tar):
+    src = F.upsample(src,size=tar.shape[2:],mode='bilinear')
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):#UNet07DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU7,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool4 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool5 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d =  self.rebnconv6d(torch.cat((hx7,hx6),1))
+        hx6dup = _upsample_like(hx6d,hx5)
+        hx5d =  self.rebnconv5d(torch.cat((hx6dup,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):#UNet06DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool4 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d =  self.rebnconv5d(torch.cat((hx6,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):#UNet05DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):#UNet04DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):#UNet04FRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4,hx3),1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d,hx2),1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d,hx1),1))
+        return hx1d + hxin
+##### U^2-Net ####
+class U2NET(nn.Module):
+    def __init__(self,in_ch=3,out_ch=1):
+        super(U2NET,self).__init__()
+        self.stage1 = RSU7(in_ch,32,64)
+        self.pool12 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage2 = RSU6(64,32,128)
+        self.pool23 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage3 = RSU5(128,64,256)
+        self.pool34 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage4 = RSU4(256,128,512)
+        self.pool45 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage5 = RSU4F(512,256,512)
+        self.pool56 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage6 = RSU4F(512,256,512)
+        # decoder
+        self.stage5d = RSU4F(1024,256,512)
+        self.stage4d = RSU4(1024,128,256)
+        self.stage3d = RSU5(512,64,128)
+        self.stage2d = RSU6(256,32,64)
+        self.stage1d = RSU7(128,16,64)
+        self.side1 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side2 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side3 = nn.Conv2d(128,out_ch,3,padding=1)
+        self.side4 = nn.Conv2d(256,out_ch,3,padding=1)
+        self.side5 = nn.Conv2d(512,out_ch,3,padding=1)
+        self.side6 = nn.Conv2d(512,out_ch,3,padding=1)
+        self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    def forward(self,x):
+        hx = x
+        #stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        #stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        #stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        #stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        #stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        #stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6,hx5)
+        #-------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup,hx1),1))
+        #side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2,d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3,d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4,d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5,d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6,d1)
+        d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
+        return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)
+### U^2-Net small ###
+class U2NETP(nn.Module):
+    def __init__(self,in_ch=3,out_ch=1):
+        super(U2NETP,self).__init__()
+        self.stage1 = RSU7(in_ch,16,64)
+        self.pool12 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage2 = RSU6(64,16,64)
+        self.pool23 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage3 = RSU5(64,16,64)
+        self.pool34 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage4 = RSU4(64,16,64)
+        self.pool45 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage5 = RSU4F(64,16,64)
+        self.pool56 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage6 = RSU4F(64,16,64)
+        # decoder
+        self.stage5d = RSU4F(128,16,64)
+        self.stage4d = RSU4(128,16,64)
+        self.stage3d = RSU5(128,16,64)
+        self.stage2d = RSU6(128,16,64)
+        self.stage1d = RSU7(128,16,64)
+        self.side1 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side2 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side3 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side4 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side5 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side6 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    def forward(self,x):
+        hx = x
+        #stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        #stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        #stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        #stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        #stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        #stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6,hx5)
+        #decoder
+        hx5d = self.stage5d(torch.cat((hx6up,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup,hx1),1))
+        #side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2,d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3,d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4,d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5,d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6,d1)
+        d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
+        return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)

U-2-Net/model/u2net_refactor.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import torch
+import torch.nn as nn
+import math
+__all__ = ['U2NET_full', 'U2NET_lite']
+def _upsample_like(x, size):
+    return nn.Upsample(size=size, mode='bilinear', align_corners=False)(x)
+def _size_map(x, height):
+    # {height: size} for Upsample
+    size = list(x.shape[-2:])
+    sizes = {}
+    for h in range(1, height):
+        sizes[h] = size
+        size = [math.ceil(w / 2) for w in size]
+    return sizes
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dilate=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(in_ch, out_ch, 3, padding=1 * dilate, dilation=1 * dilate)
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.relu_s1(self.bn_s1(self.conv_s1(x)))
+class RSU(nn.Module):
+    def __init__(self, name, height, in_ch, mid_ch, out_ch, dilated=False):
+        super(RSU, self).__init__()
+        self.name = name
+        self.height = height
+        self.dilated = dilated
+        self._make_layers(height, in_ch, mid_ch, out_ch, dilated)
+    def forward(self, x):
+        sizes = _size_map(x, self.height)
+        x = self.rebnconvin(x)
+        # U-Net like symmetric encoder-decoder structure
+        def unet(x, height=1):
+            if height < self.height:
+                x1 = getattr(self, f'rebnconv{height}')(x)
+                if not self.dilated and height < self.height - 1:
+                    x2 = unet(getattr(self, 'downsample')(x1), height + 1)
+                else:
+                    x2 = unet(x1, height + 1)
+                x = getattr(self, f'rebnconv{height}d')(torch.cat((x2, x1), 1))
+                return _upsample_like(x, sizes[height - 1]) if not self.dilated and height > 1 else x
+            else:
+                return getattr(self, f'rebnconv{height}')(x)
+        return x + unet(x)
+    def _make_layers(self, height, in_ch, mid_ch, out_ch, dilated=False):
+        self.add_module('rebnconvin', REBNCONV(in_ch, out_ch))
+        self.add_module('downsample', nn.MaxPool2d(2, stride=2, ceil_mode=True))
+        self.add_module(f'rebnconv1', REBNCONV(out_ch, mid_ch))
+        self.add_module(f'rebnconv1d', REBNCONV(mid_ch * 2, out_ch))
+        for i in range(2, height):
+            dilate = 1 if not dilated else 2 ** (i - 1)
+            self.add_module(f'rebnconv{i}', REBNCONV(mid_ch, mid_ch, dilate=dilate))
+            self.add_module(f'rebnconv{i}d', REBNCONV(mid_ch * 2, mid_ch, dilate=dilate))
+        dilate = 2 if not dilated else 2 ** (height - 1)
+        self.add_module(f'rebnconv{height}', REBNCONV(mid_ch, mid_ch, dilate=dilate))
+class U2NET(nn.Module):
+    def __init__(self, cfgs, out_ch):
+        super(U2NET, self).__init__()
+        self.out_ch = out_ch
+        self._make_layers(cfgs)
+    def forward(self, x):
+        sizes = _size_map(x, self.height)
+        maps = []  # storage for maps
+        # side saliency map
+        def unet(x, height=1):
+            if height < 6:
+                x1 = getattr(self, f'stage{height}')(x)
+                x2 = unet(getattr(self, 'downsample')(x1), height + 1)
+                x = getattr(self, f'stage{height}d')(torch.cat((x2, x1), 1))
+                side(x, height)
+                return _upsample_like(x, sizes[height - 1]) if height > 1 else x
+            else:
+                x = getattr(self, f'stage{height}')(x)
+                side(x, height)
+                return _upsample_like(x, sizes[height - 1])
+        def side(x, h):
+            # side output saliency map (before sigmoid)
+            x = getattr(self, f'side{h}')(x)
+            x = _upsample_like(x, sizes[1])
+            maps.append(x)
+        def fuse():
+            # fuse saliency probability maps
+            maps.reverse()
+            x = torch.cat(maps, 1)
+            x = getattr(self, 'outconv')(x)
+            maps.insert(0, x)
+            return [torch.sigmoid(x) for x in maps]
+        unet(x)
+        maps = fuse()
+        return maps
+    def _make_layers(self, cfgs):
+        self.height = int((len(cfgs) + 1) / 2)
+        self.add_module('downsample', nn.MaxPool2d(2, stride=2, ceil_mode=True))
+        for k, v in cfgs.items():
+            # build rsu block
+            self.add_module(k, RSU(v[0], *v[1]))
+            if v[2] > 0:
+                # build side layer
+                self.add_module(f'side{v[0][-1]}', nn.Conv2d(v[2], self.out_ch, 3, padding=1))
+        # build fuse layer
+        self.add_module('outconv', nn.Conv2d(int(self.height * self.out_ch), self.out_ch, 1))
+def U2NET_full():
+    full = {
+        # cfgs for building RSUs and sides
+        # {stage : [name, (height(L), in_ch, mid_ch, out_ch, dilated), side]}
+        'stage1': ['En_1', (7, 3, 32, 64), -1],
+        'stage2': ['En_2', (6, 64, 32, 128), -1],
+        'stage3': ['En_3', (5, 128, 64, 256), -1],
+        'stage4': ['En_4', (4, 256, 128, 512), -1],
+        'stage5': ['En_5', (4, 512, 256, 512, True), -1],
+        'stage6': ['En_6', (4, 512, 256, 512, True), 512],
+        'stage5d': ['De_5', (4, 1024, 256, 512, True), 512],
+        'stage4d': ['De_4', (4, 1024, 128, 256), 256],
+        'stage3d': ['De_3', (5, 512, 64, 128), 128],
+        'stage2d': ['De_2', (6, 256, 32, 64), 64],
+        'stage1d': ['De_1', (7, 128, 16, 64), 64],
+    }
+    return U2NET(cfgs=full, out_ch=1)
+def U2NET_lite():
+    lite = {
+        # cfgs for building RSUs and sides
+        # {stage : [name, (height(L), in_ch, mid_ch, out_ch, dilated), side]}
+        'stage1': ['En_1', (7, 3, 16, 64), -1],
+        'stage2': ['En_2', (6, 64, 16, 64), -1],
+        'stage3': ['En_3', (5, 64, 16, 64), -1],
+        'stage4': ['En_4', (4, 64, 16, 64), -1],
+        'stage5': ['En_5', (4, 64, 16, 64, True), -1],
+        'stage6': ['En_6', (4, 64, 16, 64, True), 64],
+        'stage5d': ['De_5', (4, 128, 16, 64, True), 64],
+        'stage4d': ['De_4', (4, 128, 16, 64), 64],
+        'stage3d': ['De_3', (5, 128, 16, 64), 64],
+        'stage2d': ['De_2', (6, 128, 16, 64), 64],
+        'stage1d': ['De_1', (7, 128, 16, 64), 64],
+    }
+    return U2NET(cfgs=lite, out_ch=1)

U-2-Net/requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+numpy==1.15.2
+scikit-image==0.14.0
+torch
+torchvision
+pillow==8.1.1
+opencv-python
+paddlepaddle
+paddlehub
+gradio

U-2-Net/saved_models/face_detection_cv2/haarcascade_frontalface_default.xml ADDED Viewed

The diff for this file is too large to render. See raw diff

U-2-Net/saved_models/u2net_portrait/u2net_portrait.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb9f0378a16868d08e2325c8b36eae2b174b040b91bf64781fbb5dd4d31712b4
+size 176315791

U-2-Net/setup_model_weights.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import os
+import gdown
+os.makedirs('./saved_models/u2net', exist_ok=True)
+os.makedirs('./saved_models/u2net_portrait', exist_ok=True)
+gdown.download('https://drive.google.com/uc?id=1ao1ovG1Qtx4b7EoskHXmi2E9rp5CHLcZ',
+    './saved_models/u2net/u2net.pth',
+    quiet=False)
+gdown.download('https://drive.google.com/uc?id=1IG3HdpcRiDoWNookbncQjeaPN28t90yW',
+    './saved_models/u2net_portrait/u2net_portrait.pth',
+    quiet=False)

U-2-Net/u2net_human_seg_test.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+from skimage import io, transform
+import torch
+import torchvision
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms#, utils
+# import torch.optim as optim
+import numpy as np
+from PIL import Image
+import glob
+from data_loader import RescaleT
+from data_loader import ToTensor
+from data_loader import ToTensorLab
+from data_loader import SalObjDataset
+from model import U2NET # full size version 173.6 MB
+# normalize the predicted SOD probability map
+def normPRED(d):
+    ma = torch.max(d)
+    mi = torch.min(d)
+    dn = (d-mi)/(ma-mi)
+    return dn
+def save_output(image_name,pred,d_dir):
+    predict = pred
+    predict = predict.squeeze()
+    predict_np = predict.cpu().data.numpy()
+    im = Image.fromarray(predict_np*255).convert('RGB')
+    img_name = image_name.split(os.sep)[-1]
+    image = io.imread(image_name)
+    imo = im.resize((image.shape[1],image.shape[0]),resample=Image.BILINEAR)
+    pb_np = np.array(imo)
+    aaa = img_name.split(".")
+    bbb = aaa[0:-1]
+    imidx = bbb[0]
+    for i in range(1,len(bbb)):
+        imidx = imidx + "." + bbb[i]
+    imo.save(d_dir+imidx+'.png')
+def main():
+    # --------- 1. get image path and name ---------
+    model_name='u2net'
+    image_dir = os.path.join(os.getcwd(), 'test_data', 'test_human_images')
+    prediction_dir = os.path.join(os.getcwd(), 'test_data', 'test_human_images' + '_results' + os.sep)
+    model_dir = os.path.join(os.getcwd(), 'saved_models', model_name+'_human_seg', model_name + '_human_seg.pth')
+    img_name_list = glob.glob(image_dir + os.sep + '*')
+    print(img_name_list)
+    # --------- 2. dataloader ---------
+    #1. dataloader
+    test_salobj_dataset = SalObjDataset(img_name_list = img_name_list,
+                                        lbl_name_list = [],
+                                        transform=transforms.Compose([RescaleT(320),
+                                                                      ToTensorLab(flag=0)])
+                                        )
+    test_salobj_dataloader = DataLoader(test_salobj_dataset,
+                                        batch_size=1,
+                                        shuffle=False,
+                                        num_workers=1)
+    # --------- 3. model define ---------
+    if(model_name=='u2net'):
+        print("...load U2NET---173.6 MB")
+        net = U2NET(3,1)
+    if torch.cuda.is_available():
+        net.load_state_dict(torch.load(model_dir))
+        net.cuda()
+    else:
+        net.load_state_dict(torch.load(model_dir, map_location='cpu'))
+    net.eval()
+    # --------- 4. inference for each image ---------
+    for i_test, data_test in enumerate(test_salobj_dataloader):
+        print("inferencing:",img_name_list[i_test].split(os.sep)[-1])
+        inputs_test = data_test['image']
+        inputs_test = inputs_test.type(torch.FloatTensor)
+        if torch.cuda.is_available():
+            inputs_test = Variable(inputs_test.cuda())
+        else:
+            inputs_test = Variable(inputs_test)
+        d1,d2,d3,d4,d5,d6,d7= net(inputs_test)
+        # normalization
+        pred = d1[:,0,:,:]
+        pred = normPRED(pred)
+        # save results to test_results folder
+        if not os.path.exists(prediction_dir):
+            os.makedirs(prediction_dir, exist_ok=True)
+        save_output(img_name_list[i_test],pred,prediction_dir)
+        del d1,d2,d3,d4,d5,d6,d7
+if __name__ == "__main__":
+    main()

U-2-Net/u2net_portrait_composite.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import os
+from skimage import io, transform
+from skimage.filters import gaussian
+import torch
+import torchvision
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms#, utils
+# import torch.optim as optim
+import numpy as np
+from PIL import Image
+import glob
+from data_loader import RescaleT
+from data_loader import ToTensor
+from data_loader import ToTensorLab
+from data_loader import SalObjDataset
+from model import U2NET # full size version 173.6 MB
+from model import U2NETP # small version u2net 4.7 MB
+import argparse
+# normalize the predicted SOD probability map
+def normPRED(d):
+    ma = torch.max(d)
+    mi = torch.min(d)
+    dn = (d-mi)/(ma-mi)
+    return dn
+def save_output(image_name,pred,d_dir,sigma=2,alpha=0.5):
+    predict = pred
+    predict = predict.squeeze()
+    predict_np = predict.cpu().data.numpy()
+    image = io.imread(image_name)
+    pd = transform.resize(predict_np,image.shape[0:2],order=2)
+    pd = pd/(np.amax(pd)+1e-8)*255
+    pd = pd[:,:,np.newaxis]
+    print(image.shape)
+    print(pd.shape)
+    ## fuse the orignal portrait image and the portraits into one composite image
+    ## 1. use gaussian filter to blur the orginal image
+    sigma=sigma
+    image = gaussian(image, sigma=sigma, preserve_range=True)
+    ## 2. fuse these orignal image and the portrait with certain weight: alpha
+    alpha = alpha
+    im_comp = image*alpha+pd*(1-alpha)
+    print(im_comp.shape)
+    img_name = image_name.split(os.sep)[-1]
+    aaa = img_name.split(".")
+    bbb = aaa[0:-1]
+    imidx = bbb[0]
+    for i in range(1,len(bbb)):
+        imidx = imidx + "." + bbb[i]
+    io.imsave(d_dir+'/'+imidx+'_sigma_' + str(sigma) + '_alpha_' + str(alpha) + '_composite.png',im_comp)
+def main():
+    parser = argparse.ArgumentParser(description="image and portrait composite")
+    parser.add_argument('-s',action='store',dest='sigma')
+    parser.add_argument('-a',action='store',dest='alpha')
+    args = parser.parse_args()
+    print(args.sigma)
+    print(args.alpha)
+    print("--------------------")
+    # --------- 1. get image path and name ---------
+    model_name='u2net_portrait'#u2netp
+    image_dir = './test_data/test_portrait_images/your_portrait_im'
+    prediction_dir = './test_data/test_portrait_images/your_portrait_results'
+    if(not os.path.exists(prediction_dir)):
+        os.mkdir(prediction_dir)
+    model_dir = './saved_models/u2net_portrait/u2net_portrait.pth'
+    img_name_list = glob.glob(image_dir+'/*')
+    print("Number of images: ", len(img_name_list))
+    # --------- 2. dataloader ---------
+    #1. dataloader
+    test_salobj_dataset = SalObjDataset(img_name_list = img_name_list,
+                                        lbl_name_list = [],
+                                        transform=transforms.Compose([RescaleT(512),
+                                                                      ToTensorLab(flag=0)])
+                                        )
+    test_salobj_dataloader = DataLoader(test_salobj_dataset,
+                                        batch_size=1,
+                                        shuffle=False,
+                                        num_workers=1)
+    # --------- 3. model define ---------
+    print("...load U2NET---173.6 MB")
+    net = U2NET(3,1)
+    net.load_state_dict(torch.load(model_dir))
+    if torch.cuda.is_available():
+        net.cuda()
+    net.eval()
+    # --------- 4. inference for each image ---------
+    for i_test, data_test in enumerate(test_salobj_dataloader):
+        print("inferencing:",img_name_list[i_test].split(os.sep)[-1])
+        inputs_test = data_test['image']
+        inputs_test = inputs_test.type(torch.FloatTensor)
+        if torch.cuda.is_available():
+            inputs_test = Variable(inputs_test.cuda())
+        else:
+            inputs_test = Variable(inputs_test)
+        d1,d2,d3,d4,d5,d6,d7= net(inputs_test)
+        # normalization
+        pred = 1.0 - d1[:,0,:,:]
+        pred = normPRED(pred)
+        # save results to test_results folder
+        save_output(img_name_list[i_test],pred,prediction_dir,sigma=float(args.sigma),alpha=float(args.alpha))
+        del d1,d2,d3,d4,d5,d6,d7
+if __name__ == "__main__":
+    main()

U-2-Net/u2net_portrait_demo.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import cv2
+import torch
+from model import U2NET
+from torch.autograd import Variable
+import numpy as np
+from glob import glob
+import os
+def detect_single_face(face_cascade,img):
+    # Convert into grayscale
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # Detect faces
+    faces = face_cascade.detectMultiScale(gray, 1.1, 4)
+    if(len(faces)==0):
+        print("Warming: no face detection, the portrait u2net will run on the whole image!")
+        return None
+    # filter to keep the largest face
+    wh = 0
+    idx = 0
+    for i in range(0,len(faces)):
+        (x,y,w,h) = faces[i]
+        if(wh<w*h):
+            idx = i
+            wh = w*h
+    return faces[idx]
+# crop, pad and resize face region to 512x512 resolution
+def crop_face(img, face):
+    # no face detected, return the whole image and the inference will run on the whole image
+    if(face is None):
+        return img
+    (x, y, w, h) = face
+    height,width = img.shape[0:2]
+    # crop the face with a bigger bbox
+    hmw = h - w
+    # hpad = int(h/2)+1
+    # wpad = int(w/2)+1
+    l,r,t,b = 0,0,0,0
+    lpad = int(float(w)*0.4)
+    left = x-lpad
+    if(left<0):
+        l = lpad-x
+        left = 0
+    rpad = int(float(w)*0.4)
+    right = x+w+rpad
+    if(right>width):
+        r = right-width
+        right = width
+    tpad = int(float(h)*0.6)
+    top = y - tpad
+    if(top<0):
+        t = tpad-y
+        top = 0
+    bpad  = int(float(h)*0.2)
+    bottom = y+h+bpad
+    if(bottom>height):
+        b = bottom-height
+        bottom = height
+    im_face = img[top:bottom,left:right]
+    if(len(im_face.shape)==2):
+        im_face = np.repeat(im_face[:,:,np.newaxis],(1,1,3))
+    im_face = np.pad(im_face,((t,b),(l,r),(0,0)),mode='constant',constant_values=((255,255),(255,255),(255,255)))
+    # pad to achieve image with square shape for avoding face deformation after resizing
+    hf,wf = im_face.shape[0:2]
+    if(hf-2>wf):
+        wfp = int((hf-wf)/2)
+        im_face = np.pad(im_face,((0,0),(wfp,wfp),(0,0)),mode='constant',constant_values=((255,255),(255,255),(255,255)))
+    elif(wf-2>hf):
+        hfp = int((wf-hf)/2)
+        im_face = np.pad(im_face,((hfp,hfp),(0,0),(0,0)),mode='constant',constant_values=((255,255),(255,255),(255,255)))
+    # resize to have 512x512 resolution
+    im_face = cv2.resize(im_face, (512,512), interpolation = cv2.INTER_AREA)
+    return im_face
+def normPRED(d):
+    ma = torch.max(d)
+    mi = torch.min(d)
+    dn = (d-mi)/(ma-mi)
+    return dn
+def inference(net,input):
+    # normalize the input
+    tmpImg = np.zeros((input.shape[0],input.shape[1],3))
+    input = input/np.max(input)
+    tmpImg[:,:,0] = (input[:,:,2]-0.406)/0.225
+    tmpImg[:,:,1] = (input[:,:,1]-0.456)/0.224
+    tmpImg[:,:,2] = (input[:,:,0]-0.485)/0.229
+    # convert BGR to RGB
+    tmpImg = tmpImg.transpose((2, 0, 1))
+    tmpImg = tmpImg[np.newaxis,:,:,:]
+    tmpImg = torch.from_numpy(tmpImg)
+    # convert numpy array to torch tensor
+    tmpImg = tmpImg.type(torch.FloatTensor)
+    if torch.cuda.is_available():
+        tmpImg = Variable(tmpImg.cuda())
+    else:
+        tmpImg = Variable(tmpImg)
+    # inference
+    d1,d2,d3,d4,d5,d6,d7= net(tmpImg)
+    # normalization
+    pred = 1.0 - d1[:,0,:,:]
+    pred = normPRED(pred)
+    # convert torch tensor to numpy array
+    pred = pred.squeeze()
+    pred = pred.cpu().data.numpy()
+    del d1,d2,d3,d4,d5,d6,d7
+    return pred
+def main():
+    # get the image path list for inference
+    im_list = glob('./test_data/test_portrait_images/your_portrait_im/*')
+    print("Number of images: ",len(im_list))
+    # indicate the output directory
+    out_dir = './test_data/test_portrait_images/your_portrait_results'
+    if(not os.path.exists(out_dir)):
+        os.mkdir(out_dir)
+    # Load the cascade face detection model
+    face_cascade = cv2.CascadeClassifier('./saved_models/face_detection_cv2/haarcascade_frontalface_default.xml')
+    # u2net_portrait path
+    model_dir = './saved_models/u2net_portrait/u2net_portrait.pth'
+    # load u2net_portrait model
+    net = U2NET(3,1)
+    net.load_state_dict(torch.load(model_dir))
+    if torch.cuda.is_available():
+        net.cuda()
+    net.eval()
+    # do the inference one-by-one
+    for i in range(0,len(im_list)):
+        print("--------------------------")
+        print("inferencing ", i, "/", len(im_list), im_list[i])
+        # load each image
+        img = cv2.imread(im_list[i])
+        height,width = img.shape[0:2]
+        face = detect_single_face(face_cascade,img)
+        im_face = crop_face(img, face)
+        im_portrait = inference(net,im_face)
+        # save the output
+        cv2.imwrite(out_dir+"/"+im_list[i].split('/')[-1][0:-4]+'.png',(im_portrait*255).astype(np.uint8))
+if __name__ == '__main__':
+    main()

U-2-Net/u2net_portrait_test.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+from skimage import io, transform
+import torch
+import torchvision
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms#, utils
+# import torch.optim as optim
+import numpy as np
+from PIL import Image
+import glob
+from data_loader import RescaleT
+from data_loader import ToTensor
+from data_loader import ToTensorLab
+from data_loader import SalObjDataset
+from model import U2NET # full size version 173.6 MB
+from model import U2NETP # small version u2net 4.7 MB
+# normalize the predicted SOD probability map
+def normPRED(d):
+    ma = torch.max(d)
+    mi = torch.min(d)
+    dn = (d-mi)/(ma-mi)
+    return dn
+def save_output(image_name,pred,d_dir):
+    predict = pred
+    predict = predict.squeeze()
+    predict_np = predict.cpu().data.numpy()
+    im = Image.fromarray(predict_np*255).convert('RGB')
+    img_name = image_name.split(os.sep)[-1]
+    image = io.imread(image_name)
+    imo = im.resize((image.shape[1],image.shape[0]),resample=Image.BILINEAR)
+    pb_np = np.array(imo)
+    aaa = img_name.split(".")
+    bbb = aaa[0:-1]
+    imidx = bbb[0]
+    for i in range(1,len(bbb)):
+        imidx = imidx + "." + bbb[i]
+    imo.save(d_dir+'/'+imidx+'.png')
+def main():
+    # --------- 1. get image path and name ---------
+    model_name='u2net_portrait'#u2netp
+    image_dir = './test_data/test_portrait_images/portrait_im'
+    prediction_dir = './test_data/test_portrait_images/portrait_results'
+    if(not os.path.exists(prediction_dir)):
+        os.mkdir(prediction_dir)
+    model_dir = './saved_models/u2net_portrait/u2net_portrait.pth'
+    img_name_list = glob.glob(image_dir+'/*')
+    print("Number of images: ", len(img_name_list))
+    # --------- 2. dataloader ---------
+    #1. dataloader
+    test_salobj_dataset = SalObjDataset(img_name_list = img_name_list,
+                                        lbl_name_list = [],
+                                        transform=transforms.Compose([RescaleT(512),
+                                                                      ToTensorLab(flag=0)])
+                                        )
+    test_salobj_dataloader = DataLoader(test_salobj_dataset,
+                                        batch_size=1,
+                                        shuffle=False,
+                                        num_workers=1)
+    # --------- 3. model define ---------
+    print("...load U2NET---173.6 MB")
+    net = U2NET(3,1)
+    net.load_state_dict(torch.load(model_dir))
+    if torch.cuda.is_available():
+        net.cuda()
+    net.eval()
+    # --------- 4. inference for each image ---------
+    for i_test, data_test in enumerate(test_salobj_dataloader):
+        print("inferencing:",img_name_list[i_test].split(os.sep)[-1])
+        inputs_test = data_test['image']
+        inputs_test = inputs_test.type(torch.FloatTensor)
+        if torch.cuda.is_available():
+            inputs_test = Variable(inputs_test.cuda())
+        else:
+            inputs_test = Variable(inputs_test)
+        d1,d2,d3,d4,d5,d6,d7= net(inputs_test)
+        # normalization
+        pred = 1.0 - d1[:,0,:,:]
+        pred = normPRED(pred)
+        # save results to test_results folder
+        save_output(img_name_list[i_test],pred,prediction_dir)
+        del d1,d2,d3,d4,d5,d6,d7
+if __name__ == "__main__":
+    main()

U-2-Net/u2net_test.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import os
+from skimage import io, transform
+import torch
+import torchvision
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms#, utils
+# import torch.optim as optim
+import numpy as np
+from PIL import Image
+import glob
+from data_loader import RescaleT
+from data_loader import ToTensor
+from data_loader import ToTensorLab
+from data_loader import SalObjDataset
+from model import U2NET # full size version 173.6 MB
+from model import U2NETP # small version u2net 4.7 MB
+# normalize the predicted SOD probability map
+def normPRED(d):
+    ma = torch.max(d)
+    mi = torch.min(d)
+    dn = (d-mi)/(ma-mi)
+    return dn
+def save_output(image_name,pred,d_dir):
+    predict = pred
+    predict = predict.squeeze()
+    predict_np = predict.cpu().data.numpy()
+    im = Image.fromarray(predict_np*255).convert('RGB')
+    img_name = image_name.split(os.sep)[-1]
+    image = io.imread(image_name)
+    imo = im.resize((image.shape[1],image.shape[0]),resample=Image.BILINEAR)
+    pb_np = np.array(imo)
+    aaa = img_name.split(".")
+    bbb = aaa[0:-1]
+    imidx = bbb[0]
+    for i in range(1,len(bbb)):
+        imidx = imidx + "." + bbb[i]
+    imo.save(d_dir+imidx+'.png')
+def main():
+    # --------- 1. get image path and name ---------
+    model_name='u2net'#u2netp
+    image_dir = os.path.join(os.getcwd(), 'test_data', 'test_images')
+    prediction_dir = os.path.join(os.getcwd(), 'test_data', model_name + '_results' + os.sep)
+    model_dir = os.path.join(os.getcwd(), 'saved_models', model_name, model_name + '.pth')
+    img_name_list = glob.glob(image_dir + os.sep + '*')
+    print(img_name_list)
+    # --------- 2. dataloader ---------
+    #1. dataloader
+    test_salobj_dataset = SalObjDataset(img_name_list = img_name_list,
+                                        lbl_name_list = [],
+                                        transform=transforms.Compose([RescaleT(320),
+                                                                      ToTensorLab(flag=0)])
+                                        )
+    test_salobj_dataloader = DataLoader(test_salobj_dataset,
+                                        batch_size=1,
+                                        shuffle=False,
+                                        num_workers=1)
+    # --------- 3. model define ---------
+    if(model_name=='u2net'):
+        print("...load U2NET---173.6 MB")
+        net = U2NET(3,1)
+    elif(model_name=='u2netp'):
+        print("...load U2NEP---4.7 MB")
+        net = U2NETP(3,1)
+    if torch.cuda.is_available():
+        net.load_state_dict(torch.load(model_dir))
+        net.cuda()
+    else:
+        net.load_state_dict(torch.load(model_dir, map_location='cpu'))
+    net.eval()
+    # --------- 4. inference for each image ---------
+    for i_test, data_test in enumerate(test_salobj_dataloader):
+        print("inferencing:",img_name_list[i_test].split(os.sep)[-1])
+        inputs_test = data_test['image']
+        inputs_test = inputs_test.type(torch.FloatTensor)
+        if torch.cuda.is_available():
+            inputs_test = Variable(inputs_test.cuda())
+        else:
+            inputs_test = Variable(inputs_test)
+        d1,d2,d3,d4,d5,d6,d7= net(inputs_test)
+        # normalization
+        pred = d1[:,0,:,:]
+        pred = normPRED(pred)
+        # save results to test_results folder
+        if not os.path.exists(prediction_dir):
+            os.makedirs(prediction_dir, exist_ok=True)
+        save_output(img_name_list[i_test],pred,prediction_dir)
+        del d1,d2,d3,d4,d5,d6,d7
+if __name__ == "__main__":
+    main()

U-2-Net/u2net_train.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import os
+import torch
+import torchvision
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms, utils
+import torch.optim as optim
+import torchvision.transforms as standard_transforms
+import numpy as np
+import glob
+import os
+from data_loader import Rescale
+from data_loader import RescaleT
+from data_loader import RandomCrop
+from data_loader import ToTensor
+from data_loader import ToTensorLab
+from data_loader import SalObjDataset
+from model import U2NET
+from model import U2NETP
+# ------- 1. define loss function --------
+bce_loss = nn.BCELoss(size_average=True)
+def muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, labels_v):
+	loss0 = bce_loss(d0,labels_v)
+	loss1 = bce_loss(d1,labels_v)
+	loss2 = bce_loss(d2,labels_v)
+	loss3 = bce_loss(d3,labels_v)
+	loss4 = bce_loss(d4,labels_v)
+	loss5 = bce_loss(d5,labels_v)
+	loss6 = bce_loss(d6,labels_v)
+	loss = loss0 + loss1 + loss2 + loss3 + loss4 + loss5 + loss6
+	print("l0: %3f, l1: %3f, l2: %3f, l3: %3f, l4: %3f, l5: %3f, l6: %3f\n"%(loss0.data.item(),loss1.data.item(),loss2.data.item(),loss3.data.item(),loss4.data.item(),loss5.data.item(),loss6.data.item()))
+	return loss0, loss
+# ------- 2. set the directory of training dataset --------
+model_name = 'u2net' #'u2netp'
+data_dir = os.path.join(os.getcwd(), 'train_data' + os.sep)
+tra_image_dir = os.path.join('DUTS', 'DUTS-TR', 'DUTS-TR', 'im_aug' + os.sep)
+tra_label_dir = os.path.join('DUTS', 'DUTS-TR', 'DUTS-TR', 'gt_aug' + os.sep)
+image_ext = '.jpg'
+label_ext = '.png'
+model_dir = os.path.join(os.getcwd(), 'saved_models', model_name + os.sep)
+epoch_num = 100000
+batch_size_train = 12
+batch_size_val = 1
+train_num = 0
+val_num = 0
+tra_img_name_list = glob.glob(data_dir + tra_image_dir + '*' + image_ext)
+tra_lbl_name_list = []
+for img_path in tra_img_name_list:
+	img_name = img_path.split(os.sep)[-1]
+	aaa = img_name.split(".")
+	bbb = aaa[0:-1]
+	imidx = bbb[0]
+	for i in range(1,len(bbb)):
+		imidx = imidx + "." + bbb[i]
+	tra_lbl_name_list.append(data_dir + tra_label_dir + imidx + label_ext)
+print("---")
+print("train images: ", len(tra_img_name_list))
+print("train labels: ", len(tra_lbl_name_list))
+print("---")
+train_num = len(tra_img_name_list)
+salobj_dataset = SalObjDataset(
+    img_name_list=tra_img_name_list,
+    lbl_name_list=tra_lbl_name_list,
+    transform=transforms.Compose([
+        RescaleT(320),
+        RandomCrop(288),
+        ToTensorLab(flag=0)]))
+salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True, num_workers=1)
+# ------- 3. define model --------
+# define the net
+if(model_name=='u2net'):
+    net = U2NET(3, 1)
+elif(model_name=='u2netp'):
+    net = U2NETP(3,1)
+if torch.cuda.is_available():
+    net.cuda()
+# ------- 4. define optimizer --------
+print("---define optimizer...")
+optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
+# ------- 5. training process --------
+print("---start training...")
+ite_num = 0
+running_loss = 0.0
+running_tar_loss = 0.0
+ite_num4val = 0
+save_frq = 2000 # save the model every 2000 iterations
+for epoch in range(0, epoch_num):
+    net.train()
+    for i, data in enumerate(salobj_dataloader):
+        ite_num = ite_num + 1
+        ite_num4val = ite_num4val + 1
+        inputs, labels = data['image'], data['label']
+        inputs = inputs.type(torch.FloatTensor)
+        labels = labels.type(torch.FloatTensor)
+        # wrap them in Variable
+        if torch.cuda.is_available():
+            inputs_v, labels_v = Variable(inputs.cuda(), requires_grad=False), Variable(labels.cuda(),
+                                                                                        requires_grad=False)
+        else:
+            inputs_v, labels_v = Variable(inputs, requires_grad=False), Variable(labels, requires_grad=False)
+        # y zero the parameter gradients
+        optimizer.zero_grad()
+        # forward + backward + optimize
+        d0, d1, d2, d3, d4, d5, d6 = net(inputs_v)
+        loss2, loss = muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, labels_v)
+        loss.backward()
+        optimizer.step()
+        # # print statistics
+        running_loss += loss.data.item()
+        running_tar_loss += loss2.data.item()
+        # del temporary outputs and loss
+        del d0, d1, d2, d3, d4, d5, d6, loss2, loss
+        print("[epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f " % (
+        epoch + 1, epoch_num, (i + 1) * batch_size_train, train_num, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val))
+        if ite_num % save_frq == 0:
+            torch.save(net.state_dict(), model_dir + model_name+"_bce_itr_%d_train_%3f_tar_%3f.pth" % (ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val))
+            running_loss = 0.0
+            running_tar_loss = 0.0
+            net.train()  # resume train
+            ite_num4val = 0

app.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import gradio as gr
+def process(im):
+    return im
+title = "U-2-Net"
+description = "Gradio demo for U-2-Net, https://github.com/xuebinqin/U-2-Net"
+article = ""
+gr.Interface(
+    process,
+    [gr.inputs.Image(type="pil", label="Input")
+],
+    gr.outputs.Image(type="pil", label="Output"),
+    title=title,
+    description=description,
+    article=article,
+    examples=[],
+    allow_flagging=False,
+    allow_screenshot=False
+    ).launch(enable_queue=True,cache_examples=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+numpy==1.15.2
+scikit-image==0.14.0
+torch
+torchvision
+pillow==8.1.1
+opencv-python-headless