|
import torch
|
|
import torchvision.transforms as transforms
|
|
import cv2
|
|
import numpy as np
|
|
|
|
from .model import BiSeNet
|
|
|
|
|
|
def init_parser(pth_path):
|
|
n_classes = 19
|
|
net = BiSeNet(n_classes=n_classes)
|
|
net.cuda()
|
|
net.load_state_dict(torch.load(pth_path))
|
|
net.eval()
|
|
return net
|
|
|
|
|
|
def image_to_parsing(img, net):
|
|
img = cv2.resize(img, (512, 512))
|
|
img = img[:,:,::-1]
|
|
transform = transforms.Compose([
|
|
transforms.ToTensor(),
|
|
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
|
])
|
|
img = transform(img.copy())
|
|
img = torch.unsqueeze(img, 0)
|
|
|
|
with torch.no_grad():
|
|
img = img.cuda()
|
|
out = net(img)[0]
|
|
parsing = out.squeeze(0).cpu().numpy().argmax(0)
|
|
return parsing
|
|
|
|
|
|
def get_mask(parsing, classes):
|
|
res = parsing == classes[0]
|
|
for val in classes[1:]:
|
|
res += parsing == val
|
|
return res
|
|
|
|
|
|
def swap_regions(source, target, net):
|
|
parsing = image_to_parsing(source, net)
|
|
face_classes = [1, 11, 12, 13]
|
|
|
|
mask = get_mask(parsing, face_classes)
|
|
mask = np.repeat(np.expand_dims(mask, axis=2), 3, 2)
|
|
result = (1 - mask) * cv2.resize(source, (512, 512)) + mask * cv2.resize(target, (512, 512))
|
|
result = cv2.resize(result.astype("float32"), (source.shape[1], source.shape[0]))
|
|
return result
|
|
|