213

Running

213 / face_swapper.py

Harisreedhar

update

71c9afb over 1 year ago

7.57 kB

	import time
	import torch
	import onnx
	import cv2
	import onnxruntime
	import numpy as np
	from tqdm import tqdm
	from onnx import numpy_helper
	from skimage import transform as trans

	arcface_dst = np.array(
	[[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
	[41.5493, 92.3655], [70.7299, 92.2041]],
	dtype=np.float32)

	def estimate_norm(lmk, image_size=112, mode='arcface'):
	assert lmk.shape == (5, 2)
	assert image_size % 112 == 0 or image_size % 128 == 0
	if image_size % 112 == 0:
	ratio = float(image_size) / 112.0
	diff_x = 0
	else:
	ratio = float(image_size) / 128.0
	diff_x = 8.0 * ratio
	dst = arcface_dst * ratio
	dst[:, 0] += diff_x
	tform = trans.SimilarityTransform()
	tform.estimate(lmk, dst)
	M = tform.params[0:2, :]
	return M


	def norm_crop2(img, landmark, image_size=112, mode='arcface'):
	M = estimate_norm(landmark, image_size, mode)
	warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
	return warped, M


	class Inswapper():
	def __init__(self, model_file=None, batch_size=32, providers=['CPUExecutionProvider']):
	self.model_file = model_file
	self.batch_size = batch_size

	model = onnx.load(self.model_file)
	graph = model.graph
	self.emap = numpy_helper.to_array(graph.initializer[-1])
	self.input_mean = 0.0
	self.input_std = 255.0

	self.session_options = onnxruntime.SessionOptions()
	self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=providers)

	inputs = self.session.get_inputs()
	self.input_names = [inp.name for inp in inputs]
	outputs = self.session.get_outputs()
	self.output_names = [out.name for out in outputs]
	assert len(self.output_names) == 1
	self.output_shape = outputs[0].shape
	input_cfg = inputs[0]
	input_shape = input_cfg.shape
	self.input_shape = input_shape
	self.input_size = tuple(input_shape[2:4][::-1])

	def forward(self, imgs, latents):
	batch_preds = []
	for img, latent in zip(imgs, latents):
	img = (img - self.input_mean) / self.input_std
	pred = self.session.run(self.output_names, {self.input_names[0]: img, self.input_names[1]: latent})[0]
	batch_preds.append(pred)
	return batch_preds

	def get(self, imgs, target_faces, source_faces):
	batch_preds = []
	batch_aimgs = []
	batch_ms = []
	for img, target_face, source_face in zip(imgs, target_faces, source_faces):
	if isinstance(img, str):
	img = cv2.imread(img)
	aimg, M = norm_crop2(img, target_face.kps, self.input_size[0])
	blob = cv2.dnn.blobFromImage(aimg, 1.0 / self.input_std, self.input_size,
	(self.input_mean, self.input_mean, self.input_mean), swapRB=True)
	latent = source_face.normed_embedding.reshape((1, -1))
	latent = np.dot(latent, self.emap)
	latent /= np.linalg.norm(latent)
	pred = self.session.run(self.output_names, {self.input_names[0]: blob, self.input_names[1]: latent})[0]
	pred = pred.transpose((0, 2, 3, 1))[0]
	pred = np.clip(255 * pred, 0, 255).astype(np.uint8)[:, :, ::-1]
	batch_preds.append(pred)
	batch_aimgs.append(aimg)
	batch_ms.append(M)
	return batch_preds, batch_aimgs, batch_ms

	def batch_forward(self, img_list, target_f_list, source_f_list):
	num_samples = len(img_list)
	num_batches = (num_samples + self.batch_size - 1) // self.batch_size

	preds = []
	aimgs = []
	ms = []
	for i in tqdm(range(num_batches), desc="Swapping face by batch"):
	start_idx = i * self.batch_size
	end_idx = min((i + 1) * self.batch_size, num_samples)

	batch_img = img_list[start_idx:end_idx]
	batch_target_f = target_f_list[start_idx:end_idx]
	batch_source_f = source_f_list[start_idx:end_idx]

	batch_pred, batch_aimg, batch_m = self.get(batch_img, batch_target_f, batch_source_f)
	preds.extend(batch_pred)
	aimgs.extend(batch_aimg)
	ms.extend(batch_m)
	return preds, aimgs, ms


	def laplacian_blending(A, B, m, num_levels=4):
	assert A.shape == B.shape
	assert B.shape == m.shape
	height = m.shape[0]
	width = m.shape[1]
	size_list = np.array([4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096])
	size = size_list[np.where(size_list > max(height, width))][0]
	GA = np.zeros((size, size, 3), dtype=np.float32)
	GA[:height, :width, :] = A
	GB = np.zeros((size, size, 3), dtype=np.float32)
	GB[:height, :width, :] = B
	GM = np.zeros((size, size, 3), dtype=np.float32)
	GM[:height, :width, :] = m
	gpA = [GA]
	gpB = [GB]
	gpM = [GM]
	for i in range(num_levels):
	GA = cv2.pyrDown(GA)
	GB = cv2.pyrDown(GB)
	GM = cv2.pyrDown(GM)
	gpA.append(np.float32(GA))
	gpB.append(np.float32(GB))
	gpM.append(np.float32(GM))
	lpA = [gpA[num_levels-1]]
	lpB = [gpB[num_levels-1]]
	gpMr = [gpM[num_levels-1]]
	for i in range(num_levels-1,0,-1):
	LA = np.subtract(gpA[i-1], cv2.pyrUp(gpA[i]))
	LB = np.subtract(gpB[i-1], cv2.pyrUp(gpB[i]))
	lpA.append(LA)
	lpB.append(LB)
	gpMr.append(gpM[i-1])
	LS = []
	for la,lb,gm in zip(lpA,lpB,gpMr):
	ls = la * gm + lb * (1.0 - gm)
	LS.append(ls)
	ls_ = LS[0]
	for i in range(1,num_levels):
	ls_ = cv2.pyrUp(ls_)
	ls_ = cv2.add(ls_, LS[i])
	ls_ = np.clip(ls_[:height, :width, :], 0, 255)
	return ls_


	def paste_to_whole(bgr_fake, aimg, M, whole_img, laplacian_blend=True, crop_mask=(0,0,0,0)):
	IM = cv2.invertAffineTransform(M)

	img_white = np.full((aimg.shape[0], aimg.shape[1]), 255, dtype=np.float32)

	top = int(crop_mask[0])
	bottom = int(crop_mask[1])
	if top + bottom < aimg.shape[1]:
	if top > 0: img_white[:top, :] = 0
	if bottom > 0: img_white[-bottom:, :] = 0

	left = int(crop_mask[2])
	right = int(crop_mask[3])
	if left + right < aimg.shape[0]:
	if left > 0: img_white[:, :left] = 0
	if right > 0: img_white[:, -right:] = 0

	bgr_fake = cv2.warpAffine(
	bgr_fake, IM, (whole_img.shape[1], whole_img.shape[0]), borderValue=0.0
	)
	img_white = cv2.warpAffine(
	img_white, IM, (whole_img.shape[1], whole_img.shape[0]), borderValue=0.0
	)
	img_white[img_white > 20] = 255
	img_mask = img_white
	mask_h_inds, mask_w_inds = np.where(img_mask == 255)
	mask_h = np.max(mask_h_inds) - np.min(mask_h_inds)
	mask_w = np.max(mask_w_inds) - np.min(mask_w_inds)
	mask_size = int(np.sqrt(mask_h * mask_w))

	k = max(mask_size // 10, 10)
	img_mask = cv2.erode(img_mask, np.ones((k, k), np.uint8), iterations=1)

	k = max(mask_size // 20, 5)
	kernel_size = (k, k)
	blur_size = tuple(2 * i + 1 for i in kernel_size)
	img_mask = cv2.GaussianBlur(img_mask, blur_size, 0) / 255
	img_mask = np.tile(np.expand_dims(img_mask, axis=-1), (1, 1, 3))

	if laplacian_blend:
	bgr_fake = laplacian_blending(bgr_fake.astype("float32").clip(0,255), whole_img.astype("float32").clip(0,255), img_mask.clip(0,1))
	bgr_fake = bgr_fake.astype("float32")

	fake_merged = img_mask * bgr_fake + (1 - img_mask) * whole_img.astype(np.float32)
	return fake_merged.astype("uint8")