Spaces:

victorisgeek
/

DeepSwapFace

Running

App Files Files Community

DeepSwapFace / face_swapper.py

Harisreedhar

update

71c9afb about 1 year ago

raw

history blame

No virus

7.57 kB

	import time
	import torch
	import onnx
	import cv2
	import onnxruntime
	import numpy as np
	from tqdm import tqdm
	from onnx import numpy_helper
	from skimage import transform as trans

	arcface_dst = np.array(
	[[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
	[41.5493, 92.3655], [70.7299, 92.2041]],
	dtype=np.float32)

	def estimate_norm(lmk, image_size=112, mode='arcface'):
	assert lmk.shape == (5, 2)
	assert image_size % 112 == 0 or image_size % 128 == 0
	if image_size % 112 == 0:
	ratio = float(image_size) / 112.0
	diff_x = 0
	else:
	ratio = float(image_size) / 128.0
	diff_x = 8.0 * ratio
	dst = arcface_dst * ratio
	dst[:, 0] += diff_x
	tform = trans.SimilarityTransform()
	tform.estimate(lmk, dst)
	M = tform.params[0:2, :]
	return M


	def norm_crop2(img, landmark, image_size=112, mode='arcface'):
	M = estimate_norm(landmark, image_size, mode)
	warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
	return warped, M


	class Inswapper():
	def __init__(self, model_file=None, batch_size=32, providers=['CPUExecutionProvider']):
	self.model_file = model_file
	self.batch_size = batch_size

	model = onnx.load(self.model_file)
	graph = model.graph
	self.emap = numpy_helper.to_array(graph.initializer[-1])
	self.input_mean = 0.0
	self.input_std = 255.0

	self.session_options = onnxruntime.SessionOptions()
	self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=providers)

	inputs = self.session.get_inputs()
	self.input_names = [inp.name for inp in inputs]
	outputs = self.session.get_outputs()
	self.output_names = [out.name for out in outputs]
	assert len(self.output_names) == 1
	self.output_shape = outputs[0].shape
	input_cfg = inputs[0]
	input_shape = input_cfg.shape
	self.input_shape = input_shape
	self.input_size = tuple(input_shape[2:4][::-1])

	def forward(self, imgs, latents):
	batch_preds = []
	for img, latent in zip(imgs, latents):
	img = (img - self.input_mean) / self.input_std
	pred = self.session.run(self.output_names, {self.input_names[0]: img, self.input_names[1]: latent})[0]
	batch_preds.append(pred)
	return batch_preds

	def get(self, imgs, target_faces, source_faces):
	batch_preds = []
	batch_aimgs = []
	batch_ms = []
	for img, target_face, source_face in zip(imgs, target_faces, source_faces):
	if isinstance(img, str):
	img = cv2.imread(img)
	aimg, M = norm_crop2(img, target_face.kps, self.input_size[0])
	blob = cv2.dnn.blobFromImage(aimg, 1.0 / self.input_std, self.input_size,
	(self.input_mean, self.input_mean, self.input_mean), swapRB=True)
	latent = source_face.normed_embedding.reshape((1, -1))
	latent = np.dot(latent, self.emap)
	latent /= np.linalg.norm(latent)
	pred = self.session.run(self.output_names, {self.input_names[0]: blob, self.input_names[1]: latent})[0]
	pred = pred.transpose((0, 2, 3, 1))[0]
	pred = np.clip(255 * pred, 0, 255).astype(np.uint8)[:, :, ::-1]
	batch_preds.append(pred)
	batch_aimgs.append(aimg)
	batch_ms.append(M)
	return batch_preds, batch_aimgs, batch_ms

	def batch_forward(self, img_list, target_f_list, source_f_list):
	num_samples = len(img_list)
	num_batches = (num_samples + self.batch_size - 1) // self.batch_size

	preds = []
	aimgs = []
	ms = []
	for i in tqdm(range(num_batches), desc="Swapping face by batch"):
	start_idx = i * self.batch_size
	end_idx = min((i + 1) * self.batch_size, num_samples)

	batch_img = img_list[start_idx:end_idx]
	batch_target_f = target_f_list[start_idx:end_idx]
	batch_source_f = source_f_list[start_idx:end_idx]

	batch_pred, batch_aimg, batch_m = self.get(batch_img, batch_target_f, batch_source_f)
	preds.extend(batch_pred)
	aimgs.extend(batch_aimg)
	ms.extend(batch_m)
	return preds, aimgs, ms


	def laplacian_blending(A, B, m, num_levels=4):
	assert A.shape == B.shape
	assert B.shape == m.shape
	height = m.shape[0]
	width = m.shape[1]
	size_list = np.array([4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096])
	size = size_list[np.where(size_list > max(height, width))][0]
	GA = np.zeros((size, size, 3), dtype=np.float32)
	GA[:height, :width, :] = A
	GB = np.zeros((size, size, 3), dtype=np.float32)
	GB[:height, :width, :] = B
	GM = np.zeros((size, size, 3), dtype=np.float32)
	GM[:height, :width, :] = m
	gpA = [GA]
	gpB = [GB]
	gpM = [GM]
	for i in range(num_levels):
	GA = cv2.pyrDown(GA)
	GB = cv2.pyrDown(GB)
	GM = cv2.pyrDown(GM)
	gpA.append(np.float32(GA))
	gpB.append(np.float32(GB))
	gpM.append(np.float32(GM))
	lpA = [gpA[num_levels-1]]
	lpB = [gpB[num_levels-1]]
	gpMr = [gpM[num_levels-1]]
	for i in range(num_levels-1,0,-1):
	LA = np.subtract(gpA[i-1], cv2.pyrUp(gpA[i]))
	LB = np.subtract(gpB[i-1], cv2.pyrUp(gpB[i]))
	lpA.append(LA)
	lpB.append(LB)
	gpMr.append(gpM[i-1])
	LS = []
	for la,lb,gm in zip(lpA,lpB,gpMr):
	ls = la * gm + lb * (1.0 - gm)
	LS.append(ls)
	ls_ = LS[0]
	for i in range(1,num_levels):
	ls_ = cv2.pyrUp(ls_)
	ls_ = cv2.add(ls_, LS[i])
	ls_ = np.clip(ls_[:height, :width, :], 0, 255)
	return ls_


	def paste_to_whole(bgr_fake, aimg, M, whole_img, laplacian_blend=True, crop_mask=(0,0,0,0)):
	IM = cv2.invertAffineTransform(M)

	img_white = np.full((aimg.shape[0], aimg.shape[1]), 255, dtype=np.float32)

	top = int(crop_mask[0])
	bottom = int(crop_mask[1])
	if top + bottom < aimg.shape[1]:
	if top > 0: img_white[:top, :] = 0
	if bottom > 0: img_white[-bottom:, :] = 0

	left = int(crop_mask[2])
	right = int(crop_mask[3])
	if left + right < aimg.shape[0]:
	if left > 0: img_white[:, :left] = 0
	if right > 0: img_white[:, -right:] = 0

	bgr_fake = cv2.warpAffine(
	bgr_fake, IM, (whole_img.shape[1], whole_img.shape[0]), borderValue=0.0
	)
	img_white = cv2.warpAffine(
	img_white, IM, (whole_img.shape[1], whole_img.shape[0]), borderValue=0.0
	)
	img_white[img_white > 20] = 255
	img_mask = img_white
	mask_h_inds, mask_w_inds = np.where(img_mask == 255)
	mask_h = np.max(mask_h_inds) - np.min(mask_h_inds)
	mask_w = np.max(mask_w_inds) - np.min(mask_w_inds)
	mask_size = int(np.sqrt(mask_h * mask_w))

	k = max(mask_size // 10, 10)
	img_mask = cv2.erode(img_mask, np.ones((k, k), np.uint8), iterations=1)

	k = max(mask_size // 20, 5)
	kernel_size = (k, k)
	blur_size = tuple(2 * i + 1 for i in kernel_size)
	img_mask = cv2.GaussianBlur(img_mask, blur_size, 0) / 255
	img_mask = np.tile(np.expand_dims(img_mask, axis=-1), (1, 1, 3))

	if laplacian_blend:
	bgr_fake = laplacian_blending(bgr_fake.astype("float32").clip(0,255), whole_img.astype("float32").clip(0,255), img_mask.clip(0,1))
	bgr_fake = bgr_fake.astype("float32")

	fake_merged = img_mask * bgr_fake + (1 - img_mask) * whole_img.astype(np.float32)
	return fake_merged.astype("uint8")