roop-docker / roop /face_util.py
3v324v23's picture
UP
93e2662
import threading
from typing import Any
import insightface
import roop.globals
from roop.typing import Frame, Face
import cv2
import numpy as np
from skimage import transform as trans
from roop.capturer import get_video_frame
from roop.utilities import resolve_relative_path, conditional_download
FACE_ANALYSER = None
THREAD_LOCK_ANALYSER = threading.Lock()
THREAD_LOCK_SWAPPER = threading.Lock()
FACE_SWAPPER = None
def get_face_analyser() -> Any:
global FACE_ANALYSER
with THREAD_LOCK_ANALYSER:
if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
model_path = resolve_relative_path('..')
# removed genderage
allowed_modules = roop.globals.g_desired_face_analysis
roop.globals.g_current_face_analysis = roop.globals.g_desired_face_analysis
if roop.globals.CFG.force_cpu:
print("Forcing CPU for Face Analysis")
FACE_ANALYSER = insightface.app.FaceAnalysis(
name="buffalo_l",
root=model_path, providers=["CPUExecutionProvider"],allowed_modules=allowed_modules
)
else:
FACE_ANALYSER = insightface.app.FaceAnalysis(
name="buffalo_l", root=model_path, providers=roop.globals.execution_providers,allowed_modules=allowed_modules
)
FACE_ANALYSER.prepare(
ctx_id=0,
det_size=(640, 640) if roop.globals.default_det_size else (320, 320),
)
return FACE_ANALYSER
def get_first_face(frame: Frame) -> Any:
try:
faces = get_face_analyser().get(frame)
return min(faces, key=lambda x: x.bbox[0])
# return sorted(faces, reverse=True, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[0]
except:
return None
def get_all_faces(frame: Frame) -> Any:
try:
faces = get_face_analyser().get(frame)
return sorted(faces, key=lambda x: x.bbox[0])
except:
return None
def extract_face_images(source_filename, video_info, extra_padding=-1.0):
face_data = []
source_image = None
if video_info[0]:
frame = get_video_frame(source_filename, video_info[1])
if frame is not None:
source_image = frame
else:
return face_data
else:
source_image = cv2.imdecode(np.fromfile(source_filename, dtype=np.uint8), cv2.IMREAD_COLOR)
faces = get_all_faces(source_image)
if faces is None:
return face_data
i = 0
for face in faces:
(startX, startY, endX, endY) = face["bbox"].astype("int")
startX, endX, startY, endY = clamp_cut_values(startX, endX, startY, endY, source_image)
if extra_padding > 0.0:
if source_image.shape[:2] == (512, 512):
i += 1
face_data.append([face, source_image])
continue
found = False
for i in range(1, 3):
(startX, startY, endX, endY) = face["bbox"].astype("int")
startX, endX, startY, endY = clamp_cut_values(startX, endX, startY, endY, source_image)
cutout_padding = extra_padding
# top needs extra room for detection
padding = int((endY - startY) * cutout_padding)
oldY = startY
startY -= padding
factor = 0.25 if i == 1 else 0.5
cutout_padding = factor
padding = int((endY - oldY) * cutout_padding)
endY += padding
padding = int((endX - startX) * cutout_padding)
startX -= padding
endX += padding
startX, endX, startY, endY = clamp_cut_values(
startX, endX, startY, endY, source_image
)
face_temp = source_image[startY:endY, startX:endX]
face_temp = resize_image_keep_content(face_temp)
testfaces = get_all_faces(face_temp)
if testfaces is not None and len(testfaces) > 0:
i += 1
face_data.append([testfaces[0], face_temp])
found = True
break
if not found:
print("No face found after resizing, this shouldn't happen!")
continue
face_temp = source_image[startY:endY, startX:endX]
if face_temp.size < 1:
continue
i += 1
face_data.append([face, face_temp])
return face_data
def clamp_cut_values(startX, endX, startY, endY, image):
if startX < 0:
startX = 0
if endX > image.shape[1]:
endX = image.shape[1]
if startY < 0:
startY = 0
if endY > image.shape[0]:
endY = image.shape[0]
return startX, endX, startY, endY
def face_offset_top(face: Face, offset):
face["bbox"][1] += offset
face["bbox"][3] += offset
lm106 = face.landmark_2d_106
add = np.full_like(lm106, [0, offset])
face["landmark_2d_106"] = lm106 + add
return face
def resize_image_keep_content(image, new_width=512, new_height=512):
dim = None
(h, w) = image.shape[:2]
if h > w:
r = new_height / float(h)
dim = (int(w * r), new_height)
else:
# Calculate the ratio of the width and construct the dimensions
r = new_width / float(w)
dim = (new_width, int(h * r))
image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
(h, w) = image.shape[:2]
if h == new_height and w == new_width:
return image
resize_img = np.zeros(shape=(new_height, new_width, 3), dtype=image.dtype)
offs = (new_width - w) if h == new_height else (new_height - h)
startoffs = int(offs // 2) if offs % 2 == 0 else int(offs // 2) + 1
offs = int(offs // 2)
if h == new_height:
resize_img[0:new_height, startoffs : new_width - offs] = image
else:
resize_img[startoffs : new_height - offs, 0:new_width] = image
return resize_img
def rotate_image_90(image, rotate=True):
if rotate:
return np.rot90(image)
else:
return np.rot90(image, 1, (1, 0))
def rotate_anticlockwise(frame):
return rotate_image_90(frame)
def rotate_clockwise(frame):
return rotate_image_90(frame, False)
def rotate_image_180(image):
return np.flip(image, 0)
# alignment code from insightface https://github.com/deepinsight/insightface/blob/master/python-package/insightface/utils/face_align.py
arcface_dst = np.array(
[
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041],
],
dtype=np.float32,
)
def estimate_norm(lmk, image_size=112, mode="arcface"):
assert lmk.shape == (5, 2)
assert image_size % 112 == 0 or image_size % 128 == 0
if image_size % 112 == 0:
ratio = float(image_size) / 112.0
diff_x = 0
else:
ratio = float(image_size) / 128.0
diff_x = 8.0 * ratio
dst = arcface_dst * ratio
dst[:, 0] += diff_x
tform = trans.SimilarityTransform()
tform.estimate(lmk, dst)
M = tform.params[0:2, :]
return M
# aligned, M = norm_crop2(f[1], face.kps, 512)
def align_crop(img, landmark, image_size=112, mode="arcface"):
M = estimate_norm(landmark, image_size, mode)
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
return warped, M
def square_crop(im, S):
if im.shape[0] > im.shape[1]:
height = S
width = int(float(im.shape[1]) / im.shape[0] * S)
scale = float(S) / im.shape[0]
else:
width = S
height = int(float(im.shape[0]) / im.shape[1] * S)
scale = float(S) / im.shape[1]
resized_im = cv2.resize(im, (width, height))
det_im = np.zeros((S, S, 3), dtype=np.uint8)
det_im[: resized_im.shape[0], : resized_im.shape[1], :] = resized_im
return det_im, scale
def transform(data, center, output_size, scale, rotation):
scale_ratio = scale
rot = float(rotation) * np.pi / 180.0
# translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
t1 = trans.SimilarityTransform(scale=scale_ratio)
cx = center[0] * scale_ratio
cy = center[1] * scale_ratio
t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
t3 = trans.SimilarityTransform(rotation=rot)
t4 = trans.SimilarityTransform(translation=(output_size / 2, output_size / 2))
t = t1 + t2 + t3 + t4
M = t.params[0:2]
cropped = cv2.warpAffine(data, M, (output_size, output_size), borderValue=0.0)
return cropped, M
def trans_points2d(pts, M):
new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
for i in range(pts.shape[0]):
pt = pts[i]
new_pt = np.array([pt[0], pt[1], 1.0], dtype=np.float32)
new_pt = np.dot(M, new_pt)
# print('new_pt', new_pt.shape, new_pt)
new_pts[i] = new_pt[0:2]
return new_pts
def trans_points3d(pts, M):
scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
# print(scale)
new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
for i in range(pts.shape[0]):
pt = pts[i]
new_pt = np.array([pt[0], pt[1], 1.0], dtype=np.float32)
new_pt = np.dot(M, new_pt)
# print('new_pt', new_pt.shape, new_pt)
new_pts[i][0:2] = new_pt[0:2]
new_pts[i][2] = pts[i][2] * scale
return new_pts
def trans_points(pts, M):
if pts.shape[1] == 2:
return trans_points2d(pts, M)
else:
return trans_points3d(pts, M)
def create_blank_image(width, height):
img = np.zeros((height, width, 4), dtype=np.uint8)
img[:] = [0,0,0,0]
return img