|
|
|
import time |
|
from typing import Any, Dict, Union, List, Tuple |
|
|
|
|
|
import numpy as np |
|
|
|
|
|
from deepface.modules import representation, detection, modeling |
|
from deepface.models.FacialRecognition import FacialRecognition |
|
from deepface.commons import logger as log |
|
|
|
logger = log.get_singletonish_logger() |
|
|
|
|
|
def verify( |
|
img1_path: Union[str, np.ndarray, List[float]], |
|
img2_path: Union[str, np.ndarray, List[float]], |
|
model_name: str = "VGG-Face", |
|
detector_backend: str = "opencv", |
|
distance_metric: str = "cosine", |
|
enforce_detection: bool = True, |
|
align: bool = True, |
|
expand_percentage: int = 0, |
|
normalization: str = "base", |
|
silent: bool = False, |
|
) -> Dict[str, Any]: |
|
""" |
|
Verify if an image pair represents the same person or different persons. |
|
|
|
The verification function converts facial images to vectors and calculates the similarity |
|
between those vectors. Vectors of images of the same person should exhibit higher similarity |
|
(or lower distance) than vectors of images of different persons. |
|
|
|
Args: |
|
img1_path (str or np.ndarray or List[float]): Path to the first image. |
|
Accepts exact image path as a string, numpy array (BGR), base64 encoded images |
|
or pre-calculated embeddings. |
|
|
|
img2_path (str or np.ndarray or or List[float]): Path to the second image. |
|
Accepts exact image path as a string, numpy array (BGR), base64 encoded images |
|
or pre-calculated embeddings. |
|
|
|
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, |
|
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face). |
|
|
|
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', |
|
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip' |
|
(default is opencv) |
|
|
|
distance_metric (string): Metric for measuring similarity. Options: 'cosine', |
|
'euclidean', 'euclidean_l2' (default is cosine). |
|
|
|
enforce_detection (boolean): If no face is detected in an image, raise an exception. |
|
Set to False to avoid the exception for low-resolution images (default is True). |
|
|
|
align (bool): Flag to enable face alignment (default is True). |
|
|
|
expand_percentage (int): expand detected facial area with a percentage (default is 0). |
|
|
|
normalization (string): Normalize the input image before feeding it to the model. |
|
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base) |
|
|
|
silent (boolean): Suppress or allow some log messages for a quieter analysis process |
|
(default is False). |
|
|
|
Returns: |
|
result (dict): A dictionary containing verification results. |
|
|
|
- 'verified' (bool): Indicates whether the images represent the same person (True) |
|
or different persons (False). |
|
|
|
- 'distance' (float): The distance measure between the face vectors. |
|
A lower distance indicates higher similarity. |
|
|
|
- 'max_threshold_to_verify' (float): The maximum threshold used for verification. |
|
If the distance is below this threshold, the images are considered a match. |
|
|
|
- 'model' (str): The chosen face recognition model. |
|
|
|
- 'similarity_metric' (str): The chosen similarity metric for measuring distances. |
|
|
|
- 'facial_areas' (dict): Rectangular regions of interest for faces in both images. |
|
- 'img1': {'x': int, 'y': int, 'w': int, 'h': int} |
|
Region of interest for the first image. |
|
- 'img2': {'x': int, 'y': int, 'w': int, 'h': int} |
|
Region of interest for the second image. |
|
|
|
- 'time' (float): Time taken for the verification process in seconds. |
|
""" |
|
|
|
tic = time.time() |
|
|
|
model: FacialRecognition = modeling.build_model(model_name) |
|
dims = model.output_shape |
|
|
|
|
|
if isinstance(img1_path, list): |
|
|
|
if not all(isinstance(dim, float) for dim in img1_path): |
|
raise ValueError( |
|
"When passing img1_path as a list, ensure that all its items are of type float." |
|
) |
|
|
|
if silent is False: |
|
logger.warn( |
|
"You passed 1st image as pre-calculated embeddings." |
|
f"Please ensure that embeddings have been calculated for the {model_name} model." |
|
) |
|
|
|
if len(img1_path) != dims: |
|
raise ValueError( |
|
f"embeddings of {model_name} should have {dims} dimensions," |
|
f" but it has {len(img1_path)} dimensions input" |
|
) |
|
|
|
img1_embeddings = [img1_path] |
|
img1_facial_areas = [None] |
|
else: |
|
try: |
|
img1_embeddings, img1_facial_areas = __extract_faces_and_embeddings( |
|
img_path=img1_path, |
|
model_name=model_name, |
|
detector_backend=detector_backend, |
|
enforce_detection=enforce_detection, |
|
align=align, |
|
expand_percentage=expand_percentage, |
|
normalization=normalization, |
|
) |
|
except ValueError as err: |
|
raise ValueError("Exception while processing img1_path") from err |
|
|
|
|
|
if isinstance(img2_path, list): |
|
|
|
if not all(isinstance(dim, float) for dim in img2_path): |
|
raise ValueError( |
|
"When passing img2_path as a list, ensure that all its items are of type float." |
|
) |
|
|
|
if silent is False: |
|
logger.warn( |
|
"You passed 2nd image as pre-calculated embeddings." |
|
f"Please ensure that embeddings have been calculated for the {model_name} model." |
|
) |
|
|
|
if len(img2_path) != dims: |
|
raise ValueError( |
|
f"embeddings of {model_name} should have {dims} dimensions," |
|
f" but it has {len(img2_path)} dimensions input" |
|
) |
|
|
|
img2_embeddings = [img2_path] |
|
img2_facial_areas = [None] |
|
else: |
|
try: |
|
img2_embeddings, img2_facial_areas = __extract_faces_and_embeddings( |
|
img_path=img2_path, |
|
model_name=model_name, |
|
detector_backend=detector_backend, |
|
enforce_detection=enforce_detection, |
|
align=align, |
|
expand_percentage=expand_percentage, |
|
normalization=normalization, |
|
) |
|
except ValueError as err: |
|
raise ValueError("Exception while processing img2_path") from err |
|
|
|
no_facial_area = { |
|
"x": None, |
|
"y": None, |
|
"w": None, |
|
"h": None, |
|
"left_eye": None, |
|
"right_eye": None, |
|
} |
|
|
|
distances = [] |
|
facial_areas = [] |
|
for idx, img1_embedding in enumerate(img1_embeddings): |
|
for idy, img2_embedding in enumerate(img2_embeddings): |
|
distance = find_distance(img1_embedding, img2_embedding, distance_metric) |
|
distances.append(distance) |
|
facial_areas.append( |
|
(img1_facial_areas[idx] or no_facial_area, img2_facial_areas[idy] or no_facial_area) |
|
) |
|
|
|
|
|
threshold = find_threshold(model_name, distance_metric) |
|
distance = float(min(distances)) |
|
facial_areas = facial_areas[np.argmin(distances)] |
|
|
|
toc = time.time() |
|
|
|
resp_obj = { |
|
"verified": distance <= threshold, |
|
"distance": distance, |
|
"threshold": threshold, |
|
"model": model_name, |
|
"detector_backend": detector_backend, |
|
"similarity_metric": distance_metric, |
|
"facial_areas": {"img1": facial_areas[0], "img2": facial_areas[1]}, |
|
"time": round(toc - tic, 2), |
|
} |
|
|
|
return resp_obj |
|
|
|
|
|
def __extract_faces_and_embeddings( |
|
img_path: Union[str, np.ndarray], |
|
model_name: str = "VGG-Face", |
|
detector_backend: str = "opencv", |
|
enforce_detection: bool = True, |
|
align: bool = True, |
|
expand_percentage: int = 0, |
|
normalization: str = "base", |
|
) -> Tuple[List[List[float]], List[dict]]: |
|
""" |
|
Extract facial areas and find corresponding embeddings for given image |
|
Returns: |
|
embeddings (List[float]) |
|
facial areas (List[dict]) |
|
""" |
|
embeddings = [] |
|
facial_areas = [] |
|
|
|
img_objs = detection.extract_faces( |
|
img_path=img_path, |
|
detector_backend=detector_backend, |
|
grayscale=False, |
|
enforce_detection=enforce_detection, |
|
align=align, |
|
expand_percentage=expand_percentage, |
|
) |
|
|
|
|
|
for img_obj in img_objs: |
|
img_embedding_obj = representation.represent( |
|
img_path=img_obj["face"], |
|
model_name=model_name, |
|
enforce_detection=enforce_detection, |
|
detector_backend="skip", |
|
align=align, |
|
normalization=normalization, |
|
) |
|
|
|
img_embedding = img_embedding_obj[0]["embedding"] |
|
embeddings.append(img_embedding) |
|
facial_areas.append(img_obj["facial_area"]) |
|
|
|
return embeddings, facial_areas |
|
|
|
|
|
def find_cosine_distance( |
|
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list] |
|
) -> np.float64: |
|
""" |
|
Find cosine distance between two given vectors |
|
Args: |
|
source_representation (np.ndarray or list): 1st vector |
|
test_representation (np.ndarray or list): 2nd vector |
|
Returns |
|
distance (np.float64): calculated cosine distance |
|
""" |
|
if isinstance(source_representation, list): |
|
source_representation = np.array(source_representation) |
|
|
|
if isinstance(test_representation, list): |
|
test_representation = np.array(test_representation) |
|
|
|
a = np.matmul(np.transpose(source_representation), test_representation) |
|
b = np.sum(np.multiply(source_representation, source_representation)) |
|
c = np.sum(np.multiply(test_representation, test_representation)) |
|
return 1 - (a / (np.sqrt(b) * np.sqrt(c))) |
|
|
|
|
|
def find_euclidean_distance( |
|
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list] |
|
) -> np.float64: |
|
""" |
|
Find euclidean distance between two given vectors |
|
Args: |
|
source_representation (np.ndarray or list): 1st vector |
|
test_representation (np.ndarray or list): 2nd vector |
|
Returns |
|
distance (np.float64): calculated euclidean distance |
|
""" |
|
if isinstance(source_representation, list): |
|
source_representation = np.array(source_representation) |
|
|
|
if isinstance(test_representation, list): |
|
test_representation = np.array(test_representation) |
|
|
|
euclidean_distance = source_representation - test_representation |
|
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance)) |
|
euclidean_distance = np.sqrt(euclidean_distance) |
|
return euclidean_distance |
|
|
|
|
|
def l2_normalize(x: Union[np.ndarray, list]) -> np.ndarray: |
|
""" |
|
Normalize input vector with l2 |
|
Args: |
|
x (np.ndarray or list): given vector |
|
Returns: |
|
y (np.ndarray): l2 normalized vector |
|
""" |
|
if isinstance(x, list): |
|
x = np.array(x) |
|
return x / np.sqrt(np.sum(np.multiply(x, x))) |
|
|
|
|
|
def find_distance( |
|
alpha_embedding: Union[np.ndarray, list], |
|
beta_embedding: Union[np.ndarray, list], |
|
distance_metric: str, |
|
) -> np.float64: |
|
""" |
|
Wrapper to find distance between vectors according to the given distance metric |
|
Args: |
|
source_representation (np.ndarray or list): 1st vector |
|
test_representation (np.ndarray or list): 2nd vector |
|
Returns |
|
distance (np.float64): calculated cosine distance |
|
""" |
|
if distance_metric == "cosine": |
|
distance = find_cosine_distance(alpha_embedding, beta_embedding) |
|
elif distance_metric == "euclidean": |
|
distance = find_euclidean_distance(alpha_embedding, beta_embedding) |
|
elif distance_metric == "euclidean_l2": |
|
distance = find_euclidean_distance( |
|
l2_normalize(alpha_embedding), l2_normalize(beta_embedding) |
|
) |
|
else: |
|
raise ValueError("Invalid distance_metric passed - ", distance_metric) |
|
return distance |
|
|
|
|
|
def find_threshold(model_name: str, distance_metric: str) -> float: |
|
""" |
|
Retrieve pre-tuned threshold values for a model and distance metric pair |
|
Args: |
|
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, |
|
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face). |
|
distance_metric (str): distance metric name. Options are cosine, euclidean |
|
and euclidean_l2. |
|
Returns: |
|
threshold (float): threshold value for that model name and distance metric |
|
pair. Distances less than this threshold will be classified same person. |
|
""" |
|
|
|
base_threshold = {"cosine": 0.40, "euclidean": 0.55, "euclidean_l2": 0.75} |
|
|
|
thresholds = { |
|
|
|
"VGG-Face": { |
|
"cosine": 0.68, |
|
"euclidean": 1.17, |
|
"euclidean_l2": 1.17, |
|
}, |
|
"Facenet": {"cosine": 0.40, "euclidean": 10, "euclidean_l2": 0.80}, |
|
"Facenet512": {"cosine": 0.30, "euclidean": 23.56, "euclidean_l2": 1.04}, |
|
"ArcFace": {"cosine": 0.68, "euclidean": 4.15, "euclidean_l2": 1.13}, |
|
"Dlib": {"cosine": 0.07, "euclidean": 0.6, "euclidean_l2": 0.4}, |
|
"SFace": {"cosine": 0.593, "euclidean": 10.734, "euclidean_l2": 1.055}, |
|
"OpenFace": {"cosine": 0.10, "euclidean": 0.55, "euclidean_l2": 0.55}, |
|
"DeepFace": {"cosine": 0.23, "euclidean": 64, "euclidean_l2": 0.64}, |
|
"DeepID": {"cosine": 0.015, "euclidean": 45, "euclidean_l2": 0.17}, |
|
"GhostFaceNet": {"cosine": 0.65, "euclidean": 35.71, "euclidean_l2": 1.10}, |
|
} |
|
|
|
threshold = thresholds.get(model_name, base_threshold).get(distance_metric, 0.4) |
|
|
|
return threshold |
|
|