Spaces:
Running
Running
import os | |
import io | |
import time | |
import json | |
import math | |
import base64 | |
from uuid import uuid4 | |
from PIL import Image as PILImage | |
os.environ["DEEPFACE_HOME"] = "." | |
import pyzipper | |
import numpy as np | |
import gradio as gr | |
from annoy import AnnoyIndex | |
from deepface import DeepFace | |
index = AnnoyIndex(512, "euclidean") | |
index.load(f"face.db") | |
ANNOY_INDEX = json.load(open(f"face.json")) | |
with pyzipper.AESZipFile('persons.zip') as zf: | |
password = os.getenv("VISAGE_KEY","").encode('ascii') | |
zf.setpassword(password) | |
PERFORMER_DB = json.loads(zf.read('performers.json')) | |
## Prediction functions | |
def image_search_performer(image, threshold=20.0, results=3): | |
"""Search for a performer in an image | |
Returns a list of performers with at least following keys: | |
- id: the performer's id | |
- distance: the distance between the face in the image and the performer's face | |
- confidence: a confidence score between 0 and 100 | |
- hits: the number of times the performer was found in our database | |
""" | |
image_array = np.array(image) | |
face = DeepFace.represent(img_path = image_array, detector_backend='retinaface', model_name='Facenet512', normalization="Facenet2018")[0]['embedding'] | |
return search_performer(face, threshold, results) | |
def image_search_performers(image, threshold=20.0, results=3): | |
image_array = np.array(image) | |
response = [] | |
t = time.time() | |
try: | |
faces = DeepFace.represent(img_path = image_array, detector_backend='retinaface', model_name='Facenet512', normalization="Facenet2018") | |
# faces = DeepFace.represent(img_path = image_array, detector_backend='yolov8', model_name='Facenet512', normalization="Facenet2018") | |
# faces = DeepFace.represent(img_path = image_array, detector_backend='mtcnn', model_name='Facenet512', normalization="Facenet2018") | |
except ValueError as e: | |
print(e) | |
raise gr.Error("No faces found in the image") | |
print(f"Time to find faces: {time.time() - t}") | |
for face in faces: | |
embedding = face['embedding'] | |
area = face['facial_area'] | |
confidence = face['face_confidence'] | |
cimage = image.crop((area['x'], area['y'], area['x'] + area['w'], area['y'] + area['h'])) | |
buf = io.BytesIO() | |
cimage.save(buf, format='JPEG') | |
im_b64 = base64.b64encode(buf.getvalue()).decode('ascii') | |
response.append({ | |
'image': im_b64, | |
'confidence': confidence, | |
'performers': search_performer(embedding, threshold, results) | |
}) | |
return response | |
def vector_search_performer(vector_json, threshold=20.0, results=3): | |
"""Search for a performer from a vector | |
The vector should be created with Deepface and should be a 512 vector. | |
For best results use the following settings: | |
- detector_backend: retinaface | |
- model: Facenet512 | |
- normalization: Facenet2018 | |
Returns a list of performers with at least following keys: | |
- id: the performer's id | |
- distance: the distance between the face in the image and the performer's face | |
- confidence: a confidence score between 0 and 100 | |
- hits: the number of times the performer was found in our database | |
""" | |
vector = np.array(json.loads(vector_json)) | |
return search_performer(vector, threshold, results) | |
def search_performer(vector, threshold=20.0, results=3): | |
threshold = threshold or 20.0 | |
results = results or 3 | |
ids, distances = index.get_nns_by_vector( | |
vector, 50, search_k=100000, include_distances=True | |
) | |
persons = {} | |
for p, distance in zip(ids, distances): | |
id = ANNOY_INDEX[p] | |
if id in persons: | |
persons[id]["hits"] += 1 | |
persons[id]["distance"] -= 0.5 | |
persons[id]["confidence"] = normalize_confidence_from_distance(persons[id]["distance"], threshold) | |
continue | |
persons[id] = { | |
"id": id, | |
"distance": round(distance, 2), | |
"confidence": normalize_confidence_from_distance(distance, threshold), | |
"hits": 1, | |
} | |
if id in PERFORMER_DB: | |
persons[id].update(PERFORMER_DB.get(id)) | |
persons = sorted(persons.values(), key=lambda x: x["distance"]) | |
# persons = [p for p in persons if p["distance"] < threshold] | |
return persons[:results] | |
def normalize_confidence_from_distance(distance, threshold=20.0): | |
"""Normalize confidence to 0-100 scale""" | |
confidence = face_distance_to_conf(distance,threshold) | |
return int(((confidence - 0.0) / (1.0 - 0.0)) * (100.0 - 0.0) + 0.0) | |
def face_distance_to_conf(face_distance, face_match_threshold=20.0): | |
"""Using a face distance, calculate a similarity confidence value""" | |
if face_distance > face_match_threshold: | |
# The face is far away, so give it a low confidence | |
range = (1.0 - face_match_threshold) | |
linear_val = (1.0 - face_distance) / (range * 2.0) | |
return linear_val | |
else: | |
# The face is close, so give it a high confidence | |
range = face_match_threshold | |
linear_val = 1.0 - (face_distance / (range * 2.0)) | |
# But adjust this value by a curve so that we don't get a linear | |
# transition from close to far. We want it to be more confident | |
# the closer it is. | |
return linear_val + ((1.0 - linear_val) * math.pow((linear_val - 0.5) * 2, 0.2)) | |
def find_faces_in_sprite(image, vtt): | |
vtt = base64.b64decode(vtt.replace("data:text/vtt;base64,", "")) | |
sprite = PILImage.fromarray(image) | |
results = [] | |
for i, (left, top, right, bottom, time_seconds) in enumerate(getVTToffsets(vtt)): | |
cut_frame = sprite.crop((left, top, left + right, top + bottom)) | |
faces = DeepFace.extract_faces(np.asarray(cut_frame), detector_backend="mediapipe", enforce_detection=False, align=False) | |
faces = [face for face in faces if face['confidence'] > 0.6] | |
if faces: | |
size = faces[0]['facial_area']['w'] * faces[0]['facial_area']['h'] | |
data = {'id': str(uuid4()), "offset": (left, top, right, bottom), "frame": i, "time": time_seconds, 'size': size} | |
results.append(data) | |
return results | |
def getVTToffsets(vtt): | |
time_seconds = 0 | |
left = top = right = bottom = None | |
for line in vtt.decode("utf-8").split("\n"): | |
line = line.strip() | |
if "-->" in line: | |
# grab the start time | |
# 00:00:00.000 --> 00:00:41.000 | |
start = line.split("-->")[0].strip().split(":") | |
# convert to seconds | |
time_seconds = ( | |
int(start[0]) * 3600 | |
+ int(start[1]) * 60 | |
+ float(start[2]) | |
) | |
left = top = right = bottom = None | |
elif "xywh=" in line: | |
left, top, right, bottom = line.split("xywh=")[-1].split(",") | |
left, top, right, bottom = ( | |
int(left), | |
int(top), | |
int(right), | |
int(bottom), | |
) | |
else: | |
continue | |
if not left: | |
continue | |
yield left, top, right, bottom, time_seconds | |
image_search = gr.Interface( | |
fn=image_search_performer, | |
inputs=[ | |
gr.Image(), | |
gr.Slider(label="threshold",minimum=0.0, maximum=30.0, value=20.0), | |
gr.Slider(label="results", minimum=0, maximum=50, value=3, step=1), | |
], | |
outputs=gr.JSON(label=""), | |
title="Who is in the photo?", | |
description="Upload an image of a person and we'll tell you who it is.", | |
) | |
image_search_multiple = gr.Interface( | |
fn=image_search_performers, | |
inputs=[ | |
gr.Image(type="pil"), | |
gr.Slider(label="threshold",minimum=0.0, maximum=30.0, value=20.0), | |
gr.Slider(label="results", minimum=0, maximum=50, value=3, step=1), | |
], | |
outputs=gr.JSON(label=""), | |
title="Who is in the photo?", | |
description="Upload an image of a person(s) and we'll tell you who it is.", | |
) | |
vector_search = gr.Interface( | |
fn=vector_search_performer, | |
inputs=[ | |
gr.Textbox(), | |
gr.Slider(label="threshold",minimum=0.0, maximum=30.0, value=20.0), | |
gr.Slider(label="results", minimum=0, maximum=50, value=3, step=1), | |
], | |
outputs=gr.JSON(label=""), | |
title="Who is in the photo?", | |
description="512 vector created with deepface of a person and we'll tell you who it is.", | |
) | |
faces_in_sprite = gr.Interface( | |
fn=find_faces_in_sprite, | |
inputs=[ | |
gr.Image(), | |
gr.Textbox(label="VTT file") | |
], | |
outputs=gr.JSON(label=""), | |
) | |
gr.TabbedInterface([image_search, image_search_multiple, vector_search, faces_in_sprite]).queue().launch(server_name="0.0.0.0") | |