Spaces:
Running
Running
File size: 3,411 Bytes
73fa238 de1c4fe 6e0675c 73fa238 20623c6 de1c4fe 20623c6 de1c4fe 20623c6 de1c4fe 20623c6 de1c4fe 20623c6 de1c4fe d98a2aa de1c4fe 20623c6 c568917 6e0675c e8eeca3 c568917 20623c6 de1c4fe 20623c6 de1c4fe 73fa238 20623c6 9f2a335 20623c6 9f2a335 73fa238 20623c6 1f9775e 574d7d7 73fa238 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import datasets
import gradio as gr
import numpy as np
import torchvision.transforms as transforms
import mediapipe as mp
import cv2
#model = SentenceTransformer('clip-ViT-B-16')
model = SentenceTransformer('clip-ViT-B-32')
dataset = datasets.load_dataset('brendenc/celeb-identities')
def predict(im1, im2):
# Convert the PIL Image to a numpy array
im1 = np.array(im1)
im2 = np.array(im2)
face1 = im1.copy()
face2 = im2.copy()
img1_h, img1_w, _ = im1.shape
img2_h, img2_w, _ = im2.shape
# Locate face using mediapipe
mp_face_mesh = mp.solutions.face_mesh
with mp_face_mesh.FaceMesh(max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh:
results1 = face_mesh.process(im1)
results2 = face_mesh.process(im2)
if results1.multi_face_landmarks:
for face_landmarks in results1.multi_face_landmarks:
# get location of face detected
top_x = int(face_landmarks.landmark[234].x * img1_w)
top_y = int(face_landmarks.landmark[10].y * img1_h)
bottom_x = int(face_landmarks.landmark[454].x * img1_w)
bottom_y = int(face_landmarks.landmark[152].y * img1_h)
face1 = im1[top_y:bottom_y, top_x:bottom_x]
cv2.rectangle(im1, (top_x, top_y), (bottom_x, bottom_y), (0, 255, 0), 2)
if results2.multi_face_landmarks:
for face_landmarks in results2.multi_face_landmarks:
# get location of face detected
top_x2 = int(face_landmarks.landmark[234].x * img2_w)
top_y2 = int(face_landmarks.landmark[10].y * img2_h)
bottom_x2 = int(face_landmarks.landmark[454].x * img2_w)
bottom_y2 = int(face_landmarks.landmark[152].y * img2_h)
face2 = im2[top_y2:bottom_y2, top_x2:bottom_x2]
cv2.rectangle(im2, (top_x2, top_y2), (bottom_x2, bottom_y2), (0, 255, 0), 2)
# Convert the tensor back to a PIL Image
face1 = transforms.ToPILImage()(face1)
im1 = transforms.ToPILImage()(im1)
face2 = transforms.ToPILImage()(face2)
im2 = transforms.ToPILImage()(im2)
embeddings = model.encode([face1, face2])
sim = cosine_similarity(embeddings)
sim = sim[0, 1]
if sim > 0.82:
return im1, im2, sim, "SAME PERSON, AUTHORIZE PAYMENT"
else:
return im1, im2, sim, "DIFFERENT PEOPLE, DON'T AUTHORIZE PAYMENT"
interface = gr.Interface(fn=predict,
inputs= [gr.Image(value = dataset['train']['image'][10], type="pil", source="webcam"),
gr.Image(value = dataset['train']['image'][17], type="pil", source="webcam")],
outputs= [gr.Image(),
gr.Image(),
gr.Number(label="Similarity"),
gr.Textbox(label="Message")],
title = 'Face ID',
description = 'This app uses face biometrics and a similarity to function as a Face ID application.The similarity score ranges from -1 to 1.'
)
interface.launch(debug=True)
#interface.launch(share=True)
|