Face-ID-Mediapipe

Running

File size: 3,411 Bytes

73fa238
 
 
 
de1c4fe
 
 
6e0675c
73fa238
 
 
 
20623c6
 
de1c4fe
20623c6
 
 
 
de1c4fe
20623c6
 
de1c4fe
 
 
 
 
20623c6
 
de1c4fe
20623c6
 
de1c4fe
d98a2aa
 
 
 
de1c4fe
20623c6
 
 
 
 
 
c568917
 
 
 
6e0675c
e8eeca3
c568917
20623c6
de1c4fe
20623c6
 
 
 
de1c4fe
73fa238
20623c6
 
 
 
9f2a335
20623c6
9f2a335
73fa238
 
 
20623c6
 
 
1f9775e
574d7d7
73fa238

from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import datasets
import gradio as gr
import numpy as np
import torchvision.transforms as transforms
import mediapipe as mp
import cv2

#model = SentenceTransformer('clip-ViT-B-16')
model = SentenceTransformer('clip-ViT-B-32')
dataset = datasets.load_dataset('brendenc/celeb-identities')
    
def predict(im1, im2):
    # Convert the PIL Image to a numpy array
    im1 = np.array(im1)
    im2 = np.array(im2)
    face1 = im1.copy()
    face2 = im2.copy()

    img1_h, img1_w, _ = im1.shape
    img2_h, img2_w, _ = im2.shape

    # Locate face using mediapipe
    mp_face_mesh = mp.solutions.face_mesh

    with mp_face_mesh.FaceMesh(max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh:
        results1 = face_mesh.process(im1)
        results2 = face_mesh.process(im2)

        if results1.multi_face_landmarks:
            for face_landmarks in results1.multi_face_landmarks:
                # get location of face detected
                top_x = int(face_landmarks.landmark[234].x * img1_w)
                top_y = int(face_landmarks.landmark[10].y * img1_h)
                bottom_x = int(face_landmarks.landmark[454].x * img1_w)
                bottom_y = int(face_landmarks.landmark[152].y * img1_h)
                
                face1 = im1[top_y:bottom_y, top_x:bottom_x]
                cv2.rectangle(im1, (top_x, top_y), (bottom_x, bottom_y), (0, 255, 0), 2)

        if results2.multi_face_landmarks:
            for face_landmarks in results2.multi_face_landmarks:
                # get location of face detected
                top_x2 = int(face_landmarks.landmark[234].x * img2_w)
                top_y2 = int(face_landmarks.landmark[10].y * img2_h)
                bottom_x2 = int(face_landmarks.landmark[454].x * img2_w)
                bottom_y2 = int(face_landmarks.landmark[152].y * img2_h)
                
                face2 = im2[top_y2:bottom_y2, top_x2:bottom_x2]
                cv2.rectangle(im2, (top_x2, top_y2), (bottom_x2, bottom_y2), (0, 255, 0), 2)

    # Convert the tensor back to a PIL Image
    face1 = transforms.ToPILImage()(face1)
    im1 = transforms.ToPILImage()(im1)
    face2 = transforms.ToPILImage()(face2)
    im2 = transforms.ToPILImage()(im2)
    
  
    embeddings = model.encode([face1, face2])
    sim = cosine_similarity(embeddings)
    sim = sim[0, 1]
    if sim > 0.82:
        return im1, im2, sim, "SAME PERSON, AUTHORIZE PAYMENT"
    else:
        return im1, im2, sim, "DIFFERENT PEOPLE, DON'T AUTHORIZE PAYMENT"


interface = gr.Interface(fn=predict, 
                         inputs= [gr.Image(value = dataset['train']['image'][10], type="pil", source="webcam"), 
                                  gr.Image(value = dataset['train']['image'][17], type="pil", source="webcam")], 
                         outputs= [gr.Image(),
                                   gr.Image(),
								   gr.Number(label="Similarity"),
                                   gr.Textbox(label="Message")],
                         title = 'Face ID',
                         description = 'This app uses face biometrics and a similarity to function as a Face ID application.The similarity score ranges from -1 to 1.'
                         )

interface.launch(debug=True)
#interface.launch(share=True)