from sklearn.metrics.pairwise import cosine_similarity from sentence_transformers import SentenceTransformer import datasets import gradio as gr import numpy as np import torchvision.transforms as transforms import mediapipe as mp import cv2 #model = SentenceTransformer('clip-ViT-B-16') model = SentenceTransformer('clip-ViT-B-32') model2 = SentenceTransformer('clip-ViT-B-16') model3 = SentenceTransformer('clip-ViT-L-14') dataset = datasets.load_dataset('brendenc/celeb-identities') def predict(im1, im2): # Convert the PIL Image to a numpy array im1 = np.array(im1) im2 = np.array(im2) face1 = im1.copy() face2 = im2.copy() img1_h, img1_w, _ = im1.shape img2_h, img2_w, _ = im2.shape # Locate face using mediapipe mp_face_mesh = mp.solutions.face_mesh with mp_face_mesh.FaceMesh(max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh: results1 = face_mesh.process(im1) results2 = face_mesh.process(im2) if results1.multi_face_landmarks: for face_landmarks in results1.multi_face_landmarks: # get location of face detected top_x = int(face_landmarks.landmark[234].x * img1_w) top_y = int(face_landmarks.landmark[10].y * img1_h) bottom_x = int(face_landmarks.landmark[454].x * img1_w) bottom_y = int(face_landmarks.landmark[152].y * img1_h) face1 = im1[top_y:bottom_y, top_x:bottom_x] cv2.rectangle(im1, (top_x, top_y), (bottom_x, bottom_y), (0, 255, 0), 2) if results2.multi_face_landmarks: for face_landmarks in results2.multi_face_landmarks: # get location of face detected top_x = int(face_landmarks.landmark[234].x * img2_w) top_y = int(face_landmarks.landmark[10].y * img2_h) bottom_x = int(face_landmarks.landmark[454].x * img2_w) bottom_y = int(face_landmarks.landmark[152].y * img2_h) face2 = im2[top_y:bottom_y, top_x:bottom_x] cv2.rectangle(im2, (top_x, top_y), (bottom_x, bottom_y), (0, 255, 0), 2) # Convert the tensor back to a PIL Image face1 = transforms.ToPILImage()(face1) im1 = transforms.ToPILImage()(im1) face2 = transforms.ToPILImage()(face2) im2 = transforms.ToPILImage()(im2) embeddings = model.encode([face1, face2]) embeddings2 = model2.encode([face1, face2]) embeddings3 = model3.encode([face1, face2]) sim = cosine_similarity(embeddings) sim2 = cosine_similarity(embeddings2) sim3 = cosine_similarity(embeddings3) sim = sim[0, 1] sim2 = sim2[0, 1] sim3 = sim3[0, 1] if sim > 0.82: return im1, im2, sim, sim2, sim3, "SAME PERSON, AUTHORIZE PAYMENT" else: return im1, im2, sim, sim2, sim3, "DIFFERENT PEOPLE, DON'T AUTHORIZE PAYMENT" interface = gr.Interface(fn=predict, inputs= [gr.Image(value = dataset['train']['image'][10], type="pil", source="webcam"), gr.Image(value = dataset['train']['image'][17], type="pil", source="webcam")], outputs= [gr.Image(), gr.Image(), gr.Number(label="Similarity"), gr.Number(label="Similarity_b16"), gr.Number(label="Similarity_l14"), gr.Textbox(label="Message")], title = 'Face ID', description = 'This app uses face biometrics and a similarity to function as a Face ID application.The similarity score ranges from -1 to 1.' ) interface.launch(debug=True) #interface.launch(share=True)