Spaces:

cc1234
/

stashface

Running

App Files Files

stashface / app.py

cc1234

update on 2023-11-25

a3296a3 10 months ago

raw

history blame

No virus

8.61 kB

	import os
	import json
	import math
	import base64
	from uuid import uuid4
	from PIL import Image as PILImage

	os.environ["DEEPFACE_HOME"] = "."

	import pyzipper
	import numpy as np
	import gradio as gr
	from annoy import AnnoyIndex
	from deepface import DeepFace


	index = AnnoyIndex(512, "euclidean")
	index.load(f"face.db")

	ANNOY_INDEX = json.load(open(f"face.json"))

	with pyzipper.AESZipFile('persons.zip') as zf:
	password = os.getenv("VISAGE_KEY","").encode('ascii')
	zf.setpassword(password)
	PERFORMER_DB = json.loads(zf.read('performers.json'))

	## Prediction functions


	def image_search_performer(image, threshold=20.0, results=3):
	"""Search for a performer in an image

	Returns a list of performers with at least following keys:
	- id: the performer's id
	- distance: the distance between the face in the image and the performer's face
	- confidence: a confidence score between 0 and 100
	- hits: the number of times the performer was found in our database
	"""

	image_array = np.array(image)

	face = DeepFace.represent(img_path = image_array, detector_backend='retinaface', model_name='Facenet512', normalization="Facenet2018")[0]['embedding']
	return search_performer(face, threshold, results)



	def image_search_performers(image, threshold=20.0, results=3):
	image_array = np.array(image)

	response = []
	faces = DeepFace.represent(img_path = image_array, detector_backend='retinaface', model_name='Facenet512', normalization="Facenet2018")
	# faces = DeepFace.represent(img_path = image_array, detector_backend='yolov8', model_name='SFace')
	for face in faces:
	embedding = face['embedding']
	area = face['facial_area']
	confidence = face['face_confidence']
	cimage = image.crop((area['x'], area['y'], area['x'] + area['w'], area['y'] + area['h']))
	# return image base64 encoded for display
	with open('temp.jpg', 'wb') as f:
	cimage.save(f, format='JPEG')
	im_b64 = base64.b64encode(open('temp.jpg', 'rb').read())

	response.append({
	'image': im_b64.decode('ascii'),
	'confidence': confidence,
	'performers': search_performer(embedding, threshold, results)
	})

	return response


	def vector_search_performer(vector_json, threshold=20.0, results=3):
	"""Search for a performer from a vector

	The vector should be created with Deepface and should be a 512 vector.

	For best results use the following settings:
	- detector_backend: retinaface
	- model: Facenet512
	- normalization: Facenet2018

	Returns a list of performers with at least following keys:
	- id: the performer's id
	- distance: the distance between the face in the image and the performer's face
	- confidence: a confidence score between 0 and 100
	- hits: the number of times the performer was found in our database
	"""

	vector = np.array(json.loads(vector_json))
	return search_performer(vector, threshold, results)


	def search_performer(vector, threshold=20.0, results=3):
	threshold = threshold or 20.0
	results = results or 3

	ids, distances = index.get_nns_by_vector(
	vector, 50, search_k=10000, include_distances=True
	)
	persons = {}
	for p, distance in zip(ids, distances):
	id = ANNOY_INDEX[p]
	if id in persons:
	persons[id]["hits"] += 1
	persons[id]["distance"] -= 0.5
	persons[id]["confidence"] = normalize_confidence_from_distance(persons[id]["distance"], threshold)
	continue

	persons[id] = {
	"id": id,
	"distance": round(distance, 2),
	"confidence": normalize_confidence_from_distance(distance, threshold),
	"hits": 1,
	}

	if id in PERFORMER_DB:
	persons[id].update(PERFORMER_DB.get(id))

	persons = sorted(persons.values(), key=lambda x: x["distance"])
	# persons = [p for p in persons if p["distance"] < threshold]
	return persons[:results]


	def normalize_confidence_from_distance(distance, threshold=20.0):
	"""Normalize confidence to 0-100 scale"""
	confidence = face_distance_to_conf(distance, threshold)
	return int(((confidence - 0.0) / (1.0 - 0.0)) * (100.0 - 0.0) + 0.0)


	def face_distance_to_conf(face_distance, face_match_threshold=20.0):
	"""Using a face distance, calculate a similarity confidence value"""
	if face_distance > face_match_threshold:
	# The face is far away, so give it a low confidence
	range = (1.0 - face_match_threshold)
	linear_val = (1.0 - face_distance) / (range * 2.0)
	return linear_val
	else:
	# The face is close, so give it a high confidence
	range = face_match_threshold
	linear_val = 1.0 - (face_distance / (range * 2.0))
	# But adjust this value by a curve so that we don't get a linear
	# transition from close to far. We want it to be more confident
	# the closer it is.
	return linear_val + ((1.0 - linear_val) * math.pow((linear_val - 0.5) * 2, 0.2))


	def find_faces_in_sprite(image, vtt):
	vtt = base64.b64decode(vtt.replace("data:text/vtt;base64,", ""))
	sprite = PILImage.fromarray(image)

	results = []
	for i, (left, top, right, bottom, time_seconds) in enumerate(getVTToffsets(vtt)):
	cut_frame = sprite.crop((left, top, left + right, top + bottom))
	faces = DeepFace.extract_faces(np.asarray(cut_frame), detector_backend="mediapipe", enforce_detection=False, align=False)
	faces = [face for face in faces if face['confidence'] > 0.75]
	if faces:
	size = faces[0]['facial_area']['w'] * faces[0]['facial_area']['h']
	data = {'id': str(uuid4()), "offset": (left, top, right, bottom), "frame": i, "time": time_seconds, 'size': size}
	results.append(data)

	# sort by size
	results = sorted(results, key=lambda x: x['size'], reverse=True)
	return results


	def getVTToffsets(vtt):
	time_seconds = 0
	left = top = right = bottom = None
	for line in vtt.decode("utf-8").split("\n"):
	line = line.strip()

	if "-->" in line:
	# grab the start time
	# 00:00:00.000 --> 00:00:41.000
	start = line.split("-->")[0].strip().split(":")
	# convert to seconds
	time_seconds = (
	int(start[0]) * 3600
	+ int(start[1]) * 60
	+ float(start[2])
	)
	left = top = right = bottom = None
	elif "xywh=" in line:
	left, top, right, bottom = line.split("xywh=")[-1].split(",")
	left, top, right, bottom = (
	int(left),
	int(top),
	int(right),
	int(bottom),
	)
	else:
	continue

	if not left:
	continue

	yield left, top, right, bottom, time_seconds



	image_search = gr.Interface(
	fn=image_search_performer,
	inputs=[
	gr.components.Image(),
	gr.components.Slider(label="threshold",minimum=0.0, maximum=30.0, value=20.0),
	gr.components.Slider(label="results", minimum=0, maximum=50, value=3, step=1),
	],
	outputs=gr.outputs.JSON(label=""),
	title="Who is in the photo?",
	description="Upload an image of a person and we'll tell you who it is.",
	)

	image_search_multiple = gr.Interface(
	fn=image_search_performers,
	inputs=[
	gr.components.Image(type='pil'),
	gr.components.Slider(label="threshold",minimum=0.0, maximum=30.0, value=20.0),
	gr.components.Slider(label="results", minimum=0, maximum=50, value=3, step=1),
	],
	outputs=gr.outputs.JSON(label=""),
	title="Who is in the photo?",
	description="Upload an image of a person(s) and we'll tell you who it is.",
	)

	vector_search = gr.Interface(
	fn=vector_search_performer,
	inputs=[
	gr.components.Textbox(),
	gr.components.Slider(label="threshold",minimum=0.0, maximum=30.0, value=20.0),
	gr.components.Slider(label="results", minimum=0, maximum=50, value=3, step=1),
	],
	outputs=gr.outputs.JSON(label=""),
	title="Who is in the photo?",
	description="512 vector created with deepface of a person and we'll tell you who it is.",
	)

	faces_in_sprite = gr.Interface(
	fn=find_faces_in_sprite,
	inputs=[
	gr.Image(),
	gr.Textbox(label="VTT file")
	],
	outputs=gr.JSON(label=""),
	)

	gr.TabbedInterface([image_search, image_search_multiple, vector_search, faces_in_sprite]).launch(enable_queue=True, server_name="0.0.0.0")