Spaces:
Build error
Build error
Commit
·
a9640c3
1
Parent(s):
1bee3df
init commit
Browse files- README.md +2 -4
- app.py +60 -0
- faceNet/faceDetection.py +82 -0
- faceNet/faceNet.py +189 -0
- model_utils.py +11 -0
- models/faceNet.onnx +3 -0
- requirements.txt +6 -0
README.md
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
---
|
2 |
title: Face Detection
|
3 |
-
emoji:
|
4 |
colorFrom: purple
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.15.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: Face Detection
|
3 |
+
emoji: App 👤
|
4 |
colorFrom: purple
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.15.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from model_utils import detect_face
|
2 |
+
import gradio as gr
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
# Function to run the app
|
6 |
+
|
7 |
+
|
8 |
+
def run_model(image: np.ndarray):
|
9 |
+
return gr.Image.update(value=detect_face(image))
|
10 |
+
|
11 |
+
|
12 |
+
def interface() -> None:
|
13 |
+
"""
|
14 |
+
Create and launch the graphical user interface face detection app.
|
15 |
+
"""
|
16 |
+
|
17 |
+
# Create the blocks for the interface
|
18 |
+
with gr.Blocks() as app:
|
19 |
+
# Add a title and opening HTML element
|
20 |
+
gr.HTML(
|
21 |
+
"""
|
22 |
+
<div style="text-align: center; max-width: 650px; margin: 0 auto; padding-top: 7px;">
|
23 |
+
<div
|
24 |
+
style="
|
25 |
+
display: inline-flex;
|
26 |
+
align-items: center;
|
27 |
+
gap: 0.8rem;
|
28 |
+
font-size: 1.85rem;
|
29 |
+
"
|
30 |
+
>
|
31 |
+
<h1 style="font-weight: 900; margin-bottom: 7px;">
|
32 |
+
Face Detection App 👤
|
33 |
+
</h1>
|
34 |
+
</div>
|
35 |
+
</div>
|
36 |
+
"""
|
37 |
+
)
|
38 |
+
with gr.Group():
|
39 |
+
with gr.Row():
|
40 |
+
with gr.Column():
|
41 |
+
with gr.Row():
|
42 |
+
webcam_image_in = gr.Webcam(label="Webcam input")
|
43 |
+
with gr.Row():
|
44 |
+
gr.Text(
|
45 |
+
label="⚠️ Reminder ", value="Do not forget to click the camera button to freeze and get the webcam image 📷!", interactive=False)
|
46 |
+
with gr.Column():
|
47 |
+
with gr.Row():
|
48 |
+
face_detected_image_out = gr.Image(
|
49 |
+
label="Face detected", interactive=False)
|
50 |
+
with gr.Row():
|
51 |
+
detect_button = gr.Button(value="Detect face 👤")
|
52 |
+
|
53 |
+
detect_button.click(fn=run_model, inputs=[
|
54 |
+
webcam_image_in], outputs=face_detected_image_out)
|
55 |
+
|
56 |
+
app.launch()
|
57 |
+
|
58 |
+
|
59 |
+
if __name__ == '__main__':
|
60 |
+
interface() # Run the interface
|
faceNet/faceDetection.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import typing
|
3 |
+
import numpy as np
|
4 |
+
import mediapipe as mp
|
5 |
+
|
6 |
+
class MPFaceDetection:
|
7 |
+
"""Object to create and do mediapipe face detection, more about it:
|
8 |
+
https://google.github.io/mediapipe/solutions/face_detection.html
|
9 |
+
"""
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
model_selection: bool = 1,
|
13 |
+
confidence: float = 0.5,
|
14 |
+
mp_drawing_utils: bool = True,
|
15 |
+
color: typing.Tuple[int, int, int] = (255, 255, 255),
|
16 |
+
thickness: int = 2,
|
17 |
+
) -> None:
|
18 |
+
"""
|
19 |
+
Args:
|
20 |
+
model_selection: (bool) - 1 - for low distance, 0 - for far distance face detectors.
|
21 |
+
confidence: (float) - confidence for face detector, when detection are confirmed, range (0.0-1.0).
|
22 |
+
mp_drawing_utils: (bool) - bool option whether to use mp_drawing utils or or own, Default to True.
|
23 |
+
color: (typing.Tuple[int, int, int]) - Color for drawing the annotation. Default to the white color.
|
24 |
+
thickness: (int) - Thickness for drawing the annotation. Default to 2 pixels.
|
25 |
+
"""
|
26 |
+
self.mp_drawing_utils = mp_drawing_utils
|
27 |
+
self.color = color
|
28 |
+
self.thickness = thickness
|
29 |
+
self.mp_drawing = mp.solutions.drawing_utils
|
30 |
+
self.mp_face_detection = mp.solutions.face_detection
|
31 |
+
self.face_detection = self.mp_face_detection.FaceDetection(model_selection=model_selection, min_detection_confidence=confidence)
|
32 |
+
|
33 |
+
def tlbr(self, frame: np.ndarray, mp_detections: typing.List) -> np.ndarray:
|
34 |
+
"""Return coorinates in typing.Iterable([[Top, Left, Bottom, Right]])
|
35 |
+
Args:
|
36 |
+
frame: (np.ndarray) - frame on which we want to apply detections
|
37 |
+
mp_detections: (typing.List) - list of media pipe detections
|
38 |
+
Returns:
|
39 |
+
detections: (np.ndarray) - list of detection in [Top, Left, Bottom, Right] coordinates
|
40 |
+
"""
|
41 |
+
detections = []
|
42 |
+
frame_height, frame_width, _ = frame.shape
|
43 |
+
for detection in mp_detections:
|
44 |
+
height = int(detection.location_data.relative_bounding_box.height * frame_height)
|
45 |
+
width = int(detection.location_data.relative_bounding_box.width * frame_width)
|
46 |
+
left = max(0 ,int(detection.location_data.relative_bounding_box.xmin * frame_width))
|
47 |
+
top = max(0 ,int(detection.location_data.relative_bounding_box.ymin * frame_height))
|
48 |
+
|
49 |
+
detections.append([top, left, top + height, left + width])
|
50 |
+
|
51 |
+
return np.array(detections)
|
52 |
+
|
53 |
+
def __call__(self, frame: np.ndarray, return_tlbr: bool = False) -> np.ndarray:
|
54 |
+
"""Main function to do face detection
|
55 |
+
Args:
|
56 |
+
frame: (np.ndarray) - frame to excecute face detection on
|
57 |
+
return_tlbr: (bool) - bool option to return coordinates instead of frame with drawn detections
|
58 |
+
Returns:
|
59 |
+
typing.Union[
|
60 |
+
frame: (np.ndarray) - processed frame with detected faces,
|
61 |
+
detections: (typing.List) - detections in [Top, Left, Bottom, Right]
|
62 |
+
]
|
63 |
+
"""
|
64 |
+
results = self.face_detection.process(frame)
|
65 |
+
|
66 |
+
if return_tlbr:
|
67 |
+
if results.detections:
|
68 |
+
return self.tlbr(frame, results.detections)
|
69 |
+
return []
|
70 |
+
|
71 |
+
if results.detections:
|
72 |
+
if self.mp_drawing_utils:
|
73 |
+
# Draw face detections of each face using media pipe drawing utils.
|
74 |
+
for detection in results.detections:
|
75 |
+
self.mp_drawing.draw_detection(frame, detection)
|
76 |
+
|
77 |
+
else:
|
78 |
+
# Draw face detections of each face using our own tlbr and cv2.rectangle
|
79 |
+
for tlbr in self.tlbr(frame, results.detections):
|
80 |
+
cv2.rectangle(frame, tlbr[:2][::-1], tlbr[2:][::-1], self.color, self.thickness)
|
81 |
+
|
82 |
+
return frame
|
faceNet/faceNet.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import stow
|
3 |
+
import typing
|
4 |
+
import numpy as np
|
5 |
+
import onnxruntime as ort
|
6 |
+
|
7 |
+
class FaceNet:
|
8 |
+
"""FaceNet class object, which can be used for simplified face recognition
|
9 |
+
"""
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
detector: object,
|
13 |
+
onnx_model_path: str = "models/faceNet.onnx",
|
14 |
+
anchors: typing.Union[str, dict] = 'faces',
|
15 |
+
force_cpu: bool = False,
|
16 |
+
threshold: float = 0.5,
|
17 |
+
color: tuple = (255, 255, 255),
|
18 |
+
thickness: int = 2,
|
19 |
+
) -> None:
|
20 |
+
"""Object for face recognition
|
21 |
+
Params:
|
22 |
+
detector: (object) - detector object to detect faces in image
|
23 |
+
onnx_model_path: (str) - path to onnx model
|
24 |
+
force_cpu: (bool) - if True, onnx model will be run on CPU
|
25 |
+
anchors: (str or dict) - path to directory with faces or dictionary with anchor names as keys and anchor encodings as values
|
26 |
+
threshold: (float) - threshold for face recognition
|
27 |
+
color: (tuple) - color of bounding box and text
|
28 |
+
thickness: (int) - thickness of bounding box and text
|
29 |
+
"""
|
30 |
+
if not stow.exists(onnx_model_path):
|
31 |
+
raise Exception(f"Model doesn't exists in {onnx_model_path}")
|
32 |
+
|
33 |
+
self.detector = detector
|
34 |
+
self.threshold = threshold
|
35 |
+
self.color = color
|
36 |
+
self.thickness = thickness
|
37 |
+
|
38 |
+
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
39 |
+
|
40 |
+
providers = providers if ort.get_device() == "GPU" and not force_cpu else providers[::-1]
|
41 |
+
|
42 |
+
self.ort_sess = ort.InferenceSession(onnx_model_path, providers=providers)
|
43 |
+
|
44 |
+
self.input_shape = self.ort_sess._inputs_meta[0].shape[1:3]
|
45 |
+
|
46 |
+
self.anchors = self.load_anchors(anchors) if isinstance(anchors, str) else anchors
|
47 |
+
|
48 |
+
def normalize(self, img: np.ndarray) -> np.ndarray:
|
49 |
+
"""Normalize image
|
50 |
+
|
51 |
+
Args:
|
52 |
+
img: (np.ndarray) - image to be normalized
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
img: (np.ndarray) - normalized image
|
56 |
+
"""
|
57 |
+
mean, std = img.mean(), img.std()
|
58 |
+
return (img - mean) / std
|
59 |
+
|
60 |
+
def l2_normalize(self, x: np.ndarray, axis: int = -1, epsilon: float = 1e-10) -> np.ndarray:
|
61 |
+
"""l2 normalization function
|
62 |
+
|
63 |
+
Args:
|
64 |
+
x: (np.ndarray) - input array
|
65 |
+
axis: (int) - axis to normalize
|
66 |
+
epsilon: (float) - epsilon to avoid division by zero
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
x: (np.ndarray) - normalized array
|
70 |
+
"""
|
71 |
+
output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon))
|
72 |
+
return output
|
73 |
+
|
74 |
+
def detect_save_faces(self, image: np.ndarray, output_dir: str = "faces"):
|
75 |
+
"""Detect faces in given image and save them to output_dir
|
76 |
+
|
77 |
+
Args:
|
78 |
+
image: (np.ndarray) - image to be processed
|
79 |
+
output_dir: (str) - directory where faces will be saved
|
80 |
+
|
81 |
+
Returns:
|
82 |
+
bool: (bool) - True if faces were detected and saved
|
83 |
+
"""
|
84 |
+
face_crops = [image[t:b, l:r] for t, l, b, r in self.detector(image, return_tlbr=True)]
|
85 |
+
|
86 |
+
if face_crops == []:
|
87 |
+
return False
|
88 |
+
|
89 |
+
#stow.mkdir(output_dir)
|
90 |
+
|
91 |
+
for index, crop in enumerate(face_crops):
|
92 |
+
#output_path = stow.join(output_dir, f"face_{str(index)}.png")
|
93 |
+
#cv2.imwrite(output_path, crop)
|
94 |
+
#print("Crop saved to:", output_path)
|
95 |
+
|
96 |
+
#self.anchors = self.load_anchors(output_dir)
|
97 |
+
|
98 |
+
return crop
|
99 |
+
|
100 |
+
def load_anchors(self, faces_path: str):
|
101 |
+
"""Generate anchors for given faces path
|
102 |
+
|
103 |
+
Args:
|
104 |
+
faces_path: (str) - path to directory with faces
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
anchors: (dict) - dictionary with anchor names as keys and anchor encodings as values
|
108 |
+
"""
|
109 |
+
anchors = {}
|
110 |
+
if not stow.exists(faces_path):
|
111 |
+
return {}
|
112 |
+
|
113 |
+
for face_path in stow.ls(faces_path):
|
114 |
+
anchors[stow.basename(face_path)] = self.encode(cv2.imread(face_path.path))
|
115 |
+
|
116 |
+
return anchors
|
117 |
+
|
118 |
+
def encode(self, face_image: np.ndarray) -> np.ndarray:
|
119 |
+
"""Encode face image with FaceNet model
|
120 |
+
|
121 |
+
Args
|
122 |
+
face_image: (np.ndarray) - face image to be encoded
|
123 |
+
|
124 |
+
Returns:
|
125 |
+
face_encoding: (np.ndarray) - face encoding
|
126 |
+
"""
|
127 |
+
face = self.normalize(face_image)
|
128 |
+
face = cv2.resize(face, self.input_shape).astype(np.float32)
|
129 |
+
|
130 |
+
encode = self.ort_sess.run(None, {self.ort_sess._inputs_meta[0].name: np.expand_dims(face, axis=0)})[0][0]
|
131 |
+
normalized_encode = self.l2_normalize(encode)
|
132 |
+
|
133 |
+
return normalized_encode
|
134 |
+
|
135 |
+
def cosine_distance(self, a: np.ndarray, b: typing.Union[np.ndarray, list]) -> np.ndarray:
|
136 |
+
"""Cosine distance between wectors a and b
|
137 |
+
|
138 |
+
Args:
|
139 |
+
a: (np.ndarray) - first vector
|
140 |
+
b: (np.ndarray) - second list of vectors
|
141 |
+
|
142 |
+
Returns:
|
143 |
+
distance: (float) - cosine distance
|
144 |
+
"""
|
145 |
+
if isinstance(a, list):
|
146 |
+
a = np.array(a)
|
147 |
+
|
148 |
+
if isinstance(b, list):
|
149 |
+
b = np.array(b)
|
150 |
+
|
151 |
+
return np.dot(a, b.T) / (np.linalg.norm(a) * np.linalg.norm(b))
|
152 |
+
|
153 |
+
def draw(self, image: np.ndarray, face_crops: dict):
|
154 |
+
"""Draw face crops on image
|
155 |
+
|
156 |
+
Args:
|
157 |
+
image: (np.ndarray) - image to be drawn on
|
158 |
+
face_crops: (dict) - dictionary with face crops as values and face names as keys
|
159 |
+
|
160 |
+
Returns:
|
161 |
+
image: (np.ndarray) - image with drawn face crops
|
162 |
+
"""
|
163 |
+
for value in face_crops.values():
|
164 |
+
t, l, b, r = value["tlbr"]
|
165 |
+
cv2.rectangle(image, (l, t), (r, b), self.color, self.thickness)
|
166 |
+
cv2.putText(image, stow.name(value['name']), (l, t - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, self.color, self.thickness)
|
167 |
+
|
168 |
+
return image
|
169 |
+
|
170 |
+
def __call__(self, frame: np.ndarray) -> np.ndarray:
|
171 |
+
"""Face recognition pipeline
|
172 |
+
|
173 |
+
Args:
|
174 |
+
frame: (np.ndarray) - image to be processed
|
175 |
+
|
176 |
+
Returns:
|
177 |
+
frame: (np.ndarray) - image with drawn face recognition results
|
178 |
+
"""
|
179 |
+
face_crops = {index: {"name": "Unknown", "tlbr": tlbr} for index, tlbr in enumerate(self.detector(frame, return_tlbr=True))}
|
180 |
+
for key, value in face_crops.items():
|
181 |
+
t, l, b, r = value["tlbr"]
|
182 |
+
face_encoding = self.encode(frame[t:b, l:r])
|
183 |
+
distances = self.cosine_distance(face_encoding, list(self.anchors.values()))
|
184 |
+
if np.max(distances) > self.threshold:
|
185 |
+
face_crops[key]["name"] = list(self.anchors.keys())[np.argmax(distances)]
|
186 |
+
|
187 |
+
frame = self.draw(frame, face_crops)
|
188 |
+
|
189 |
+
return frame
|
model_utils.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from faceNet.faceDetection import MPFaceDetection
|
2 |
+
from faceNet.faceNet import FaceNet
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
def detect_face(image: np.array) -> np.array:
|
6 |
+
facenet = FaceNet(
|
7 |
+
detector = MPFaceDetection(),
|
8 |
+
onnx_model_path = "models/faceNet.onnx",
|
9 |
+
force_cpu = True,
|
10 |
+
)
|
11 |
+
return facenet.detect_save_faces(image)
|
models/faceNet.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57979299c43729011d3d91025ec4758892731f23c263e422371885ee9f48ec34
|
3 |
+
size 91243087
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==3.1.4
|
2 |
+
numpy==1.23.1
|
3 |
+
mediapipe==0.9.0
|
4 |
+
onnxruntime==1.13.1
|
5 |
+
opencv-python==4.6.0.66
|
6 |
+
stow==1.1.6
|