carlosabadia commited on
Commit
a9640c3
·
1 Parent(s): 1bee3df

init commit

Browse files
README.md CHANGED
@@ -1,13 +1,11 @@
1
  ---
2
  title: Face Detection
3
- emoji: 🌍
4
  colorFrom: purple
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 3.15.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Face Detection
3
+ emoji: App 👤
4
  colorFrom: purple
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.15.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
 
 
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model_utils import detect_face
2
+ import gradio as gr
3
+ import numpy as np
4
+
5
+ # Function to run the app
6
+
7
+
8
+ def run_model(image: np.ndarray):
9
+ return gr.Image.update(value=detect_face(image))
10
+
11
+
12
+ def interface() -> None:
13
+ """
14
+ Create and launch the graphical user interface face detection app.
15
+ """
16
+
17
+ # Create the blocks for the interface
18
+ with gr.Blocks() as app:
19
+ # Add a title and opening HTML element
20
+ gr.HTML(
21
+ """
22
+ <div style="text-align: center; max-width: 650px; margin: 0 auto; padding-top: 7px;">
23
+ <div
24
+ style="
25
+ display: inline-flex;
26
+ align-items: center;
27
+ gap: 0.8rem;
28
+ font-size: 1.85rem;
29
+ "
30
+ >
31
+ <h1 style="font-weight: 900; margin-bottom: 7px;">
32
+ Face Detection App 👤
33
+ </h1>
34
+ </div>
35
+ </div>
36
+ """
37
+ )
38
+ with gr.Group():
39
+ with gr.Row():
40
+ with gr.Column():
41
+ with gr.Row():
42
+ webcam_image_in = gr.Webcam(label="Webcam input")
43
+ with gr.Row():
44
+ gr.Text(
45
+ label="⚠️ Reminder ", value="Do not forget to click the camera button to freeze and get the webcam image 📷!", interactive=False)
46
+ with gr.Column():
47
+ with gr.Row():
48
+ face_detected_image_out = gr.Image(
49
+ label="Face detected", interactive=False)
50
+ with gr.Row():
51
+ detect_button = gr.Button(value="Detect face 👤")
52
+
53
+ detect_button.click(fn=run_model, inputs=[
54
+ webcam_image_in], outputs=face_detected_image_out)
55
+
56
+ app.launch()
57
+
58
+
59
+ if __name__ == '__main__':
60
+ interface() # Run the interface
faceNet/faceDetection.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import typing
3
+ import numpy as np
4
+ import mediapipe as mp
5
+
6
+ class MPFaceDetection:
7
+ """Object to create and do mediapipe face detection, more about it:
8
+ https://google.github.io/mediapipe/solutions/face_detection.html
9
+ """
10
+ def __init__(
11
+ self,
12
+ model_selection: bool = 1,
13
+ confidence: float = 0.5,
14
+ mp_drawing_utils: bool = True,
15
+ color: typing.Tuple[int, int, int] = (255, 255, 255),
16
+ thickness: int = 2,
17
+ ) -> None:
18
+ """
19
+ Args:
20
+ model_selection: (bool) - 1 - for low distance, 0 - for far distance face detectors.
21
+ confidence: (float) - confidence for face detector, when detection are confirmed, range (0.0-1.0).
22
+ mp_drawing_utils: (bool) - bool option whether to use mp_drawing utils or or own, Default to True.
23
+ color: (typing.Tuple[int, int, int]) - Color for drawing the annotation. Default to the white color.
24
+ thickness: (int) - Thickness for drawing the annotation. Default to 2 pixels.
25
+ """
26
+ self.mp_drawing_utils = mp_drawing_utils
27
+ self.color = color
28
+ self.thickness = thickness
29
+ self.mp_drawing = mp.solutions.drawing_utils
30
+ self.mp_face_detection = mp.solutions.face_detection
31
+ self.face_detection = self.mp_face_detection.FaceDetection(model_selection=model_selection, min_detection_confidence=confidence)
32
+
33
+ def tlbr(self, frame: np.ndarray, mp_detections: typing.List) -> np.ndarray:
34
+ """Return coorinates in typing.Iterable([[Top, Left, Bottom, Right]])
35
+ Args:
36
+ frame: (np.ndarray) - frame on which we want to apply detections
37
+ mp_detections: (typing.List) - list of media pipe detections
38
+ Returns:
39
+ detections: (np.ndarray) - list of detection in [Top, Left, Bottom, Right] coordinates
40
+ """
41
+ detections = []
42
+ frame_height, frame_width, _ = frame.shape
43
+ for detection in mp_detections:
44
+ height = int(detection.location_data.relative_bounding_box.height * frame_height)
45
+ width = int(detection.location_data.relative_bounding_box.width * frame_width)
46
+ left = max(0 ,int(detection.location_data.relative_bounding_box.xmin * frame_width))
47
+ top = max(0 ,int(detection.location_data.relative_bounding_box.ymin * frame_height))
48
+
49
+ detections.append([top, left, top + height, left + width])
50
+
51
+ return np.array(detections)
52
+
53
+ def __call__(self, frame: np.ndarray, return_tlbr: bool = False) -> np.ndarray:
54
+ """Main function to do face detection
55
+ Args:
56
+ frame: (np.ndarray) - frame to excecute face detection on
57
+ return_tlbr: (bool) - bool option to return coordinates instead of frame with drawn detections
58
+ Returns:
59
+ typing.Union[
60
+ frame: (np.ndarray) - processed frame with detected faces,
61
+ detections: (typing.List) - detections in [Top, Left, Bottom, Right]
62
+ ]
63
+ """
64
+ results = self.face_detection.process(frame)
65
+
66
+ if return_tlbr:
67
+ if results.detections:
68
+ return self.tlbr(frame, results.detections)
69
+ return []
70
+
71
+ if results.detections:
72
+ if self.mp_drawing_utils:
73
+ # Draw face detections of each face using media pipe drawing utils.
74
+ for detection in results.detections:
75
+ self.mp_drawing.draw_detection(frame, detection)
76
+
77
+ else:
78
+ # Draw face detections of each face using our own tlbr and cv2.rectangle
79
+ for tlbr in self.tlbr(frame, results.detections):
80
+ cv2.rectangle(frame, tlbr[:2][::-1], tlbr[2:][::-1], self.color, self.thickness)
81
+
82
+ return frame
faceNet/faceNet.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import stow
3
+ import typing
4
+ import numpy as np
5
+ import onnxruntime as ort
6
+
7
+ class FaceNet:
8
+ """FaceNet class object, which can be used for simplified face recognition
9
+ """
10
+ def __init__(
11
+ self,
12
+ detector: object,
13
+ onnx_model_path: str = "models/faceNet.onnx",
14
+ anchors: typing.Union[str, dict] = 'faces',
15
+ force_cpu: bool = False,
16
+ threshold: float = 0.5,
17
+ color: tuple = (255, 255, 255),
18
+ thickness: int = 2,
19
+ ) -> None:
20
+ """Object for face recognition
21
+ Params:
22
+ detector: (object) - detector object to detect faces in image
23
+ onnx_model_path: (str) - path to onnx model
24
+ force_cpu: (bool) - if True, onnx model will be run on CPU
25
+ anchors: (str or dict) - path to directory with faces or dictionary with anchor names as keys and anchor encodings as values
26
+ threshold: (float) - threshold for face recognition
27
+ color: (tuple) - color of bounding box and text
28
+ thickness: (int) - thickness of bounding box and text
29
+ """
30
+ if not stow.exists(onnx_model_path):
31
+ raise Exception(f"Model doesn't exists in {onnx_model_path}")
32
+
33
+ self.detector = detector
34
+ self.threshold = threshold
35
+ self.color = color
36
+ self.thickness = thickness
37
+
38
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
39
+
40
+ providers = providers if ort.get_device() == "GPU" and not force_cpu else providers[::-1]
41
+
42
+ self.ort_sess = ort.InferenceSession(onnx_model_path, providers=providers)
43
+
44
+ self.input_shape = self.ort_sess._inputs_meta[0].shape[1:3]
45
+
46
+ self.anchors = self.load_anchors(anchors) if isinstance(anchors, str) else anchors
47
+
48
+ def normalize(self, img: np.ndarray) -> np.ndarray:
49
+ """Normalize image
50
+
51
+ Args:
52
+ img: (np.ndarray) - image to be normalized
53
+
54
+ Returns:
55
+ img: (np.ndarray) - normalized image
56
+ """
57
+ mean, std = img.mean(), img.std()
58
+ return (img - mean) / std
59
+
60
+ def l2_normalize(self, x: np.ndarray, axis: int = -1, epsilon: float = 1e-10) -> np.ndarray:
61
+ """l2 normalization function
62
+
63
+ Args:
64
+ x: (np.ndarray) - input array
65
+ axis: (int) - axis to normalize
66
+ epsilon: (float) - epsilon to avoid division by zero
67
+
68
+ Returns:
69
+ x: (np.ndarray) - normalized array
70
+ """
71
+ output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon))
72
+ return output
73
+
74
+ def detect_save_faces(self, image: np.ndarray, output_dir: str = "faces"):
75
+ """Detect faces in given image and save them to output_dir
76
+
77
+ Args:
78
+ image: (np.ndarray) - image to be processed
79
+ output_dir: (str) - directory where faces will be saved
80
+
81
+ Returns:
82
+ bool: (bool) - True if faces were detected and saved
83
+ """
84
+ face_crops = [image[t:b, l:r] for t, l, b, r in self.detector(image, return_tlbr=True)]
85
+
86
+ if face_crops == []:
87
+ return False
88
+
89
+ #stow.mkdir(output_dir)
90
+
91
+ for index, crop in enumerate(face_crops):
92
+ #output_path = stow.join(output_dir, f"face_{str(index)}.png")
93
+ #cv2.imwrite(output_path, crop)
94
+ #print("Crop saved to:", output_path)
95
+
96
+ #self.anchors = self.load_anchors(output_dir)
97
+
98
+ return crop
99
+
100
+ def load_anchors(self, faces_path: str):
101
+ """Generate anchors for given faces path
102
+
103
+ Args:
104
+ faces_path: (str) - path to directory with faces
105
+
106
+ Returns:
107
+ anchors: (dict) - dictionary with anchor names as keys and anchor encodings as values
108
+ """
109
+ anchors = {}
110
+ if not stow.exists(faces_path):
111
+ return {}
112
+
113
+ for face_path in stow.ls(faces_path):
114
+ anchors[stow.basename(face_path)] = self.encode(cv2.imread(face_path.path))
115
+
116
+ return anchors
117
+
118
+ def encode(self, face_image: np.ndarray) -> np.ndarray:
119
+ """Encode face image with FaceNet model
120
+
121
+ Args
122
+ face_image: (np.ndarray) - face image to be encoded
123
+
124
+ Returns:
125
+ face_encoding: (np.ndarray) - face encoding
126
+ """
127
+ face = self.normalize(face_image)
128
+ face = cv2.resize(face, self.input_shape).astype(np.float32)
129
+
130
+ encode = self.ort_sess.run(None, {self.ort_sess._inputs_meta[0].name: np.expand_dims(face, axis=0)})[0][0]
131
+ normalized_encode = self.l2_normalize(encode)
132
+
133
+ return normalized_encode
134
+
135
+ def cosine_distance(self, a: np.ndarray, b: typing.Union[np.ndarray, list]) -> np.ndarray:
136
+ """Cosine distance between wectors a and b
137
+
138
+ Args:
139
+ a: (np.ndarray) - first vector
140
+ b: (np.ndarray) - second list of vectors
141
+
142
+ Returns:
143
+ distance: (float) - cosine distance
144
+ """
145
+ if isinstance(a, list):
146
+ a = np.array(a)
147
+
148
+ if isinstance(b, list):
149
+ b = np.array(b)
150
+
151
+ return np.dot(a, b.T) / (np.linalg.norm(a) * np.linalg.norm(b))
152
+
153
+ def draw(self, image: np.ndarray, face_crops: dict):
154
+ """Draw face crops on image
155
+
156
+ Args:
157
+ image: (np.ndarray) - image to be drawn on
158
+ face_crops: (dict) - dictionary with face crops as values and face names as keys
159
+
160
+ Returns:
161
+ image: (np.ndarray) - image with drawn face crops
162
+ """
163
+ for value in face_crops.values():
164
+ t, l, b, r = value["tlbr"]
165
+ cv2.rectangle(image, (l, t), (r, b), self.color, self.thickness)
166
+ cv2.putText(image, stow.name(value['name']), (l, t - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, self.color, self.thickness)
167
+
168
+ return image
169
+
170
+ def __call__(self, frame: np.ndarray) -> np.ndarray:
171
+ """Face recognition pipeline
172
+
173
+ Args:
174
+ frame: (np.ndarray) - image to be processed
175
+
176
+ Returns:
177
+ frame: (np.ndarray) - image with drawn face recognition results
178
+ """
179
+ face_crops = {index: {"name": "Unknown", "tlbr": tlbr} for index, tlbr in enumerate(self.detector(frame, return_tlbr=True))}
180
+ for key, value in face_crops.items():
181
+ t, l, b, r = value["tlbr"]
182
+ face_encoding = self.encode(frame[t:b, l:r])
183
+ distances = self.cosine_distance(face_encoding, list(self.anchors.values()))
184
+ if np.max(distances) > self.threshold:
185
+ face_crops[key]["name"] = list(self.anchors.keys())[np.argmax(distances)]
186
+
187
+ frame = self.draw(frame, face_crops)
188
+
189
+ return frame
model_utils.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from faceNet.faceDetection import MPFaceDetection
2
+ from faceNet.faceNet import FaceNet
3
+ import numpy as np
4
+
5
+ def detect_face(image: np.array) -> np.array:
6
+ facenet = FaceNet(
7
+ detector = MPFaceDetection(),
8
+ onnx_model_path = "models/faceNet.onnx",
9
+ force_cpu = True,
10
+ )
11
+ return facenet.detect_save_faces(image)
models/faceNet.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57979299c43729011d3d91025ec4758892731f23c263e422371885ee9f48ec34
3
+ size 91243087
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==3.1.4
2
+ numpy==1.23.1
3
+ mediapipe==0.9.0
4
+ onnxruntime==1.13.1
5
+ opencv-python==4.6.0.66
6
+ stow==1.1.6