akhaliq HF staff commited on
Commit
c003417
1 Parent(s): 07a12d1

Create mtcnn_detector.py

Browse files
Files changed (1) hide show
  1. mtcnn_detector.py +650 -0
mtcnn_detector.py ADDED
@@ -0,0 +1,650 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ # coding: utf-8
4
+ import os
5
+ import mxnet as mx
6
+ import numpy as np
7
+ import math
8
+ import cv2
9
+ from multiprocessing import Pool
10
+ from itertools import repeat
11
+ from helper import nms, adjust_input, generate_bbox, detect_first_stage_warpper
12
+ try:
13
+ from itertools import izip as zip
14
+ except ImportError:
15
+ pass
16
+
17
+ class MtcnnDetector(object):
18
+ """
19
+ Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks
20
+ see https://github.com/kpzhang93/MTCNN_face_detection_alignment
21
+ this is a mxnet version
22
+ """
23
+ def __init__(self,
24
+ model_folder='.',
25
+ minsize = 20,
26
+ threshold = [0.6, 0.7, 0.8],
27
+ factor = 0.709,
28
+ num_worker = 1,
29
+ accurate_landmark = False,
30
+ ctx=mx.cpu()):
31
+ """
32
+ Initialize the detector
33
+
34
+ Parameters:
35
+ ----------
36
+ model_folder : string
37
+ path for the models
38
+ minsize : float number
39
+ minimal face to detect
40
+ threshold : float number
41
+ detect threshold for 3 stages
42
+ factor: float number
43
+ scale factor for image pyramid
44
+ num_worker: int number
45
+ number of processes we use for first stage
46
+ accurate_landmark: bool
47
+ use accurate landmark localization or not
48
+
49
+ """
50
+ self.num_worker = num_worker
51
+ self.accurate_landmark = accurate_landmark
52
+
53
+ # load 4 models from folder
54
+ models = ['det1', 'det2', 'det3','det4']
55
+ models = [ os.path.join(model_folder, f) for f in models]
56
+
57
+ self.PNets = []
58
+ for i in range(num_worker):
59
+ workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx)
60
+ self.PNets.append(workner_net)
61
+
62
+ self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx)
63
+ self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx)
64
+ self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx)
65
+
66
+ self.minsize = float(minsize)
67
+ self.factor = float(factor)
68
+ self.threshold = threshold
69
+
70
+
71
+ def convert_to_square(self, bbox):
72
+ """
73
+ convert bbox to square
74
+
75
+ Parameters:
76
+ ----------
77
+ bbox: numpy array , shape n x 5
78
+ input bbox
79
+
80
+ Returns:
81
+ -------
82
+ square bbox
83
+ """
84
+ square_bbox = bbox.copy()
85
+
86
+ h = bbox[:, 3] - bbox[:, 1] + 1
87
+ w = bbox[:, 2] - bbox[:, 0] + 1
88
+ max_side = np.maximum(h,w)
89
+ square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5
90
+ square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5
91
+ square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
92
+ square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
93
+ return square_bbox
94
+
95
+ def calibrate_box(self, bbox, reg):
96
+ """
97
+ calibrate bboxes
98
+
99
+ Parameters:
100
+ ----------
101
+ bbox: numpy array, shape n x 5
102
+ input bboxes
103
+ reg: numpy array, shape n x 4
104
+ bboxex adjustment
105
+
106
+ Returns:
107
+ -------
108
+ bboxes after refinement
109
+
110
+ """
111
+ w = bbox[:, 2] - bbox[:, 0] + 1
112
+ w = np.expand_dims(w, 1)
113
+ h = bbox[:, 3] - bbox[:, 1] + 1
114
+ h = np.expand_dims(h, 1)
115
+ reg_m = np.hstack([w, h, w, h])
116
+ aug = reg_m * reg
117
+ bbox[:, 0:4] = bbox[:, 0:4] + aug
118
+ return bbox
119
+
120
+
121
+ def pad(self, bboxes, w, h):
122
+ """
123
+ pad the the bboxes, alse restrict the size of it
124
+
125
+ Parameters:
126
+ ----------
127
+ bboxes: numpy array, n x 5
128
+ input bboxes
129
+ w: float number
130
+ width of the input image
131
+ h: float number
132
+ height of the input image
133
+ Returns :
134
+ ------s
135
+ dy, dx : numpy array, n x 1
136
+ start point of the bbox in target image
137
+ edy, edx : numpy array, n x 1
138
+ end point of the bbox in target image
139
+ y, x : numpy array, n x 1
140
+ start point of the bbox in original image
141
+ ex, ex : numpy array, n x 1
142
+ end point of the bbox in original image
143
+ tmph, tmpw: numpy array, n x 1
144
+ height and width of the bbox
145
+
146
+ """
147
+ tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1, bboxes[:, 3] - bboxes[:, 1] + 1
148
+ num_box = bboxes.shape[0]
149
+
150
+ dx , dy= np.zeros((num_box, )), np.zeros((num_box, ))
151
+ edx, edy = tmpw.copy()-1, tmph.copy()-1
152
+
153
+ x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
154
+
155
+ tmp_index = np.where(ex > w-1)
156
+ edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
157
+ ex[tmp_index] = w - 1
158
+
159
+ tmp_index = np.where(ey > h-1)
160
+ edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
161
+ ey[tmp_index] = h - 1
162
+
163
+ tmp_index = np.where(x < 0)
164
+ dx[tmp_index] = 0 - x[tmp_index]
165
+ x[tmp_index] = 0
166
+
167
+ tmp_index = np.where(y < 0)
168
+ dy[tmp_index] = 0 - y[tmp_index]
169
+ y[tmp_index] = 0
170
+
171
+ return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
172
+ return_list = [item.astype(np.int32) for item in return_list]
173
+
174
+ return return_list
175
+
176
+ def slice_index(self, number):
177
+ """
178
+ slice the index into (n,n,m), m < n
179
+ Parameters:
180
+ ----------
181
+ number: int number
182
+ number
183
+ """
184
+ def chunks(l, n):
185
+ """Yield successive n-sized chunks from l."""
186
+ for i in range(0, len(l), n):
187
+ yield l[i:i + n]
188
+ num_list = range(number)
189
+ return list(chunks(num_list, self.num_worker))
190
+
191
+ def detect_face_limited(self, img, det_type=2):
192
+ height, width, _ = img.shape
193
+ if det_type>=2:
194
+ total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
195
+ num_box = total_boxes.shape[0]
196
+
197
+ # pad the bbox
198
+ [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
199
+ # (3, 24, 24) is the input shape for RNet
200
+ input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
201
+
202
+ for i in range(num_box):
203
+ tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
204
+ tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
205
+ input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
206
+
207
+ output = self.RNet.predict(input_buf)
208
+
209
+ # filter the total_boxes with threshold
210
+ passed = np.where(output[1][:, 1] > self.threshold[1])
211
+ total_boxes = total_boxes[passed]
212
+
213
+ if total_boxes.size == 0:
214
+ return None
215
+
216
+ total_boxes[:, 4] = output[1][passed, 1].reshape((-1,))
217
+ reg = output[0][passed]
218
+
219
+ # nms
220
+ pick = nms(total_boxes, 0.7, 'Union')
221
+ total_boxes = total_boxes[pick]
222
+ total_boxes = self.calibrate_box(total_boxes, reg[pick])
223
+ total_boxes = self.convert_to_square(total_boxes)
224
+ total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
225
+ else:
226
+ total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
227
+ num_box = total_boxes.shape[0]
228
+ [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
229
+ # (3, 48, 48) is the input shape for ONet
230
+ input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
231
+
232
+ for i in range(num_box):
233
+ tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
234
+ tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
235
+ input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
236
+
237
+ output = self.ONet.predict(input_buf)
238
+
239
+ # filter the total_boxes with threshold
240
+ passed = np.where(output[2][:, 1] > self.threshold[2])
241
+ total_boxes = total_boxes[passed]
242
+
243
+ if total_boxes.size == 0:
244
+ return None
245
+
246
+ total_boxes[:, 4] = output[2][passed, 1].reshape((-1,))
247
+ reg = output[1][passed]
248
+ points = output[0][passed]
249
+
250
+ # compute landmark points
251
+ bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
252
+ bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
253
+ points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
254
+ points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
255
+
256
+ # nms
257
+ total_boxes = self.calibrate_box(total_boxes, reg)
258
+ pick = nms(total_boxes, 0.7, 'Min')
259
+ total_boxes = total_boxes[pick]
260
+ points = points[pick]
261
+
262
+ if not self.accurate_landmark:
263
+ return total_boxes, points
264
+
265
+ #############################################
266
+ # extended stage
267
+ #############################################
268
+ num_box = total_boxes.shape[0]
269
+ patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
270
+ patchw = np.round(patchw*0.25)
271
+
272
+ # make it even
273
+ patchw[np.where(np.mod(patchw,2) == 1)] += 1
274
+
275
+ input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
276
+ for i in range(5):
277
+ x, y = points[:, i], points[:, i+5]
278
+ x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
279
+ [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
280
+ width,
281
+ height)
282
+ for j in range(num_box):
283
+ tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
284
+ tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
285
+ input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))
286
+
287
+ output = self.LNet.predict(input_buf)
288
+
289
+ pointx = np.zeros((num_box, 5))
290
+ pointy = np.zeros((num_box, 5))
291
+
292
+ for k in range(5):
293
+ # do not make a large movement
294
+ tmp_index = np.where(np.abs(output[k]-0.5) > 0.35)
295
+ output[k][tmp_index[0]] = 0.5
296
+
297
+ pointx[:, k] = np.round(points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw
298
+ pointy[:, k] = np.round(points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw
299
+
300
+ points = np.hstack([pointx, pointy])
301
+ points = points.astype(np.int32)
302
+
303
+ return total_boxes, points
304
+
305
+ def detect_face(self, img, det_type=0):
306
+ """
307
+ detect face over img
308
+ Parameters:
309
+ ----------
310
+ img: numpy array, bgr order of shape (1, 3, n, m)
311
+ input image
312
+ Retures:
313
+ -------
314
+ bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
315
+ bboxes
316
+ points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
317
+ landmarks
318
+ """
319
+
320
+ # check input
321
+ height, width, _ = img.shape
322
+ if det_type==0:
323
+ MIN_DET_SIZE = 12
324
+
325
+ if img is None:
326
+ return None
327
+
328
+ # only works for color image
329
+ if len(img.shape) != 3:
330
+ return None
331
+
332
+ # detected boxes
333
+ total_boxes = []
334
+
335
+ minl = min( height, width)
336
+
337
+ # get all the valid scales
338
+ scales = []
339
+ m = MIN_DET_SIZE/self.minsize
340
+ minl *= m
341
+ factor_count = 0
342
+ while minl > MIN_DET_SIZE:
343
+ scales.append(m*self.factor**factor_count)
344
+ minl *= self.factor
345
+ factor_count += 1
346
+
347
+ #############################################
348
+ # first stage
349
+ #############################################
350
+
351
+ sliced_index = self.slice_index(len(scales))
352
+ total_boxes = []
353
+ for batch in sliced_index:
354
+ local_boxes = map( detect_first_stage_warpper, \
355
+ zip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
356
+ total_boxes.extend(local_boxes)
357
+
358
+ # remove the Nones
359
+ total_boxes = [ i for i in total_boxes if i is not None]
360
+
361
+ if len(total_boxes) == 0:
362
+ return None
363
+
364
+ total_boxes = np.vstack(total_boxes)
365
+
366
+ if total_boxes.size == 0:
367
+ return None
368
+
369
+ # merge the detection from first stage
370
+ pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
371
+ total_boxes = total_boxes[pick]
372
+
373
+ bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
374
+ bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
375
+
376
+ # refine the bboxes
377
+ total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw,
378
+ total_boxes[:, 1]+total_boxes[:, 6] * bbh,
379
+ total_boxes[:, 2]+total_boxes[:, 7] * bbw,
380
+ total_boxes[:, 3]+total_boxes[:, 8] * bbh,
381
+ total_boxes[:, 4]
382
+ ])
383
+
384
+ total_boxes = total_boxes.T
385
+ total_boxes = self.convert_to_square(total_boxes)
386
+ total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
387
+ else:
388
+ total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
389
+
390
+ #############################################
391
+ # second stage
392
+ #############################################
393
+ num_box = total_boxes.shape[0]
394
+
395
+ # pad the bbox
396
+ [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
397
+ # (3, 24, 24) is the input shape for RNet
398
+ input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
399
+
400
+ for i in range(num_box):
401
+ tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
402
+ tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
403
+ input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
404
+
405
+ output = self.RNet.predict(input_buf)
406
+
407
+ # filter the total_boxes with threshold
408
+ passed = np.where(output[1][:, 1] > self.threshold[1])
409
+ total_boxes = total_boxes[passed]
410
+
411
+ if total_boxes.size == 0:
412
+ return None
413
+
414
+ total_boxes[:, 4] = output[1][passed, 1].reshape((-1,))
415
+ reg = output[0][passed]
416
+
417
+ # nms
418
+ pick = nms(total_boxes, 0.7, 'Union')
419
+ total_boxes = total_boxes[pick]
420
+ total_boxes = self.calibrate_box(total_boxes, reg[pick])
421
+ total_boxes = self.convert_to_square(total_boxes)
422
+ total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
423
+
424
+ #############################################
425
+ # third stage
426
+ #############################################
427
+ num_box = total_boxes.shape[0]
428
+
429
+ # pad the bbox
430
+ [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
431
+ # (3, 48, 48) is the input shape for ONet
432
+ input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
433
+
434
+ for i in range(num_box):
435
+ tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
436
+ tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
437
+ input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
438
+
439
+ output = self.ONet.predict(input_buf)
440
+
441
+ # filter the total_boxes with threshold
442
+ passed = np.where(output[2][:, 1] > self.threshold[2])
443
+ total_boxes = total_boxes[passed]
444
+
445
+ if total_boxes.size == 0:
446
+ return None
447
+
448
+ total_boxes[:, 4] = output[2][passed, 1].reshape((-1,))
449
+ reg = output[1][passed]
450
+ points = output[0][passed]
451
+
452
+ # compute landmark points
453
+ bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
454
+ bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
455
+ points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
456
+ points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
457
+
458
+ # nms
459
+ total_boxes = self.calibrate_box(total_boxes, reg)
460
+ pick = nms(total_boxes, 0.7, 'Min')
461
+ total_boxes = total_boxes[pick]
462
+ points = points[pick]
463
+
464
+ if not self.accurate_landmark:
465
+ return total_boxes, points
466
+
467
+ #############################################
468
+ # extended stage
469
+ #############################################
470
+ num_box = total_boxes.shape[0]
471
+ patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
472
+ patchw = np.round(patchw*0.25)
473
+
474
+ # make it even
475
+ patchw[np.where(np.mod(patchw,2) == 1)] += 1
476
+
477
+ input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
478
+ for i in range(5):
479
+ x, y = points[:, i], points[:, i+5]
480
+ x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
481
+ [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
482
+ width,
483
+ height)
484
+ for j in range(num_box):
485
+ tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
486
+ tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
487
+ input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))
488
+
489
+ output = self.LNet.predict(input_buf)
490
+
491
+ pointx = np.zeros((num_box, 5))
492
+ pointy = np.zeros((num_box, 5))
493
+
494
+ for k in range(5):
495
+ # do not make a large movement
496
+ tmp_index = np.where(np.abs(output[k]-0.5) > 0.35)
497
+ output[k][tmp_index[0]] = 0.5
498
+
499
+ pointx[:, k] = np.round(points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw
500
+ pointy[:, k] = np.round(points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw
501
+
502
+ points = np.hstack([pointx, pointy])
503
+ points = points.astype(np.int32)
504
+
505
+ return total_boxes, points
506
+
507
+
508
+
509
+ def list2colmatrix(self, pts_list):
510
+ """
511
+ convert list to column matrix
512
+ Parameters:
513
+ ----------
514
+ pts_list:
515
+ input list
516
+ Retures:
517
+ -------
518
+ colMat:
519
+
520
+ """
521
+ assert len(pts_list) > 0
522
+ colMat = []
523
+ for i in range(len(pts_list)):
524
+ colMat.append(pts_list[i][0])
525
+ colMat.append(pts_list[i][1])
526
+ colMat = np.matrix(colMat).transpose()
527
+ return colMat
528
+
529
+ def find_tfrom_between_shapes(self, from_shape, to_shape):
530
+ """
531
+ find transform between shapes
532
+ Parameters:
533
+ ----------
534
+ from_shape:
535
+ to_shape:
536
+ Retures:
537
+ -------
538
+ tran_m:
539
+ tran_b:
540
+ """
541
+ assert from_shape.shape[0] == to_shape.shape[0] and from_shape.shape[0] % 2 == 0
542
+
543
+ sigma_from = 0.0
544
+ sigma_to = 0.0
545
+ cov = np.matrix([[0.0, 0.0], [0.0, 0.0]])
546
+
547
+ # compute the mean and cov
548
+ from_shape_points = from_shape.reshape(from_shape.shape[0]/2, 2)
549
+ to_shape_points = to_shape.reshape(to_shape.shape[0]/2, 2)
550
+ mean_from = from_shape_points.mean(axis=0)
551
+ mean_to = to_shape_points.mean(axis=0)
552
+
553
+ for i in range(from_shape_points.shape[0]):
554
+ temp_dis = np.linalg.norm(from_shape_points[i] - mean_from)
555
+ sigma_from += temp_dis * temp_dis
556
+ temp_dis = np.linalg.norm(to_shape_points[i] - mean_to)
557
+ sigma_to += temp_dis * temp_dis
558
+ cov += (to_shape_points[i].transpose() - mean_to.transpose()) * (from_shape_points[i] - mean_from)
559
+
560
+ sigma_from = sigma_from / to_shape_points.shape[0]
561
+ sigma_to = sigma_to / to_shape_points.shape[0]
562
+ cov = cov / to_shape_points.shape[0]
563
+
564
+ # compute the affine matrix
565
+ s = np.matrix([[1.0, 0.0], [0.0, 1.0]])
566
+ u, d, vt = np.linalg.svd(cov)
567
+
568
+ if np.linalg.det(cov) < 0:
569
+ if d[1] < d[0]:
570
+ s[1, 1] = -1
571
+ else:
572
+ s[0, 0] = -1
573
+ r = u * s * vt
574
+ c = 1.0
575
+ if sigma_from != 0:
576
+ c = 1.0 / sigma_from * np.trace(np.diag(d) * s)
577
+
578
+ tran_b = mean_to.transpose() - c * r * mean_from.transpose()
579
+ tran_m = c * r
580
+
581
+ return tran_m, tran_b
582
+
583
+ def extract_image_chips(self, img, points, desired_size=256, padding=0):
584
+ """
585
+ crop and align face
586
+ Parameters:
587
+ ----------
588
+ img: numpy array, bgr order of shape (1, 3, n, m)
589
+ input image
590
+ points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
591
+ desired_size: default 256
592
+ padding: default 0
593
+ Retures:
594
+ -------
595
+ crop_imgs: list, n
596
+ cropped and aligned faces
597
+ """
598
+ crop_imgs = []
599
+ for p in points:
600
+ shape =[]
601
+ for k in range(len(p)/2):
602
+ shape.append(p[k])
603
+ shape.append(p[k+5])
604
+
605
+ if padding > 0:
606
+ padding = padding
607
+ else:
608
+ padding = 0
609
+ # average positions of face points
610
+ mean_face_shape_x = [0.224152, 0.75610125, 0.490127, 0.254149, 0.726104]
611
+ mean_face_shape_y = [0.2119465, 0.2119465, 0.628106, 0.780233, 0.780233]
612
+
613
+ from_points = []
614
+ to_points = []
615
+
616
+ for i in range(len(shape)/2):
617
+ x = (padding + mean_face_shape_x[i]) / (2 * padding + 1) * desired_size
618
+ y = (padding + mean_face_shape_y[i]) / (2 * padding + 1) * desired_size
619
+ to_points.append([x, y])
620
+ from_points.append([shape[2*i], shape[2*i+1]])
621
+
622
+ # convert the points to Mat
623
+ from_mat = self.list2colmatrix(from_points)
624
+ to_mat = self.list2colmatrix(to_points)
625
+
626
+ # compute the similar transfrom
627
+ tran_m, tran_b = self.find_tfrom_between_shapes(from_mat, to_mat)
628
+
629
+ probe_vec = np.matrix([1.0, 0.0]).transpose()
630
+ probe_vec = tran_m * probe_vec
631
+
632
+ scale = np.linalg.norm(probe_vec)
633
+ angle = 180.0 / math.pi * math.atan2(probe_vec[1, 0], probe_vec[0, 0])
634
+
635
+ from_center = [(shape[0]+shape[2])/2.0, (shape[1]+shape[3])/2.0]
636
+ to_center = [0, 0]
637
+ to_center[1] = desired_size * 0.4
638
+ to_center[0] = desired_size * 0.5
639
+
640
+ ex = to_center[0] - from_center[0]
641
+ ey = to_center[1] - from_center[1]
642
+
643
+ rot_mat = cv2.getRotationMatrix2D((from_center[0], from_center[1]), -1*angle, scale)
644
+ rot_mat[0][2] += ex
645
+ rot_mat[1][2] += ey
646
+
647
+ chips = cv2.warpAffine(img, rot_mat, (desired_size, desired_size))
648
+ crop_imgs.append(chips)
649
+
650
+ return crop_imgs