Spaces:

Enderfga
/

mtCNN_sysu

Runtime error

App Files Files Community

mtCNN_sysu / utils /detect.py

Enderfga

Add application file

7652882 almost 3 years ago

raw

history blame contribute delete

28.3 kB

	import cv2
	import time
	import numpy as np
	import torch
	from utils.models import PNet,RNet,ONet
	import utils.tool as utils
	import utils.dataloader as image_tools


	def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True):

	pnet, rnet, onet = None, None, None

	if p_model_path is not None:
	pnet = PNet(use_cuda=use_cuda)
	if(use_cuda):
	print('p_model_path:{0}'.format(p_model_path))
	pnet.load_state_dict(torch.load(p_model_path))
	pnet.cuda()
	else:
	# forcing all GPU tensors to be in CPU while loading
	#pnet.load_state_dict(torch.load(p_model_path, map_location=lambda storage, loc: storage))
	pnet.load_state_dict(torch.load(p_model_path, map_location='cpu'))
	pnet.eval()

	if r_model_path is not None:
	rnet = RNet(use_cuda=use_cuda)
	if (use_cuda):
	print('r_model_path:{0}'.format(r_model_path))
	rnet.load_state_dict(torch.load(r_model_path))
	rnet.cuda()
	else:
	rnet.load_state_dict(torch.load(r_model_path, map_location=lambda storage, loc: storage))
	rnet.eval()

	if o_model_path is not None:
	onet = ONet(use_cuda=use_cuda)
	if (use_cuda):
	print('o_model_path:{0}'.format(o_model_path))
	onet.load_state_dict(torch.load(o_model_path))
	onet.cuda()
	else:
	onet.load_state_dict(torch.load(o_model_path, map_location=lambda storage, loc: storage))
	onet.eval()

	return pnet,rnet,onet




	class MtcnnDetector(object):
	"""
	P,R,O net face detection and landmarks align
	"""
	def __init__(self,
	pnet = None,
	rnet = None,
	onet = None,
	min_face_size=12,
	stride=2,
	threshold=[0.6, 0.7, 0.7],
	#threshold=[0.1, 0.1, 0.1],
	scale_factor=0.709,
	):

	self.pnet_detector = pnet
	self.rnet_detector = rnet
	self.onet_detector = onet
	self.min_face_size = min_face_size
	self.stride=stride
	self.thresh = threshold
	self.scale_factor = scale_factor


	def unique_image_format(self,im):
	if not isinstance(im,np.ndarray):
	if im.mode == 'I':
	im = np.array(im, np.int32, copy=False)
	elif im.mode == 'I;16':
	im = np.array(im, np.int16, copy=False)
	else:
	im = np.asarray(im)
	return im

	def square_bbox(self, bbox):
	"""
	convert bbox to square
	Parameters:
	----------
	bbox: numpy array , shape n x m
	input bbox
	Returns:
	-------
	a square bbox
	"""
	square_bbox = bbox.copy()

	# x2 - x1
	# y2 - y1
	h = bbox[:, 3] - bbox[:, 1] + 1
	w = bbox[:, 2] - bbox[:, 0] + 1
	l = np.maximum(h,w)
	# x1 = x1 + w0.5 - l0.5
	# y1 = y1 + h0.5 - l0.5
	square_bbox[:, 0] = bbox[:, 0] + w0.5 - l0.5
	square_bbox[:, 1] = bbox[:, 1] + h0.5 - l0.5

	# x2 = x1 + l - 1
	# y2 = y1 + l - 1
	square_bbox[:, 2] = square_bbox[:, 0] + l - 1
	square_bbox[:, 3] = square_bbox[:, 1] + l - 1
	return square_bbox


	def generate_bounding_box(self, map, reg, scale, threshold):
	"""
	generate bbox from feature map
	Parameters:
	----------
	map: numpy array , n x m x 1
	detect score for each position
	reg: numpy array , n x m x 4
	bbox
	scale: float number
	scale of this detection
	threshold: float number
	detect threshold
	Returns:
	-------
	bbox array
	"""
	stride = 2
	cellsize = 12 # receptive field

	t_index = np.where(map[:,:,0] > threshold)
	# print('shape of t_index:{0}'.format(len(t_index)))
	# print('t_index{0}'.format(t_index))
	# time.sleep(5)

	# find nothing
	if t_index[0].size == 0:
	return np.array([])

	# reg = (1, n, m, 4)
	# choose bounding box whose socre are larger than threshold
	dx1, dy1, dx2, dy2 = [reg[0, t_index[0], t_index[1], i] for i in range(4)]
	#print(dx1.shape)
	#exit()
	# time.sleep(5)
	reg = np.array([dx1, dy1, dx2, dy2])
	#print('shape of reg{0}'.format(reg.shape))
	#exit()

	# lefteye_dx, lefteye_dy, righteye_dx, righteye_dy, nose_dx, nose_dy, \
	# leftmouth_dx, leftmouth_dy, rightmouth_dx, rightmouth_dy = [landmarks[0, t_index[0], t_index[1], i] for i in range(10)]
	#
	# landmarks = np.array([lefteye_dx, lefteye_dy, righteye_dx, righteye_dy, nose_dx, nose_dy, leftmouth_dx, leftmouth_dy, rightmouth_dx, rightmouth_dy])

	# abtain score of classification which larger than threshold
	# t_index[0]: choose the first column of t_index
	# t_index[1]: choose the second column of t_index
	score = map[t_index[0], t_index[1], 0]
	# hence t_index[1] means column, t_index[1] is the value of x
	# hence t_index[0] means row, t_index[0] is the value of y
	boundingbox = np.vstack([np.round((stride * t_index[1]) / scale), # x1 of prediction box in original image
	np.round((stride * t_index[0]) / scale), # y1 of prediction box in original image
	np.round((stride * t_index[1] + cellsize) / scale), # x2 of prediction box in original image
	np.round((stride * t_index[0] + cellsize) / scale), # y2 of prediction box in original image
	# reconstruct the box in original image
	score,
	reg,
	# landmarks
	])

	return boundingbox.T


	def resize_image(self, img, scale):
	"""
	resize image and transform dimention to [batchsize, channel, height, width]
	Parameters:
	----------
	img: numpy array , height x width x channel
	input image, channels in BGR order here
	scale: float number
	scale factor of resize operation
	Returns:
	-------
	transformed image tensor , 1 x channel x height x width
	"""
	height, width, channels = img.shape
	new_height = int(height * scale) # resized new height
	new_width = int(width * scale) # resized new width
	new_dim = (new_width, new_height)
	img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR) # resized image
	return img_resized


	def pad(self, bboxes, w, h):
	"""
	pad the the boxes
	Parameters:
	----------
	bboxes: numpy array, n x 5
	input bboxes
	w: float number
	width of the input image
	h: float number
	height of the input image
	Returns :
	------
	dy, dx : numpy array, n x 1
	start point of the bbox in target image
	edy, edx : numpy array, n x 1
	end point of the bbox in target image
	y, x : numpy array, n x 1
	start point of the bbox in original image
	ex, ex : numpy array, n x 1
	end point of the bbox in original image
	tmph, tmpw: numpy array, n x 1
	height and width of the bbox
	"""
	# width and height
	tmpw = (bboxes[:, 2] - bboxes[:, 0] + 1).astype(np.int32)
	tmph = (bboxes[:, 3] - bboxes[:, 1] + 1).astype(np.int32)
	numbox = bboxes.shape[0]

	dx = np.zeros((numbox, ))
	dy = np.zeros((numbox, ))
	edx, edy = tmpw.copy()-1, tmph.copy()-1
	# x, y: start point of the bbox in original image
	# ex, ey: end point of the bbox in original image
	x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]

	tmp_index = np.where(ex > w-1)
	edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
	ex[tmp_index] = w - 1

	tmp_index = np.where(ey > h-1)
	edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
	ey[tmp_index] = h - 1

	tmp_index = np.where(x < 0)
	dx[tmp_index] = 0 - x[tmp_index]
	x[tmp_index] = 0

	tmp_index = np.where(y < 0)
	dy[tmp_index] = 0 - y[tmp_index]
	y[tmp_index] = 0

	return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
	return_list = [item.astype(np.int32) for item in return_list]

	return return_list


	def detect_pnet(self, im):
	"""Get face candidates through pnet

	Parameters:
	----------
	im: numpy array
	input image array
	one batch

	Returns:
	-------
	boxes: numpy array
	detected boxes before calibration
	boxes_align: numpy array
	boxes after calibration
	"""

	# im = self.unique_image_format(im)

	# original wider face data
	h, w, c = im.shape
	net_size = 12

	current_scale = float(net_size) / self.min_face_size # find initial scale
	#print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale))
	im_resized = self.resize_image(im, current_scale) # scale = 1.0
	current_height, current_width, _ = im_resized.shape
	# fcn
	all_boxes = list()
	while min(current_height, current_width) > net_size:
	#print('current:',current_height, current_width)
	feed_imgs = []
	image_tensor = image_tools.convert_image_to_tensor(im_resized)
	feed_imgs.append(image_tensor)
	feed_imgs = torch.stack(feed_imgs)

	feed_imgs.requires_grad = True

	if self.pnet_detector.use_cuda:
	feed_imgs = feed_imgs.cuda()

	# self.pnet_detector is a trained pnet torch model

	# receptive field is 12×12
	# 12×12 --> score
	# 12×12 --> bounding box
	cls_map, reg = self.pnet_detector(feed_imgs)

	cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(cls_map.cpu())
	reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())
	# print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4)
	# time.sleep(5)
	# landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())

	# self.threshold[0] = 0.6
	# print(cls_map_np[0,:,:].shape)
	# time.sleep(4)

	# boxes = [x1, y1, x2, y2, score, reg]
	boxes = self.generate_bounding_box(cls_map_np[ 0, :, :], reg_np, current_scale, self.thresh[0])
	#cv2.rectangle(im,(300,100),(400,200),color=(0,0,0))
	#cv2.rectangle(im,(400,200),(500,300),color=(0,0,0))

	# generate pyramid images
	current_scale *= self.scale_factor # self.scale_factor = 0.709
	im_resized = self.resize_image(im, current_scale)
	current_height, current_width, _ = im_resized.shape

	if boxes.size == 0:
	continue

	# non-maximum suppresion
	keep = utils.nms(boxes[:, :5], 0.5, 'Union')
	boxes = boxes[keep]
	all_boxes.append(boxes)

	""" img = im.copy()
	bw = boxes[:,2]-boxes[:,0]
	bh = boxes[:,3]-boxes[:,1]
	for i in range(boxes.shape[0]):
	p1=(int(boxes[i][0]+boxes[i][5]bw[i]),int(boxes[i][1]+boxes[i][6]bh[i]))
	p2=(int(boxes[i][2]+boxes[i][7]bw[i]),int(boxes[i][3]+boxes[i][8]bh[i]))
	cv2.rectangle(img,p1,p2,color=(0,0,0))
	cv2.imshow('ss',img)
	cv2.waitKey(0)
	#ii+=1
	exit() """

	if len(all_boxes) == 0:
	return None, None
	all_boxes = np.vstack(all_boxes)
	# print("shape of all boxes {0}".format(all_boxes.shape))
	# time.sleep(5)

	# merge the detection from first stage
	keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
	all_boxes = all_boxes[keep]
	# boxes = all_boxes[:, :5]

	# x2 - x1
	# y2 - y1
	bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
	bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

	# landmark_keep = all_boxes[:, 9:].reshape((5,2))


	boxes = np.vstack([all_boxes[:,0],
	all_boxes[:,1],
	all_boxes[:,2],
	all_boxes[:,3],
	all_boxes[:,4],
	# all_boxes[:, 0] + all_boxes[:, 9] * bw,
	# all_boxes[:, 1] + all_boxes[:,10] * bh,
	# all_boxes[:, 0] + all_boxes[:, 11] * bw,
	# all_boxes[:, 1] + all_boxes[:, 12] * bh,
	# all_boxes[:, 0] + all_boxes[:, 13] * bw,
	# all_boxes[:, 1] + all_boxes[:, 14] * bh,
	# all_boxes[:, 0] + all_boxes[:, 15] * bw,
	# all_boxes[:, 1] + all_boxes[:, 16] * bh,
	# all_boxes[:, 0] + all_boxes[:, 17] * bw,
	# all_boxes[:, 1] + all_boxes[:, 18] * bh
	])

	boxes = boxes.T

	# boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction)
	align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
	align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
	align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
	align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh

	# refine the boxes
	boxes_align = np.vstack([ align_topx,
	align_topy,
	align_bottomx,
	align_bottomy,
	all_boxes[:, 4],
	# align_topx + all_boxes[:,9] * bw,
	# align_topy + all_boxes[:,10] * bh,
	# align_topx + all_boxes[:,11] * bw,
	# align_topy + all_boxes[:,12] * bh,
	# align_topx + all_boxes[:,13] * bw,
	# align_topy + all_boxes[:,14] * bh,
	# align_topx + all_boxes[:,15] * bw,
	# align_topy + all_boxes[:,16] * bh,
	# align_topx + all_boxes[:,17] * bw,
	# align_topy + all_boxes[:,18] * bh,
	])
	boxes_align = boxes_align.T

	#remove invalid box
	valindex = [True for _ in range(boxes_align.shape[0])]
	for i in range(boxes_align.shape[0]):
	if boxes_align[i][2]-boxes_align[i][0]<=3 or boxes_align[i][3]-boxes_align[i][1]<=3:
	valindex[i]=False
	#print('pnet has one smaller than 3')
	else:
	if boxes_align[i][2]<1 or boxes_align[i][0]>w-2 or boxes_align[i][3]<1 or boxes_align[i][1]>h-2:
	valindex[i]=False
	#print('pnet has one out')
	boxes_align=boxes_align[valindex,:]
	boxes = boxes[valindex,:]
	return boxes, boxes_align

	def detect_rnet(self, im, dets):
	"""Get face candidates using rnet

	Parameters:
	----------
	im: numpy array
	input image array
	dets: numpy array
	detection results of pnet

	Returns:
	-------
	boxes: numpy array
	detected boxes before calibration
	boxes_align: numpy array
	boxes after calibration
	"""
	# im: an input image
	h, w, c = im.shape

	if dets is None:
	return None,None
	if dets.shape[0]==0:
	return None, None

	# (705, 5) = [x1, y1, x2, y2, score, reg]
	# print("pnet detection {0}".format(dets.shape))
	# time.sleep(5)
	detss = dets
	# return square boxes
	dets = self.square_bbox(dets)
	detsss = dets
	# rounds
	dets[:, 0:4] = np.round(dets[:, 0:4])
	[dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
	num_boxes = dets.shape[0]

	'''
	# helper for setting RNet batch size
	batch_size = self.rnet_detector.batch_size
	ratio = float(num_boxes) / batch_size
	if ratio > 3 or ratio < 0.3:
	print "You may need to reset RNet batch size if this info appears frequently, \
	face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
	'''

	# cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
	cropped_ims_tensors = []
	for i in range(num_boxes):
	try:
	tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
	tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
	except:
	print(dy[i],edy[i],dx[i],edx[i],y[i],ey[i],x[i],ex[i],tmpw[i],tmph[i])
	print(dets[i])
	print(detss[i])
	print(detsss[i])
	print(h,w)
	exit()
	crop_im = cv2.resize(tmp, (24, 24))
	crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
	# cropped_ims_tensors[i, :, :, :] = crop_im_tensor
	cropped_ims_tensors.append(crop_im_tensor)
	feed_imgs = torch.stack(cropped_ims_tensors)
	feed_imgs.requires_grad = True


	if self.rnet_detector.use_cuda:
	feed_imgs = feed_imgs.cuda()

	cls_map, reg = self.rnet_detector(feed_imgs)

	cls_map = cls_map.cpu().data.numpy()
	reg = reg.cpu().data.numpy()
	# landmark = landmark.cpu().data.numpy()


	keep_inds = np.where(cls_map > self.thresh[1])[0]

	if len(keep_inds) > 0:
	boxes = dets[keep_inds]
	cls = cls_map[keep_inds]
	reg = reg[keep_inds]
	# landmark = landmark[keep_inds]
	else:
	return None, None
	keep = utils.nms(boxes, 0.7)

	if len(keep) == 0:
	return None, None

	keep_cls = cls[keep]
	keep_boxes = boxes[keep]
	keep_reg = reg[keep]
	# keep_landmark = landmark[keep]


	bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
	bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1


	boxes = np.vstack([ keep_boxes[:,0],
	keep_boxes[:,1],
	keep_boxes[:,2],
	keep_boxes[:,3],
	keep_cls[:,0],
	# keep_boxes[:,0] + keep_landmark[:, 0] * bw,
	# keep_boxes[:,1] + keep_landmark[:, 1] * bh,
	# keep_boxes[:,0] + keep_landmark[:, 2] * bw,
	# keep_boxes[:,1] + keep_landmark[:, 3] * bh,
	# keep_boxes[:,0] + keep_landmark[:, 4] * bw,
	# keep_boxes[:,1] + keep_landmark[:, 5] * bh,
	# keep_boxes[:,0] + keep_landmark[:, 6] * bw,
	# keep_boxes[:,1] + keep_landmark[:, 7] * bh,
	# keep_boxes[:,0] + keep_landmark[:, 8] * bw,
	# keep_boxes[:,1] + keep_landmark[:, 9] * bh,
	])

	align_topx = keep_boxes[:,0] + keep_reg[:,0] * bw
	align_topy = keep_boxes[:,1] + keep_reg[:,1] * bh
	align_bottomx = keep_boxes[:,2] + keep_reg[:,2] * bw
	align_bottomy = keep_boxes[:,3] + keep_reg[:,3] * bh

	boxes_align = np.vstack([align_topx,
	align_topy,
	align_bottomx,
	align_bottomy,
	keep_cls[:, 0],
	# align_topx + keep_landmark[:, 0] * bw,
	# align_topy + keep_landmark[:, 1] * bh,
	# align_topx + keep_landmark[:, 2] * bw,
	# align_topy + keep_landmark[:, 3] * bh,
	# align_topx + keep_landmark[:, 4] * bw,
	# align_topy + keep_landmark[:, 5] * bh,
	# align_topx + keep_landmark[:, 6] * bw,
	# align_topy + keep_landmark[:, 7] * bh,
	# align_topx + keep_landmark[:, 8] * bw,
	# align_topy + keep_landmark[:, 9] * bh,
	])

	boxes = boxes.T
	boxes_align = boxes_align.T

	#remove invalid box
	valindex = [True for _ in range(boxes_align.shape[0])]
	for i in range(boxes_align.shape[0]):
	if boxes_align[i][2]-boxes_align[i][0]<=3 or boxes_align[i][3]-boxes_align[i][1]<=3:
	valindex[i]=False
	print('rnet has one smaller than 3')
	else:
	if boxes_align[i][2]<1 or boxes_align[i][0]>w-2 or boxes_align[i][3]<1 or boxes_align[i][1]>h-2:
	valindex[i]=False
	print('rnet has one out')
	boxes_align=boxes_align[valindex,:]
	boxes = boxes[valindex,:]
	""" img = im.copy()
	for i in range(boxes_align.shape[0]):
	p1=(int(boxes_align[i,0]),int(boxes_align[i,1]))
	p2=(int(boxes_align[i,2]),int(boxes_align[i,3]))
	cv2.rectangle(img,p1,p2,color=(0,0,0))
	cv2.imshow('ss',img)
	cv2.waitKey(0)
	exit() """
	return boxes, boxes_align

	def detect_onet(self, im, dets):
	"""Get face candidates using onet

	Parameters:
	----------
	im: numpy array
	input image array
	dets: numpy array
	detection results of rnet

	Returns:
	-------
	boxes_align: numpy array
	boxes after calibration
	landmarks_align: numpy array
	landmarks after calibration

	"""
	h, w, c = im.shape

	if dets is None:
	return None, None
	if dets.shape[0]==0:
	return None, None

	detss = dets
	dets = self.square_bbox(dets)


	dets[:, 0:4] = np.round(dets[:, 0:4])

	[dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
	num_boxes = dets.shape[0]


	# cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
	cropped_ims_tensors = []
	for i in range(num_boxes):
	try:
	tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
	# crop input image
	tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
	except:
	print(dy[i],edy[i],dx[i],edx[i],y[i],ey[i],x[i],ex[i],tmpw[i],tmph[i])
	print(dets[i])
	print(detss[i])
	print(h,w)
	crop_im = cv2.resize(tmp, (48, 48))
	crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
	# cropped_ims_tensors[i, :, :, :] = crop_im_tensor
	cropped_ims_tensors.append(crop_im_tensor)
	feed_imgs = torch.stack(cropped_ims_tensors)
	feed_imgs.requires_grad = True

	if self.rnet_detector.use_cuda:
	feed_imgs = feed_imgs.cuda()

	cls_map, reg, landmark = self.onet_detector(feed_imgs)

	cls_map = cls_map.cpu().data.numpy()
	reg = reg.cpu().data.numpy()
	landmark = landmark.cpu().data.numpy()

	keep_inds = np.where(cls_map > self.thresh[2])[0]

	if len(keep_inds) > 0:
	boxes = dets[keep_inds]
	cls = cls_map[keep_inds]
	reg = reg[keep_inds]
	landmark = landmark[keep_inds]
	else:
	return None, None

	keep = utils.nms(boxes, 0.7, mode="Minimum")

	if len(keep) == 0:
	return None, None

	keep_cls = cls[keep]
	keep_boxes = boxes[keep]
	keep_reg = reg[keep]
	keep_landmark = landmark[keep]

	bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
	bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1


	align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
	align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
	align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
	align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

	align_landmark_topx = keep_boxes[:, 0]
	align_landmark_topy = keep_boxes[:, 1]




	boxes_align = np.vstack([align_topx,
	align_topy,
	align_bottomx,
	align_bottomy,
	keep_cls[:, 0],
	# align_topx + keep_landmark[:, 0] * bw,
	# align_topy + keep_landmark[:, 1] * bh,
	# align_topx + keep_landmark[:, 2] * bw,
	# align_topy + keep_landmark[:, 3] * bh,
	# align_topx + keep_landmark[:, 4] * bw,
	# align_topy + keep_landmark[:, 5] * bh,
	# align_topx + keep_landmark[:, 6] * bw,
	# align_topy + keep_landmark[:, 7] * bh,
	# align_topx + keep_landmark[:, 8] * bw,
	# align_topy + keep_landmark[:, 9] * bh,
	])

	boxes_align = boxes_align.T

	landmark = np.vstack([
	align_landmark_topx + keep_landmark[:, 0] * bw,
	align_landmark_topy + keep_landmark[:, 1] * bh,
	align_landmark_topx + keep_landmark[:, 2] * bw,
	align_landmark_topy + keep_landmark[:, 3] * bh,
	align_landmark_topx + keep_landmark[:, 4] * bw,
	align_landmark_topy + keep_landmark[:, 5] * bh,
	align_landmark_topx + keep_landmark[:, 6] * bw,
	align_landmark_topy + keep_landmark[:, 7] * bh,
	align_landmark_topx + keep_landmark[:, 8] * bw,
	align_landmark_topy + keep_landmark[:, 9] * bh,
	])

	landmark_align = landmark.T

	return boxes_align, landmark_align


	def detect_face(self,img):
	"""Detect face over image
	"""
	boxes_align = np.array([])
	landmark_align =np.array([])

	t = time.time()

	# pnet
	if self.pnet_detector:
	p_boxes, boxes_align = self.detect_pnet(img)
	if boxes_align is None:
	return np.array([]), np.array([])

	t1 = time.time() - t
	t = time.time()

	# rnet
	if self.rnet_detector:
	r_boxes, boxes_align = self.detect_rnet(img, boxes_align)
	if boxes_align is None:
	return np.array([]), np.array([])

	t2 = time.time() - t
	t = time.time()

	# onet
	if self.onet_detector:
	boxes_align, landmark_align = self.detect_onet(img, boxes_align)
	if boxes_align is None:
	return np.array([]), np.array([])

	t3 = time.time() - t
	t = time.time()
	print("time cost " + '{:.3f}'.format(t1+t2+t3) + ' pnet {:.3f} rnet {:.3f} onet {:.3f}'.format(t1, t2, t3))

	return p_boxes,r_boxes,boxes_align, landmark_align