DATID-3D / pose_estimation /batch_mtcnn.py
gwang-kim's picture
u
f12ab4c
import argparse
import cv2
import os
from mtcnn import MTCNN
import random
from tqdm import tqdm
import numpy as np
detector = MTCNN()
parser = argparse.ArgumentParser()
parser.add_argument('--in_root', type=str, default="", help='process folder')
args = parser.parse_args()
in_root = args.in_root
out_root = os.path.join(in_root, "debug")
out_detection = os.path.join(in_root, "detections")
if not os.path.exists(out_root):
os.makedirs(out_root)
if not os.path.exists(out_detection):
os.makedirs(out_detection)
imgs = sorted([x for x in os.listdir(in_root) if x.endswith(".jpg") or x.endswith(".png")])
random.shuffle(imgs)
for img in tqdm(imgs):
src = os.path.join(in_root, img)
dst = os.path.join(out_detection, img.replace(".jpg", ".txt").replace(".png", ".txt"))
if not os.path.exists(dst):
image = cv2.cvtColor(cv2.imread(src), cv2.COLOR_BGR2RGB)
print(image.shape)
result = detector.detect_faces(image)
if len(result)>0:
index = 0
if len(result)>1: # if multiple faces, take the biggest face
# size = -100000
lowest_dist = float('Inf')
for r in range(len(result)):
# print(result[r]["box"][0], result[r]["box"][1])
face_pos = np.array(result[r]["box"][:2]) + np.array(result[r]["box"][2:])/2
dist_from_center = np.linalg.norm(face_pos - np.array([1500./2, 1500./2]))
if dist_from_center < lowest_dist:
lowest_dist = dist_from_center
index=r
# size_ = result[r]["box"][2] + result[r]["box"][3]
# if size < size_:
# size = size_
# index = r
# Result is an array with all the bounding boxes detected. We know that for 'ivan.jpg' there is only one.
bounding_box = result[index]['box']
keypoints = result[index]['keypoints']
if result[index]["confidence"] > 0.9:
cv2.rectangle(image,
(bounding_box[0], bounding_box[1]),
(bounding_box[0]+bounding_box[2], bounding_box[1] + bounding_box[3]),
(0,155,255),
2)
cv2.circle(image,(keypoints['left_eye']), 2, (0,155,255), 2)
cv2.circle(image,(keypoints['right_eye']), 2, (0,155,255), 2)
cv2.circle(image,(keypoints['nose']), 2, (0,155,255), 2)
cv2.circle(image,(keypoints['mouth_left']), 2, (0,155,255), 2)
cv2.circle(image,(keypoints['mouth_right']), 2, (0,155,255), 2)
dst = os.path.join(out_root, img)
# cv2.imwrite(dst, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
dst = os.path.join(out_detection, img.replace(".jpg", ".txt").replace(".png", ".txt"))
outLand = open(dst, "w")
outLand.write(str(float(keypoints['left_eye'][0])) + " " + str(float(keypoints['left_eye'][1])) + "\n")
outLand.write(str(float(keypoints['right_eye'][0])) + " " + str(float(keypoints['right_eye'][1])) + "\n")
outLand.write(str(float(keypoints['nose'][0])) + " " + str(float(keypoints['nose'][1])) + "\n")
outLand.write(str(float(keypoints['mouth_left'][0])) + " " + str(float(keypoints['mouth_left'][1])) + "\n")
outLand.write(str(float(keypoints['mouth_right'][0])) + " " + str(float(keypoints['mouth_right'][1])) + "\n")
outLand.close()