import os.path import sys sys.path.append('..') import options import cv2 import dlib import numpy as np import options as opt import matplotlib.pyplot as plt from tqdm.auto import tqdm from multiprocessing import Pool predictor_path = '../pretrain/shape_predictor_68_face_landmarks.dat' predictor = dlib.shape_predictor(predictor_path) detector = dlib.get_frontal_face_detector() RUN_PARALLEL = True FORCE_RATIO = True BORDER = 10 base = os.path.abspath('..') image_dir = os.path.join(base, options.images_dir) anno_dir = os.path.join(base, options.alignments_dir) crop_dir = os.path.join(base, options.crop_images_dir) def get_mouth_marks(shape): marks = np.zeros((2, 20)) co = 0 # Specific for the mouth. for ii in range(48, 68): """ This for loop is going over all mouth-related features. X and Y coordinates are extracted and stored separately. """ X = shape.part(ii) A = (X.x, X.y) marks[0, co] = X.x marks[1, co] = X.y co += 1 # Get the extreme points(top-left & bottom-right) X_left, Y_left, X_right, Y_right = [ int(np.amin(marks, axis=1)[0]), int(np.amin(marks, axis=1)[1]), int(np.amax(marks, axis=1)[0]), int(np.amax(marks, axis=1)[1]) ] return X_left, Y_left, X_right, Y_right translate_pairs = [] for speaker_no in range(1, 35): speaker_name = f's{speaker_no}' speaker_image_dir = os.path.join(image_dir, speaker_name) speaker_crop_dir = os.path.join(crop_dir, speaker_name) speaker_anno_dir = os.path.join(anno_dir, speaker_name) if not os.path.exists(speaker_image_dir): continue if not os.path.exists(speaker_crop_dir): os.mkdir(speaker_crop_dir) sentence_dirs = os.listdir(speaker_image_dir) for sentence in sentence_dirs: anno_filepath = os.path.join(speaker_anno_dir, f'{sentence}.align') if not os.path.exists(anno_filepath): continue translate_pairs.append((speaker_no, sentence)) print('PAIRS', len(translate_pairs)) bad_sentences = set() bad_filepaths = [] def extract_mouth_image(speaker_no, sentence): speaker_name = f's{speaker_no}' speaker_image_dir = os.path.join(image_dir, speaker_name) speaker_crop_dir = os.path.join(crop_dir, speaker_name) img_sentence_dir = os.path.join(speaker_image_dir, sentence) crop_sentence_dir = os.path.join(speaker_crop_dir, sentence) filenames = os.listdir(img_sentence_dir) if not os.path.exists(crop_sentence_dir): os.mkdir(crop_sentence_dir) for filename in filenames: img_filepath = os.path.join(img_sentence_dir, filename) if not img_filepath.endswith('.jpg'): continue crop_filepath = os.path.join(crop_sentence_dir, filename) if not os.path.exists(crop_filepath): bad_filepaths.append(crop_filepath) print('BAD FILEPATH', (speaker_no, sentence, filename)) bad_sentences.add((speaker_no, sentence)) for translate_pair in tqdm(translate_pairs): extract_mouth_image(*translate_pair) print('BAD SENTENCES', list(bad_sentences)) print('>>>') # extract_mouth_image(1, 'pbio7a')