from os import listdir from os.path import isfile, join import numpy as np from math import floor from scipy.ndimage.interpolation import zoom, rotate import imageio import cv2 from os.path import join ## Face extraction class Video: def __init__(self, path): self.path = path self.container = imageio.get_reader(path, 'ffmpeg') self.length = self.container.count_frames() self.fps = self.container.get_meta_data()['fps'] def init_head(self): self.container.set_image_index(0) def next_frame(self): self.container.get_next_data() def get(self, key): return self.container.get_data(key) def __call__(self, key): return self.get(key) def __len__(self): return self.length class FaceFinder(Video): def __init__(self, path, load_first_face=True): super().__init__(path) self.faces = {} self.coordinates = {} # stores the face (locations center, rotation, length) self.last_frame = self.get(0) self.frame_shape = self.last_frame.shape[:2] self.last_location = (0, 200, 200, 0) # Initialize OpenCV's Haar Cascade for face detection self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml") if load_first_face: face_positions = self.detect_faces(self.last_frame) if len(face_positions) > 0: self.last_location = self.expand_location_zone(face_positions[0]) def detect_faces(self, frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)): """Detect faces using Haar Cascade.""" gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) faces = self.face_cascade.detectMultiScale(gray_frame, scaleFactor=scaleFactor, minNeighbors=minNeighbors, minSize=minSize) return faces def expand_location_zone(self, loc, margin=0.2): """Adds a margin around a frame slice.""" x, y, w, h = loc offset_x = round(margin * w) offset_y = round(margin * h) y0 = max(y - offset_y, 0) x1 = min(x + w + offset_x, self.frame_shape[1]) y1 = min(y + h + offset_y, self.frame_shape[0]) x0 = max(x - offset_x, 0) return (y0, x1, y1, x0) def find_faces(self, resize=0.5, stop=0, skipstep=0, cut_left=0, cut_right=-1): """The core function to extract faces from frames.""" # Frame iteration setup if stop != 0: finder_frameset = range(0, min(self.length, stop), skipstep + 1) else: finder_frameset = range(0, self.length, skipstep + 1) # Loop through frames for i in finder_frameset: frame = self.get(i) if cut_left != 0 or cut_right != -1: frame[:, :cut_left] = 0 frame[:, cut_right:] = 0 # Detect faces in the current frame face_positions = self.detect_faces(frame) if len(face_positions) > 0: # Use the largest detected face largest_face = max(face_positions, key=lambda f: f[2] * f[3]) self.faces[i] = self.expand_location_zone(largest_face) self.last_location = self.faces[i] else: print(f"No face detected in frame {i}") print(f"Face extraction completed: {len(self.faces)} faces detected.") def get_face(self, i): """Extract the face region for the given frame index.""" frame = self.get(i) if i in self.faces: y0, x1, y1, x0 = self.faces[i] return frame[y0:y1, x0:x1] return frame ## Face prediction class FaceBatchGenerator: ''' Made to deal with framesubsets of video. ''' def __init__(self, face_finder, target_size = 256): self.finder = face_finder self.target_size = target_size self.head = 0 self.length = int(face_finder.length) def resize_patch(self, patch): m, n = patch.shape[:2] return zoom(patch, (self.target_size / m, self.target_size / n, 1)) def next_batch(self, batch_size = 50): batch = np.zeros((1, self.target_size, self.target_size, 3)) stop = min(self.head + batch_size, self.length) i = 0 while (i < batch_size) and (self.head < self.length): if self.head in self.finder.coordinates: patch = self.finder.get_aligned_face(self.head) batch = np.concatenate((batch, np.expand_dims(self.resize_patch(patch), axis = 0)), axis = 0) i += 1 self.head += 1 return batch[1:] def predict_faces(generator, classifier, batch_size = 50, output_size = 1): ''' Compute predictions for a face batch generator ''' n = len(generator.finder.coordinates.items()) profile = np.zeros((1, output_size)) for epoch in range(n // batch_size + 1): face_batch = generator.next_batch(batch_size = batch_size) prediction = classifier.predict(face_batch) if (len(prediction) > 0): profile = np.concatenate((profile, prediction)) return profile[1:] def compute_accuracy(classifier, dirname, frame_subsample_count = 30): ''' Extraction + Prediction over a video ''' filenames = [f for f in listdir(dirname) if isfile(join(dirname, f)) and ((f[-4:] == '.mp4') or (f[-4:] == '.avi') or (f[-4:] == '.mov'))] predictions = {} for vid in filenames: print('Dealing with video ', vid) # Compute face locations and store them in the face finder face_finder = FaceFinder(join(dirname, vid), load_first_face = False) skipstep = max(floor(face_finder.length / frame_subsample_count), 0) face_finder.find_faces(resize=0.5, skipstep = skipstep) print('Predicting ', vid) gen = FaceBatchGenerator(face_finder) p = predict_faces(gen, classifier) predictions[vid[:-4]] = (np.mean(p > 0.5), p) return predictions