|
|
|
from os import listdir |
|
from os.path import isfile, join |
|
|
|
import numpy as np |
|
from math import floor |
|
from scipy.ndimage.interpolation import zoom, rotate |
|
|
|
import imageio |
|
import cv2 |
|
from os.path import join |
|
|
|
|
|
|
|
|
|
class Video: |
|
def __init__(self, path): |
|
self.path = path |
|
self.container = imageio.get_reader(path, 'ffmpeg') |
|
self.length = self.container.count_frames() |
|
self.fps = self.container.get_meta_data()['fps'] |
|
|
|
def init_head(self): |
|
self.container.set_image_index(0) |
|
|
|
def next_frame(self): |
|
self.container.get_next_data() |
|
|
|
def get(self, key): |
|
return self.container.get_data(key) |
|
|
|
def __call__(self, key): |
|
return self.get(key) |
|
|
|
def __len__(self): |
|
return self.length |
|
|
|
|
|
class FaceFinder(Video): |
|
def __init__(self, path, load_first_face=True): |
|
super().__init__(path) |
|
self.faces = {} |
|
self.coordinates = {} |
|
self.last_frame = self.get(0) |
|
self.frame_shape = self.last_frame.shape[:2] |
|
self.last_location = (0, 200, 200, 0) |
|
|
|
|
|
self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml") |
|
|
|
if load_first_face: |
|
face_positions = self.detect_faces(self.last_frame) |
|
if len(face_positions) > 0: |
|
self.last_location = self.expand_location_zone(face_positions[0]) |
|
|
|
def detect_faces(self, frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)): |
|
"""Detect faces using Haar Cascade.""" |
|
gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) |
|
faces = self.face_cascade.detectMultiScale(gray_frame, scaleFactor=scaleFactor, minNeighbors=minNeighbors, minSize=minSize) |
|
return faces |
|
|
|
def expand_location_zone(self, loc, margin=0.2): |
|
"""Adds a margin around a frame slice.""" |
|
x, y, w, h = loc |
|
offset_x = round(margin * w) |
|
offset_y = round(margin * h) |
|
y0 = max(y - offset_y, 0) |
|
x1 = min(x + w + offset_x, self.frame_shape[1]) |
|
y1 = min(y + h + offset_y, self.frame_shape[0]) |
|
x0 = max(x - offset_x, 0) |
|
return (y0, x1, y1, x0) |
|
|
|
def find_faces(self, resize=0.5, stop=0, skipstep=0, cut_left=0, cut_right=-1): |
|
"""The core function to extract faces from frames.""" |
|
|
|
if stop != 0: |
|
finder_frameset = range(0, min(self.length, stop), skipstep + 1) |
|
else: |
|
finder_frameset = range(0, self.length, skipstep + 1) |
|
|
|
|
|
for i in finder_frameset: |
|
frame = self.get(i) |
|
if cut_left != 0 or cut_right != -1: |
|
frame[:, :cut_left] = 0 |
|
frame[:, cut_right:] = 0 |
|
|
|
|
|
face_positions = self.detect_faces(frame) |
|
if len(face_positions) > 0: |
|
|
|
largest_face = max(face_positions, key=lambda f: f[2] * f[3]) |
|
self.faces[i] = self.expand_location_zone(largest_face) |
|
self.last_location = self.faces[i] |
|
else: |
|
print(f"No face detected in frame {i}") |
|
|
|
print(f"Face extraction completed: {len(self.faces)} faces detected.") |
|
|
|
def get_face(self, i): |
|
"""Extract the face region for the given frame index.""" |
|
frame = self.get(i) |
|
if i in self.faces: |
|
y0, x1, y1, x0 = self.faces[i] |
|
return frame[y0:y1, x0:x1] |
|
return frame |
|
|
|
|
|
|
|
class FaceBatchGenerator: |
|
''' |
|
Made to deal with framesubsets of video. |
|
''' |
|
def __init__(self, face_finder, target_size = 256): |
|
self.finder = face_finder |
|
self.target_size = target_size |
|
self.head = 0 |
|
self.length = int(face_finder.length) |
|
|
|
def resize_patch(self, patch): |
|
m, n = patch.shape[:2] |
|
return zoom(patch, (self.target_size / m, self.target_size / n, 1)) |
|
|
|
def next_batch(self, batch_size = 50): |
|
batch = np.zeros((1, self.target_size, self.target_size, 3)) |
|
stop = min(self.head + batch_size, self.length) |
|
i = 0 |
|
while (i < batch_size) and (self.head < self.length): |
|
if self.head in self.finder.coordinates: |
|
patch = self.finder.get_aligned_face(self.head) |
|
batch = np.concatenate((batch, np.expand_dims(self.resize_patch(patch), axis = 0)), |
|
axis = 0) |
|
i += 1 |
|
self.head += 1 |
|
return batch[1:] |
|
|
|
|
|
def predict_faces(generator, classifier, batch_size = 50, output_size = 1): |
|
''' |
|
Compute predictions for a face batch generator |
|
''' |
|
n = len(generator.finder.coordinates.items()) |
|
profile = np.zeros((1, output_size)) |
|
for epoch in range(n // batch_size + 1): |
|
face_batch = generator.next_batch(batch_size = batch_size) |
|
prediction = classifier.predict(face_batch) |
|
if (len(prediction) > 0): |
|
profile = np.concatenate((profile, prediction)) |
|
return profile[1:] |
|
|
|
|
|
def compute_accuracy(classifier, dirname, frame_subsample_count = 30): |
|
''' |
|
Extraction + Prediction over a video |
|
''' |
|
filenames = [f for f in listdir(dirname) if isfile(join(dirname, f)) and ((f[-4:] == '.mp4') or (f[-4:] == '.avi') or (f[-4:] == '.mov'))] |
|
predictions = {} |
|
|
|
for vid in filenames: |
|
print('Dealing with video ', vid) |
|
|
|
|
|
face_finder = FaceFinder(join(dirname, vid), load_first_face = False) |
|
skipstep = max(floor(face_finder.length / frame_subsample_count), 0) |
|
face_finder.find_faces(resize=0.5, skipstep = skipstep) |
|
|
|
print('Predicting ', vid) |
|
gen = FaceBatchGenerator(face_finder) |
|
p = predict_faces(gen, classifier) |
|
|
|
predictions[vid[:-4]] = (np.mean(p > 0.5), p) |
|
return predictions |