VideoDetection

Runtime error

File size: 9,280 Bytes

fc3814c

"""
Video Face Manipulation Detection Through Ensemble of CNNs

Image and Sound Processing Lab - Politecnico di Milano

Nicolò Bonettini
Edoardo Daniele Cannas
Sara Mandelli
Luca Bondi
Paolo Bestagini
"""
from pprint import pprint
from typing import Iterable, List

import albumentations as A
import cv2
import numpy as np
import scipy
import torch
from PIL import Image
from albumentations.pytorch import ToTensorV2
from matplotlib import pyplot as plt
from torch import nn as nn
from torchvision import transforms


def extract_meta_av(path: str) -> (int, int, int):
    """
    Extract video height, width and number of frames to index the files
    :param path:
    :return:
    """
    import av
    try:
        video = av.open(path)
        video_stream = video.streams.video[0]
        return video_stream.height, video_stream.width, video_stream.frames
    except av.AVError as e:
        print('Error while reading file: {}'.format(path))
        print(e)
        return 0, 0, 0
    except IndexError as e:
        print('Error while processing file: {}'.format(path))
        print(e)
        return 0, 0, 0


def extract_meta_cv(path: str) -> (int, int, int):
    """
    Extract video height, width and number of frames to index the files
    :param path:
    :return:
    """
    try:
        vid = cv2.VideoCapture(path)
        num_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        return height, width, num_frames
    except Exception as e:
        print('Error while reading file: {}'.format(path))
        print(e)
        return 0, 0, 0


def adapt_bb(frame_height: int, frame_width: int, bb_height: int, bb_width: int, left: int, top: int, right: int,
             bottom: int) -> (
        int, int, int, int):
    x_ctr = (left + right) // 2
    y_ctr = (bottom + top) // 2
    new_top = max(y_ctr - bb_height // 2, 0)
    new_bottom = min(new_top + bb_height, frame_height)
    new_left = max(x_ctr - bb_width // 2, 0)
    new_right = min(new_left + bb_width, frame_width)
    return new_left, new_top, new_right, new_bottom


def extract_bb(frame: Image.Image, bb: Iterable, scale: str, size: int) -> Image.Image:
    """
    Extract a face from a frame according to the given bounding box and scale policy
    :param frame: Entire frame
    :param bb: Bounding box (left,top,right,bottom) in the reference system of the frame
    :param scale: "scale" to crop a square with size equal to the maximum between height and width of the face, then scale to size
                  "crop" to crop a fixed square around face center,
                  "tight" to crop face exactly at the bounding box with no scaling
    :param size: size of the face
    :return:
    """
    left, top, right, bottom = bb
    if scale == "scale":
        bb_width = int(right) - int(left)
        bb_height = int(bottom) - int(top)
        bb_to_desired_ratio = min(size / bb_height, size / bb_width) if (bb_width > 0 and bb_height > 0) else 1.
        bb_width = int(size / bb_to_desired_ratio)
        bb_height = int(size / bb_to_desired_ratio)
        left, top, right, bottom = adapt_bb(frame.height, frame.width, bb_height, bb_width, left, top, right,
                                            bottom)
        face = frame.crop((left, top, right, bottom)).resize((size, size), Image.BILINEAR)
    elif scale == "crop":
        # Find the center of the bounding box and cut an area around it of height x width
        left, top, right, bottom = adapt_bb(frame.height, frame.width, size, size, left, top, right,
                                            bottom)
        face = frame.crop((left, top, right, bottom))
    elif scale == "tight":
        left, top, right, bottom = adapt_bb(frame.height, frame.width, bottom - top, right - left, left, top, right,
                                            bottom)
        face = frame.crop((left, top, right, bottom))
    else:
        raise ValueError('Unknown scale value: {}'.format(scale))

    return face


def showimage(img_tensor: torch.Tensor):
    topil = transforms.Compose([
        transforms.Normalize(mean=[0, 0, 0, ], std=[1 / 0.229, 1 / 0.224, 1 / 0.225]),
        transforms.Normalize(mean=[-0.485, -0.456, -0.406], std=[1, 1, 1]),
        transforms.ToPILImage()
    ])
    plt.figure()
    plt.imshow(topil(img_tensor))
    plt.show()


def make_train_tag(net_class: nn.Module,
                   face_policy: str,
                   patch_size: int,
                   traindb: List[str],
                   seed: int,
                   suffix: str,
                   debug: bool,
                   ):
    # Training parameters and tag
    tag_params = dict(net=net_class.__name__,
                      traindb='-'.join(traindb),
                      face=face_policy,
                      size=patch_size,
                      seed=seed
                      )
    print('Parameters')
    pprint(tag_params)
    tag = 'debug_' if debug else ''
    tag += '_'.join(['-'.join([key, str(tag_params[key])]) for key in tag_params])
    if suffix is not None:
        tag += '_' + suffix
    print('Tag: {:s}'.format(tag))
    return tag


def get_transformer(face_policy: str, patch_size: int, net_normalizer: transforms.Normalize, train: bool):
    # Transformers and traindb
    if face_policy == 'scale':
        # The loader crops the face isotropically then scales to a square of size patch_size_load
        loading_transformations = [
            A.PadIfNeeded(min_height=patch_size, min_width=patch_size,
                          border_mode=cv2.BORDER_CONSTANT, value=0,always_apply=True),
            A.Resize(height=patch_size,width=patch_size,always_apply=True),
        ]
        if train:
            downsample_train_transformations = [
                A.Downscale(scale_max=0.5, scale_min=0.5, p=0.5),  # replaces scaled dataset
            ]
        else:
            downsample_train_transformations = []
    elif face_policy == 'tight':
        # The loader crops the face tightly without any scaling
        loading_transformations = [
            A.LongestMaxSize(max_size=patch_size, always_apply=True),
            A.PadIfNeeded(min_height=patch_size, min_width=patch_size,
                          border_mode=cv2.BORDER_CONSTANT, value=0,always_apply=True),
        ]
        if train:
            downsample_train_transformations = [
                A.Downscale(scale_max=0.5, scale_min=0.5, p=0.5),  # replaces scaled dataset
            ]
        else:
            downsample_train_transformations = []
    else:
        raise ValueError('Unknown value for face_policy: {}'.format(face_policy))

    if train:
        aug_transformations = [
            A.Compose([
                A.HorizontalFlip(),
                A.OneOf([
                    A.RandomBrightnessContrast(),
                    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=20),
                ]),
                A.OneOf([
                    A.ISONoise(),
                    A.IAAAdditiveGaussianNoise(scale=(0.01 * 255, 0.03 * 255)),
                ]),
                A.Downscale(scale_min=0.7, scale_max=0.9, interpolation=cv2.INTER_LINEAR),
                A.ImageCompression(quality_lower=50, quality_upper=99),
            ], )
        ]
    else:
        aug_transformations = []

    # Common final transformations
    final_transformations = [
        A.Normalize(mean=net_normalizer.mean, std=net_normalizer.std, ),
        ToTensorV2(),
    ]
    transf = A.Compose(
        loading_transformations + downsample_train_transformations + aug_transformations + final_transformations)
    return transf


def aggregate(x, deadzone: float, pre_mult: float, policy: str, post_mult: float, clipmargin: float, params={}):
    x = x.copy()
    if deadzone > 0:
        x = x[(x > deadzone) | (x < -deadzone)]
        if len(x) == 0:
            x = np.asarray([0, ])
    if policy == 'mean':
        x = np.mean(x)
        x = scipy.special.expit(x * pre_mult)
        x = (x - 0.5) * post_mult + 0.5
    elif policy == 'sigmean':
        x = scipy.special.expit(x * pre_mult).mean()
        x = (x - 0.5) * post_mult + 0.5
    elif policy == 'meanp':
        pow_coeff = params.pop('p', 3)
        x = np.mean(np.sign(x) * (np.abs(x) ** pow_coeff))
        x = np.sign(x) * (np.abs(x) ** (1 / pow_coeff))
        x = scipy.special.expit(x * pre_mult)
        x = (x - 0.5) * post_mult + 0.5
    elif policy == 'median':
        x = scipy.special.expit(np.median(x) * pre_mult)
        x = (x - 0.5) * post_mult + 0.5
    elif policy == 'sigmedian':
        x = np.median(scipy.special.expit(x * pre_mult))
        x = (x - 0.5) * post_mult + 0.5
    elif policy == 'maxabs':
        x = np.min(x) if abs(np.min(x)) > abs(np.max(x)) else np.max(x)
        x = scipy.special.expit(x * pre_mult)
        x = (x - 0.5) * post_mult + 0.5
    elif policy == 'avgvoting':
        x = np.mean(np.sign(x))
        x = (x * post_mult + 1) / 2
    elif policy == 'voting':
        x = np.sign(np.mean(x * pre_mult))
        x = (x - 0.5) * post_mult + 0.5
    else:
        raise NotImplementedError()
    return np.clip(x, clipmargin, 1 - clipmargin)