VideoDetection

Runtime error

VideoDetection / icpr2020dfdc /isplutils /utils.py

Mohamed Almukhtar

Duplicate from malmukhtar/ImageDetection

fc3814c over 2 years ago

9.28 kB

	"""
	Video Face Manipulation Detection Through Ensemble of CNNs

	Image and Sound Processing Lab - Politecnico di Milano

	Nicolò Bonettini
	Edoardo Daniele Cannas
	Sara Mandelli
	Luca Bondi
	Paolo Bestagini
	"""
	from pprint import pprint
	from typing import Iterable, List

	import albumentations as A
	import cv2
	import numpy as np
	import scipy
	import torch
	from PIL import Image
	from albumentations.pytorch import ToTensorV2
	from matplotlib import pyplot as plt
	from torch import nn as nn
	from torchvision import transforms


	def extract_meta_av(path: str) -> (int, int, int):
	"""
	Extract video height, width and number of frames to index the files
	:param path:
	:return:
	"""
	import av
	try:
	video = av.open(path)
	video_stream = video.streams.video[0]
	return video_stream.height, video_stream.width, video_stream.frames
	except av.AVError as e:
	print('Error while reading file: {}'.format(path))
	print(e)
	return 0, 0, 0
	except IndexError as e:
	print('Error while processing file: {}'.format(path))
	print(e)
	return 0, 0, 0


	def extract_meta_cv(path: str) -> (int, int, int):
	"""
	Extract video height, width and number of frames to index the files
	:param path:
	:return:
	"""
	try:
	vid = cv2.VideoCapture(path)
	num_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
	height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
	width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
	return height, width, num_frames
	except Exception as e:
	print('Error while reading file: {}'.format(path))
	print(e)
	return 0, 0, 0


	def adapt_bb(frame_height: int, frame_width: int, bb_height: int, bb_width: int, left: int, top: int, right: int,
	bottom: int) -> (
	int, int, int, int):
	x_ctr = (left + right) // 2
	y_ctr = (bottom + top) // 2
	new_top = max(y_ctr - bb_height // 2, 0)
	new_bottom = min(new_top + bb_height, frame_height)
	new_left = max(x_ctr - bb_width // 2, 0)
	new_right = min(new_left + bb_width, frame_width)
	return new_left, new_top, new_right, new_bottom


	def extract_bb(frame: Image.Image, bb: Iterable, scale: str, size: int) -> Image.Image:
	"""
	Extract a face from a frame according to the given bounding box and scale policy
	:param frame: Entire frame
	:param bb: Bounding box (left,top,right,bottom) in the reference system of the frame
	:param scale: "scale" to crop a square with size equal to the maximum between height and width of the face, then scale to size
	"crop" to crop a fixed square around face center,
	"tight" to crop face exactly at the bounding box with no scaling
	:param size: size of the face
	:return:
	"""
	left, top, right, bottom = bb
	if scale == "scale":
	bb_width = int(right) - int(left)
	bb_height = int(bottom) - int(top)
	bb_to_desired_ratio = min(size / bb_height, size / bb_width) if (bb_width > 0 and bb_height > 0) else 1.
	bb_width = int(size / bb_to_desired_ratio)
	bb_height = int(size / bb_to_desired_ratio)
	left, top, right, bottom = adapt_bb(frame.height, frame.width, bb_height, bb_width, left, top, right,
	bottom)
	face = frame.crop((left, top, right, bottom)).resize((size, size), Image.BILINEAR)
	elif scale == "crop":
	# Find the center of the bounding box and cut an area around it of height x width
	left, top, right, bottom = adapt_bb(frame.height, frame.width, size, size, left, top, right,
	bottom)
	face = frame.crop((left, top, right, bottom))
	elif scale == "tight":
	left, top, right, bottom = adapt_bb(frame.height, frame.width, bottom - top, right - left, left, top, right,
	bottom)
	face = frame.crop((left, top, right, bottom))
	else:
	raise ValueError('Unknown scale value: {}'.format(scale))

	return face


	def showimage(img_tensor: torch.Tensor):
	topil = transforms.Compose([
	transforms.Normalize(mean=[0, 0, 0, ], std=[1 / 0.229, 1 / 0.224, 1 / 0.225]),
	transforms.Normalize(mean=[-0.485, -0.456, -0.406], std=[1, 1, 1]),
	transforms.ToPILImage()
	])
	plt.figure()
	plt.imshow(topil(img_tensor))
	plt.show()


	def make_train_tag(net_class: nn.Module,
	face_policy: str,
	patch_size: int,
	traindb: List[str],
	seed: int,
	suffix: str,
	debug: bool,
	):
	# Training parameters and tag
	tag_params = dict(net=net_class.__name__,
	traindb='-'.join(traindb),
	face=face_policy,
	size=patch_size,
	seed=seed
	)
	print('Parameters')
	pprint(tag_params)
	tag = 'debug_' if debug else ''
	tag += '_'.join(['-'.join([key, str(tag_params[key])]) for key in tag_params])
	if suffix is not None:
	tag += '_' + suffix
	print('Tag: {:s}'.format(tag))
	return tag


	def get_transformer(face_policy: str, patch_size: int, net_normalizer: transforms.Normalize, train: bool):
	# Transformers and traindb
	if face_policy == 'scale':
	# The loader crops the face isotropically then scales to a square of size patch_size_load
	loading_transformations = [
	A.PadIfNeeded(min_height=patch_size, min_width=patch_size,
	border_mode=cv2.BORDER_CONSTANT, value=0,always_apply=True),
	A.Resize(height=patch_size,width=patch_size,always_apply=True),
	]
	if train:
	downsample_train_transformations = [
	A.Downscale(scale_max=0.5, scale_min=0.5, p=0.5), # replaces scaled dataset
	]
	else:
	downsample_train_transformations = []
	elif face_policy == 'tight':
	# The loader crops the face tightly without any scaling
	loading_transformations = [
	A.LongestMaxSize(max_size=patch_size, always_apply=True),
	A.PadIfNeeded(min_height=patch_size, min_width=patch_size,
	border_mode=cv2.BORDER_CONSTANT, value=0,always_apply=True),
	]
	if train:
	downsample_train_transformations = [
	A.Downscale(scale_max=0.5, scale_min=0.5, p=0.5), # replaces scaled dataset
	]
	else:
	downsample_train_transformations = []
	else:
	raise ValueError('Unknown value for face_policy: {}'.format(face_policy))

	if train:
	aug_transformations = [
	A.Compose([
	A.HorizontalFlip(),
	A.OneOf([
	A.RandomBrightnessContrast(),
	A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=20),
	]),
	A.OneOf([
	A.ISONoise(),
	A.IAAAdditiveGaussianNoise(scale=(0.01 * 255, 0.03 * 255)),
	]),
	A.Downscale(scale_min=0.7, scale_max=0.9, interpolation=cv2.INTER_LINEAR),
	A.ImageCompression(quality_lower=50, quality_upper=99),
	], )
	]
	else:
	aug_transformations = []

	# Common final transformations
	final_transformations = [
	A.Normalize(mean=net_normalizer.mean, std=net_normalizer.std, ),
	ToTensorV2(),
	]
	transf = A.Compose(
	loading_transformations + downsample_train_transformations + aug_transformations + final_transformations)
	return transf


	def aggregate(x, deadzone: float, pre_mult: float, policy: str, post_mult: float, clipmargin: float, params={}):
	x = x.copy()
	if deadzone > 0:
	x = x[(x > deadzone) \| (x < -deadzone)]
	if len(x) == 0:
	x = np.asarray([0, ])
	if policy == 'mean':
	x = np.mean(x)
	x = scipy.special.expit(x * pre_mult)
	x = (x - 0.5) * post_mult + 0.5
	elif policy == 'sigmean':
	x = scipy.special.expit(x * pre_mult).mean()
	x = (x - 0.5) * post_mult + 0.5
	elif policy == 'meanp':
	pow_coeff = params.pop('p', 3)
	x = np.mean(np.sign(x) * (np.abs(x) ** pow_coeff))
	x = np.sign(x) * (np.abs(x) ** (1 / pow_coeff))
	x = scipy.special.expit(x * pre_mult)
	x = (x - 0.5) * post_mult + 0.5
	elif policy == 'median':
	x = scipy.special.expit(np.median(x) * pre_mult)
	x = (x - 0.5) * post_mult + 0.5
	elif policy == 'sigmedian':
	x = np.median(scipy.special.expit(x * pre_mult))
	x = (x - 0.5) * post_mult + 0.5
	elif policy == 'maxabs':
	x = np.min(x) if abs(np.min(x)) > abs(np.max(x)) else np.max(x)
	x = scipy.special.expit(x * pre_mult)
	x = (x - 0.5) * post_mult + 0.5
	elif policy == 'avgvoting':
	x = np.mean(np.sign(x))
	x = (x * post_mult + 1) / 2
	elif policy == 'voting':
	x = np.sign(np.mean(x * pre_mult))
	x = (x - 0.5) * post_mult + 0.5
	else:
	raise NotImplementedError()
	return np.clip(x, clipmargin, 1 - clipmargin)